Ejemplo n.º 1
0
                random_rollout_data=True,
                use_cached=False,
                vae_dataset_specific_kwargs=dict(),
                show=False,
            ),
            vae_kwargs=dict(input_channels=3, ),
            algo_kwargs=dict(
                do_scatterplot=False,
                batch_size=128,
                lr=1e-3,
            ),
            save_period=5,
        ),
        env_class=Point2DWallEnv,
        env_kwargs=dict(
            render_onscreen=False,
            ball_radius=1,
            images_are_rgb=True,
            show_goal=False,
        ),
        algorithm='RIG',
    )

    run_experiment(
        grill_her_td3_full_experiment,
        exp_prefix='rlkit-pointmass-rig-example',
        mode='here_no_doodad',
        variant=variant,
        # use_gpu=True,  # Turn on if you have a GPU
    )
Ejemplo n.º 2
0
            num_trains_per_train_loop=100,
            min_num_steps_before_training=100,
        )
        variant['save_video'] = True
        variant['save_video_kwargs']['rows'] = 1
        variant['save_video_kwargs']['save_video_period'] = 1

    for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()):
        if (variant['reward_type'] == 'sparse' and
                variant['dynamics_model_version'] != 'fixed_standard_laplace'):
            continue
        for seed in range(n_seeds):
            variant['exp_id'] = exp_id
            # variant['seed'] = seed
            run_experiment(
                probabilistic_goal_reaching_experiment,
                exp_name=exp_name,
                mode=mode,
                variant=variant,
                use_gpu=False,
                num_exps_per_instance=2,
                slurm_config_name='cpu_co',
                # slurm_config_name='cpu_co',
                gcp_kwargs=dict(zone='us-east1-c',
                                gpu_kwargs=dict(
                                    gpu_model='nvidia-tesla-k80',
                                    num_gpu=1,
                                )),
                time_in_mins=10 * 60,
            )
Ejemplo n.º 3
0
    )

    n_seeds = 1
    mode = 'local'
    exp_name = 'dev-{}'.format(
        __file__.replace('/', '-').replace('_', '-').split('.')[0])

    n_seeds = 2
    mode = 'sss'
    exp_name = 'pnp-img-obs-enc-d-rew-many-heads--sweep-random-init-do-not-encode-state'

    for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()):
        for seed in range(n_seeds):
            variant['exp_id'] = exp_id
            # variant['seed'] = seed
            run_experiment(
                encoder_goal_conditioned_sac_experiment,
                exp_name=exp_name,
                mode=mode,
                variant=variant,
                use_gpu=True,
                num_exps_per_instance=3,
                # slurm_config_name='cpu_co',
                gcp_kwargs=dict(zone='us-east1-c',
                                gpu_kwargs=dict(
                                    gpu_model='nvidia-tesla-k80',
                                    num_gpu=1,
                                )),
                time_in_mins=int(2.5 * 24 * 60),
            )
Ejemplo n.º 4
0
        algorithm='HER-tSAC',
        version='normal',
        observation_key='observation',
        desired_goal_key='desired_goal',
    )
    search_space = {}
    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space,
        default_parameters=variant,
    )

    n_seeds = 1
    mode = 'local'
    exp_prefix = 'dev'

    # n_seeds = 5
    # mode = 'ec2'
    # exp_prefix = 'fetch-push-test'

    for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()):
        for i in range(n_seeds):
            run_experiment(
                relabeling_tsac_experiment,
                exp_prefix=exp_prefix,
                mode=mode,
                variant=variant,
                time_in_mins=23 * 60,
                snapshot_mode='gap_and_last',
                snapshot_gap=100,
            )
Ejemplo n.º 5
0
                dict(num_examples=1024, version='circle', radius=3),
                # dict(num_examples=1024, version='circle', radius=0),
            ], ),
    )

    n_seeds = 1
    mode = 'local'
    exp_name = 'dev-{}'.format(
        __file__.replace('/', '-').replace('_', '-').split('.')[0])

    # n_seeds = 3
    # mode = 'sss'
    # exp_name = __file__.split('/')[-1].split('.')[0].replace('_', '-')
    # print('exp_name', exp_name)
    exp_name = 'dev-set-vae-2d-two-circles'

    search_space = {}
    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space,
        default_parameters=variant,
    )
    for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()):
        for _ in range(n_seeds):
            run_experiment(
                train_2d_set_vae,
                exp_name=exp_name,
                mode=mode,
                variant=variant,
                use_gpu=True,
            )
Ejemplo n.º 6
0
        'grill_variant.algo_kwargs.base_kwargs.num_updates_per_env_step':
        [1, 2, 4, 6],
        'grill_variant.algo_kwargs.base_kwargs.max_path_length': [100],
        'grill_variant.algo_kwargs.online_vae_kwargs.oracle_data': [False],
    }
    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space,
        default_parameters=variant,
    )

    n_seeds = 1
    mode = 'local'
    exp_prefix = 'dev'

    n_seeds = 2
    mode = 'ec2'
    exp_prefix = 'online-vae-pushing-parallel-sweep-NUPO'
    for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()):
        for _ in range(n_seeds):
            run_experiment(
                grill_her_td3_online_vae_full_experiment,
                exp_id=exp_id,
                exp_prefix=exp_prefix,
                mode=mode,
                variant=variant,
                use_gpu=True,
                snapshot_gap=200,
                snapshot_mode='gap_and_last',
                num_exps_per_instance=1,
            )
Ejemplo n.º 7
0
                do_scatterplot=False,
                batch_size=128,
                lr=1e-3,
            ),
            save_period=5,
        ),

        env_class=Point2DWallEnv,
        env_kwargs=dict(
            render_onscreen=False,
            ball_radius=1,
            images_are_rgb=True,
            show_goal=False,
        ),

        algorithm='RIG',
    )

    n_seeds = 1
    mode = 'here_no_doodad'
    exp_prefix = 'rlkit-pointmass-rig-example'

    for _ in range(n_seeds):
        run_experiment(
            grill_her_td3_full_experiment,
            exp_prefix=exp_prefix,
            mode=mode,
            variant=variant,
            # use_gpu=True,  # Turn on if you have a GPU
        )
Ejemplo n.º 8
0
        ],
        'create_vae_kwargs.decoder_distribution': [
            'gaussian_learned_global_image_variance',
        ],
    }
    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space,
        default_parameters=variant,
    )
    for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()):
        beta = variant['vae_trainer_kwargs']['beta']
        slw = variant['vae_trainer_kwargs']['set_loss_weight']
        for _ in range(n_seeds):
            variant['logger_config'] = dict(
                trial_dir_suffix='beta-{}-slw{}'.format(
                    beta,
                    slw,
                ))
            run_experiment(
                train_set_vae,
                variant=variant,
                exp_name='vae-encoder-set-loss-sweep',
                mode='sss',
                # exp_name='dev-vae-encoder-sweep',
                # mode='here_no_doodad',
                # slurm_config_name='gpu_fc',
                # slurm_config_name='gpu_low_pri',
                # exp_name='vae-bernoulli-decoder',
                use_gpu=True,
            )
Ejemplo n.º 9
0
        ),
    )
    search_space = {
        'env_id':['SawyerDoorHookEnv-v0', 'SawyerDoorHookResetFreeEnv-v0'],
        'grill_variant.exploration_noise':[0.3, .8],
    }
    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space, default_parameters=variant,
    )

    # n_seeds = 1
    # mode = 'local'
    # exp_prefix = 'test'

    n_seeds = 1
    mode = 'ec2'
    exp_prefix = 'sawyer_door_state_her_td3'

    for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()):
        for i in range(n_seeds):
            run_experiment(
                grill_her_td3_full_experiment,
                exp_prefix=exp_prefix,
                mode=mode,
                snapshot_mode='gap_and_last',
                snapshot_gap=50,
                variant=variant,
                use_gpu=True,
                num_exps_per_instance=4,
            )
Ejemplo n.º 10
0
         'algo_params.discount': [0.99, 0.9, 0.5],
         'algo_params.policy_learning_rate': [1e-4, 1e-3, 1e-2],
         'algo_params.qf_learning_rate': [1e-4, 1e-3, 1e-2],
         'algo_params.target_hard_update_period': [10, 100, 1000],
     }
     sweeper = hyp.DeterministicHyperparameterSweeper(
         search_space, default_parameters=variant)
     for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()):
         for i in range(n_seeds):
             seed = random.randint(0, 10000)
             run_experiment(
                 experiment,
                 exp_prefix=exp_prefix,
                 seed=seed,
                 mode=mode,
                 variant=variant,
                 exp_id=exp_id,
                 sync_s3_log=True,
                 sync_s3_pkl=True,
                 periodic_sync_interval=600,
             )
 if run_mode == 'random':
     hyperparameters = [
         hyp.LinearFloatParam('algo_params.discount', 0, 1),
         hyp.LogFloatParam('algo_params.policy_learning_rate', 1e-7, 1e-1),
         hyp.LogFloatParam('algo_params.qf_learning_rate', 1e-7, 1e-1),
         hyp.LogIntParam('algo_params.target_hard_update_period', 1, 1000),
     ]
     sweeper = hyp.RandomHyperparameterSweeper(
         hyperparameters,
         default_kwargs=variant,
Ejemplo n.º 11
0
    Make sure to hardcode this in vae_launcher.py
    ungrouped_imgs = generate_images(
        env, renderer, num_images=num_ungrouped_images, set=train_sets[0])
    """)
    for _ in range(n_seeds):
        for exp_id, variant in enumerate(variants):
            variant['vae_trainer_kwargs']['beta'] = (
                    1. / variant['create_vae_kwargs']['latent_dim']
            )
            variant['vae_trainer_kwargs']['debug_bad_recons'] = (
                    variant['create_vae_kwargs']['decoder_distribution'] ==
                    'gaussian_learned_global_scalar_variance'
            )
            if mode == 'local':
                variant['vae_algo_kwargs']['num_iters'] = 1
                variant['vae_algo_kwargs']['num_epochs_per_iter'] = 1
                # variant['generate_train_set_kwargs']['saved_filename'] = (
                #     'manual-upload/sets/hand2xy_hand2x_1obj2xy_1obj2x_num_objs_1.pickle'
                # )
            run_experiment(
                train_set_vae,
                exp_name=exp_name,
                prepend_date_to_exp_name=True,
                num_exps_per_instance=2,
                mode=mode,
                variant=variant,
                # slurm_config_name='cpu',
                use_gpu=True,
                # gpu_id=1,
            )
Ejemplo n.º 12
0
            output_size=8,
            hidden_sizes=[8]))
    algo_search_space = copy.deepcopy(algo_variant)
    algo_search_space = {k: [v] for k, v in algo_search_space.items()}
    algo_search_space.update(
        # insert sweep params here
    )

    env_sweeper = hyp.DeterministicHyperparameterSweeper(
        env_search_space,
        default_parameters=env_variant,
    )
    algo_sweeper = hyp.DeterministicHyperparameterSweeper(
        algo_search_space,
        default_parameters=algo_variant,
    )

    for exp_id, env_vari in enumerate(env_sweeper.iterate_hyperparameters()):
        for algo_vari in algo_sweeper.iterate_hyperparameters():
            variant = {'algo_kwargs': algo_vari, 'env_kwargs': env_vari}
            for _ in range(n_seeds):
                run_experiment(experiment,
                               exp_prefix=exp_prefix,
                               mode=mode,
                               variant=variant,
                               use_gpu=use_gpu,
                               region='us-west-2',
                               num_exps_per_instance=3,
                               snapshot_mode='gap',
                               snapshot_gap=10)
Ejemplo n.º 13
0
        #     2,
        #     5,
        #     10,
        # ],
        'vae_kwargs.weight_loss': [
            True,
        ],
        'vae_kwargs.skew_sampling': [
            False,
        ],
        'append_all_data': [
            # True,
            False,
        ],
    }
    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space,
        default_parameters=variant,
    )
    for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()):
        for _ in range(n_seeds):
            run_experiment(
                train_from_variant,
                exp_prefix=exp_prefix,
                mode=mode,
                variant=variant,
                exp_id=exp_id,
                # skip_wait=True,
                use_gpu=True,
            )
Ejemplo n.º 14
0
            epoch_length=num_steps_per_iteration,
            eval_samples=100,
            max_path_length=H,
            discount=1,
        ),
        env_params=dict(num_steps=H,
                        # use_small_maze=True,
                        ),
        ou_params=dict(
            max_sigma=1,
            min_sigma=None,
        ),
        exp_prefix=exp_prefix,
        env_class=env_class,
        version="DDPG")
    exp_id = -1
    for seed in range(n_seeds):
        exp_id += 1
        set_seed(seed)
        variant['seed'] = seed
        variant['exp_id'] = exp_id

        run_experiment(
            run_linear_ocm_exp,
            exp_prefix=exp_prefix,
            seed=seed,
            mode=mode,
            variant=variant,
            exp_id=exp_id,
        )
Ejemplo n.º 15
0
        #     False,
        # ],
        'have_no_disentangled_encoder': [
            True,
        ],
    }
    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space,
        default_parameters=variant,
    )

    n_seeds = 1
    mode = 'local'
    exp_prefix = '{}'.format(
        __file__.replace('/', '-').replace('_', '-').split('.')[0])

    # n_seeds = 5
    # mode = 'sss'
    # exp_prefix = 'disentangled-basic-test-envs-new-vae'

    for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()):
        for _ in range(n_seeds):
            run_experiment(
                disentangled_grill_her_twin_sac_experiment,
                exp_prefix=exp_prefix,
                mode=mode,
                variant=variant,
                use_gpu=True,
                time_in_mins=int(2.5 * 24 * 60),
            )
Ejemplo n.º 16
0
            discount=0.99,
            qf_learning_rate=1e-3,
            policy_learning_rate=1e-4,
        ),
        version="DDPG",
        epoch_discount_schedule_class=LinearSchedule,
        epoch_discount_schedule_params=dict(
            min_value=0.,
            max_value=0.99,
            ramp_duration=99,
        ),
    )
    for env_class in [
            SwimmerEnv,
            HalfCheetahEnv,
            AntEnv,
            HopperEnv,
    ]:
        variant['env_class'] = env_class
        variant['version'] = str(env_class)
        for _ in range(5):
            seed = random.randint(0, 999999)
            run_experiment(
                experiment,
                exp_prefix="ddpg-increase-gamma",
                seed=seed,
                mode='ec2',
                variant=variant,
                use_gpu=False,
            )
Ejemplo n.º 17
0
        default_parameters=variant,
    )

    n_seeds = 1
    mode = 'local'
    exp_name = 'dev-{}'.format(
        __file__.replace('/', '-').replace('_', '-').split('.')[0])

    n_seeds = 1
    mode = 'sss'
    exp_name = 'one-obj-img-obs-state-reward-sweep-round3-black-background'

    for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()):
        for seed in range(n_seeds):
            variant['exp_id'] = exp_id
            # variant['seed'] = seed
            run_experiment(
                image_based_goal_conditioned_sac_experiment,
                exp_name=exp_name,
                mode=mode,
                variant=variant,
                use_gpu=True,
                num_exps_per_instance=3,
                gcp_kwargs=dict(zone='us-east1-c',
                                gpu_kwargs=dict(
                                    gpu_model='nvidia-tesla-k80',
                                    num_gpu=1,
                                )),
                time_in_mins=int(10 * 60),
            )
Ejemplo n.º 18
0
    n_seeds = 2
    # mode = 'sss'
    exp_prefix = 'exp2-single-set-local-mode'

    search_space = {
        # 'vae_algo_kwargs.num_iters': [1],
        'vae_trainer_kwargs.set_loss_weight': [
            0,
            1,
            100,
        ],
    }
    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space,
        default_parameters=variant,
    )
    variants = list(sweeper.iterate_hyperparameters())
    for _ in range(n_seeds):
        for exp_id, variant in enumerate(variants):
            variant['exp_id'] = exp_id
            run_experiment(
                disco_experiment,
                exp_name=exp_prefix,
                num_exps_per_instance=2,
                mode=mode,
                variant=variant,
                slurm_config_name='cpu',
                use_gpu=True,
                gpu_id=1,
            )
Ejemplo n.º 19
0
        policy_kwargs=dict(hidden_sizes=[400, 300], ),
        algorithm='SAC',
        version='SAC',
        env_class=HalfCheetahEnv,
    )
    search_space = {
        'env_class': [
            HalfCheetahEnv,
            AntEnv,
            HopperEnv,
            Walker2dEnv,
        ],
        'algo_kwargs.reward_scale': [0.1, 1, 10],
        # 'algo_kwargs.num_updates_per_env_step': [1, 5],
    }
    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space,
        default_parameters=variant,
    )
    for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()):
        for _ in range(2):
            run_experiment(
                experiment,
                # exp_prefix="dev-sac-sweep",
                exp_prefix="sac-sweep-try-reparameterization",
                mode='ec2',
                exp_id=exp_id,
                variant=variant,
                use_gpu=False,
            )
            "hinge_cabinet",
            # "light_switch",
        ],
        "actor_kwargs.hidden_size": [100, 512],
        "algorithm_kwargs.clip_param": [0.1, 0.2],
        "actor_kwargs.hidden_activation": ["relu", "tanh"],
        "use_linear_lr_decay": [True, False],
        "rollout_kwargs.use_proper_time_limits": [True, False],
    }
    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space,
        default_parameters=variant,
    )
    for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()):
        for _ in range(args.num_seeds):
            seed = random.randint(0, 100000)
            variant["seed"] = seed
            variant["exp_id"] = exp_id
            run_experiment(
                experiment,
                exp_prefix=args.exp_prefix,
                mode=args.mode,
                variant=variant,
                use_gpu=False,
                snapshot_mode="none",
                python_cmd=subprocess.check_output(
                    "which python", shell=True).decode("utf-8")[:-1],
                seed=seed,
                exp_id=exp_id,
            )
Ejemplo n.º 21
0
        epsilon=0.5,
        tau=0.001,
    ),
                   env_params=dict(),
                   qf_kwargs=dict(hidden_sizes=[32, 32], ))
    search_space = {
        # 'env_params.num_bins': [3, 5, 10],
        # 'env_params.reward_position': [False, True],
        # 'algo_params.tau': [0.01, 0.001],
        # 'algo_params.reward_scale': [0.1, 1, 10],
        # 'algo_params.epsilon': [0.1, 0.5],
    }
    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space,
        default_parameters=variant,
    )
    for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()):
        for i in range(n_seeds):
            seed = random.randint(0, 10000)
            run_experiment(
                experiment,
                seed=seed,
                # exp_prefix="dqn-swimmer-sweep",
                # mode='ec2',
                # use_gpu=False,
                exp_prefix="dev-dqn-swimmer",
                mode='local',
                use_gpu=True,
                variant=variant,
            )
Ejemplo n.º 22
0
    algo_search_space = {k: [v] for k, v in algo_search_space.items()}
    algo_search_space.update(
        # insert sweep params here
    )

    env_sweeper = hyp.DeterministicHyperparameterSweeper(
        env_search_space,
        default_parameters=env_variant,
    )
    algo_sweeper = hyp.DeterministicHyperparameterSweeper(
        algo_search_space,
        default_parameters=algo_variant,
    )

    for exp_id, env_vari in enumerate(env_sweeper.iterate_hyperparameters()):
        for algo_vari in algo_sweeper.iterate_hyperparameters():
            variant = {'algo_kwargs': algo_vari, 'env_kwargs': env_vari}
            for _ in range(n_seeds):
                run_experiment(
                    experiment,
                    exp_prefix=exp_prefix,
                    mode=mode,
                    variant=variant,
                    use_gpu=use_gpu,
                    region='us-east-2',
                    num_exps_per_instance=1,
                    snapshot_mode='gap',
                    snapshot_gap=10,
                    # instance_type='c5.large',
                    spot_price=0.08)
Ejemplo n.º 23
0
        init_camera=sawyer_pick_and_place_camera,
    )

    search_space = {}
    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space,
        default_parameters=variant,
    )

    n_seeds = 1
    mode = 'local'
    exp_prefix = 'dev-{}'.format(__file__.replace('/', '-').replace('_', '-').split('.')[0])

    # n_seeds = 3
    # mode = 'gcp'
    # exp_prefix = 'skew-fit-pickup-reference-post-refactor'

    for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()):
        for _ in range(n_seeds):
            run_experiment(
                skewfit_full_experiment,
                exp_prefix=exp_prefix,
                mode=mode,
                variant=variant,
                use_gpu=True,
                snapshot_gap=200,
                snapshot_mode='gap_and_last',
                num_exps_per_instance=3,
                gcp_kwargs=dict(zone='us-west1-b', ),
            )
Ejemplo n.º 24
0
        time.sleep(1)
    logger.log("end")
    logger.log('Local date & time is: {}'.format(date.strftime(date_format)))

    logger.log("start mujoco")
    from gym.envs.mujoco import HalfCheetahEnv
    e = HalfCheetahEnv()
    img = e.sim.render(32, 32)
    logger.log(str(sum(img)))
    logger.log("end mujoco_py")


if __name__ == "__main__":
    # noinspection PyTypeChecker
    date_format = '%m/%d/%Y %H:%M:%S %Z'
    date = datetime.now(tz=pytz.utc)
    for seed in range(5):
        variant = dict(
            num_seconds=10,
            launch_time=str(date.strftime(date_format)),
            logger_config=dict(),
            seed=seed,
        )
        run_experiment(
            example,
            exp_name='gcp-doodad-easy-launch-example',
            mode='gcp',
            variant=variant,
            use_gpu=False,
        )
Ejemplo n.º 25
0
    # n_seeds = 1
    # mode = 'ec2'
    # exp_prefix = 'sawyer-new-pusher'

    search_space = {
        # 'env_kwargs.randomize_goals': [True, False],
        'algo_kwargs.max_path_length': [100],
        'env_kwargs.reward_info.type': [
            # 'hand_to_object_only',
            'shaped',
        ],
        'exploration_type': [
            'ou',
            # 'epsilon',
            # 'gaussian',
        ],
    }
    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space,
        default_parameters=variant,
    )
    for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()):
        for _ in range(n_seeds):
            run_experiment(
                td3_experiment,
                exp_prefix=exp_prefix,
                mode=mode,
                variant=variant,
                exp_id=exp_id,
            )
Ejemplo n.º 26
0
            AntEnv,
            HopperEnv,
            Walker2dEnv,
        ],
        'algo_kwargs.reward_scale': [10000, 100, 1, 0.01],
        'algo_kwargs.optimizer_class': [
            optim.Adam,
        ],
        'algo_kwargs.tau': [
            1e-2,
        ],
        'algo_kwargs.num_updates_per_env_step': [
            1,
        ],
        'es_kwargs.max_sigma': [0.01, 0.1, 0.5],
    }
    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space,
        default_parameters=variant,
    )
    for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()):
        for _ in range(1):
            run_experiment(
                example,
                exp_prefix="n3dpg-sweep-hard-tasks",
                mode='ec2',
                exp_id=exp_id,
                variant=variant,
                use_gpu=False,
            )
            0,
        ],
        'generate_set_for_rl_kwargs.saved_filename': [
            '6sets128samples_xy_x_y.pickle',
            '9sets128samples_xy_x_y.pickle',
        ],
    }
    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space,
        default_parameters=variant,
    )
    variants = list(sweeper.iterate_hyperparameters())
    for _ in range(n_seeds):
        for exp_id, variant in enumerate(variants):
            if variant['generate_set_for_rl_kwargs']['saved_filename'] == (
                    '6sets128samples_xy_x_y'):
                variant['algo_kwargs']['num_eval_steps_per_epoch'] = (6000)
            else:
                variant['algo_kwargs']['num_eval_steps_per_epoch'] = (9000)
            run_experiment(
                disco_experiment,
                exp_name=exp_prefix,
                prepend_date_to_exp_name=False,
                num_exps_per_instance=2,
                mode=mode,
                variant=variant,
                # slurm_config_name='cpu',
                use_gpu=True,
                # gpu_id=1,
            )
Ejemplo n.º 28
0

if __name__ == "__main__":
    variant = dict(vae_kwargs=dict(
        imsize=64,
        representation_size=128,
        input_channels=3,
        decoder_distribution='gaussian_identity_variance',
        beta=1,
        K=7,
        T=10,
    ),
                   algo_kwargs=dict(
                       gamma=0.5,
                       batch_size=8,
                       lr=1e-4,
                       log_interval=0,
                   ),
                   num_epochs=10000,
                   algorithm='VAE',
                   save_period=5,
                   physics=True)

    run_experiment(
        main,
        exp_prefix='iodine-blocks-mpc',
        mode='here_no_doodad',
        variant=variant,
        use_gpu=True,  # Turn on if you have a GPU
    )
Ejemplo n.º 29
0
            hidden_sizes=[300, 300],
            structure='norm_difference',
        ),
        policy_kwargs=dict(hidden_sizes=[300, 300], ),
        es_kwargs=dict(
            theta=0.1,
            max_sigma=0.1,
            min_sigma=0.1,
        ),
        qf_criterion_class=HuberLoss,
        algorithm="DDPG-TDM",
    )

    search_space = {
        'algo_kwargs.base_kwargs.num_updates_per_env_step': [1, 5, 10],
        'algo_kwargs.tdm_kwargs.max_tau': [0, 5],
        'env_class': [SawyerXYEnv, SawyerReachXYEnv],
    }
    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space,
        default_parameters=variant,
    )
    for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()):
        for _ in range(n_seeds):
            run_experiment(
                experiment,
                exp_prefix=exp_prefix,
                mode=mode,
                variant=variant,
            )
Ejemplo n.º 30
0
            'obj_distance',
            'obj_success',
            'hand_and_obj_distance',
            'hand_and_obj_success',
        ],
    }
    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space,
        default_parameters=variant,
    )

    n_seeds = 1
    mode = 'local'
    exp_prefix = 'dev-{}'.format(
        __file__.replace('/', '-').replace('_', '-').split('.')[0])

    n_seeds = 3
    mode = 'sss'
    exp_prefix = 'her-td3-pick-reward-sweep-take2'

    for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()):
        for _ in range(n_seeds):
            run_experiment(
                experiment,
                exp_prefix=exp_prefix,
                mode=mode,
                variant=variant,
                use_gpu=False,
                time_in_mins=int(2.5 * 24 * 60),
            )