Exemple #1
0
def main():
    variant = dict(algo_kwargs=dict(
        lr=1e-3,
        max_timesteps=int(1E7),
        buffer_size=int(5E5),
        exploration_fraction=0.1,
        exploration_final_eps=0.02,
        print_freq=1000,
        gamma=0.99,
    ), )
    search_space = {
        'algo_kwargs.prioritized_replay': [True, False],
        'algo_kwargs.lr': [1e-2, 1e-3, 1e-4],
        'algo_kwargs.exploration_fraction': [0.1, 0.5],
        'algo_kwargs.exploration_final_eps': [0.2, 0.02],
    }
    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space,
        default_parameters=variant,
    )
    for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()):
        for _ in range(1):
            run_experiment(
                experiment,
                exp_id=exp_id,
                variant=variant,
                exp_prefix="openai-baselines-dqn-cartpole-sweep-2",
                mode='ec2',
                # exp_prefix="dev-openai-baselines-dqn-cartpole-2",
                # mode='local',
            )
Exemple #2
0
def run_algorithm(launch_settings,
                  env_params,
                  exp_prefix,
                  seed,
                  exp_id=1,
                  **kwargs):
    """
    Launch an algorithm
    :param launch_settings: See get_launch_settings_list_from_args
    :param env_params: See get_env_settings
    :param exp_prefix: Experiment prefix
    :param seed: Experiment seed
    :param exp_id: Experiment ID # to identify it later (e.g. for plotting data)
    :param kwargs: Other kwargs to pass to run_experiment_lite
    :return:
    """
    variant = launch_settings['variant']
    variant['env_params'] = env_params
    variant['algo_params'] = launch_settings['algo_params']
    variant['batch_norm_params'] = launch_settings['batch_norm_params']
    variant['exp_id'] = exp_id

    env_settings = get_env_settings(**env_params)
    variant['Environment'] = env_settings['name']
    algorithm_launcher = launch_settings['algorithm_launcher']

    run_experiment(algorithm_launcher, exp_prefix, seed, variant, **kwargs)
Exemple #3
0
def main():
    n_seeds = 1
    mode = "here"
    exp_prefix = "dev-sl"

    # n_seeds = 10
    # mode = "ec2"
    exp_prefix = "paper-6-14-HL-sl-H25"

    H = 25
    # noinspection PyTypeChecker
    variant = dict(
        H=H,
        exp_prefix=exp_prefix,
        algo_params=dict(
            num_batches_per_epoch=100,
            num_epochs=30,
            learning_rate=1e-3,
            batch_size=1000,
            eval_num_episodes=64,
            lstm_state_size=10,
            # rnn_cell_class=LSTMCell,
            # rnn_cell_params=dict(
            #     use_peepholes=True,
            # ),
            rnn_cell_class=SeparateLstmLinearCell,
            rnn_cell_params=dict(
                use_peepholes=True,
                env_noise_std=0,
                memory_noise_std=0,
                output_nonlinearity=tf.nn.tanh,
                # output_nonlinearity=tf.nn.softmax,
                env_hidden_sizes=[],
                output_dim=1,
            ),
            softmax=False,
        ),
        version='Supervised Learning',
        env_class=HighLow,
        env_params=dict(horizon=H, )
        # env_class=OneCharMemory,
    )

    exp_id = -1
    for _ in range(n_seeds):
        seed = random.randint(0, 999999)
        exp_id += 1
        set_seed(seed)
        variant['seed'] = seed
        variant['exp_id'] = exp_id

        run_experiment(
            bptt_launcher,
            exp_prefix=exp_prefix,
            seed=seed,
            mode=mode,
            variant=variant,
            exp_id=exp_id,
        )
def main():
    num_hyperparameters = 40
    layer_norm = True
    sweeper = hp.RandomHyperparameterSweeper([
        hp.LogFloatParam("qf_learning_rate", 1e-5, 1e-1),
        hp.LogFloatParam("policy_learning_rate", 1e-5, 1e-1),
        hp.LogFloatParam("reward_scale", 10.0, 0.001),
        hp.LogFloatParam("discount", 0.5, 0.99),
    ])
    for seed in range(num_hyperparameters):
        params_dict = sweeper.generate_random_hyperparameters()
        variant = dict(
            algo_params=dict(batch_size=128,
                             n_epochs=50,
                             epoch_length=1000,
                             eval_samples=1000,
                             replay_pool_size=1000000,
                             min_pool_size=256,
                             max_path_length=1000,
                             qf_weight_decay=0.00,
                             n_updates_per_time_step=5,
                             soft_target_tau=0.01,
                             **params_dict),
            env_params=dict(
                env_id='cart',
                normalize_env=True,
                gym_name="",
            ),
            policy_params=dict(layer_norm=layer_norm, ),
            qf_params=dict(layer_norm=layer_norm, ),
        )
        run_experiment(
            my_ddpg_launcher,
            exp_prefix="3-16-cartpole-ddpg-sweep-test",
            seed=seed,
            variant=variant,
            mode="ec2",
        )
Exemple #5
0
    )

    # n_seeds = 1
    # mode = 'local'
    # exp_prefix = 'test'

    n_seeds = 3
    mode = 'ec2'
    exp_prefix = 'sawyer_pusher_offline_ae_final'

    for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()):
        if variant['env_id'] == 'SawyerPushAndReachXYEnv-No-Arena-v0':
            variant['train_vae_variant']['generate_vae_dataset_kwargs']['dataset_path'] = \
                'datasets/SawyerPushAndReachXYEnv-No-Arena-v0_N5000_sawyer_pusher_camera_upright_v3_imsize84_random_oracle_split_0.npy'
            variant['grill_variant'][
                'presampled_goals_path'] = 'goals/goals_n5000_VAEWrappedEnv(ImageEnv(<SawyerPushAndReachXYEnv<SawyerPushAndReachXYEnv-No-Arena-v0>>)).npy'
        else:
            variant['train_vae_variant']['generate_vae_dataset_kwargs']['dataset_path'] = \
                'datasets/SawyerPushAndReachXYEnv-No-Arena-v1_N5000_sawyer_pusher_camera_upright_v3_imsize84_random_oracle_split_0.npy'
            variant['grill_variant'][
                'presampled_goals_path'] = 'goals/goals_n5000_VAEWrappedEnv(ImageEnv(<SawyerPushAndReachXYEnv<SawyerPushAndReachXYEnv-No-Arena-v1>>)).npy'
        for _ in range(n_seeds):
            run_experiment(
                grill_her_td3_full_experiment,
                exp_prefix=exp_prefix,
                mode=mode,
                variant=variant,
                use_gpu=True,
                num_exps_per_instance=2,
            )
Exemple #6
0
            # 10,
            # 100,
        ],
        'algo_params.tdm_kwargs.max_tau': [
            1,
            # 15,
            # 20,
        ],
        'algo_params.supervised_weight': [
            # 0,
            .2,
            # .4,
            # .6,
            # .8,
        ]
    }
    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space,
        default_parameters=variant,
    )
    for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()):
        run_experiment(
            experiment,
            seed=np.random.randint(1, 10004),
            variant=variant,
            exp_id=exp_id,
            # exp_prefix='tdm_rl_supervised_combo',
            exp_prefix='tdm_rl_supervised_combo',
            mode='local',
        )
Exemple #7
0
            input_channels=3,
            imsize=48,
            architecture=architecture,
            decoder_distribution='beta',
        ),
        save_period=10,
        beta=2.5,
        representation_size=16,
    )

    search_space = {
        'beta':[.5, 1, 2.5, 5]
        # 'algo_kwargs.normalize_log_probs':[True],
    }
    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space, default_parameters=variant,
    )
    for _ in range(n_seeds):
        for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()):
            run_experiment(
                experiment,
                exp_prefix=exp_prefix,
                mode=mode,
                variant=variant,
                use_gpu=use_gpu,
                num_exps_per_instance=2,
                snapshot_mode='gap_and_last',
                snapshot_gap=100,
                # skip_wait=True,
            )
    search_space = {
        'grill_variant.replay_buffer_kwargs.power':
        [1 / 10000, 1 / 1000, 1 / 100, 1 / 70, 1 / 50],
    }
    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space,
        default_parameters=variant,
    )

    # n_seeds = 1
    # mode = 'local'
    # exp_prefix = 'test'

    n_seeds = 6
    mode = 'gcp'
    exp_prefix = 'reacher-skew-fit-final-fixed-power-bug'

    for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()):
        for _ in range(n_seeds):
            run_experiment(grill_her_twin_sac_online_vae_full_experiment,
                           exp_prefix=exp_prefix,
                           mode=mode,
                           variant=variant,
                           use_gpu=True,
                           num_exps_per_instance=2,
                           gcp_kwargs=dict(zone='us-west1-b',
                                           gpu_kwargs=dict(
                                               gpu_model='nvidia-tesla-p100',
                                               num_gpu=1,
                                           )))
        # init_camera=sawyer_door_env_camera,
        # save_video=True,
    )

    n_seeds = 1
    mode = 'local'
    exp_prefix = 'test'

    # n_seeds = 3
    # mode = 'ec2'
    # exp_prefix = 'sawyer_door_push_and_pull_open_her_td3_full_state_reset'

    search_space = {
        'es_kwargs.max_sigma':[.3, .8],
        'env_kwargs.num_resets_before_door_reset':[1, int(1e6)],
        'env_kwargs.num_resets_before_hand_reset':[1, int(1e6)],
        'env_kwargs.reset_hand_with_door':[True, False],
    }
    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space, default_parameters=variant,
    )
    for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()):
        for _ in range(n_seeds):
            run_experiment(
                her_td3_experiment,
                exp_prefix=exp_prefix,
                mode=mode,
                variant=variant,
                exp_id=exp_id,
            )
            False,
        ],
        'algo_kwargs.base_kwargs.discount': [
            0.99,
        ],
    }
    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space,
        default_parameters=variant,
    )

    n_seeds = 1
    mode = 'local'
    exp_prefix = 'dev'

    # n_seeds = 5
    # mode = 'ec2'
    exp_prefix = 'point2d-test'

    for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()):
        for i in range(n_seeds):
            run_experiment(
                tdm_twin_sac_experiment,
                exp_prefix=exp_prefix,
                mode=mode,
                variant=variant,
                time_in_mins=23 * 60,
                snapshot_mode='gap_and_last',
                snapshot_gap=100,
            )
Exemple #11
0
    n_seeds = 3
    # mode = 'ec2'
    exp_prefix = 'online-match-hps-point2d-33x33-img-all-fc-goal00-resnet18'

    search_space = {
        'shared_qf_conv': [
            True,
            # False,
        ],
        'collection_mode': [
            # 'batch',
            'online',
        ]
    }
    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space,
        default_parameters=variant,
    )
    for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()):
        for _ in range(n_seeds):
            run_experiment(
                experiment,
                exp_prefix=exp_prefix,
                mode=mode,
                variant=variant,
                exp_id=exp_id,
                use_gpu=True,
                gpu_id=0,
            )
            # 'MountainCar-v0',
        ],
        'algo_class': [
            DDPG,
        ],
        # 'algo_params.use_hard_updates': [True, False],
        'qf_criterion_class': [
            #nn.MSELoss,
            HuberLoss,
        ],
        'algo_params.collection_mode': ['online-parallel']
    }
    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space,
        default_parameters=variant,
    )
    for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()):
        for i in range(3):
            run_experiment(
                experiment,
                variant=variant,
                exp_id=exp_id,
                exp_prefix=
                "DDPG-online-parallel-tests-switch-to-multiprocessing-2",
                mode='local',
                use_gpu=False,
                # exp_prefix="double-vs-dqn-huber-sweep-cartpole",
                # mode='local',
                # use_gpu=True,
            )
            replay_buffer_size=int(2e4),
        ),
        cnn_params=dict(
            kernel_sizes=[5, 5, 3],
            n_channels=[32, 32, 32],
            strides=[3, 3, 2],
            # pool_sizes=[1, 1, 1], this param is giving an error?
            hidden_sizes=[400, 300],
            paddings=[0, 0, 0],
            # use_batch_norm=True, this param is giving an error?
        ),
        qf_criterion_class=HuberLoss,
    )

    PARALLEL = 1
    SERIES = 10

    for j in range(SERIES):

        for i in range(PARALLEL):

            run_experiment(
                experiment,
                variant=variant,
                exp_id=i + PARALLEL * j,
                exp_prefix=
                "sac-image-reacher-brandon-softlearning-hyperparameters-{0}".
                format(i + PARALLEL * j),
                mode='local',
                skip_wait=i != PARALLEL - 1)
Exemple #14
0
    for launcher in [
            # trpo_launcher,
            # mem_trpo_launcher,
            # rtrpo_launcher,
            ddpg_launcher,
            mem_ddpg_launcher,
            rdpg_launcher,
    ]:
        search_space = {
            # 'env_class': [WaterMaze1D, WaterMazeEasy1D, WaterMazeMemory1D],
        }
        sweeper = DeterministicHyperparameterSweeper(
            search_space, default_parameters=variant)
        for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()):
            for seed in range(n_seeds):
                exp_id += 1
                set_seed(seed)
                variant['seed'] = seed
                variant['exp_id'] = exp_id

                run_experiment(
                    launcher,
                    exp_prefix=exp_prefix,
                    seed=seed,
                    mode=mode,
                    variant=variant,
                    exp_id=exp_id,
                    snapshot_mode='last',
                    use_gpu=use_gpu,
                )
        default_parameters=variant,
    )

    n_seeds = 1
    mode = 'local'
    exp_name = 'dev-{}'.format(
        __file__.replace('/', '-').replace('_', '-').split('.')[0])

    n_seeds = 3
    mode = 'sss'
    exp_name = 'reference-skew-fit-brc-push'

    for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()):
        for _ in range(n_seeds):
            run_experiment(
                grill_her_twin_sac_online_vae_full_experiment,
                unpack_variant=False,
                exp_name=exp_name,
                mode=mode,
                variant=variant,
                use_gpu=True,
                num_exps_per_instance=2,
                gcp_kwargs=dict(terminate=True,
                                zone='us-east1-c',
                                gpu_kwargs=dict(
                                    gpu_model='nvidia-tesla-k80',
                                    num_gpu=1,
                                )),
                time_in_mins=int(2.5 * 24 * 60),
            )
Exemple #16
0
        'grill_variant.algo_kwargs.base_kwargs.num_updates_per_env_step': [4],
        'grill_variant.exploration_noise': [.3, .5],
        'env_kwargs.random_init': [False],
        'env_kwargs.action_scale': [.02],
        'init_camera': [
            sawyer_pick_and_place_camera,
        ],


    }
    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space, default_parameters=variant,
    )

    n_seeds = 4
    mode = 'ec2'
    exp_prefix = 'pickup-offline-autoencoder-grill-paper-final'
    for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()):
        for _ in range(n_seeds):
            run_experiment(
                grill_her_td3_full_experiment,
                exp_prefix=exp_prefix,
                mode=mode,
                variant=variant,
                use_gpu=True,
                # trial_dir_suffix='n1000-{}--zoomed-{}'.format(n1000, zoomed),
                snapshot_gap=200,
                snapshot_mode='gap_and_last',
                num_exps_per_instance=2,
            )
            save_period=10,
        ),
    )

    search_space = {}

    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space,
        default_parameters=variant,
    )

    # n_seeds = 1
    # mode = 'local'
    # exp_prefix = 'test'

    n_seeds = 1
    mode = 'ec2'
    exp_prefix = 'sawyer_xy_reacher_her_td3_state'

    for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()):
        for i in range(n_seeds):
            run_experiment(
                grill_her_td3_full_experiment,
                exp_prefix=exp_prefix,
                mode=mode,
                snapshot_mode='gap_and_last',
                snapshot_gap=50,
                variant=variant,
                use_gpu=True,
                num_exps_per_instance=5,
            )
Exemple #18
0
            save_video_period=1,
        ),
        logger_config=dict(
            snapshot_gap=10,
        ),
        dump_buffer_kwargs=dict(
            dump_buffer_period=50,
        ),
        replay_buffer_size=int(5E5),
        expl_path_collector_kwargs=dict(),
        eval_path_collector_kwargs=dict(),
        shared_qf_conv=False,
        use_robot_state=False,
        randomize_env=True,
        batch_rl=True,
    )
    n_seeds = 1
    mode = 'local'
    mode = 'here_no_doodad'
    exp_prefix = 'railrl-bear-SAC-carla-{}-{}'.format(args.env, args.obs)

    run_experiment(
        experiment,
        exp_name=exp_prefix,
        mode=mode,
        variant=variant,
        use_gpu=True,
        gpu_id=args.gpu,
        unpack_variant=False,
    )
        ],
        'algo_params.soft_target_tau': [
            .01,
            .001,
        ],
        'env_params.randomize_goal_on_reset': [
            True,
            False,
        ],
        'net_size': [
            200,
            300,
            400,
        ]
    }
    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space,
        default_parameters=variant,
    )
    n_seeds = 1
    for variant in sweeper.iterate_hyperparameters():
        exp_prefix = 'sawyer_simulated_sac_reaching_pos_cntrl'
        mode = 'here_no_doodad'
        for i in range(n_seeds):
            run_experiment(
                experiment,
                mode=mode,
                exp_prefix=exp_prefix,
                variant=variant,
            )
Exemple #20
0
        # 0.001,
        # ],
        'sac_tdm_kwargs.tdm_kwargs.sample_rollout_goals_from': [
            # 'fixed',
            'environment',
        ],
        'sac_tdm_kwargs.tdm_kwargs.max_tau': [
            0,
        ],
        'sac_tdm_kwargs.base_kwargs.num_updates_per_env_step': [
            1,
        ],
    }
    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space,
        default_parameters=variant,
    )
    for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()):
        for i in range(n_seeds):
            variant['multitask'] = (variant['sac_tdm_kwargs']['tdm_kwargs']
                                    ['sample_rollout_goals_from'] != 'fixed')
            seed = random.randint(0, 10000)
            run_experiment(
                experiment,
                mode=mode,
                exp_prefix=exp_prefix,
                seed=seed,
                variant=variant,
                exp_id=exp_id,
            )
         # 'es_params.memory_es_class': [GaussianStrategy, OUStrategy],
         # 'es_params.env_es_class': [GaussianStrategy, OUStrategy],
         # 'es_params.memory_es_params.max_sigma': [0.1, 0.3, 1],
         # 'es_params.memory_es_params.min_sigma': [1],
         # 'es_params.env_es_params.max_sigma': [0.1, 0.3, 1],
         # 'es_params.env_es_params.min_sigma': [1],
         # 'replay_buffer_params.keep_old_fraction': [0, 0.5, 0.9],
     }
     sweeper = DeterministicHyperparameterSweeper(
         search_space, default_parameters=variant)
     for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()):
         for i in range(n_seeds):
             run_experiment(
                 get_ocm_score,
                 exp_prefix=exp_prefix,
                 seed=i,
                 mode=mode,
                 variant=variant,
                 exp_id=exp_id,
             )
 elif run_mode == 'random':
     sweeper = RandomHyperparameterSweeper(
         hyperparameters=[
             LinearFloatParam('policy_params.rnn_cell_params.env_noise_std',
                              0, 1),
             LinearFloatParam(
                 'policy_params.rnn_cell_params.memory_noise_std', 0, 1),
             LogFloatParam('ddpg_params.bpt_bellman_error_weight',
                           1,
                           1001,
                           offset=-1),
             LogFloatParam('meta_params.meta_qf_learning_rate', 1e-5, 1e-2),
def example(*_):
    env = DoublePendulumEnv()
    es = OUStrategy(env_spec=env.spec)
    qf = FeedForwardCritic(
        name_or_scope="critic",
        env_spec=env.spec,
    )
    policy = FeedForwardPolicy(
        name_or_scope="actor",
        env_spec=env.spec,
    )
    algorithm = DDPG(
        env,
        es,
        policy,
        qf,
        n_epochs=30,
        batch_size=1024,
    )
    algorithm.train()


if __name__ == "__main__":
    run_experiment(
        example,
        exp_prefix="ddpg-double-pendulum",
        seed=0,
        mode='here',
    )
        'grill_variant.algo_kwargs.base_kwargs.num_updates_per_env_step': [2],
        'grill_variant.algo_kwargs.base_kwargs.max_path_length': [100],
        'grill_variant.algo_kwargs.online_vae_kwargs.oracle_data': [False],
    }
    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space,
        default_parameters=variant,
    )

    n_seeds = 1
    mode = 'local'
    exp_prefix = 'dev'

    n_seeds = 2
    mode = 'ec2'
    exp_prefix = 'pusher-test-pnp-merge-2'

    for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()):
        for _ in range(n_seeds):
            run_experiment(
                grill_her_td3_online_vae_full_experiment,
                exp_id=exp_id,
                exp_prefix=exp_prefix,
                mode=mode,
                variant=variant,
                use_gpu=True,
                snapshot_gap=200,
                snapshot_mode='gap_and_last',
                num_exps_per_instance=2,
            )
    exp_prefix = "her-twin-sac-" + args.env
    if len(args.label) > 0:
        exp_prefix = exp_prefix + "-" + args.label

    search_space = common_params
    search_space.update(env_params[args.env])
    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space, default_parameters=variant,
    )
    if args.mode == 'ec2' and args.gpu:
        num_exps_per_instance = args.num_seeds
        num_outer_loops = 1
    else:
        num_exps_per_instance = 1
        num_outer_loops = args.num_seeds

    for _ in range(num_outer_loops):
        for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()):
            run_experiment(
                grill_her_twin_sac_experiment,
                exp_prefix=exp_prefix,
                mode=args.mode,
                exp_id=exp_id,
                variant=variant,
                use_gpu=args.gpu,
                num_exps_per_instance=num_exps_per_instance,
                snapshot_gap=int(math.ceil(variant['algo_kwargs']['base_kwargs']['num_epochs'] / 10)),
                snapshot_mode='gap_and_last',
            )
Exemple #25
0
        ],
        'trainer_kwargs.awr_min_q': [
            True,
        ],
        'trainer_kwargs.q_weight_decay': [0],
    }
    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space,
        default_parameters=variant,
    )

    # n_seeds = 1
    # mode = 'local'
    # exp_prefix = 'awr_sac_offline_ant_v1'

    n_seeds = 2
    mode = 'ec2'
    exp_prefix = 'awr_sac_ant_offline_online_short_pretraining_len_v1'

    for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()):
        for _ in range(n_seeds):
            run_experiment(
                experiment,
                exp_prefix=exp_prefix,
                mode=mode,
                variant=variant,
                num_exps_per_instance=2,
                use_gpu=True,
                gcp_kwargs=dict(preemptible=False, ),
            )
        optimizer_params=dict(base_eps=1e-5, ),
        policy_kwargs=dict(hidden_sizes=(100, 100), ),
        multitask=False,
    )
    search_space = {
        'env_class': [
            DiscreteReacher2D,
            # MountainCar,
            # CartPole,
            # CartPoleAngleOnly,
        ],
        'multitask': [False, True],
    }
    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space,
        default_parameters=variant,
    )
    for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()):
        for _ in range(n_seeds):
            seed = random.randint(0, 999999)
            run_experiment(
                experiment,
                exp_prefix=exp_prefix,
                seed=seed,
                mode=mode,
                variant=variant,
                use_gpu=False,
                snapshot_mode='gap',
                snapshot_gap=5,
            )
        ),
        algo_class=DDPG,
        qf_criterion_class=HuberLoss,
    )
    search_space = {
        # 'algo_params.use_hard_updates': [True, False],
        'qf_criterion_class': [
            HuberLoss,
        ],
        'history': [2, 3]
    }
    #    setup_logger('dqn-images-experiment', variant=variant)
    #    experiment(variant)

    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space,
        default_parameters=variant,
    )
    for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()):
        #        for i in range(2):
        run_experiment(
            experiment,
            variant=variant,
            exp_id=exp_id,
            exp_prefix="DDPG-images-pusher-partial-param-check-batch-norm",
            mode='ec2',
            # exp_prefix="double-vs-dqn-huber-sweep-cartpole",
            # mode='local',
            #use_gpu=True,
        )
        lr=3e-4,
        normalize=True,
        num_epochs=200,
        weight_decay=0,
        num_divisions=1,
        vae=None  #load vae here
    )

    search_space = {
        'batch_size': [256],
        'hidden_sizes': [[100], [100, 100], [300, 300, 300]],
        'weight_decay': [.001, .01, .1],
        'lr': [1e-3, 1e-4],
        'normalize': [True],
    }
    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space,
        default_parameters=variant,
    )
    for _ in range(n_seeds):
        for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()):
            run_experiment(
                experiment,
                exp_prefix=exp_prefix,
                mode=mode,
                variant=variant,
                use_gpu=use_gpu,
                snapshot_mode='gap',
                snapshot_gap=20,
            )
Exemple #29
0
        # plotter=plotter,
        # render_eval_paths=True,
        **variant['algo_params'])
    algorithm.to(ptu.device)
    algorithm.train()


if __name__ == "__main__":
    # noinspection PyTypeChecker
    variant = dict(algo_params=dict(
        num_epochs=10,
        num_steps_per_epoch=1000,
        num_steps_per_eval=300,
        batch_size=64,
        max_path_length=30,
        reward_scale=0.3,
        discount=0.99,
        soft_target_tau=0.001,
    ), )
    for _ in range(1):
        seed = random.randint(0, 999999)
        run_experiment(
            experiment,
            seed=seed,
            variant=variant,
            exp_prefix="dev-sac-multigoal",
            # exp_prefix="dev-profile",
            mode='local',
            use_gpu=False,
        )
    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space,
        default_parameters=variant,
    )

    n_seeds = 1
    mode = 'local'
    exp_prefix = 'dev'

    n_seeds = 2
    mode = 'sss'
    exp_prefix = 'pusher-sf-steven-reference-script-rb-size-sweep'

    for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()):
        for _ in range(n_seeds):
            run_experiment(grill_her_twin_sac_online_vae_full_experiment,
                           exp_prefix=exp_prefix,
                           mode=mode,
                           variant=variant,
                           use_gpu=True,
                           num_exps_per_instance=3,
                           time_in_mins=int(2.8 * 24 * 60),
                           snapshot_gap=100,
                           snapshot_mode='gap_and_last',
                           gcp_kwargs=dict(terminate=True,
                                           zone='us-east1-c',
                                           gpu_kwargs=dict(
                                               gpu_model='nvidia-tesla-k80',
                                               num_gpu=1,
                                           )))