def main(): variant = dict(algo_kwargs=dict( lr=1e-3, max_timesteps=int(1E7), buffer_size=int(5E5), exploration_fraction=0.1, exploration_final_eps=0.02, print_freq=1000, gamma=0.99, ), ) search_space = { 'algo_kwargs.prioritized_replay': [True, False], 'algo_kwargs.lr': [1e-2, 1e-3, 1e-4], 'algo_kwargs.exploration_fraction': [0.1, 0.5], 'algo_kwargs.exploration_final_eps': [0.2, 0.02], } sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()): for _ in range(1): run_experiment( experiment, exp_id=exp_id, variant=variant, exp_prefix="openai-baselines-dqn-cartpole-sweep-2", mode='ec2', # exp_prefix="dev-openai-baselines-dqn-cartpole-2", # mode='local', )
def run_algorithm(launch_settings, env_params, exp_prefix, seed, exp_id=1, **kwargs): """ Launch an algorithm :param launch_settings: See get_launch_settings_list_from_args :param env_params: See get_env_settings :param exp_prefix: Experiment prefix :param seed: Experiment seed :param exp_id: Experiment ID # to identify it later (e.g. for plotting data) :param kwargs: Other kwargs to pass to run_experiment_lite :return: """ variant = launch_settings['variant'] variant['env_params'] = env_params variant['algo_params'] = launch_settings['algo_params'] variant['batch_norm_params'] = launch_settings['batch_norm_params'] variant['exp_id'] = exp_id env_settings = get_env_settings(**env_params) variant['Environment'] = env_settings['name'] algorithm_launcher = launch_settings['algorithm_launcher'] run_experiment(algorithm_launcher, exp_prefix, seed, variant, **kwargs)
def main(): n_seeds = 1 mode = "here" exp_prefix = "dev-sl" # n_seeds = 10 # mode = "ec2" exp_prefix = "paper-6-14-HL-sl-H25" H = 25 # noinspection PyTypeChecker variant = dict( H=H, exp_prefix=exp_prefix, algo_params=dict( num_batches_per_epoch=100, num_epochs=30, learning_rate=1e-3, batch_size=1000, eval_num_episodes=64, lstm_state_size=10, # rnn_cell_class=LSTMCell, # rnn_cell_params=dict( # use_peepholes=True, # ), rnn_cell_class=SeparateLstmLinearCell, rnn_cell_params=dict( use_peepholes=True, env_noise_std=0, memory_noise_std=0, output_nonlinearity=tf.nn.tanh, # output_nonlinearity=tf.nn.softmax, env_hidden_sizes=[], output_dim=1, ), softmax=False, ), version='Supervised Learning', env_class=HighLow, env_params=dict(horizon=H, ) # env_class=OneCharMemory, ) exp_id = -1 for _ in range(n_seeds): seed = random.randint(0, 999999) exp_id += 1 set_seed(seed) variant['seed'] = seed variant['exp_id'] = exp_id run_experiment( bptt_launcher, exp_prefix=exp_prefix, seed=seed, mode=mode, variant=variant, exp_id=exp_id, )
def main(): num_hyperparameters = 40 layer_norm = True sweeper = hp.RandomHyperparameterSweeper([ hp.LogFloatParam("qf_learning_rate", 1e-5, 1e-1), hp.LogFloatParam("policy_learning_rate", 1e-5, 1e-1), hp.LogFloatParam("reward_scale", 10.0, 0.001), hp.LogFloatParam("discount", 0.5, 0.99), ]) for seed in range(num_hyperparameters): params_dict = sweeper.generate_random_hyperparameters() variant = dict( algo_params=dict(batch_size=128, n_epochs=50, epoch_length=1000, eval_samples=1000, replay_pool_size=1000000, min_pool_size=256, max_path_length=1000, qf_weight_decay=0.00, n_updates_per_time_step=5, soft_target_tau=0.01, **params_dict), env_params=dict( env_id='cart', normalize_env=True, gym_name="", ), policy_params=dict(layer_norm=layer_norm, ), qf_params=dict(layer_norm=layer_norm, ), ) run_experiment( my_ddpg_launcher, exp_prefix="3-16-cartpole-ddpg-sweep-test", seed=seed, variant=variant, mode="ec2", )
) # n_seeds = 1 # mode = 'local' # exp_prefix = 'test' n_seeds = 3 mode = 'ec2' exp_prefix = 'sawyer_pusher_offline_ae_final' for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()): if variant['env_id'] == 'SawyerPushAndReachXYEnv-No-Arena-v0': variant['train_vae_variant']['generate_vae_dataset_kwargs']['dataset_path'] = \ 'datasets/SawyerPushAndReachXYEnv-No-Arena-v0_N5000_sawyer_pusher_camera_upright_v3_imsize84_random_oracle_split_0.npy' variant['grill_variant'][ 'presampled_goals_path'] = 'goals/goals_n5000_VAEWrappedEnv(ImageEnv(<SawyerPushAndReachXYEnv<SawyerPushAndReachXYEnv-No-Arena-v0>>)).npy' else: variant['train_vae_variant']['generate_vae_dataset_kwargs']['dataset_path'] = \ 'datasets/SawyerPushAndReachXYEnv-No-Arena-v1_N5000_sawyer_pusher_camera_upright_v3_imsize84_random_oracle_split_0.npy' variant['grill_variant'][ 'presampled_goals_path'] = 'goals/goals_n5000_VAEWrappedEnv(ImageEnv(<SawyerPushAndReachXYEnv<SawyerPushAndReachXYEnv-No-Arena-v1>>)).npy' for _ in range(n_seeds): run_experiment( grill_her_td3_full_experiment, exp_prefix=exp_prefix, mode=mode, variant=variant, use_gpu=True, num_exps_per_instance=2, )
# 10, # 100, ], 'algo_params.tdm_kwargs.max_tau': [ 1, # 15, # 20, ], 'algo_params.supervised_weight': [ # 0, .2, # .4, # .6, # .8, ] } sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()): run_experiment( experiment, seed=np.random.randint(1, 10004), variant=variant, exp_id=exp_id, # exp_prefix='tdm_rl_supervised_combo', exp_prefix='tdm_rl_supervised_combo', mode='local', )
input_channels=3, imsize=48, architecture=architecture, decoder_distribution='beta', ), save_period=10, beta=2.5, representation_size=16, ) search_space = { 'beta':[.5, 1, 2.5, 5] # 'algo_kwargs.normalize_log_probs':[True], } sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) for _ in range(n_seeds): for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()): run_experiment( experiment, exp_prefix=exp_prefix, mode=mode, variant=variant, use_gpu=use_gpu, num_exps_per_instance=2, snapshot_mode='gap_and_last', snapshot_gap=100, # skip_wait=True, )
search_space = { 'grill_variant.replay_buffer_kwargs.power': [1 / 10000, 1 / 1000, 1 / 100, 1 / 70, 1 / 50], } sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) # n_seeds = 1 # mode = 'local' # exp_prefix = 'test' n_seeds = 6 mode = 'gcp' exp_prefix = 'reacher-skew-fit-final-fixed-power-bug' for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()): for _ in range(n_seeds): run_experiment(grill_her_twin_sac_online_vae_full_experiment, exp_prefix=exp_prefix, mode=mode, variant=variant, use_gpu=True, num_exps_per_instance=2, gcp_kwargs=dict(zone='us-west1-b', gpu_kwargs=dict( gpu_model='nvidia-tesla-p100', num_gpu=1, )))
# init_camera=sawyer_door_env_camera, # save_video=True, ) n_seeds = 1 mode = 'local' exp_prefix = 'test' # n_seeds = 3 # mode = 'ec2' # exp_prefix = 'sawyer_door_push_and_pull_open_her_td3_full_state_reset' search_space = { 'es_kwargs.max_sigma':[.3, .8], 'env_kwargs.num_resets_before_door_reset':[1, int(1e6)], 'env_kwargs.num_resets_before_hand_reset':[1, int(1e6)], 'env_kwargs.reset_hand_with_door':[True, False], } sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()): for _ in range(n_seeds): run_experiment( her_td3_experiment, exp_prefix=exp_prefix, mode=mode, variant=variant, exp_id=exp_id, )
False, ], 'algo_kwargs.base_kwargs.discount': [ 0.99, ], } sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) n_seeds = 1 mode = 'local' exp_prefix = 'dev' # n_seeds = 5 # mode = 'ec2' exp_prefix = 'point2d-test' for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()): for i in range(n_seeds): run_experiment( tdm_twin_sac_experiment, exp_prefix=exp_prefix, mode=mode, variant=variant, time_in_mins=23 * 60, snapshot_mode='gap_and_last', snapshot_gap=100, )
n_seeds = 3 # mode = 'ec2' exp_prefix = 'online-match-hps-point2d-33x33-img-all-fc-goal00-resnet18' search_space = { 'shared_qf_conv': [ True, # False, ], 'collection_mode': [ # 'batch', 'online', ] } sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()): for _ in range(n_seeds): run_experiment( experiment, exp_prefix=exp_prefix, mode=mode, variant=variant, exp_id=exp_id, use_gpu=True, gpu_id=0, )
# 'MountainCar-v0', ], 'algo_class': [ DDPG, ], # 'algo_params.use_hard_updates': [True, False], 'qf_criterion_class': [ #nn.MSELoss, HuberLoss, ], 'algo_params.collection_mode': ['online-parallel'] } sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()): for i in range(3): run_experiment( experiment, variant=variant, exp_id=exp_id, exp_prefix= "DDPG-online-parallel-tests-switch-to-multiprocessing-2", mode='local', use_gpu=False, # exp_prefix="double-vs-dqn-huber-sweep-cartpole", # mode='local', # use_gpu=True, )
replay_buffer_size=int(2e4), ), cnn_params=dict( kernel_sizes=[5, 5, 3], n_channels=[32, 32, 32], strides=[3, 3, 2], # pool_sizes=[1, 1, 1], this param is giving an error? hidden_sizes=[400, 300], paddings=[0, 0, 0], # use_batch_norm=True, this param is giving an error? ), qf_criterion_class=HuberLoss, ) PARALLEL = 1 SERIES = 10 for j in range(SERIES): for i in range(PARALLEL): run_experiment( experiment, variant=variant, exp_id=i + PARALLEL * j, exp_prefix= "sac-image-reacher-brandon-softlearning-hyperparameters-{0}". format(i + PARALLEL * j), mode='local', skip_wait=i != PARALLEL - 1)
for launcher in [ # trpo_launcher, # mem_trpo_launcher, # rtrpo_launcher, ddpg_launcher, mem_ddpg_launcher, rdpg_launcher, ]: search_space = { # 'env_class': [WaterMaze1D, WaterMazeEasy1D, WaterMazeMemory1D], } sweeper = DeterministicHyperparameterSweeper( search_space, default_parameters=variant) for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()): for seed in range(n_seeds): exp_id += 1 set_seed(seed) variant['seed'] = seed variant['exp_id'] = exp_id run_experiment( launcher, exp_prefix=exp_prefix, seed=seed, mode=mode, variant=variant, exp_id=exp_id, snapshot_mode='last', use_gpu=use_gpu, )
default_parameters=variant, ) n_seeds = 1 mode = 'local' exp_name = 'dev-{}'.format( __file__.replace('/', '-').replace('_', '-').split('.')[0]) n_seeds = 3 mode = 'sss' exp_name = 'reference-skew-fit-brc-push' for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()): for _ in range(n_seeds): run_experiment( grill_her_twin_sac_online_vae_full_experiment, unpack_variant=False, exp_name=exp_name, mode=mode, variant=variant, use_gpu=True, num_exps_per_instance=2, gcp_kwargs=dict(terminate=True, zone='us-east1-c', gpu_kwargs=dict( gpu_model='nvidia-tesla-k80', num_gpu=1, )), time_in_mins=int(2.5 * 24 * 60), )
'grill_variant.algo_kwargs.base_kwargs.num_updates_per_env_step': [4], 'grill_variant.exploration_noise': [.3, .5], 'env_kwargs.random_init': [False], 'env_kwargs.action_scale': [.02], 'init_camera': [ sawyer_pick_and_place_camera, ], } sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) n_seeds = 4 mode = 'ec2' exp_prefix = 'pickup-offline-autoencoder-grill-paper-final' for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()): for _ in range(n_seeds): run_experiment( grill_her_td3_full_experiment, exp_prefix=exp_prefix, mode=mode, variant=variant, use_gpu=True, # trial_dir_suffix='n1000-{}--zoomed-{}'.format(n1000, zoomed), snapshot_gap=200, snapshot_mode='gap_and_last', num_exps_per_instance=2, )
save_period=10, ), ) search_space = {} sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) # n_seeds = 1 # mode = 'local' # exp_prefix = 'test' n_seeds = 1 mode = 'ec2' exp_prefix = 'sawyer_xy_reacher_her_td3_state' for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()): for i in range(n_seeds): run_experiment( grill_her_td3_full_experiment, exp_prefix=exp_prefix, mode=mode, snapshot_mode='gap_and_last', snapshot_gap=50, variant=variant, use_gpu=True, num_exps_per_instance=5, )
save_video_period=1, ), logger_config=dict( snapshot_gap=10, ), dump_buffer_kwargs=dict( dump_buffer_period=50, ), replay_buffer_size=int(5E5), expl_path_collector_kwargs=dict(), eval_path_collector_kwargs=dict(), shared_qf_conv=False, use_robot_state=False, randomize_env=True, batch_rl=True, ) n_seeds = 1 mode = 'local' mode = 'here_no_doodad' exp_prefix = 'railrl-bear-SAC-carla-{}-{}'.format(args.env, args.obs) run_experiment( experiment, exp_name=exp_prefix, mode=mode, variant=variant, use_gpu=True, gpu_id=args.gpu, unpack_variant=False, )
], 'algo_params.soft_target_tau': [ .01, .001, ], 'env_params.randomize_goal_on_reset': [ True, False, ], 'net_size': [ 200, 300, 400, ] } sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) n_seeds = 1 for variant in sweeper.iterate_hyperparameters(): exp_prefix = 'sawyer_simulated_sac_reaching_pos_cntrl' mode = 'here_no_doodad' for i in range(n_seeds): run_experiment( experiment, mode=mode, exp_prefix=exp_prefix, variant=variant, )
# 0.001, # ], 'sac_tdm_kwargs.tdm_kwargs.sample_rollout_goals_from': [ # 'fixed', 'environment', ], 'sac_tdm_kwargs.tdm_kwargs.max_tau': [ 0, ], 'sac_tdm_kwargs.base_kwargs.num_updates_per_env_step': [ 1, ], } sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()): for i in range(n_seeds): variant['multitask'] = (variant['sac_tdm_kwargs']['tdm_kwargs'] ['sample_rollout_goals_from'] != 'fixed') seed = random.randint(0, 10000) run_experiment( experiment, mode=mode, exp_prefix=exp_prefix, seed=seed, variant=variant, exp_id=exp_id, )
# 'es_params.memory_es_class': [GaussianStrategy, OUStrategy], # 'es_params.env_es_class': [GaussianStrategy, OUStrategy], # 'es_params.memory_es_params.max_sigma': [0.1, 0.3, 1], # 'es_params.memory_es_params.min_sigma': [1], # 'es_params.env_es_params.max_sigma': [0.1, 0.3, 1], # 'es_params.env_es_params.min_sigma': [1], # 'replay_buffer_params.keep_old_fraction': [0, 0.5, 0.9], } sweeper = DeterministicHyperparameterSweeper( search_space, default_parameters=variant) for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()): for i in range(n_seeds): run_experiment( get_ocm_score, exp_prefix=exp_prefix, seed=i, mode=mode, variant=variant, exp_id=exp_id, ) elif run_mode == 'random': sweeper = RandomHyperparameterSweeper( hyperparameters=[ LinearFloatParam('policy_params.rnn_cell_params.env_noise_std', 0, 1), LinearFloatParam( 'policy_params.rnn_cell_params.memory_noise_std', 0, 1), LogFloatParam('ddpg_params.bpt_bellman_error_weight', 1, 1001, offset=-1), LogFloatParam('meta_params.meta_qf_learning_rate', 1e-5, 1e-2),
def example(*_): env = DoublePendulumEnv() es = OUStrategy(env_spec=env.spec) qf = FeedForwardCritic( name_or_scope="critic", env_spec=env.spec, ) policy = FeedForwardPolicy( name_or_scope="actor", env_spec=env.spec, ) algorithm = DDPG( env, es, policy, qf, n_epochs=30, batch_size=1024, ) algorithm.train() if __name__ == "__main__": run_experiment( example, exp_prefix="ddpg-double-pendulum", seed=0, mode='here', )
'grill_variant.algo_kwargs.base_kwargs.num_updates_per_env_step': [2], 'grill_variant.algo_kwargs.base_kwargs.max_path_length': [100], 'grill_variant.algo_kwargs.online_vae_kwargs.oracle_data': [False], } sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) n_seeds = 1 mode = 'local' exp_prefix = 'dev' n_seeds = 2 mode = 'ec2' exp_prefix = 'pusher-test-pnp-merge-2' for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()): for _ in range(n_seeds): run_experiment( grill_her_td3_online_vae_full_experiment, exp_id=exp_id, exp_prefix=exp_prefix, mode=mode, variant=variant, use_gpu=True, snapshot_gap=200, snapshot_mode='gap_and_last', num_exps_per_instance=2, )
exp_prefix = "her-twin-sac-" + args.env if len(args.label) > 0: exp_prefix = exp_prefix + "-" + args.label search_space = common_params search_space.update(env_params[args.env]) sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) if args.mode == 'ec2' and args.gpu: num_exps_per_instance = args.num_seeds num_outer_loops = 1 else: num_exps_per_instance = 1 num_outer_loops = args.num_seeds for _ in range(num_outer_loops): for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()): run_experiment( grill_her_twin_sac_experiment, exp_prefix=exp_prefix, mode=args.mode, exp_id=exp_id, variant=variant, use_gpu=args.gpu, num_exps_per_instance=num_exps_per_instance, snapshot_gap=int(math.ceil(variant['algo_kwargs']['base_kwargs']['num_epochs'] / 10)), snapshot_mode='gap_and_last', )
], 'trainer_kwargs.awr_min_q': [ True, ], 'trainer_kwargs.q_weight_decay': [0], } sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) # n_seeds = 1 # mode = 'local' # exp_prefix = 'awr_sac_offline_ant_v1' n_seeds = 2 mode = 'ec2' exp_prefix = 'awr_sac_ant_offline_online_short_pretraining_len_v1' for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()): for _ in range(n_seeds): run_experiment( experiment, exp_prefix=exp_prefix, mode=mode, variant=variant, num_exps_per_instance=2, use_gpu=True, gcp_kwargs=dict(preemptible=False, ), )
optimizer_params=dict(base_eps=1e-5, ), policy_kwargs=dict(hidden_sizes=(100, 100), ), multitask=False, ) search_space = { 'env_class': [ DiscreteReacher2D, # MountainCar, # CartPole, # CartPoleAngleOnly, ], 'multitask': [False, True], } sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()): for _ in range(n_seeds): seed = random.randint(0, 999999) run_experiment( experiment, exp_prefix=exp_prefix, seed=seed, mode=mode, variant=variant, use_gpu=False, snapshot_mode='gap', snapshot_gap=5, )
), algo_class=DDPG, qf_criterion_class=HuberLoss, ) search_space = { # 'algo_params.use_hard_updates': [True, False], 'qf_criterion_class': [ HuberLoss, ], 'history': [2, 3] } # setup_logger('dqn-images-experiment', variant=variant) # experiment(variant) sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()): # for i in range(2): run_experiment( experiment, variant=variant, exp_id=exp_id, exp_prefix="DDPG-images-pusher-partial-param-check-batch-norm", mode='ec2', # exp_prefix="double-vs-dqn-huber-sweep-cartpole", # mode='local', #use_gpu=True, )
lr=3e-4, normalize=True, num_epochs=200, weight_decay=0, num_divisions=1, vae=None #load vae here ) search_space = { 'batch_size': [256], 'hidden_sizes': [[100], [100, 100], [300, 300, 300]], 'weight_decay': [.001, .01, .1], 'lr': [1e-3, 1e-4], 'normalize': [True], } sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) for _ in range(n_seeds): for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()): run_experiment( experiment, exp_prefix=exp_prefix, mode=mode, variant=variant, use_gpu=use_gpu, snapshot_mode='gap', snapshot_gap=20, )
# plotter=plotter, # render_eval_paths=True, **variant['algo_params']) algorithm.to(ptu.device) algorithm.train() if __name__ == "__main__": # noinspection PyTypeChecker variant = dict(algo_params=dict( num_epochs=10, num_steps_per_epoch=1000, num_steps_per_eval=300, batch_size=64, max_path_length=30, reward_scale=0.3, discount=0.99, soft_target_tau=0.001, ), ) for _ in range(1): seed = random.randint(0, 999999) run_experiment( experiment, seed=seed, variant=variant, exp_prefix="dev-sac-multigoal", # exp_prefix="dev-profile", mode='local', use_gpu=False, )
sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) n_seeds = 1 mode = 'local' exp_prefix = 'dev' n_seeds = 2 mode = 'sss' exp_prefix = 'pusher-sf-steven-reference-script-rb-size-sweep' for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()): for _ in range(n_seeds): run_experiment(grill_her_twin_sac_online_vae_full_experiment, exp_prefix=exp_prefix, mode=mode, variant=variant, use_gpu=True, num_exps_per_instance=3, time_in_mins=int(2.8 * 24 * 60), snapshot_gap=100, snapshot_mode='gap_and_last', gcp_kwargs=dict(terminate=True, zone='us-east1-c', gpu_kwargs=dict( gpu_model='nvidia-tesla-k80', num_gpu=1, )))