random_rollout_data=True, use_cached=False, vae_dataset_specific_kwargs=dict(), show=False, ), vae_kwargs=dict(input_channels=3, ), algo_kwargs=dict( do_scatterplot=False, batch_size=128, lr=1e-3, ), save_period=5, ), env_class=Point2DWallEnv, env_kwargs=dict( render_onscreen=False, ball_radius=1, images_are_rgb=True, show_goal=False, ), algorithm='RIG', ) run_experiment( grill_her_td3_full_experiment, exp_prefix='rlkit-pointmass-rig-example', mode='here_no_doodad', variant=variant, # use_gpu=True, # Turn on if you have a GPU )
num_trains_per_train_loop=100, min_num_steps_before_training=100, ) variant['save_video'] = True variant['save_video_kwargs']['rows'] = 1 variant['save_video_kwargs']['save_video_period'] = 1 for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()): if (variant['reward_type'] == 'sparse' and variant['dynamics_model_version'] != 'fixed_standard_laplace'): continue for seed in range(n_seeds): variant['exp_id'] = exp_id # variant['seed'] = seed run_experiment( probabilistic_goal_reaching_experiment, exp_name=exp_name, mode=mode, variant=variant, use_gpu=False, num_exps_per_instance=2, slurm_config_name='cpu_co', # slurm_config_name='cpu_co', gcp_kwargs=dict(zone='us-east1-c', gpu_kwargs=dict( gpu_model='nvidia-tesla-k80', num_gpu=1, )), time_in_mins=10 * 60, )
) n_seeds = 1 mode = 'local' exp_name = 'dev-{}'.format( __file__.replace('/', '-').replace('_', '-').split('.')[0]) n_seeds = 2 mode = 'sss' exp_name = 'pnp-img-obs-enc-d-rew-many-heads--sweep-random-init-do-not-encode-state' for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()): for seed in range(n_seeds): variant['exp_id'] = exp_id # variant['seed'] = seed run_experiment( encoder_goal_conditioned_sac_experiment, exp_name=exp_name, mode=mode, variant=variant, use_gpu=True, num_exps_per_instance=3, # slurm_config_name='cpu_co', gcp_kwargs=dict(zone='us-east1-c', gpu_kwargs=dict( gpu_model='nvidia-tesla-k80', num_gpu=1, )), time_in_mins=int(2.5 * 24 * 60), )
algorithm='HER-tSAC', version='normal', observation_key='observation', desired_goal_key='desired_goal', ) search_space = {} sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) n_seeds = 1 mode = 'local' exp_prefix = 'dev' # n_seeds = 5 # mode = 'ec2' # exp_prefix = 'fetch-push-test' for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()): for i in range(n_seeds): run_experiment( relabeling_tsac_experiment, exp_prefix=exp_prefix, mode=mode, variant=variant, time_in_mins=23 * 60, snapshot_mode='gap_and_last', snapshot_gap=100, )
dict(num_examples=1024, version='circle', radius=3), # dict(num_examples=1024, version='circle', radius=0), ], ), ) n_seeds = 1 mode = 'local' exp_name = 'dev-{}'.format( __file__.replace('/', '-').replace('_', '-').split('.')[0]) # n_seeds = 3 # mode = 'sss' # exp_name = __file__.split('/')[-1].split('.')[0].replace('_', '-') # print('exp_name', exp_name) exp_name = 'dev-set-vae-2d-two-circles' search_space = {} sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()): for _ in range(n_seeds): run_experiment( train_2d_set_vae, exp_name=exp_name, mode=mode, variant=variant, use_gpu=True, )
'grill_variant.algo_kwargs.base_kwargs.num_updates_per_env_step': [1, 2, 4, 6], 'grill_variant.algo_kwargs.base_kwargs.max_path_length': [100], 'grill_variant.algo_kwargs.online_vae_kwargs.oracle_data': [False], } sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) n_seeds = 1 mode = 'local' exp_prefix = 'dev' n_seeds = 2 mode = 'ec2' exp_prefix = 'online-vae-pushing-parallel-sweep-NUPO' for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()): for _ in range(n_seeds): run_experiment( grill_her_td3_online_vae_full_experiment, exp_id=exp_id, exp_prefix=exp_prefix, mode=mode, variant=variant, use_gpu=True, snapshot_gap=200, snapshot_mode='gap_and_last', num_exps_per_instance=1, )
do_scatterplot=False, batch_size=128, lr=1e-3, ), save_period=5, ), env_class=Point2DWallEnv, env_kwargs=dict( render_onscreen=False, ball_radius=1, images_are_rgb=True, show_goal=False, ), algorithm='RIG', ) n_seeds = 1 mode = 'here_no_doodad' exp_prefix = 'rlkit-pointmass-rig-example' for _ in range(n_seeds): run_experiment( grill_her_td3_full_experiment, exp_prefix=exp_prefix, mode=mode, variant=variant, # use_gpu=True, # Turn on if you have a GPU )
], 'create_vae_kwargs.decoder_distribution': [ 'gaussian_learned_global_image_variance', ], } sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()): beta = variant['vae_trainer_kwargs']['beta'] slw = variant['vae_trainer_kwargs']['set_loss_weight'] for _ in range(n_seeds): variant['logger_config'] = dict( trial_dir_suffix='beta-{}-slw{}'.format( beta, slw, )) run_experiment( train_set_vae, variant=variant, exp_name='vae-encoder-set-loss-sweep', mode='sss', # exp_name='dev-vae-encoder-sweep', # mode='here_no_doodad', # slurm_config_name='gpu_fc', # slurm_config_name='gpu_low_pri', # exp_name='vae-bernoulli-decoder', use_gpu=True, )
), ) search_space = { 'env_id':['SawyerDoorHookEnv-v0', 'SawyerDoorHookResetFreeEnv-v0'], 'grill_variant.exploration_noise':[0.3, .8], } sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) # n_seeds = 1 # mode = 'local' # exp_prefix = 'test' n_seeds = 1 mode = 'ec2' exp_prefix = 'sawyer_door_state_her_td3' for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()): for i in range(n_seeds): run_experiment( grill_her_td3_full_experiment, exp_prefix=exp_prefix, mode=mode, snapshot_mode='gap_and_last', snapshot_gap=50, variant=variant, use_gpu=True, num_exps_per_instance=4, )
'algo_params.discount': [0.99, 0.9, 0.5], 'algo_params.policy_learning_rate': [1e-4, 1e-3, 1e-2], 'algo_params.qf_learning_rate': [1e-4, 1e-3, 1e-2], 'algo_params.target_hard_update_period': [10, 100, 1000], } sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant) for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()): for i in range(n_seeds): seed = random.randint(0, 10000) run_experiment( experiment, exp_prefix=exp_prefix, seed=seed, mode=mode, variant=variant, exp_id=exp_id, sync_s3_log=True, sync_s3_pkl=True, periodic_sync_interval=600, ) if run_mode == 'random': hyperparameters = [ hyp.LinearFloatParam('algo_params.discount', 0, 1), hyp.LogFloatParam('algo_params.policy_learning_rate', 1e-7, 1e-1), hyp.LogFloatParam('algo_params.qf_learning_rate', 1e-7, 1e-1), hyp.LogIntParam('algo_params.target_hard_update_period', 1, 1000), ] sweeper = hyp.RandomHyperparameterSweeper( hyperparameters, default_kwargs=variant,
Make sure to hardcode this in vae_launcher.py ungrouped_imgs = generate_images( env, renderer, num_images=num_ungrouped_images, set=train_sets[0]) """) for _ in range(n_seeds): for exp_id, variant in enumerate(variants): variant['vae_trainer_kwargs']['beta'] = ( 1. / variant['create_vae_kwargs']['latent_dim'] ) variant['vae_trainer_kwargs']['debug_bad_recons'] = ( variant['create_vae_kwargs']['decoder_distribution'] == 'gaussian_learned_global_scalar_variance' ) if mode == 'local': variant['vae_algo_kwargs']['num_iters'] = 1 variant['vae_algo_kwargs']['num_epochs_per_iter'] = 1 # variant['generate_train_set_kwargs']['saved_filename'] = ( # 'manual-upload/sets/hand2xy_hand2x_1obj2xy_1obj2x_num_objs_1.pickle' # ) run_experiment( train_set_vae, exp_name=exp_name, prepend_date_to_exp_name=True, num_exps_per_instance=2, mode=mode, variant=variant, # slurm_config_name='cpu', use_gpu=True, # gpu_id=1, )
output_size=8, hidden_sizes=[8])) algo_search_space = copy.deepcopy(algo_variant) algo_search_space = {k: [v] for k, v in algo_search_space.items()} algo_search_space.update( # insert sweep params here ) env_sweeper = hyp.DeterministicHyperparameterSweeper( env_search_space, default_parameters=env_variant, ) algo_sweeper = hyp.DeterministicHyperparameterSweeper( algo_search_space, default_parameters=algo_variant, ) for exp_id, env_vari in enumerate(env_sweeper.iterate_hyperparameters()): for algo_vari in algo_sweeper.iterate_hyperparameters(): variant = {'algo_kwargs': algo_vari, 'env_kwargs': env_vari} for _ in range(n_seeds): run_experiment(experiment, exp_prefix=exp_prefix, mode=mode, variant=variant, use_gpu=use_gpu, region='us-west-2', num_exps_per_instance=3, snapshot_mode='gap', snapshot_gap=10)
# 2, # 5, # 10, # ], 'vae_kwargs.weight_loss': [ True, ], 'vae_kwargs.skew_sampling': [ False, ], 'append_all_data': [ # True, False, ], } sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()): for _ in range(n_seeds): run_experiment( train_from_variant, exp_prefix=exp_prefix, mode=mode, variant=variant, exp_id=exp_id, # skip_wait=True, use_gpu=True, )
epoch_length=num_steps_per_iteration, eval_samples=100, max_path_length=H, discount=1, ), env_params=dict(num_steps=H, # use_small_maze=True, ), ou_params=dict( max_sigma=1, min_sigma=None, ), exp_prefix=exp_prefix, env_class=env_class, version="DDPG") exp_id = -1 for seed in range(n_seeds): exp_id += 1 set_seed(seed) variant['seed'] = seed variant['exp_id'] = exp_id run_experiment( run_linear_ocm_exp, exp_prefix=exp_prefix, seed=seed, mode=mode, variant=variant, exp_id=exp_id, )
# False, # ], 'have_no_disentangled_encoder': [ True, ], } sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) n_seeds = 1 mode = 'local' exp_prefix = '{}'.format( __file__.replace('/', '-').replace('_', '-').split('.')[0]) # n_seeds = 5 # mode = 'sss' # exp_prefix = 'disentangled-basic-test-envs-new-vae' for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()): for _ in range(n_seeds): run_experiment( disentangled_grill_her_twin_sac_experiment, exp_prefix=exp_prefix, mode=mode, variant=variant, use_gpu=True, time_in_mins=int(2.5 * 24 * 60), )
discount=0.99, qf_learning_rate=1e-3, policy_learning_rate=1e-4, ), version="DDPG", epoch_discount_schedule_class=LinearSchedule, epoch_discount_schedule_params=dict( min_value=0., max_value=0.99, ramp_duration=99, ), ) for env_class in [ SwimmerEnv, HalfCheetahEnv, AntEnv, HopperEnv, ]: variant['env_class'] = env_class variant['version'] = str(env_class) for _ in range(5): seed = random.randint(0, 999999) run_experiment( experiment, exp_prefix="ddpg-increase-gamma", seed=seed, mode='ec2', variant=variant, use_gpu=False, )
default_parameters=variant, ) n_seeds = 1 mode = 'local' exp_name = 'dev-{}'.format( __file__.replace('/', '-').replace('_', '-').split('.')[0]) n_seeds = 1 mode = 'sss' exp_name = 'one-obj-img-obs-state-reward-sweep-round3-black-background' for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()): for seed in range(n_seeds): variant['exp_id'] = exp_id # variant['seed'] = seed run_experiment( image_based_goal_conditioned_sac_experiment, exp_name=exp_name, mode=mode, variant=variant, use_gpu=True, num_exps_per_instance=3, gcp_kwargs=dict(zone='us-east1-c', gpu_kwargs=dict( gpu_model='nvidia-tesla-k80', num_gpu=1, )), time_in_mins=int(10 * 60), )
n_seeds = 2 # mode = 'sss' exp_prefix = 'exp2-single-set-local-mode' search_space = { # 'vae_algo_kwargs.num_iters': [1], 'vae_trainer_kwargs.set_loss_weight': [ 0, 1, 100, ], } sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) variants = list(sweeper.iterate_hyperparameters()) for _ in range(n_seeds): for exp_id, variant in enumerate(variants): variant['exp_id'] = exp_id run_experiment( disco_experiment, exp_name=exp_prefix, num_exps_per_instance=2, mode=mode, variant=variant, slurm_config_name='cpu', use_gpu=True, gpu_id=1, )
policy_kwargs=dict(hidden_sizes=[400, 300], ), algorithm='SAC', version='SAC', env_class=HalfCheetahEnv, ) search_space = { 'env_class': [ HalfCheetahEnv, AntEnv, HopperEnv, Walker2dEnv, ], 'algo_kwargs.reward_scale': [0.1, 1, 10], # 'algo_kwargs.num_updates_per_env_step': [1, 5], } sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()): for _ in range(2): run_experiment( experiment, # exp_prefix="dev-sac-sweep", exp_prefix="sac-sweep-try-reparameterization", mode='ec2', exp_id=exp_id, variant=variant, use_gpu=False, )
"hinge_cabinet", # "light_switch", ], "actor_kwargs.hidden_size": [100, 512], "algorithm_kwargs.clip_param": [0.1, 0.2], "actor_kwargs.hidden_activation": ["relu", "tanh"], "use_linear_lr_decay": [True, False], "rollout_kwargs.use_proper_time_limits": [True, False], } sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()): for _ in range(args.num_seeds): seed = random.randint(0, 100000) variant["seed"] = seed variant["exp_id"] = exp_id run_experiment( experiment, exp_prefix=args.exp_prefix, mode=args.mode, variant=variant, use_gpu=False, snapshot_mode="none", python_cmd=subprocess.check_output( "which python", shell=True).decode("utf-8")[:-1], seed=seed, exp_id=exp_id, )
epsilon=0.5, tau=0.001, ), env_params=dict(), qf_kwargs=dict(hidden_sizes=[32, 32], )) search_space = { # 'env_params.num_bins': [3, 5, 10], # 'env_params.reward_position': [False, True], # 'algo_params.tau': [0.01, 0.001], # 'algo_params.reward_scale': [0.1, 1, 10], # 'algo_params.epsilon': [0.1, 0.5], } sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()): for i in range(n_seeds): seed = random.randint(0, 10000) run_experiment( experiment, seed=seed, # exp_prefix="dqn-swimmer-sweep", # mode='ec2', # use_gpu=False, exp_prefix="dev-dqn-swimmer", mode='local', use_gpu=True, variant=variant, )
algo_search_space = {k: [v] for k, v in algo_search_space.items()} algo_search_space.update( # insert sweep params here ) env_sweeper = hyp.DeterministicHyperparameterSweeper( env_search_space, default_parameters=env_variant, ) algo_sweeper = hyp.DeterministicHyperparameterSweeper( algo_search_space, default_parameters=algo_variant, ) for exp_id, env_vari in enumerate(env_sweeper.iterate_hyperparameters()): for algo_vari in algo_sweeper.iterate_hyperparameters(): variant = {'algo_kwargs': algo_vari, 'env_kwargs': env_vari} for _ in range(n_seeds): run_experiment( experiment, exp_prefix=exp_prefix, mode=mode, variant=variant, use_gpu=use_gpu, region='us-east-2', num_exps_per_instance=1, snapshot_mode='gap', snapshot_gap=10, # instance_type='c5.large', spot_price=0.08)
init_camera=sawyer_pick_and_place_camera, ) search_space = {} sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) n_seeds = 1 mode = 'local' exp_prefix = 'dev-{}'.format(__file__.replace('/', '-').replace('_', '-').split('.')[0]) # n_seeds = 3 # mode = 'gcp' # exp_prefix = 'skew-fit-pickup-reference-post-refactor' for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()): for _ in range(n_seeds): run_experiment( skewfit_full_experiment, exp_prefix=exp_prefix, mode=mode, variant=variant, use_gpu=True, snapshot_gap=200, snapshot_mode='gap_and_last', num_exps_per_instance=3, gcp_kwargs=dict(zone='us-west1-b', ), )
time.sleep(1) logger.log("end") logger.log('Local date & time is: {}'.format(date.strftime(date_format))) logger.log("start mujoco") from gym.envs.mujoco import HalfCheetahEnv e = HalfCheetahEnv() img = e.sim.render(32, 32) logger.log(str(sum(img))) logger.log("end mujoco_py") if __name__ == "__main__": # noinspection PyTypeChecker date_format = '%m/%d/%Y %H:%M:%S %Z' date = datetime.now(tz=pytz.utc) for seed in range(5): variant = dict( num_seconds=10, launch_time=str(date.strftime(date_format)), logger_config=dict(), seed=seed, ) run_experiment( example, exp_name='gcp-doodad-easy-launch-example', mode='gcp', variant=variant, use_gpu=False, )
# n_seeds = 1 # mode = 'ec2' # exp_prefix = 'sawyer-new-pusher' search_space = { # 'env_kwargs.randomize_goals': [True, False], 'algo_kwargs.max_path_length': [100], 'env_kwargs.reward_info.type': [ # 'hand_to_object_only', 'shaped', ], 'exploration_type': [ 'ou', # 'epsilon', # 'gaussian', ], } sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()): for _ in range(n_seeds): run_experiment( td3_experiment, exp_prefix=exp_prefix, mode=mode, variant=variant, exp_id=exp_id, )
AntEnv, HopperEnv, Walker2dEnv, ], 'algo_kwargs.reward_scale': [10000, 100, 1, 0.01], 'algo_kwargs.optimizer_class': [ optim.Adam, ], 'algo_kwargs.tau': [ 1e-2, ], 'algo_kwargs.num_updates_per_env_step': [ 1, ], 'es_kwargs.max_sigma': [0.01, 0.1, 0.5], } sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()): for _ in range(1): run_experiment( example, exp_prefix="n3dpg-sweep-hard-tasks", mode='ec2', exp_id=exp_id, variant=variant, use_gpu=False, )
0, ], 'generate_set_for_rl_kwargs.saved_filename': [ '6sets128samples_xy_x_y.pickle', '9sets128samples_xy_x_y.pickle', ], } sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) variants = list(sweeper.iterate_hyperparameters()) for _ in range(n_seeds): for exp_id, variant in enumerate(variants): if variant['generate_set_for_rl_kwargs']['saved_filename'] == ( '6sets128samples_xy_x_y'): variant['algo_kwargs']['num_eval_steps_per_epoch'] = (6000) else: variant['algo_kwargs']['num_eval_steps_per_epoch'] = (9000) run_experiment( disco_experiment, exp_name=exp_prefix, prepend_date_to_exp_name=False, num_exps_per_instance=2, mode=mode, variant=variant, # slurm_config_name='cpu', use_gpu=True, # gpu_id=1, )
if __name__ == "__main__": variant = dict(vae_kwargs=dict( imsize=64, representation_size=128, input_channels=3, decoder_distribution='gaussian_identity_variance', beta=1, K=7, T=10, ), algo_kwargs=dict( gamma=0.5, batch_size=8, lr=1e-4, log_interval=0, ), num_epochs=10000, algorithm='VAE', save_period=5, physics=True) run_experiment( main, exp_prefix='iodine-blocks-mpc', mode='here_no_doodad', variant=variant, use_gpu=True, # Turn on if you have a GPU )
hidden_sizes=[300, 300], structure='norm_difference', ), policy_kwargs=dict(hidden_sizes=[300, 300], ), es_kwargs=dict( theta=0.1, max_sigma=0.1, min_sigma=0.1, ), qf_criterion_class=HuberLoss, algorithm="DDPG-TDM", ) search_space = { 'algo_kwargs.base_kwargs.num_updates_per_env_step': [1, 5, 10], 'algo_kwargs.tdm_kwargs.max_tau': [0, 5], 'env_class': [SawyerXYEnv, SawyerReachXYEnv], } sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()): for _ in range(n_seeds): run_experiment( experiment, exp_prefix=exp_prefix, mode=mode, variant=variant, )
'obj_distance', 'obj_success', 'hand_and_obj_distance', 'hand_and_obj_success', ], } sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) n_seeds = 1 mode = 'local' exp_prefix = 'dev-{}'.format( __file__.replace('/', '-').replace('_', '-').split('.')[0]) n_seeds = 3 mode = 'sss' exp_prefix = 'her-td3-pick-reward-sweep-take2' for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()): for _ in range(n_seeds): run_experiment( experiment, exp_prefix=exp_prefix, mode=mode, variant=variant, use_gpu=False, time_in_mins=int(2.5 * 24 * 60), )