use_linear_dynamics=False, is_auto_encoder=False, batch_size=64, lr=1e-3, ), save_period=50, ), num_exps_per_instance=3, ) search_space = { 'train_vae_variant.beta':[.5, 2.5], 'env_id':['SawyerPushAndReacherXYEnv-v0'], } sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) variants = [] for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()): # if variant['env_id'] == 'SawyerPushAndReachXYEnv-No-Arena-v0': # variant['train_vae_variant']['generate_vae_dataset_kwargs']['dataset_path'] = \ # 'datasets/SawyerPushAndReachXYEnv-No-Arena-v0_N5000_sawyer_pusher_camera_upright_v3_imsize84_random_oracle_split_0.npy' # else: # variant['train_vae_variant']['generate_vae_dataset_kwargs']['dataset_path'] = \ # 'datasets/SawyerPushAndReachXYEnv-No-Arena-v1_N5000_sawyer_pusher_camera_upright_v3_imsize84_random_oracle_split_0.npy' variants.append(variant) run_variants(grill_her_td3_full_experiment, variants, run_id=3)
# tensorboard=True, # ), load_demos=True, pretrain_policy=True, pretrain_rl=True, # save_pretrained_algorithm=True, # snapshot_mode="all", env='pen-sparse-v0', ) experiment(variant) if __name__ == "__main__": variant = dict() search_space = { 'seedid': range(3), } sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) variants = [] for variant in sweeper.iterate_hyperparameters(): variants.append(variant) run_variants(local_exp, variants, run_id=0)
'seedid': range(5), 'algo_kwargs.base_kwargs.num_updates_per_env_step': [ 16, ], 'replay_buffer_kwargs.fraction_goals_rollout_goals': [ 0.1, ], 'replay_buffer_kwargs.fraction_goals_env_goals': [ 0.5, ], } sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) # n_seeds = 1 # mode = 'local' # exp_prefix = 'test' n_seeds = 1 mode = 'ec2' exp_prefix = 'sawyer_pusher_state_final' variants = [] for variant in sweeper.iterate_hyperparameters(): variants.append(variant) run_variants(her_td3_experiment, variants, run_id=0)
distance_weight=0, skew_dataset=False, priority_function_kwargs=dict( decoder_distribution='gaussian_identity_variance', sampling_method='importance_sampling', # sampling_method='true_prior_sampling', num_latents_to_sample=10, ), use_parallel_dataloading=False, ), save_period=25, ), ) search_space = { 'seedid': range(1), 'train_vae_variant.representation_size': [(16, 16)], #(3 * objects, 3 * colors) 'train_vae_variant.beta': [50], #THIS IS A LINEAR INTERPOLATION CURRENTLY # 'train_vae_variant.generate_vae_dataset_kwargs.n_random_steps': [2],#, 50, 100], } sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) variants = [] for variant in sweeper.iterate_hyperparameters(): variants.append(variant) run_variants(experiment, variants, run_id=101)
wrap_mujoco_env=True, track_qpos_goal=5, do_state_based_exp=False, exploration_noise=0.1, reward_params=dict( type="sparse", epsilon=20.0, ), save_video=False, ) n_seeds = 3 search_space = { 'exploration_type': ['epsilon'], 'env_kwargs.arm_range': [1.0], 'algo_kwargs.reward_scale': [1], 'algo_kwargs.discount': [0.99], 'exploration_noise': [0.2], 'reward_params.epsilon': [20.0, 100.0, 5.0], 'replay_kwargs.fraction_goals_are_env_goals': [0.0, 0.5, 1.0], 'replay_kwargs.fraction_goals_are_rollout_goals': [0.2, 1.0], # 'rdim': [2, 4, 8, 16], 'seedid': range(n_seeds), } sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) run_variants(experiment, sweeper.iterate_hyperparameters(), run_id=1)
sampling_method='importance_sampling', # sampling_method='true_prior_sampling', num_latents_to_sample=10, ), use_parallel_dataloading=False, ), save_period=25, ), ) search_space = { 'seedid': range(3), 'grill_variant.reward_params.type': ['latent_distance'], 'train_vae_variant.representation_size': [ (4, 4), ], #(3 * objects, 3 * colors) 'train_vae_variant.beta': [50], 'train_vae_variant.generate_vae_dataset_kwargs.n_random_steps': [100] } sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) variants = [] for variant in sweeper.iterate_hyperparameters(): variants.append(variant) run_variants(grill_her_twin_sac_online_vae_full_experiment, variants, run_id=10)
'env_id': [ 'SawyerPushDebugLEAP-v3', 'SawyerPushDebugLEAPPuckRew-v3', ], 'rl_variant.use_masks': [True, False], 'rl_variant.max_path_length': [200], 'init_camera': [sawyer_xyz_reacher_camera_v0], 'rl_variant.vis_kwargs.vis_list': [ [], ], # 'plt', # ]], 'rl_variant.algo_kwargs.num_trains_per_train_loop': [ 4000, ], 'rl_variant.es_kwargs.max_sigma': [0.2, 0.5, 0.8], 'seedid': range(3), } sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) variants = [] for variant in sweeper.iterate_hyperparameters(): if not (variant['env_id'] == 'SawyerPushDebugLEAPPuckRew-v3' and variant['rl_variant']['use_masks'] == True): variants.append(variant) run_variants(rl_experiment, variants, run_id=0)
trainer_kwargs=dict( lr=1e-3, loss_weights=dict( mse=1.0, ) ), save_period=100, slurm_variant=dict( timeout_min=48 * 60, cpus_per_task=10, gpus_per_node=1, ), num_train_workers=8, ) search_space = { "dataset_kwargs.dataset_name": ["doors2", "pour1"], "model_kwargs.delta_features": [True, ], "model_kwargs.pretrained_features": [True, False, ], "model_kwargs.normalize": [True, ], "trainer_kwargs.loss_weights.mse": [0.0, 1.0], "trainer_kwargs.loss_weights.classification_gradients": [False, True, ], "trainer_class": [GeometricTimePredictorTrainer], "seedid": range(1), } sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) run_variants(train_rfeatures_model, sweeper.iterate_hyperparameters(), run_id=1)
4000, ], 'env_kwargs.reward_type': [ 'hand_and_puck_distance', ], 'policy_kwargs.min_log_std': [ -6, ], 'expl_mask_distribution_kwargs.weights': [ (1, 1, 1), (0, 0, 1), (0, 1, 0), ], 'eval_mask_distribution_kwargs.weights': [ (1, 0, 0), (0, 1, 0), (0, 0, 1), ], } sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) variants = [] for variant in sweeper.iterate_hyperparameters(): variants.append(variant) run_variants(experiment, variants, process_args)
# # 'shaped', # 'euclidean', # ], # 'exploration_type': [ # 'epsilon', # 'ou', # 'gaussian', # ], 'seed_id': range(n_seeds), 'replay_buffer_kwargs.fraction_resampled_goals_are_env_goals': [ 0.0, 0.5, ], 'replay_buffer_kwargs.fraction_goals_are_rollout_goals': [ 0.2, 1.0, ], 'env_kwargs.randomize_position_on_reset': [False, True], 'env_kwargs.wall_shape': ["u"], 'algo_kwargs.her_kwargs.rollout_goal_params.exploration_temperature': [1.0, 100.0, 1e-2, 1e-4], } sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) run_variants(her_td3_experiment, sweeper.iterate_hyperparameters(), run_id=0)
skew_config=dict( method='vae_prob', power=0, ), skew_dataset=False, priority_function_kwargs=dict( decoder_distribution='gaussian_identity_variance', sampling_method='importance_sampling', # sampling_method='true_prior_sampling', num_latents_to_sample=10, ), use_parallel_dataloading=False, ), save_period=25, ), ) search_space = { 'seedid': range(5), } sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) variants = [] for variant in sweeper.iterate_hyperparameters(): variants.append(variant) run_variants(arl_full_experiment, variants, run_id=0)
) search_space = { 'td3_bc_trainer_kwargs.use_awr': [False], # 'td3_bc_trainer_kwargs.demo_beta':[1, 10], 'td3_bc_trainer_kwargs.bc_weight': [1, 0], 'td3_bc_trainer_kwargs.rl_weight': [1], 'algo_kwargs.num_epochs': [1000], 'algo_kwargs.num_eval_steps_per_epoch': [100], 'algo_kwargs.num_expl_steps_per_train_loop': [100], 'algo_kwargs.min_num_steps_before_training': [0], # 'td3_bc_trainer_kwargs.add_demos_to_replay_buffer':[True, False], # 'td3_bc_trainer_kwargs.num_trains_per_train_loop':[1000, 2000, 4000, 10000, 16000], # 'exploration_noise':[0.1, .3, .5], # 'pretrain_rl':[True], # 'pretrain_policy':[False], 'pretrain_rl': [False], 'pretrain_policy': [False], 'seedid': range(5), } sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) variants = [] for variant in sweeper.iterate_hyperparameters(): variants.append(variant) run_variants(state_td3bc_experiment, variants, run_id=0)
skew_config=dict( method='vae_prob', power=0, ), skew_dataset=False, priority_function_kwargs=dict( decoder_distribution='gaussian_identity_variance', sampling_method='importance_sampling', # sampling_method='true_prior_sampling', num_latents_to_sample=10, ), use_parallel_dataloading=False, ), save_period=25, ), ) search_space = { 'seedid': range(5), } sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) variants = [] for variant in sweeper.iterate_hyperparameters(): variants.append(variant) run_variants(grill_her_twin_sac_full_experiment, variants, run_id=0)
decoder_distribution='gaussian_identity_variance', sampling_method='importance_sampling', # sampling_method='true_prior_sampling', num_latents_to_sample=10, ), use_parallel_dataloading=False, ), save_period=25, ), region="us-west-2", ) search_space = { 'seedid': range(2), 'grill_variant.algo_kwargs.rl_offpolicy_num_training_steps': [10, ], # 'grill_variant.reward_params.type':['latent_bound'], #, 'latent_distance' # 'train_vae_variant.representation_size': [(6, 8)], #(3 * objects, 3 * colors) # 'train_vae_variant.beta': [1], # 'train_vae_variant.generate_vae_dataset_kwargs.n_random_steps': [100] } sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) variants = [] for variant in sweeper.iterate_hyperparameters(): variants.append(variant) run_variants(grill_her_td3_offpolicy_online_vae_full_experiment, variants, run_id=0)
), policy_kwargs=dict( hidden_sizes=[32, 32], ), save_video=True, dump_video_kwargs=dict( save_period=1, # imsize=(3, 500, 300), ), desired_trajectory="/home/anair/ros_ws/src/railrl-private/demos/door_demos_v3/demo_v3_0.pkl", logger_variant=dict( tensorboard=True, ), model_path="/home/anair/data/s3doodad/facebook/models/rfeatures/multitask1/run2/id2/itr_4000.pt", ) search_space = { 'seedid': range(1), } sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) variants = [] for variant in sweeper.iterate_hyperparameters(): variants.append(variant) run_variants(encoder_wrapped_td3bc_experiment, variants, run_id=3)
method='vae_prob', power=0, ), skew_dataset=False, priority_function_kwargs=dict( decoder_distribution='gaussian_identity_variance', sampling_method='importance_sampling', # sampling_method='true_prior_sampling', num_latents_to_sample=10, ), use_parallel_dataloading=False, ), save_period=25, ), ) search_space = { 'seedid': range(5), 'train_vae_variant.representation_size': [2, 4, 8, 16], } sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) variants = [] for variant in sweeper.iterate_hyperparameters(): variants.append(variant) run_variants(experiment, variants, run_id=2)
priority_function_kwargs=dict( decoder_distribution='gaussian_identity_variance', sampling_method='importance_sampling', # sampling_method='true_prior_sampling', num_latents_to_sample=10, ), use_parallel_dataloading=False, ), save_period=25, ), region="us-west-1", ) search_space = { 'seedid': range(5), # 'grill_variant.reward_params.type':['latent_bound'], #, 'latent_distance' # 'train_vae_variant.representation_size': [(6, 8)], #(3 * objects, 3 * colors) # 'train_vae_variant.beta': [1], # 'train_vae_variant.generate_vae_dataset_kwargs.n_random_steps': [100] } sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) variants = [] for variant in sweeper.iterate_hyperparameters(): variants.append(variant) run_variants(grill_her_td3_online_vae_full_experiment, variants, run_id=2)