def main(mode, debug, dry): exp_prefix = '0118_walker_walk' env_arg_dict = { 'walker-walk': {} } vg = VariantGenerator() vg.add('algorithm', ['dreamer', 'planet']) vg.add('env_name', ['walker-walk']) vg.add('env_kwargs', lambda env_name: [env_arg_dict[env_name]]) vg.add('env_kwargs_camera_name', ['default_camera']) vg.add('image_dim', [64]) # Kept the same as the original paper vg.add('action_repeat', [2]) vg.add('planning_horizon', [12]) vg.add('max_episode_length', [1000]) vg.add('use_value_function', [False]) vg.add('seed', [100, 200, 300]) if not debug: vg.add('collect_interval', [100]) # Add possible vgs for non-debug purpose pass else: vg.add('collect_interval', [1]) exp_prefix += '_debug' print('Number of configurations: ', len(vg.variants())) sub_process_popens = [] for idx, vv in enumerate(vg.variants()): while len(sub_process_popens) >= 2: sub_process_popens = [x for x in sub_process_popens if x.poll() is None] time.sleep(10) if mode == 'seuss': if idx == 0: compile_script = 'compile.sh' # For the first experiment, compile the current softgym wait_compile = None else: compile_script = None wait_compile = 120 # Wait 30 seconds for the compilation to finish else: compile_script = wait_compile = None cur_popen = run_experiment_lite( stub_method_call=run_task, variant=vv, mode=mode, dry=dry, use_gpu=True, exp_prefix=exp_prefix, wait_subprocess=debug, compile_script=compile_script, wait_compile=wait_compile, ) if cur_popen is not None: sub_process_popens.append(cur_popen) if debug: break
def main(mode, debug, dry): exp_prefix = 'corl_camera_ready_qpg_cloth' vg = VariantGenerator() vg.add('algorithm', ['qpg']) vg.add('env_name', ['ClothFlatten', 'ClothFold']) vg.add('env_kwargs', lambda env_name: [env_arg_dict[env_name]]) vg.add('env_kwargs_camera_name', ['default_camera']) vg.add('env_kwargs_render', [True]) vg.add('env_kwargs_observation_mode', ['cam_rgb']) vg.add('env_kwargs_num_picker', [1]) vg.add('env_kwargs_action_repeat', [1]) vg.add('env_kwargs_horizon', [20]) vg.add('env_kwargs_action_mode', ['picker_qpg']) vg.add('env_kwargs_reward_type', lambda env_name: ['index', 'bigraph'] if env_name == 'RopeAlphaBet' else [None]) # only for ropealphabet vg.add('config_key', ['sac_pixels_cloth_corner_softgym']) vg.add('random_location', [True]) vg.add('sac_module', ['sac_v2']) vg.add('sac_agent_module', ['sac_agent_v2']) vg.add('seed', [100, 200, 300]) if not debug: # Add possible vgs for non-debug purpose pass else: exp_prefix += '_debug' print('Number of configurations: ', len(vg.variants())) sub_process_popens = [] for idx, vv in enumerate(vg.variants()): while len(sub_process_popens) >= 1: sub_process_popens = [ x for x in sub_process_popens if x.poll() is None ] time.sleep(10) if mode == 'seuss': if idx == 0: compile_script = 'compile_1.0.sh' # For the first experiment, compile the current softgym wait_compile = None else: compile_script = None wait_compile = 120 # Wait 30 seconds for the compilation to finish else: compile_script = wait_compile = None cur_popen = run_experiment_lite( stub_method_call=run_task, variant=vv, mode=mode, dry=dry, use_gpu=True, exp_prefix=exp_prefix, wait_subprocess=debug, compile_script=compile_script, wait_compile=wait_compile, ) if cur_popen is not None: sub_process_popens.append(cur_popen) if debug: break
vg.add('env_id', ['HalfCheetah-v2', 'Hopper-v2', 'InvertedPendulum-v2']) # select random seeds from 0 to 4 vg.add('seed', [0, 1, 2, 3, 4]) print('Number of configurations: ', len(vg.variants())) # set the maximum number for running experiments in parallel # this number depends on the number of processors in the runner maximum_launching_process = 5 # launch experiments sub_process_popens = [] for vv in vg.variants(): while len(sub_process_popens) >= maximum_launching_process: sub_process_popens = [ x for x in sub_process_popens if x.poll() is None ] time.sleep(10) # import the launcher of experiments from chester.examples.train import run_task # use your written run_task function cur_popen = run_experiment_lite(stub_method_call=run_task, variant=vv, mode='local', exp_prefix=exp_prefix, wait_subprocess=False) if cur_popen is not None: sub_process_popens.append(cur_popen)
def main(mode, debug, dry): vg = VariantGenerator() variant = dict( algorithm='Skew-Fit', double_algo=False, online_vae_exploration=False, imsize=48, init_camera=sawyer_init_camera_zoomed_in, env_id='SawyerPushNIPSEasy-v0', skewfit_variant=dict( env_kwargs={ 'reset_free': False, }, observation_mode='original_image', preprocess_obs_mlp_kwargs=dict( obs_preprocess_size=3, obs_preprocess_hidden_sizes=[32, 32], obs_preprocess_output_size=6, ), segmentation=True, # if true, using segmentation, otherwise not. segmentation_method='unet', # or 'unet'. segmentation_kwargs=dict( dilation=False, show=False, robot_threshold=0.2, fg_threshold=120, ), keep_train_segmentation_lstm=True, save_video=True, custom_goal_sampler='replay_buffer', online_vae_trainer_kwargs=dict( beta=20, lr=1e-3, ), online_lstm_trainer_kwargs=dict( beta=0, recon_loss_coef=0, triplet_loss_coef=[], triplet_loss_type=[], triplet_loss_margin=1, matching_loss_coef=50, vae_matching_loss_coef=0, ae_loss_coef=0.5, lstm_kl_loss_coef=0, contrastive_loss_coef=0, adaptive_margin=0, negative_range=15, batch_size=16, ), save_video_period=100, qf_kwargs=dict(hidden_sizes=[400, 300], ), policy_kwargs=dict(hidden_sizes=[400, 300], ), vf_kwargs=dict(hidden_sizes=[400, 300], ), max_path_length=50, algo_kwargs=dict( batch_size=1024, num_epochs=600, num_eval_steps_per_epoch=500, num_expl_steps_per_train_loop=500, num_trains_per_train_loop=1000, min_num_steps_before_training=10000, vae_training_schedule=vae_schedules.custom_schedule_2, lstm_training_schedule=LSTM_schedules.custom_schedule_2, oracle_data=False, vae_save_period=50, lstm_save_period=25, parallel_vae_train=False, ), twin_sac_trainer_kwargs=dict( discount=0.99, reward_scale=1, soft_target_tau=1e-3, target_update_period=1, use_automatic_entropy_tuning=True, ), replay_buffer_kwargs=dict( start_skew_epoch=10, max_size=int(100000), fraction_goals_rollout_goals=0.2, fraction_goals_env_goals=0.5, exploration_rewards_type='None', vae_priority_type='vae_prob', priority_function_kwargs=dict( sampling_method='importance_sampling', decoder_distribution='gaussian_identity_variance', num_latents_to_sample=10, ), power=-1, relabeling_goal_sampling_mode='vae_prior', ), exploration_goal_sampling_mode='vae_prior', evaluation_goal_sampling_mode='reset_of_env', normalize=False, render=False, exploration_noise=0.0, exploration_type='ou', training_mode='train', testing_mode='test', reward_params=dict(type='latent_distance', ), observation_key='latent_observation', desired_goal_key='latent_desired_goal', vae_wrapped_env_kwargs=dict(sample_from_true_prior=True, ), ), train_vae_variant=dict( # actually this is train vae and lstm variant only_train_lstm=False, lstm_version=2, lstm_representation_size=6, vae_representation_size=6, beta=20, num_vae_epochs=2000, # pretrain vae epochs num_lstm_epochs=2000, # pretrain lstm epochs dump_skew_debug_plots=False, decoder_activation= 'gaussian', # will be later replaced by identity. only sigmoid or identity. seg_pretrain=True, # if pretrain the segmentation lstm ori_pretrain=True, # if pretrain the original vae lstm_pretrain_vae_only= True, # if true, will only use random sampled images (not trajs) to train the first vae part, no training of the ROLL. generate_lstm_data_fctn= generate_LSTM_vae_only_dataset, # use a custom vae dataset generate function generate_vae_dataset_kwargs=dict( N=2000, # pretrain vae dataset size test_p=.9, use_cached=False, show=False, oracle_dataset=True, oracle_dataset_using_set_to_goal=True, n_random_steps=100, non_presampled_goal_img_is_garbage=True, ), generate_lstm_dataset_kwargs=dict( N=2000, # pretrain vae part of lstm image num test_p=.9, show=False, occlusion_prob=0.3, occlusion_level=0.5, ), vae_kwargs=dict( input_channels=3, architecture=vae_48_default_architecture, decoder_distribution='gaussian_identity_variance', ), lstm_kwargs=dict( input_channels=3, architecture=lstm_48_default_architecture, decoder_distribution='gaussian_identity_variance', detach_vae_output=True, ), # pre-train lstm and vae kwargs algo_kwargs=dict( start_skew_epoch=5000, is_auto_encoder=False, batch_size=128, lr=1e-3, skew_config=dict( method='vae_prob', power=-1, ), recon_loss_coef=1, triplet_loss_coef=[], triplet_loss_type=[], triplet_loss_margin=1, matching_loss_coef=0, vae_matching_loss_coef=400, ae_loss_coef=0.5, lstm_kl_loss_coef=0, contrastive_loss_coef=0, adaptive_margin=0, negative_range=15, skew_dataset=False, priority_function_kwargs=dict( decoder_distribution='gaussian_identity_variance', sampling_method='importance_sampling', num_latents_to_sample=10, ), use_parallel_dataloading=False, ), save_period=25, ), ) if not debug: vg.add('seed', [[100], [200], [300], [400], [500], [600]]) else: vg.add('seed', [[100]]) exp_prefix = '11-20-ROLL-release-push' if variant['train_vae_variant']['lstm_version'] == 3 or variant[ 'train_vae_variant']['lstm_version'] == 2: lstm_48_default_architecture['LSTM_args']['input_size'] = variant[ 'train_vae_variant']['lstm_representation_size'] variant['train_vae_variant']['lstm_kwargs'][ 'architecture'] = lstm_48_default_architecture if debug: # use very small parameters to make sure code at least compiles and can run exp_prefix = 'debug' vg.add('variant', [variant]) for vv in vg.variants(): vv['variant']['skewfit_variant']['algo_kwargs']['batch_size'] = 32 vv['variant']['skewfit_variant']['algo_kwargs'][ 'num_trains_per_train_loop'] = 10 vv['variant']['skewfit_variant']['algo_kwargs'][ 'min_num_steps_before_training'] = 100 vv['variant']['skewfit_variant']['replay_buffer_kwargs'][ 'max_size'] = 1000 vv['variant']['train_vae_variant']['seg_pretrain'] = True vv['variant']['train_vae_variant']['ori_pretrain'] = True vv['variant']['train_vae_variant']['num_vae_epochs'] = 2 vv['variant']['train_vae_variant']['num_lstm_epochs'] = 1 vv['variant']['train_vae_variant']['save_period'] = 1 vv['variant']['train_vae_variant']['generate_vae_dataset_kwargs'][ 'N'] = 10 vv['variant']['train_vae_variant']['generate_lstm_dataset_kwargs'][ 'N'] = 3 else: vg.add('variant', [variant]) print("there are {} variants to run".format(len(vg.variants()))) sub_process_popens = [] for idx, vv in enumerate(vg.variants()): while len(sub_process_popens) >= 1: sub_process_popens = [ x for x in sub_process_popens if x.poll() is None ] time.sleep(10) compile_script = wait_compile = None cur_popen = run_experiment_lite( stub_method_call=run_task, variant=vv, mode=mode, dry=dry, use_gpu=True, exp_prefix=exp_prefix, wait_subprocess=debug, compile_script=compile_script, wait_compile=wait_compile, ) if cur_popen is not None: sub_process_popens.append(cur_popen) if debug: break
def main(mode, debug, dry): exp_prefix = '0808_cem_cloth_ppp' vg = VariantGenerator() cem_plan_horizon = { 'PassWater': 7, 'PourWater': 40, 'PourWaterAmount': 40, 'ClothFold': 15, 'ClothFoldPPP': 5, 'ClothFoldCrumpled': 30, 'ClothFoldDrop': 30, 'ClothFlatten': 15, 'ClothFlattenPPP': 5, 'ClothDrop': 15, 'RopeFlatten': 15, 'RopeFlattenNew': 15, 'RopeAlphaBet': 15, 'RigidClothFold': 15, 'RigidClothDrop': 15, } vg.add('env_kwargs_camera_name', ['default_camera']) vg.add('env_kwargs_render', [False]) vg.add('env_kwargs_observation_mode', ['key_point']) vg.add('env_kwargs_reward_type', lambda env_name: ['index', 'bigraph'] if env_name == 'RopeAlphaBet' else [None]) # only for ropealphabet vg.add('seed', [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]) vg.add('max_episode_length', [200]) if not debug: vg.add('max_iters', [10]) vg.add('plan_horizon', lambda env_name: [cem_plan_horizon[env_name]]) vg.add('timestep_per_decision', [21000]) vg.add('test_episodes', [1]) vg.add('use_mpc', [True]) # Add possible vgs for non-debug purpose pass else: vg.add('max_iters', [1]) vg.add('test_episodes', [1]) vg.add('timestep_per_decision', [100]) vg.add('use_mpc', [True]) vg.add('plan_horizon', [7]) exp_prefix += '_debug' print('Number of configurations: ', len(vg.variants())) sub_process_popens = [] for idx, vv in enumerate(vg.variants()): while len(sub_process_popens) >= 1: sub_process_popens = [x for x in sub_process_popens if x.poll() is None] time.sleep(10) if mode == 'seuss': if idx == 0: compile_script = 'compile_1.0.sh' # For the first experiment, compile the current softgym wait_compile = None else: compile_script = None wait_compile = 120 # Wait 30 seconds for the compilation to finish else: compile_script = wait_compile = None cur_popen = run_experiment_lite( stub_method_call=run_task, variant=vv, mode=mode, dry=dry, use_gpu=True, exp_prefix=exp_prefix, wait_subprocess=debug, compile_script=compile_script, wait_compile=wait_compile, ) if cur_popen is not None: sub_process_popens.append(cur_popen) if debug: break
def main(mode, debug, dry): vg = VariantGenerator() vg.add('load_path', [None]) # vg.add('load_path', [None]) vg.add('load_epoch', [2550]) ### PDE settings vg.add('boundary_condition', ['periodic']) vg.add('Tscheme', ['euler']) vg.add('solution_data_path', ['data/local/solutions/9-24-50-eta-0.01-forcing-1']) vg.add('flux', ['u2']) vg.add('dx', [[0.02 * np.pi, 0.04 * np.pi]]) vg.add('eta', [0.01]) ### Training Env Settings vg.add('state_mode', ['normalize']) # 'normalize', 'unnormalize', 'mix' vg.add('state_dim', [7]) vg.add('action_dim', [4]) vg.add('weno_freq', [0.5]) vg.add('no_done', [True]) vg.add('same_time', [True, False]) ### Training logics settings vg.add('test_interval', [100]) vg.add('save_interval', [50]) vg.add('train_epoch', [30000]) ### Reward Settings vg.add('reward_width', [0, 3]) vg.add('reward_first_deriv_error_weight', [0]) ### General RL Algorithm Parameters vg.add('gamma', [0.99]) vg.add('actor_lr', [1e-4]) vg.add('final_actor_lr', [1e-7]) vg.add('critic_lr', [1e-3]) vg.add('final_critic_lr', [1e-7]) vg.add('batch_size', [64]) vg.add('policy_hidden_layers', [[64, 64, 64, 64, 64, 64]]) vg.add('critic_hidden_layers', [[64, 64, 64, 64, 64, 64, 64]]) vg.add('max_grad_norm', [0.5]) vg.add('clip_gradient', [0]) vg.add('lr_decay_interval', [0, 2000]) ### DDPG parameter vg.add('tau', [0.02]) vg.add('replay_buffer_size', [1000000]) vg.add('noise_beg', [0.2]) vg.add('noise_end', [0.01]) vg.add('noise_dec', [0.04]) vg.add('noise_dec_every', [500]) vg.add('ddpg_value_train_iter', [2]) vg.add('batch_norm', [False]) if not debug: vg.add('seed', [100]) else: vg.add('seed', [100]) exp_prefix = '9-25-many-64-multiple-dx-forcing-eta-0.01' print("there are {} variants to run".format(len(vg.variants()))) sub_process_popens = [] for idx, vv in enumerate(vg.variants()): while len(sub_process_popens) >= 1: sub_process_popens = [ x for x in sub_process_popens if x.poll() is None ] time.sleep(10) compile_script = wait_compile = None cur_popen = run_experiment_lite( stub_method_call=run_task, variant=vv, mode=mode, dry=dry, use_gpu=True, exp_prefix=exp_prefix, wait_subprocess=debug, compile_script=compile_script, wait_compile=wait_compile, ) if cur_popen is not None: sub_process_popens.append(cur_popen) if debug: break
def main(mode, debug, dry): exp_prefix = '1122_planet' vg = VariantGenerator() vg.add('algorithm', ['planet']) if debug: vg.add('env_name', ['ClothFlatten', 'PourWater', 'ClothDrop', 'RopeFlatten']) else: vg.add('env_name', [ 'ClothDrop', 'PourWater', 'PassWater', 'ClothFlatten', 'RopeFlatten', 'ClothFold' ]) vg.add('env_kwargs', lambda env_name: [env_arg_dict[env_name]]) vg.add('env_kwargs_camera_name', ['default_camera']) vg.add('env_kwargs_delta_reward', [False]) vg.add('train_epoch', [1200]) vg.add('planning_horizon', [24]) vg.add('use_value_function', [False]) if debug: vg.add('seed', [100]) else: vg.add('seed', [100, 200, 300]) if not debug: vg.add('collect_interval', [100]) vg.add('test_interval', [10]) vg.add('test_episodes', lambda env_name: [900 // env_arg_dict[env_name]['horizon']]) vg.add('episodes_per_loop', lambda env_name: [900 // env_arg_dict[env_name]['horizon']]) # Add possible vgs for non-debug purpose pass else: vg.add('collect_interval', [1]) vg.add('test_interval', [1]) vg.add('test_episodes', [1]) vg.add('episodes_per_loop', [1]) exp_prefix += '_debug' print('Number of configurations: ', len(vg.variants())) sub_process_popens = [] for idx, vv in enumerate(vg.variants()): while len(sub_process_popens) >= 1: sub_process_popens = [ x for x in sub_process_popens if x.poll() is None ] time.sleep(10) if mode == 'seuss': if idx == 0: compile_script = 'compile_1.0.sh' # For the first experiment, compile the current softgym wait_compile = None else: compile_script = None wait_compile = 120 # Wait 30 seconds for the compilation to finish else: compile_script = wait_compile = None cur_popen = run_experiment_lite( stub_method_call=run_task, variant=vv, mode=mode, dry=dry, use_gpu=True, exp_prefix=exp_prefix, wait_subprocess=debug, compile_script=compile_script, wait_compile=wait_compile, ) if cur_popen is not None: sub_process_popens.append(cur_popen) if debug: break
def main(mode, debug, dry): vg = VariantGenerator() vg.add( "variant", [ dict( algorithm='Skew-Fit', double_algo=False, online_vae_exploration=False, imsize=48, init_camera=sawyer_init_camera_zoomed_in, env_id='SawyerPushHurdle-v0', skewfit_variant=dict( # vae_path=[ # "./data/05-09-test-color-thresholding-vae/05-09-test_color_thresholding_vae_2020_05_09_19_20_04_0000--s-35695/vae_ori_pretrain.pkl", # "./data/05-09-test-color-thresholding-vae/05-09-test_color_thresholding_vae_2020_05_09_19_20_04_0000--s-35695/vae_seg_pretrain.pkl", # ], segmentation= True, # if true, using segmentation, otherwise not. segmentation_method='unet', # or 'unet'. segmentation_kwargs=dict( dilation=False, show=False, robot_threshold=0.05, fg_threshold=120, ), keep_train_segmentation_vae=False, save_video=True, custom_goal_sampler='replay_buffer', online_vae_trainer_kwargs=dict( beta=20, lr=1e-3, ), save_video_period=100, qf_kwargs=dict(hidden_sizes=[400, 300], ), policy_kwargs=dict(hidden_sizes=[400, 300], ), vf_kwargs=dict(hidden_sizes=[400, 300], ), max_path_length=50, algo_kwargs=dict( batch_size=1024, #32, num_epochs=800, num_eval_steps_per_epoch=500, num_expl_steps_per_train_loop=500, num_trains_per_train_loop=1000, #10, min_num_steps_before_training=10000, #100, vae_training_schedule=vae_schedules.custom_schedule_2, oracle_data=False, vae_save_period=50, parallel_vae_train=False, ), twin_sac_trainer_kwargs=dict( discount=0.99, reward_scale=1, soft_target_tau=1e-3, target_update_period=1, # 1 use_automatic_entropy_tuning=True, ), replay_buffer_kwargs=dict( start_skew_epoch=10, max_size=int(100000), fraction_goals_rollout_goals=0.2, fraction_goals_env_goals=0.5, exploration_rewards_type='None', vae_priority_type='vae_prob', priority_function_kwargs=dict( sampling_method='importance_sampling', decoder_distribution='gaussian_identity_variance', num_latents_to_sample=10, ), power=-1, relabeling_goal_sampling_mode='vae_prior', ), exploration_goal_sampling_mode='vae_prior', evaluation_goal_sampling_mode='reset_of_env', normalize=False, render=False, exploration_noise=0.0, exploration_type='ou', training_mode='train', testing_mode='test', reward_params=dict(type='latent_distance', ), observation_key='latent_observation', desired_goal_key='latent_desired_goal', vae_wrapped_env_kwargs=dict(sample_from_true_prior=True, ), ), train_vae_variant=dict( representation_size=6, beta=20, num_epochs=2000, # pretrain vae epochs dump_skew_debug_plots=False, decoder_activation='gaussian', seg_pretrain=True, # if pretrain the segmentation vae ori_pretrain=True, # if pretrain the original vae generate_vae_data_fctn= generate_sawyerhurdle_dataset, # use a custom vae dataset generate function generate_vae_dataset_kwargs=dict( N=2000, # pretrain vae dataset size test_p=.9, use_cached=False, show=False, oracle_dataset=True, oracle_dataset_using_set_to_goal=True, n_random_steps=100, non_presampled_goal_img_is_garbage=True, ), vae_kwargs=dict( input_channels=3, architecture=imsize48_default_architecture, decoder_distribution='gaussian_identity_variance', ), seg_vae_kwargs=dict( input_channels=3, architecture=imsize48_default_architecture, decoder_distribution='gaussian_identity_variance', ), # TODO: why the redundancy? algo_kwargs=dict( start_skew_epoch=5000, is_auto_encoder=False, batch_size=64, lr=1e-3, skew_config=dict( method='vae_prob', power=-1, ), matching_loss_coef=0, skew_dataset=True, priority_function_kwargs=dict( decoder_distribution='gaussian_identity_variance', sampling_method='importance_sampling', num_latents_to_sample=10, ), use_parallel_dataloading=False, ), save_period=25, ), ) ]) if not debug: vg.add('seed', [[400], [500], [600]]) else: vg.add('seed', [[100]]) # exp_prefix = '6-5-skewfit-hurdlemiddle-segcolorpretrain' exp_prefix = '7-17-CoRL-sawyerhurlde-segvae-ablation' for vv in vg.variants(): seg_imsize48_arch = copy.deepcopy(imsize48_default_architecture) seg_imsize48_arch['conv_args']['output_size'] = 6 vv['variant']['train_vae_variant']['seg_vae_kwargs'][ 'architecture'] = seg_imsize48_arch if debug: # use very small parameters to make sure code at least compiles and can run print(debug) exp_prefix = 'debug' for vv in vg.variants(): vv['variant']['skewfit_variant']['algo_kwargs']['batch_size'] = 32 vv['variant']['skewfit_variant']['algo_kwargs'][ 'num_trains_per_train_loop'] = 10 vv['variant']['skewfit_variant']['algo_kwargs'][ 'min_num_steps_before_training'] = 100 vv['variant']['skewfit_variant']['replay_buffer_kwargs'][ 'max_size'] = 1000 vv['variant']['train_vae_variant']['seg_pretrain'] = False vv['variant']['train_vae_variant']['ori_pretrain'] = False vv['variant']['train_vae_variant']['num_epochs'] = 3 vv['variant']['train_vae_variant']['generate_vae_dataset_kwargs'][ 'N'] = 5 print("there are {} variants to run".format(len(vg.variants()))) sub_process_popens = [] for idx, vv in enumerate(vg.variants()): while len(sub_process_popens) >= 1: sub_process_popens = [ x for x in sub_process_popens if x.poll() is None ] time.sleep(10) compile_script = wait_compile = None run_task = run_task_original cur_popen = run_experiment_lite( stub_method_call=run_task, variant=vv, mode=mode, dry=dry, use_gpu=True, exp_prefix=exp_prefix, wait_subprocess=debug, compile_script=compile_script, wait_compile=wait_compile, ) if cur_popen is not None: sub_process_popens.append(cur_popen) if debug: break
def main(mode, debug, dry): vg = VariantGenerator() variant = dict( algorithm='Skew-Fit-SAC', double_algo=False, online_vae_exploration=False, imsize=48, env_id='SawyerDoorHookResetFreeEnv-v1', init_camera=sawyer_door_env_camera_v0, skewfit_variant=dict( observation_mode='original_image', segmentation=True, segmentation_method='unet', segmentation_kwargs=dict( dilation=True, dilation_size=2, show=False, save=False, save_path='data/local/debug', robot_threshold=0.99, fg_threshold=120, ), keep_train_segmentation_lstm=True, save_video=True, custom_goal_sampler='replay_buffer', online_vae_trainer_kwargs=dict( beta=20, lr=1e-3, ), online_lstm_trainer_kwargs=dict( beta=0, recon_loss_coef=0, triplet_loss_coef=[], triplet_loss_type=[], triplet_loss_margin=1, matching_loss_coef=50, vae_matching_loss_coef=0, ae_loss_coef=0.5, lstm_kl_loss_coef=0, contrastive_loss_coef=0, adaptive_margin=0, negative_range=15, batch_size=16, ), save_video_period=50, qf_kwargs=dict(hidden_sizes=[400, 300], ), policy_kwargs=dict(hidden_sizes=[400, 300], ), twin_sac_trainer_kwargs=dict( reward_scale=1, discount=0.99, soft_target_tau=1e-3, target_update_period=1, use_automatic_entropy_tuning=True, ), max_path_length=100, algo_kwargs=dict( batch_size=1024, num_epochs=500, num_eval_steps_per_epoch=500, num_expl_steps_per_train_loop=500, num_trains_per_train_loop=1000, min_num_steps_before_training=10000, vae_training_schedule=vae_schedules.custom_schedule, lstm_training_schedule=LSTM_schedules.custom_schedule_2, oracle_data=False, vae_save_period=50, lstm_save_period=25, parallel_vae_train=False, ), replay_buffer_kwargs=dict( start_skew_epoch=10, max_size=int(100000), fraction_goals_rollout_goals=0.2, fraction_goals_env_goals=0.5, exploration_rewards_type='None', vae_priority_type='vae_prob', priority_function_kwargs=dict( sampling_method='importance_sampling', decoder_distribution='gaussian_identity_variance', num_latents_to_sample=10, ), power=-0.5, relabeling_goal_sampling_mode='custom_goal_sampler', ), exploration_goal_sampling_mode='vae_prior', evaluation_goal_sampling_mode='presampled', training_mode='train', testing_mode='test', reward_params=dict(type='latent_distance', ), observation_key='latent_observation', desired_goal_key='latent_desired_goal', presampled_goals_path= 'data/local/goals/SawyerDoorHookResetFreeEnv-v1-goal.npy', presample_goals=True, vae_wrapped_env_kwargs=dict(sample_from_true_prior=True, ), ), train_vae_variant=dict( vae_representation_size=16, lstm_representation_size=6, lstm_path=None, only_train_lstm= False, # if terminate the process just after training the LSTM. lstm_version=2, beta=20, num_vae_epochs=2000, num_lstm_epochs=2000, # pretrain lstm epochs dump_skew_debug_plots=False, seg_pretrain=True, # if pretrain the segmentation lstm ori_pretrain=True, # if pretrain the original vae lstm_pretrain_vae_only= True, # if true, will only use random sampled images (not trajs) to train the vae part, no training of the LSTM. decoder_activation='gaussian', generate_vae_dataset_kwargs=dict( dataset_path= "data/local/pre-train-vae/door_original_dataset.npy", N=2, test_p=.9, use_cached=True, show=False, oracle_dataset=False, n_random_steps=1, non_presampled_goal_img_is_garbage=True, ), generate_lstm_dataset_kwargs=dict( N=1000, # pretrain lstm dataset size test_p=.9, show=False, occlusion_prob=0, occlusion_level=0, ), vae_kwargs=dict( decoder_distribution='gaussian_identity_variance', input_channels=3, architecture=vae_48_default_architecture, ), lstm_kwargs=dict( input_channels=3, architecture=lstm_48_default_architecture, decoder_distribution='gaussian_identity_variance', detach_vae_output=True, ), algo_kwargs=dict( start_skew_epoch=5000, is_auto_encoder=False, batch_size=16, lr=1e-3, skew_config=dict( method='vae_prob', power=-0.5, ), recon_loss_coef=1, triplet_loss_coef=[], triplet_loss_type=[], triplet_loss_margin=1, matching_loss_coef=0, vae_matching_loss_coef=50, ae_loss_coef=0.5, lstm_kl_loss_coef=0, contrastive_loss_coef=0, matching_loss_one_side=False, adaptive_margin=0, negative_range=15, skew_dataset=False, priority_function_kwargs=dict( decoder_distribution='gaussian_identity_variance', sampling_method='importance_sampling', num_latents_to_sample=10, ), use_parallel_dataloading=False, ), save_period=25, ), ) if not debug: vg.add('seed', [[100], [200], [300], [400], [500], [600]]) else: vg.add('seed', [[100]]) exp_prefix = '11-20-ROLL-release-door' if variant['train_vae_variant']['lstm_version'] == 3 or variant[ 'train_vae_variant']['lstm_version'] == 2: lstm_48_default_architecture['LSTM_args']['input_size'] = variant[ 'train_vae_variant']['lstm_representation_size'] lstm_48_default_architecture['conv_args']['output_size'] = 6 variant['train_vae_variant']['lstm_kwargs'][ 'architecture'] = lstm_48_default_architecture # handle online & pre-train lstm if variant['skewfit_variant']['keep_train_segmentation_lstm']: variant['train_vae_variant']['lstm_pretrain_vae_only'] = True variant['train_vae_variant'][ 'generate_lstm_data_fctn'] = generate_LSTM_vae_only_dataset variant['train_vae_variant']['generate_lstm_dataset_kwargs'][ 'N'] = 1000 variant['train_vae_variant']['algo_kwargs']['batch_size'] = 128 if debug: # use very small parameters to make sure code at least compiles and can run exp_prefix = 'debug' vg.add('variant', [variant]) for vv in vg.variants(): vv['variant']['skewfit_variant']['algo_kwargs']['batch_size'] = 32 vv['variant']['skewfit_variant']['algo_kwargs'][ 'num_trains_per_train_loop'] = 10 vv['variant']['skewfit_variant']['algo_kwargs'][ 'min_num_steps_before_training'] = 100 vv['variant']['skewfit_variant']['replay_buffer_kwargs'][ 'max_size'] = 1000 vv['variant']['train_vae_variant']['seg_pretrain'] = True vv['variant']['train_vae_variant']['ori_pretrain'] = True vv['variant']['train_vae_variant']['num_vae_epochs'] = 2 vv['variant']['train_vae_variant']['num_lstm_epochs'] = 1 vv['variant']['train_vae_variant']['save_period'] = 1 # vv['variant']['train_vae_variant']['generate_vae_dataset_kwargs']['N'] = 10 # vv['variant']['train_vae_variant']['generate_lstm_dataset_kwargs']['N'] = 3 else: vg.add('variant', [variant]) print("there are {} variants to run".format(len(vg.variants()))) sub_process_popens = [] for idx, vv in enumerate(vg.variants()): while len(sub_process_popens) >= 1: sub_process_popens = [ x for x in sub_process_popens if x.poll() is None ] time.sleep(10) compile_script = wait_compile = None cur_popen = run_experiment_lite( stub_method_call=run_task, variant=vv, mode=mode, dry=dry, use_gpu=True, exp_prefix=exp_prefix, wait_subprocess=debug, compile_script=compile_script, wait_compile=wait_compile, ) if cur_popen is not None: sub_process_popens.append(cur_popen) if debug: break
def main(mode, debug, dry): exp_prefix = '0910-drq-first-try' reward_scales = { 'PourWater': 20.0, 'PassWaterTorus': 20.0, 'TransportTorus': 20.0, 'PassWater': 20.0, 'PourWaterAmount': 20.0, 'ClothFold': 50.0, 'ClothFoldCrumpled': 50.0, 'ClothFoldDrop': 50.0, 'ClothFlatten': 50.0, 'ClothDrop': 50.0, 'RopeFlatten': 50.0, 'RopeFlattenNew': 50.0, 'RopeAlphaBet': 50.0, 'RigidClothFold': 50.0, 'RigidClothDrop': 50.0, } clip_obs = { 'PassWater': None, 'PourWater': None, 'PassWaterTorus': None, 'PassWater': None, 'TransportTorus': None, 'PourWaterAmount': None, 'ClothFold': None, #(-3, 3), 'ClothFoldCrumpled': None, 'ClothFoldDrop': None, 'ClothFlatten': None, #(-2, 2), 'ClothDrop': None, 'RopeFlatten': None, 'RopeFlattenNew': None, #(-3, 3), 'RopeAlphaBet': None, 'RigidClothFold': None, #(-3, 3), 'RigidClothDrop': None, } def get_critic_lr(env_name, obs_mode): # if env_name in ['ClothFold', 'RigidClothFold', 'PassWaterTorus'] or (env_name =='RopeFlattenNew' and obs_mode =='point_cloud'): # if obs_mode == 'cam_rgb': # return 1e-4 # else: # return 5e-4 # if obs_mode == 'cam_rgb': # return 3e-4 # else: # return 1e-3 return 1e-3 def get_alpha_lr(env_name, obs_mode): # if env_name in ['RigidClothFold', 'ClothFold']: # return 2e-5 # else: # return 1e-3 return 1e-3 def get_lr_decay(env_name, obs_mode): # if env_name == 'RopeFlattenNew' or (env_name == 'ClothFlatten' and obs_mode == 'cam_rgb') \ # or (env_name == 'RigidClothFold' and obs_mode == 'key_point'): # return 0.01 # elif obs_mode == 'point_cloud': # return 0.01 # elif env_name == 'PassWaterTorus': # return 0.01 # else: # return None return None vg = VariantGenerator() vg.add('env_name', ['PassWater']) vg.add('env_kwargs', lambda env_name: [env_arg_dict[env_name]]) vg.add('env_kwargs_observation_mode', ['cam_rgb']) # vg.add('algorithm', ['CURL']) # vg.add('alpha_fixed', [False]) # vg.add('critic_lr', lambda env_name, env_kwargs_observation_mode: [get_critic_lr(env_name, env_kwargs_observation_mode)]) # vg.add('actor_lr', lambda critic_lr: [critic_lr]) # vg.add('alpha_lr', lambda env_name, env_kwargs_observation_mode: [get_alpha_lr(env_name, env_kwargs_observation_mode)]) # vg.add('lr_decay', lambda env_name, env_kwargs_observation_mode: [get_lr_decay(env_name, env_kwargs_observation_mode)]) # vg.add('init_temperature', lambda env_kwargs_observation_mode: [0.1] if env_kwargs_observation_mode == 'cam_rgb' else [0.1]) vg.add( 'replay_buffer_capacity', lambda env_kwargs_observation_mode: [10000] if env_kwargs_observation_mode == 'cam_rgb' else [100000]) vg.add( 'num_train_steps', lambda env_kwargs_observation_mode: [1000000] if env_kwargs_observation_mode == 'cam_rgb' else [1000000]) vg.add('scale_reward', lambda env_name: [reward_scales[env_name]]) vg.add( 'clip_obs', lambda env_name, env_kwargs_observation_mode: [clip_obs[env_name]] if env_kwargs_observation_mode == 'key_point' else [None]) vg.add('batch_size', [128]) vg.add('im_size', [128]) vg.add('env_kwargs_deterministic', [False]) vg.add('log_save_tb', [False]) vg.add('save_video', [True]) vg.add('save_model', [True]) vg.add('log_interval', [1]) if not debug: vg.add('seed', [100, 200, 300]) pass else: vg.add('seed', [100]) exp_prefix += '_debug' print('Number of configurations: ', len(vg.variants())) print("exp_prefix: ", exp_prefix) hostname = socket.gethostname() gpu_num = torch.cuda.device_count() sub_process_popens = [] for idx, vv in enumerate(vg.variants()): while len(sub_process_popens) >= 10: sub_process_popens = [ x for x in sub_process_popens if x.poll() is None ] time.sleep(10) if mode in ['seuss', 'autobot']: if idx == 0: compile_script = 'compile_1.0.sh' # For the first experiment, compile the current softgym wait_compile = None else: compile_script = None wait_compile = 120 # Wait 30 seconds for the compilation to finish elif mode == 'ec2': compile_script = 'compile_1.0.sh' wait_compile = None else: compile_script = wait_compile = None if hostname.startswith('autobot') and gpu_num > 0: env_var = {'CUDA_VISIBLE_DEVICES': str(idx % gpu_num)} else: env_var = None cur_popen = run_experiment_lite(stub_method_call=run_task, variant=vv, mode=mode, dry=dry, use_gpu=True, exp_prefix=exp_prefix, wait_subprocess=debug, compile_script=compile_script, wait_compile=wait_compile, env=env_var) if cur_popen is not None: sub_process_popens.append(cur_popen) if debug: break
def main(mode, debug, dry): vg = VariantGenerator() vg.add( "variant", [ dict( algorithm='Skew-Fit', imsize=48, double_algo=False, env_id="SawyerPickupEnvYZEasy-v0", skewfit_variant=dict( sample_goals_from_buffer=True, save_video=True, segmentation=False, keep_train_segmentation_vae=False, segmentation_method='color_pickup', # or 'unet' save_video_period=50, presample_goals=True, custom_goal_sampler='replay_buffer', online_vae_trainer_kwargs=dict( beta=30, lr=1e-3, ), presampled_goals_path= 'data/local/goals/SawyerPickupEnvYZEasy-v0-goal-500.npy', qf_kwargs=dict(hidden_sizes=[400, 300], ), policy_kwargs=dict(hidden_sizes=[400, 300], ), vf_kwargs=dict(hidden_sizes=[400, 300], ), max_path_length=50, algo_kwargs=dict( batch_size=1024, num_epochs=800, num_eval_steps_per_epoch=500, num_expl_steps_per_train_loop=500, num_trains_per_train_loop=1000, min_num_steps_before_training=10000, vae_training_schedule=vae_schedules.custom_schedule, oracle_data=False, vae_save_period=50, parallel_vae_train=False, ), twin_sac_trainer_kwargs=dict( reward_scale=1, discount=0.99, soft_target_tau=1e-3, target_update_period=1, use_automatic_entropy_tuning=True, ), replay_buffer_kwargs=dict( start_skew_epoch=10, max_size=int(100000), fraction_goals_rollout_goals=0.2, fraction_goals_env_goals=0.5, exploration_rewards_type='None', vae_priority_type='vae_prob', priority_function_kwargs=dict( sampling_method='importance_sampling', decoder_distribution='gaussian_identity_variance', num_latents_to_sample=10, ), #power=0, # Use RIG power=-1, # Use Skewfit relabeling_goal_sampling_mode='custom_goal_sampler', ), exploration_goal_sampling_mode='custom_goal_sampler', evaluation_goal_sampling_mode='env', normalize=False, render=False, exploration_noise=0.0, exploration_type='ou', training_mode='train', testing_mode='test', reward_params=dict(type='latent_distance', ), observation_key='latent_observation', desired_goal_key='latent_desired_goal', vae_wrapped_env_kwargs=dict(sample_from_true_prior=True, ), ), train_vae_variant=dict( representation_size=16, beta=30, num_epochs=2000, dump_skew_debug_plots=False, decoder_activation='gaussian', seg_pretrain=True, # if pretrain the segmentation vae ori_pretrain=True, # if pretrain the original vae vae_kwargs=dict( input_channels=3, architecture=imsize48_default_architecture, decoder_distribution='gaussian_identity_variance', ), generate_vae_dataset_kwargs=dict( dataset_path= 'data/local/pre-train-vae/pickup-original-dataset.npy', N=2000, oracle_dataset=True, use_cached=False, num_channels=3, ), algo_kwargs=dict( start_skew_epoch=12000, is_auto_encoder=False, batch_size=64, lr=1e-3, skew_config=dict( method='vae_prob', #power=0, # Use RIG power=-1, # Use Skewfit ), skew_dataset=True, priority_function_kwargs=dict( decoder_distribution='gaussian_identity_variance', sampling_method='true_prior_sampling', num_latents_to_sample=10, ), use_parallel_dataloading=False, ), save_period=25, ), init_camera=sawyer_pick_and_place_camera, ) ]) if not debug and mode == 'seuss': vg.add('seed', [[200], [400], [500]]) else: vg.add('seed', [[100]]) # exp_prefix = '8-6-pickup-baseline-separate' exp_prefix = '10-21-pickup-baseline-rerun' if debug: # use very small parameters to make sure code at least compiles and can run exp_prefix = 'debug' for vv in vg.variants(): vv['variant']['skewfit_variant']['algo_kwargs']['batch_size'] = 32 vv['variant']['skewfit_variant']['algo_kwargs'][ 'num_trains_per_train_loop'] = 10 vv['variant']['skewfit_variant']['algo_kwargs'][ 'min_num_steps_before_training'] = 100 vv['variant']['skewfit_variant']['replay_buffer_kwargs'][ 'max_size'] = 100 vv['variant']['train_vae_variant']['seg_pretrain'] = False vv['variant']['train_vae_variant']['ori_pretrain'] = False vv['variant']['train_vae_variant']['num_epochs'] = 0 vv['variant']['train_vae_variant']['generate_vae_dataset_kwargs'][ 'N'] = 50 print("there are {} variants to run".format(len(vg.variants()))) sub_process_popens = [] for idx, vv in enumerate(vg.variants()): while len(sub_process_popens) >= 1: sub_process_popens = [ x for x in sub_process_popens if x.poll() is None ] time.sleep(10) compile_script = wait_compile = None run_task = run_task_original cur_popen = run_experiment_lite( stub_method_call=run_task, variant=vv, mode=mode, dry=dry, use_gpu=True, exp_prefix=exp_prefix, wait_subprocess=debug, compile_script=compile_script, wait_compile=wait_compile, ) if cur_popen is not None: sub_process_popens.append(cur_popen) if debug: break