def main(): global policy global baseline # Load pre-trained network if available args = parse_arguments() if args.network is not None: data = joblib.load(args.network) policy = data['policy'] baseline = data['baseline'] use_pretrained = True else: use_pretrained = False # Run multiple experiment variants at once vg = VariantGenerator() # Non-configurable parameters (do not change) vg.add('trajectory', ['Circle']) vg.add('objective', ['Fast']) if args.algo == 'trpo': vg.add('algo', ['TRPO']) else: vg.add('algo', ['CPO']) # Configurable parameters # Options for model_type: 'BrushTireModel', 'LinearTireModel' # Options for robot_type: 'MRZR', 'RCCar' # Note: There is no notion of a target velocity in CPO, but it does # control the distribution of the initial state. See the function # get_initial_state() in envs/circle/circle_env.py for more # information. robot_type = 'RCCar' seeds = [100, 200, 300, 400, 500] vg.add('seed', seeds) vg.add('target_velocity', [1.0]) vg.add('radius', [1.0]) vg.add('dt', [0.1]) vg.add('eps', [0.05]) vg.add('model_type', ['BrushTireModel']) vg.add('robot_type', [robot_type]) vg.add('mu_s', [1.37]) vg.add('mu_k', [1.96]) vg.add('pretrained', [use_pretrained]) print('Number of Configurations: ', len(vg.variants())) # Run each experiment variant for vv in vg.variants(): run_experiment_lite(stub_method_call=run_task, variant=vv, n_parallel=4, snapshot_mode='last', seed=vv['seed'])
def main(): global policy global baseline # Load pre-trained network if available args = parse_arguments() if args.network is not None: data = joblib.load(args.network) policy = data['policy'] baseline = data['baseline'] use_pretrained = True else: use_pretrained = False # Run multiple experiment variants at once vg = VariantGenerator() # Non-configurable parameters (do not change) vg.add('trajectory', ['Straight']) vg.add('objective', ['TargetVelocity']) if args.algo == 'trpo': vg.add('algo', ['TRPO']) else: vg.add('algo', ['CPO']) # Configurable parameters # Options for model_type: 'BrushTireModel', 'LinearTireModel' # Options for robot_type: 'MRZR', 'RCCar' seeds = [102, 201, 54, 304] robot_type = 'RCCar' use_ros = False vg.add('seed', seeds) vg.add('target_velocity', [1.0]) vg.add('dt', [0.02]) vg.add('model_type', ['BrushTireModel']) vg.add('robot_type', [robot_type]) vg.add('mu_s', [1.37]) vg.add('mu_k', [1.96]) vg.add('use_ros', [use_ros]) vg.add('pretrained', [use_pretrained]) print('Number of Configurations: ', len(vg.variants())) # Run each experiment variant # for vv in vg.variants(): # run_task(vv) for vv in vg.variants(): run_experiment_lite(stub_method_call=run_task, variant=vv, n_parallel=4, snapshot_mode='last', seed=vv['seed'])
def main(): # Set up multiple experiments at once vg = VariantGenerator() vg.add('target_velocity', [0.7]) vg.add('seed', [100]) print('Number of Configurations: ', len(vg.variants())) # Run each experiment variant for vv in vg.variants(): run_experiment_lite(stub_method_call=run_task, variant=vv, n_parallel=1, snapshot_mode='last', seed=vv['seed'])
def experiment(variant): # we have to generate the combinations for the env_specs env_specs = variant['env_specs'] env_specs_vg = VariantGenerator() env_spec_constants = {} for k, v in env_specs.items(): if isinstance(v, list): env_specs_vg.add(k, v) else: env_spec_constants[k] = v env_specs_list = [] for es in env_specs_vg.variants(): del es['_hidden_keys'] es.update(env_spec_constants) env_specs_list.append(es) print(env_specs_list) print(env_specs_list[0]) env_sampler = EnvSampler(env_specs_list) # set up similar to non-meta version sample_env, _ = env_sampler() if variant['algo_params']['concat_env_params_to_obs']: meta_params_dim = sample_env.env_meta_params.shape[0] else: meta_params_dim = 0 obs_dim = int(np.prod(sample_env.observation_space.shape)) action_dim = int(np.prod(sample_env.action_space.shape)) net_size = variant['net_size'] qf = FlattenMlp( hidden_sizes=[net_size, net_size], input_size=obs_dim + action_dim + meta_params_dim, output_size=1, ) vf = FlattenMlp( hidden_sizes=[net_size, net_size], input_size=obs_dim + meta_params_dim, output_size=1, ) policy = TanhGaussianPolicy( hidden_sizes=[net_size, net_size], obs_dim=obs_dim + meta_params_dim, action_dim=action_dim, ) algorithm = MetaSoftActorCritic(env_sampler=env_sampler, policy=policy, qf=qf, vf=vf, **variant['algo_params']) if ptu.gpu_enabled(): algorithm.cuda() algorithm.train() return 1
report.add_text( 'Outer iteration: {}, disc loss: {}, gen loss: {}'.format( outer_iter, dloss, gloss)) generated_samples, _ = gan.sample_generator(50) report.add_image(plot_samples(generated_samples)) report.add_image(plot_dicriminator(gan)) report.save() if __name__ == '__main__': vg = VariantGenerator() # vg.add('generator_init', ['xavier', 0.02, 0.1, 0.005]) # vg.add('generator_iters', [40, 20, 5, 2]) # vg.add('discriminator_iters', [20, 5, 1]) # vg.add('generator_learning_rate', [0.0003, 0.001, 0.003, 0.01, 0.1]) # vg.add('discriminator_learning_rate', [0.0003, 0.001, 0.003, 0.01, 0.1]) vg.add('outer_iters', [500]) for variant in vg.variants(randomized=False): run_experiment_lite( stub_method_call=run_task, mode='local', n_parallel=1, # Only keep the snapshot parameters for the last iteration snapshot_mode="last", seed=int(time.time()), exp_prefix='debug_simple_circle_gan', variant=variant, # exp_name=exp_name, )
def experiment(variant): # we have to generate the combinations for the env_specs if variant['on_the_fly']: # we have to generate the combinations for the env_specs env_specs = variant['env_specs'] env_sampler = OnTheFlyEnvSampler(env_specs) else: env_specs = variant['env_specs'] env_specs_vg = VariantGenerator() env_spec_constants = {} env_spec_ranges = {} for k, v in env_specs.items(): if isinstance(v, list): env_specs_vg.add(k, v) env_spec_ranges[k] = v else: env_spec_constants[k] = v env_specs_list = [] for es in env_specs_vg.variants(): del es['_hidden_keys'] es.update(env_spec_constants) env_specs_list.append(es) env_sampler = EnvSampler(env_specs_list) # set up the neural process np_path = exp_specs['neural_process_load_path'] if np_path == '': raise NotImplementedError() else: neural_process = joblib.load(np_path)['neural_process'] # set up similar to non-meta version sample_env, _ = env_sampler() obs_dim = int(np.prod(sample_env.observation_space.shape)) action_dim = int(np.prod(sample_env.action_space.shape)) if variant['algo_params']['latent_repr_mode'] == 'concat_params': extra_obs_dim = 2 * neural_process.z_dim else: # concat samples extra_obs_dim = variant['algo_params']['num_latent_samples'] * neural_process.z_dim net_size = variant['net_size'] vf = FlattenMlp( hidden_sizes=[net_size, net_size], input_size=obs_dim + extra_obs_dim, output_size=1, ) policy = TanhGaussianPolicy( hidden_sizes=[net_size, net_size], obs_dim=obs_dim + extra_obs_dim, action_dim=action_dim, ) qf = FlattenMlp( hidden_sizes=[net_size, net_size], input_size=obs_dim + action_dim + extra_obs_dim, output_size=1, ) algorithm = NPMetaSoftActorCritic( env_sampler=env_sampler, neural_process=neural_process, policy=policy, qf=qf, vf=vf, **variant['algo_params'] ) if ptu.gpu_enabled(): algorithm.cuda() algorithm.train() return 1
def main(train_bool, manual_edit_params, load_saved_params, saved_config_path): ################################# ######## Set parameters ######### ################################# #read in default config config = yaml.load(open("../config.yaml")) if load_saved_params: saved_config = yaml.load(open(saved_config_path, "r")) #IPython.embed() if train_bool: # replcae training config only config["training"] = recursive_dict_merge(config["training"], saved_config["training"]) else: # replace testing config only config["testing"] = recursive_dict_merge(config["testing"], saved_config["testing"]) IPython.embed() vg = VariantGenerator() vg.add('config', [config]) if manual_edit_params: # For testing, you must fill out: vg.add('previous_dynamics_model', [ "/home/anagabandi/rllab-private/data/local/experiment/MAML_roach/9_11_optimization/_ubs_23_ulr_0.0num_updates1_layers_2_x500_task_list_turf_styrofoam_carpet_mlr_0.001_mbs_64_num-sgd-steps_1_reg_weight_0.001_dim_bias_5_metatrain_lr_False/model_aggIter0_epoch45" ]) #vg.add('previous_dynamics_model', ["/home/anagabandi/rllab-private/data/local/experiment/MAML_roach/9_7_optimization/_ubs_23_ulr_2.0num_updates2_layers_2_x500_task_list_turf_styrofoam_carpet_mlr_0.001_mbs_64_num-sgd-steps_1_reg_weight_0.001_dim_bias_5_metatrain_lr_False/model_aggIter0_epoch45"]) vg.add('restore_previous_dynamics_model', [True]) # For testing, please customize these: vg.add('num_steps_per_rollout', [110]) vg.add('desired_shape_for_rollout', ["straight"]) vg.add('save_rollout_run_num', [1]) vg.add('dynamic_evaluation', [True]) #####~!!!!!!!!!!!!!!!!!! vg.add('meta_batch_size', [64]) vg.add('meta_lr', [0.001]) vg.add('update_batch_size', [23]) vg.add('max_runs_per_surface', [5]) #396 vg.add('num_updates', [1]) vg.add('update_lr', [0.1]) vg.add("task_list", [["all"]]) #"all" vg.add('max_epochs', [50]) vg.add('num_sgd_steps', [1]) # Aggregation vg.add('ratio_new', [0.9]) vg.add('curr_agg_iter', [0]) #0, 1, 2, etc # Misc vg.add('horizon', [5]) # vg.add('use_reg', [True]) vg.add('seed', [0]) vg.add('nonlinearity', ['relu']) if config['training']['use_reg']: vg.add('regularization_weight', [0.001]) vg.add('use_clip', [True]) vg.add("weight_initializer", ["xavier"]) vg.add("dim_hidden", [[500, 500]]) vg.add('optimizer', ["adam"]) vg.add('dim_bias', [5]) vg.add('use_momentum', [False]) vg.add('learn_inner_loss', [False]) for v in vg.variants(): time.sleep(1.) if manual_edit_params: _v = v.copy() del _v['config'], _v['_hidden_keys'] v['config'] = replace_in_dict(v['config'], _v) if train_bool: # Want the testing parameters to match the training parameters, so you can easily load this saved config for testing v['config']['testing'] = recursive_dict_merge( v['config']['testing'], v['config']['training']) # Example foldername if training #v['exp_name'] = "MAML_roach/9_11_optimization/" + "_ubs_" + str(v['config']['training']['update_batch_size']) + "_ulr_" + str(v['config']['training']['update_lr']) + "num_updates" + str(v['config']['training']['num_updates']) + "_layers_" + str(len(v['config']['model']['dim_hidden'])) + "_x" + str((v['config']['model']['dim_hidden'])[0]) + "_task_list_" + "_".join(v['config']['training']['task_list']) + "_mlr_" + str(v['config']['training']['meta_lr']) + "_mbs_" + str(v['config']['testing']['meta_batch_size']) + "_num-sgd-steps_" + str(v['config']['training']['num_sgd_steps']) + '_reg_weight_' + str(v['config']['training']['regularization_weight']) + "_dim_bias_" + str(v['config']['model']['dim_bias']) # Example foldername if testing (i.e. you can place the rollouts in the same folder as the model used) #v['exp_name'] = "/home/anagabandi/rllab-private/data/local/experiment/MAML_roach/9_11_optimization/_ubs_23_ulr_0.0num_updates1_layers_2_x500_task_list_turf_styrofoam_carpet_mlr_0.001_mbs_64_num-sgd-steps_1_reg_weight_0.001_dim_bias_5_metatrain_lr_False/video" v['exp_name'] = "/home/anagabandi/roach_workspace/src/gbac_roach/videos/de/shell_shift" v['train_bool'] = train_bool run_experiment_lite( run, sync_s3_pkl=True, periodic_sync=True, variant=v, snapshot_mode="all", mode="local", use_cloudpickle=True, exp_name=v['exp_name'], use_gpu=False, pre_commands=[ #"yes | pip install --upgrade pip", "yes | pip install tensorflow=='1.4.1'", "yes | pip install --upgrade cloudpickle" ], seed=v['config']['seed'])
def run_experiment(argv): # -------------------- Parse Arguments ----------------------------------- parser = argparse.ArgumentParser() parser.add_argument( '--mode', type=str, default='local', help='Mode for running the experiments - local: runs on local machine, ' 'ec2: runs on AWS ec2 cluster (requires a proper configuration file)') args = parser.parse_args(argv[1:]) # -------------------- Define Variants ----------------------------------- vg = VariantGenerator() vg.add('n_itr', [5000]) vg.add('fixed_gains', [True, False]) vg.add('stability_cost_coef', [0.0, 0.01]) vg.add('ctrl_cost_coef', [0, 0.0005, 0.001, 0.005]) vg.add('alive_bonus', [0, 1]) vg.add('step_size', [0.02]) vg.add('seed', [1, 11]) vg.add('discount', [0.99]) vg.add('path_length', [200]) vg.add('batch_size', [50000]) vg.add('hidden_nonlinearity', ['tanh']) vg.add('hidden_sizes', [(64, 64)]) variants = vg.variants() # ----------------------- AWS conficuration --------------------------------- if args.mode == 'ec2': info = config.INSTANCE_TYPE_INFO[ec2_instance] n_parallel = info['vCPU'] else: n_parallel = 12 if args.mode == 'ec2': config.AWS_INSTANCE_TYPE = ec2_instance config.AWS_SPOT_PRICE = str(info["price"]) print("\n" + "**********" * 10 + "\nexp_prefix: {}\nvariants: {}".format('TRPO', len(variants))) print( 'Running on type {}, with price {}, on the subnets: '.format( config.AWS_INSTANCE_TYPE, config.AWS_SPOT_PRICE, ), str(subnets)) # ----------------------- TRAINING --------------------------------------- exp_ids = random.sample(range(1, 1000), len(variants)) for v, exp_id in zip(variants, exp_ids): exp_name = "trpo_train_cassie_mujoco_%.3f_%i_%i_id_%i" % ( v['step_size'], v['batch_size'], v['seed'], exp_id) v = instantiate_class_stings(v) subnet = random.choice(subnets) config.AWS_REGION_NAME = subnet[:-1] config.AWS_KEY_NAME = config.ALL_REGION_AWS_KEY_NAMES[ config.AWS_REGION_NAME] config.AWS_IMAGE_ID = config.ALL_REGION_AWS_IMAGE_IDS[ config.AWS_REGION_NAME] config.AWS_SECURITY_GROUP_IDS = \ config.ALL_REGION_AWS_SECURITY_GROUP_IDS[ config.AWS_REGION_NAME] run_experiment_lite( run_train_task, exp_prefix=EXP_PREFIX, exp_name=exp_name, # Number of parallel workers for sampling n_parallel=n_parallel, # Only keep the snapshot parameters for the last iteration snapshot_mode="last", periodic_sync=True, sync_s3_pkl=True, sync_s3_log=True, # Specifies the seed for the experiment. If this is not provided, a random seed # will be used seed=v["seed"], #sync_all_data_node_to_s3=True, python_command="python3", pre_commands=[ "yes | pip install --upgrade pip", "yes | pip install tensorflow=='1.6.0'", "yes | pip install --upgrade cloudpickle" ], mode=args.mode, use_cloudpickle=True, variant=v, )
def run_experiment(argv): # -------------------- Parse Arguments ----------------------------------- parser = argparse.ArgumentParser() parser.add_argument( '--mode', type=str, default='local', help='Mode for running the experiments - local: runs on local machine, ' 'ec2: runs on AWS ec2 cluster (requires a proper configuration file)') parser.add_argument('--n_gpu', type=int, default=0, help='Number of GPUs') parser.add_argument('--ctx', type=int, default=4, help='Number of tasks per GPU') args = parser.parse_args(argv[1:]) # -------------------- Define Variants ----------------------------------- vg = VariantGenerator() vg.add('seed', [22, 33]) # env spec vg.add('env', ['HalfCheetahEnvRandParams']) # HalfCheetahEnvRandParams vg.add('log_scale_limit', [0.0]) vg.add('path_length', [200, 500]) # Model-based MAML algo spec vg.add('n_itr', [100]) vg.add('step_size', [0.01]) vg.add('discount', [0.99]) vg.add('batch_size_env_samples', [4000]) vg.add('initial_random_samples', [4000]) vg.add('num_models', [5, 10]) vg.add('n_candidates', [1000]) vg.add('horizon', [10]) # neural network configuration vg.add('hidden_nonlinearity_model', ['relu']) vg.add('hidden_sizes_model', [(512, 512)]) vg.add('dynamic_model_epochs', [(200, 200)]) vg.add('weight_normalization_model', [True]) vg.add('reinit_model_cycle', [0]) vg.add('valid_split_ratio', [0.2]) vg.add('rolling_average_persitency', [0.99]) # other stuff vg.add('exp_prefix', [EXP_PREFIX]) variants = vg.variants() default_dict = dict(exp_prefix=EXP_PREFIX, snapshot_mode="gap", snapshot_gap=5, periodic_sync=True, sync_s3_pkl=True, sync_s3_log=True, python_command="python3", pre_commands=[ "yes | pip install tensorflow=='1.6.0'", "pip list", "yes | pip install --upgrade cloudpickle" ], use_cloudpickle=True, variants=variants) # ----------------------- AWS conficuration --------------------------------- if args.mode == 'ec2': info = config.INSTANCE_TYPE_INFO[ec2_instance] n_parallel = int(info["vCPU"] / 2) # make the default 4 if not using ec2 else: n_parallel = 6 if args.mode == 'ec2': config.AWS_INSTANCE_TYPE = ec2_instance config.AWS_SPOT_PRICE = str(info["price"]) subnets = cheapest_subnets(ec2_instance, num_subnets=NUM_EC2_SUBNETS) print("\n" + "**********" * 10 + "\nexp_prefix: {}\nvariants: {}".format('TRPO', len(variants))) print( 'Running on type {}, with price {}, on the subnets: '.format( config.AWS_INSTANCE_TYPE, config.AWS_SPOT_PRICE, ), str(subnets)) # ----------------------- TRAINING --------------------------------------- exp_ids = random.sample(range(1, 1000), len(variants)) for v, exp_id in zip(variants, exp_ids): exp_name = "model_based_mpc_train_env_%s_%i_%i_%i_id_%i" % ( v['env'], v['path_length'], v['batch_size_env_samples'], v['seed'], exp_id) v = instantiate_class_stings(v) if args.mode == 'ec2': subnet = random.choice(subnets) config.AWS_REGION_NAME = subnet[:-1] config.AWS_KEY_NAME = config.ALL_REGION_AWS_KEY_NAMES[ config.AWS_REGION_NAME] config.AWS_IMAGE_ID = config.ALL_REGION_AWS_IMAGE_IDS[ config.AWS_REGION_NAME] config.AWS_SECURITY_GROUP_IDS = \ config.ALL_REGION_AWS_SECURITY_GROUP_IDS[ config.AWS_REGION_NAME] run_experiment_lite( run_train_task, exp_prefix=EXP_PREFIX, exp_name=exp_name, # Number of parallel workers for sampling n_parallel=n_parallel, snapshot_mode="gap", snapshot_gap=5, periodic_sync=True, sync_s3_pkl=True, sync_s3_log=True, # Specifies the seed for the experiment. If this is not provided, a random seed # will be used seed=v["seed"], python_command='python3', pre_commands=[ "yes | pip install tensorflow=='1.6.0'", "pip list", "yes | pip install --upgrade cloudpickle" ], mode=args.mode, use_cloudpickle=True, variant=v, )
def run_experiment(argv): # -------------------- Parse Arguments ----------------------------------- parser = argparse.ArgumentParser() parser.add_argument( '--mode', type=str, default='local', help='Mode for running the experiments - local: runs on local machine, ' 'ec2: runs on AWS ec2 cluster (requires a proper configuration file)') args = parser.parse_args(argv[1:]) # -------------------- Define Variants ----------------------------------- vg = VariantGenerator() vg.add('env', [ 'HalfCheetahEnvRandParams', 'AntEnvRandParams', 'WalkerEnvRandomParams', 'SwimmerEnvRandParams', 'HopperEnvRandParams', 'PR2EnvRandParams' ]) vg.add('total_timesteps', [int(10**8)]) vg.add('seed', [31, 41, 32]) vg.add('discount', [0.99]) vg.add('path_length', [200]) vg.add('batch_size', [5000]) vg.add('num_timesteps', [10**7]) vg.add('hidden_nonlinearity', ['tanh']) vg.add('hidden_sizes', [(32, 32)]) variants = vg.variants() from pprint import pprint pprint(variants) # ----------------------- AWS conficuration --------------------------------- if args.mode == 'ec2': n_parallel = int(info["vCPU"] / 2) # make the default 4 if not using ec2 else: n_parallel = 6 if args.mode == 'ecs': print("\n" + "**********" * 10 + "\nexp_prefix: {}\nvariants: {}".format('TRPO', len(variants))) print( 'Running on type {}, with price {}, parallel {} on the subnets: '. format(config.AWS_INSTANCE_TYPE, config.AWS_SPOT_PRICE, n_parallel), *subnets) # ----------------------- TRAINING --------------------------------------- exp_ids = random.sample(range(1, 1000), len(variants)) for v, exp_id in zip(variants, exp_ids): exp_name = "acktr_%s_%i_%i_id_%i" % (v['env'], v['batch_size'], v['seed'], exp_id) v['exp_name'] = exp_name v['exp_prefix'] = EXP_PREFIX v = instantiate_class_stings(v) subnet = random.choice(subnets) config.AWS_REGION_NAME = subnet[:-1] config.AWS_KEY_NAME = config.ALL_REGION_AWS_KEY_NAMES[ config.AWS_REGION_NAME] config.AWS_IMAGE_ID = config.ALL_REGION_AWS_IMAGE_IDS[ config.AWS_REGION_NAME] config.AWS_SECURITY_GROUP_IDS = \ config.ALL_REGION_AWS_SECURITY_GROUP_IDS[ config.AWS_REGION_NAME] run_experiment_lite( run_train_task, exp_prefix=EXP_PREFIX, exp_name=exp_name, # Number of parallel workers for sampling n_parallel=n_parallel, # Only keep the snapshot parameters for the last iteration snapshot_mode="last", sync_s3_pkl=True, periodic_sync=True, # Specifies the seed for the experiment. If this is not provided, a random seed # will be used seed=v["seed"], #sync_all_data_node_to_s3=True, python_command="python3", #sys.executable, pre_commands=[ "yes | pip install tensorflow=='1.6.0'", "yes | pip install --upgrade cloudpickle", "yes | pip install gym==0.10.5" ], mode=args.mode, use_cloudpickle=True, variant=v, )
# policy initialization vg.add('output_gain', [0.1]) vg.add('policy_init_std', [1]) vg.add('learn_std', [False]) #2 vg.add('adaptive_std', [False]) vg.add('discount', [0.998]) vg.add('seed_with', ['only_goods']) vg.add('seed', [args.seed]) if args.scratch_dir: vg.add('scratch_dir', [args.scratch_dir]) exp_prefix = 'ant-startgen-smartreplay4' print("\n" + "**********" * 10 + "\nexp_prefix: {}\nvariants: {}".format(exp_prefix, vg.size)) variants = vg.variants() assert len(variants) == 1 vv = variants[0] run_experiment_lite( # use_cloudpickle=False, stub_method_call=run_task, variant=vv, mode='local', n_parallel=8, snapshot_mode="last", seed=vv['seed'], exp_prefix=exp_prefix, # exp_name=exp_name, log_dir=args.log_dir, )
def experiment(variant): # we have to generate the combinations for the env_specs env_specs = variant['env_specs'] env_specs_vg = VariantGenerator() env_spec_constants = {} env_spec_ranges = {} for k, v in env_specs.items(): if isinstance(v, list): env_specs_vg.add(k, v) env_spec_ranges[k] = v else: env_spec_constants[k] = v env_specs_list = [] for es in env_specs_vg.variants(): del es['_hidden_keys'] es.update(env_spec_constants) env_specs_list.append(es) env_sampler = EnvSampler(env_specs_list) # make the normalizer function for the env_params mean = [] half_diff = [] for k in sorted(env_spec_ranges.keys()): r = env_spec_ranges[k] if len(r) == 1: mean.append(0) half_diff.append(r[0]) else: mean.append((r[0] + r[1]) / 2.0) half_diff.append((r[1] - r[0]) / 2.0) mean = np.array(mean) half_diff = np.array(half_diff) def env_params_normalizer(params): return (params - mean) / half_diff variant['algo_params']['env_params_normalizer'] = env_params_normalizer # set up similar to non-meta version sample_env, _ = env_sampler() if variant['algo_params']['concat_env_params_to_obs']: meta_params_dim = sample_env.env_meta_params.shape[0] else: meta_params_dim = 0 obs_dim = int(np.prod(sample_env.observation_space.shape)) action_dim = int(np.prod(sample_env.action_space.shape)) net_size = variant['net_size'] vf = FlattenMlp( hidden_sizes=[net_size, net_size], input_size=obs_dim + meta_params_dim, output_size=1, ) if exp_specs['use_new_sac']: qf1 = FlattenMlp( hidden_sizes=[net_size, net_size], input_size=obs_dim + action_dim + meta_params_dim, output_size=1, ) qf2 = FlattenMlp( hidden_sizes=[net_size, net_size], input_size=obs_dim + action_dim + meta_params_dim, output_size=1, ) policy = ReparamTanhMultivariateGaussianPolicy( hidden_sizes=[net_size, net_size], obs_dim=obs_dim + meta_params_dim, action_dim=action_dim, ) algorithm = NewMetaSoftActorCritic(env_sampler=env_sampler, policy=policy, qf1=qf1, qf2=qf2, vf=vf, **variant['algo_params']) else: policy = TanhGaussianPolicy( hidden_sizes=[net_size, net_size], obs_dim=obs_dim + meta_params_dim, action_dim=action_dim, ) qf = FlattenMlp( hidden_sizes=[net_size, net_size], input_size=obs_dim + action_dim + meta_params_dim, output_size=1, ) algorithm = MetaSoftActorCritic(env_sampler=env_sampler, policy=policy, qf=qf, vf=vf, **variant['algo_params']) if ptu.gpu_enabled(): algorithm.cuda() algorithm.train() return 1
def run_experiment(argv): # -------------------- Parse Arguments ----------------------------------- parser = argparse.ArgumentParser() parser.add_argument( '--mode', type=str, default='local', help='Mode for running the experiments - local: runs on local machine, ' 'ec2: runs on AWS ec2 cluster (requires a proper configuration file)') args = parser.parse_args(argv[1:]) # -------------------- Define Variants ----------------------------------- vg = VariantGenerator() vg.add('env', ['SawyerPushAndReachXYZEnv']) vg.add('fix_goal', [False]) vg.add('goal_slack', [0.0, 0.05, 0.1]) vg.add('init_slack', [0.0, 0.05]) vg.add('reward_type', ['puck_distance_hand_distance_after_success']) vg.add('seed', [1, 10]) vg.add('n_itr', [1001]) vg.add('fast_lr', [0.1]) vg.add('outer_lr', [1e-3]) vg.add('meta_batch_size', [40]) vg.add('num_grad_updates', [1]) vg.add('fast_batch_size', [20]) vg.add('discount', [0.99]) vg.add('path_length', [200]) vg.add('hidden_nonlinearity', ['tanh']) vg.add('hidden_sizes', [(64, 64)]) vg.add('trainable_step_size', [False]) vg.add('bias_transform', [False]) vg.add('entropy_bonus', [0]) # PPO-MAML params vg.add('clip_eps', [0.5]) vg.add('clip_outer', [True]) vg.add('target_outer_step', [0]) vg.add('init_outer_kl_penalty', [0]) vg.add('adaptive_outer_kl_penalty', [False]) vg.add('target_inner_step', [1e-2]) vg.add('init_inner_kl_penalty', [1e-3]) vg.add('adaptive_inner_kl_penalty', [True]) vg.add('max_epochs', [5]) vg.add('num_batches', [1]) vg.add('parallel_sampler', [True]) variants = vg.variants() # ----------------------- AWS conficuration --------------------------------- if args.mode == 'ec2': info = config.INSTANCE_TYPE_INFO[ec2_instance] n_parallel = info['vCPU'] else: n_parallel = 8 if args.mode == 'ec2': config.AWS_INSTANCE_TYPE = ec2_instance config.AWS_SPOT_PRICE = str(info["price"]) print("\n" + "**********" * 10 + "\nexp_prefix: {}\nvariants: {}".format('TRPO', len(variants))) print( 'Running on type {}, with price {}, on the subnets: '.format( config.AWS_INSTANCE_TYPE, config.AWS_SPOT_PRICE, ), str(subnets)) # ----------------------- TRAINING --------------------------------------- exp_ids = random.sample(range(1, 1000), len(variants)) for v, exp_id in zip(variants, exp_ids): exp_name = "%s_%s_%.1f_%.3f_%i_%i_id_%i" % ( EXP_PREFIX, v['env'], v['clip_eps'], v['target_inner_step'], v['max_epochs'], v['seed'], exp_id) v = instantiate_class_stings(v) subnet = random.choice(subnets) config.AWS_REGION_NAME = subnet[:-1] config.AWS_KEY_NAME = config.ALL_REGION_AWS_KEY_NAMES[ config.AWS_REGION_NAME] config.AWS_IMAGE_ID = config.ALL_REGION_AWS_IMAGE_IDS[ config.AWS_REGION_NAME] config.AWS_SECURITY_GROUP_IDS = \ config.ALL_REGION_AWS_SECURITY_GROUP_IDS[ config.AWS_REGION_NAME] run_experiment_lite( run_train_task, exp_prefix=EXP_PREFIX, exp_name=exp_name, # Number of parallel workers for sampling n_parallel=n_parallel, # Only keep the snapshot parameters for the last iteration snapshot_mode="gap", snapshot_gap=200, periodic_sync=True, sync_s3_pkl=True, sync_s3_log=True, # Specifies the seed for the experiment. If this is not provided, a random seed # will be used seed=v["seed"], #sync_all_data_node_to_s3=True, python_command="python3", pre_commands=[ "yes | pip install --upgrade pip", "yes | pip install --upgrade cloudpickle" ], mode=args.mode, use_cloudpickle=True, variant=v, )
def main(config_path, extra_config): ################################# ## INIT config and vars ################################# #read in config vars config = yaml.load(open(config_path)) config = replace_in_dict(config, extra_config) vg = VariantGenerator() vg.add('config', [config]) ##vg.add('batch_size', [2000]) ######### to do: use this to decide how much data to read in from disk vg.add('meta_batch_size', [64]) #1300 ################# vg.add('meta_lr', [0.001]) vg.add('update_batch_size', [16]) ############# vg.add('update_lr', [1.0]) #[1.0, 0.1, 0.01, 0.001] vg.add('num_updates', [3]) # vg.add('max_epochs', [50]) vg.add('horizon', [5]) vg.add('curr_agg_iter', [0]) #vg.add('use_reg', [True, False]) # This only changes the save filename! The config.yaml var needs to agree with this one if True vg.add('use_reg', [True]) # This only changes the save filename! The config.yaml var needs to agree with this one if True vg.add('seed', [0]) vg.add('nonlinearity', ['relu']) if config['training']['use_reg']: vg.add('regularization_weight', [0.000000001]) #no reg for carp on carp: 0.000000001 vg.add('use_clip', [True]) vg.add("weight_initializer", ["truncated_normal"]) vg.add("dim_hidden", [[800], [800, 800]]) vg.add("task_list", [["all"]]) #vg.add('max_runs_per_surface', [5]) #vg.add('backward_discouragement', [10, 11]) #IPython.embed() ##print("\n" + "**********" * 10 + "\nexp_prefix: {}\nvariants: {}".format('MAML', vg.size)) for v in vg.variants(): time.sleep(1.) #IPython.embed() _v = v.copy(); del _v['config'], _v['_hidden_keys'] v['config'] = replace_in_dict(v['config'], _v) #IPython.embed() #v['exp_name'] = exp_name = v['config']['logging']['log_dir'] + '__'.join([v['config']['experiment_type']] + [key + '_' + str(val) for key,val in _v.items() if key not in ['name', 'experiment_type', 'dim_hidden']]) #v['exp_name'] = exp_name = v['config']['logging']['log_dir'] + v['config']['experiment_type'] + '__max_epochs_5__meta_batch_size_40__batch_size_2000__update_batch_size_20__horizon_5' # v['exp_name'] = v['config']['logging']['log_dir'] + v['config']['experiment_type'] + "_all_terrain_mbs_" + str(v['config']['training']['meta_batch_size']) + "_ubs_" + str(v['config']['training']['update_batch_size']) + "NON_GBAC" # if v['config']['training']['use_reg']: # v['exp_name'] = v['exp_name'] + "_reg_" + str(v['config']['training']['regularization_weight']) #v['exp_name'] = "MAML_roach/terrain_types__regularization_weight_0.001__use_reg_True__meta_batch_size_250__meta_lr_0.001__horizon_5__max_epochs_80__update_lr_0.1__curr_agg_iter_0__update_batch_size_16" #v['exp_name'] = "MAML_roach/thorough_debug/" + "ulr_" + str(v['config']['training']['update_lr']) + "_use_reg_" + str(v['config']['training']['use_reg']) + "_use_clip_" +str(v['config']['training']['use_clip']) + "_use_clf_" + str(v['config']['training']['use_clf']) + "_nonx_001" #v['exp_name'] = "MAML_roach_copy/Tuesday_optimization/all_terrains_with_carpet_on_carpet_params_except_lr_" + str(v['config']['training']['update_lr']) #v['exp_name'] = "MAML_roach_copy/Tuesday_optimization/num_updates_2/num_updates_"+ str(v['config']['training']['num_updates'])+"_lr_ " + str(v['config']['training']['update_lr']) +"_ubs_" + str(v['config']['training']['update_batch_size']) +"_reg_weight_" + str(v['config']['training']['regularization_weight']) #v['exp_name'] = "MAML_roach_copy/Tuesday_optimization/averaging_debug" #v['exp_name'] = "MAML_roach_copy/Tuesday_optimization/" + '__'.join([v['config']['experiment_type']] + [key + '_' + str(val) for key,val in _v.items() if key not in ['name', 'experiment_type', 'dim_hidden']]) #IPython.embed() v['exp_name'] = "MAML_roach_copy/Tuesday_night_optimization/" + "_ubs_" + str(v['config']['training']['update_batch_size']) + "_ulr_" + str(v['config']['training']['update_lr']) + "num_updates" + str(v['config']['training']['num_updates']) + "_layers_" + str(len(v['config']['model']['dim_hidden'])) + "_x" + str((v['config']['model']['dim_hidden'])[0]) run_experiment_lite( run, sync_s3_pkl=True, periodic_sync=True, variant=v, snapshot_mode="all", mode="local", use_cloudpickle=True, exp_name=v['exp_name'], use_gpu=False, pre_commands=[#"yes | pip install --upgrade pip", "yes | pip install tensorflow=='1.4.1'", "yes | pip install --upgrade cloudpickle"], seed=v['config']['seed'] )