def run_experiment(argv): # -------------------- Parse Arguments ----------------------------------- parser = argparse.ArgumentParser() parser.add_argument( '--mode', type=str, default='local', help='Mode for running the experiments - local: runs on local machine, ' 'ec2: runs on AWS ec2 cluster (requires a proper configuration file)') parser.add_argument('--n_gpu', type=int, default=0, help='Number of GPUs') parser.add_argument('--ctx', type=int, default=4, help='Number of tasks per GPU') args = parser.parse_args(argv[1:]) # -------------------- Define Variants ----------------------------------- vg = VariantGenerator() vg.add('seed', [22, 23, 24]) # env spec vg.add('env', ['WalkerEnvRandomParams']) vg.add('log_scale_limit', [0.0]) vg.add('target_velocity', [None]) vg.add('path_length_env', [200]) # Model-based MAML algo spec vg.add('n_itr', [500]) vg.add('fast_lr', [0.001]) vg.add('outer_lr', [1e-3]) vg.add('meta_batch_size', [20]) # must be a multiple of num_models vg.add('discount', [0.99]) vg.add('entropy_bonus', [0]) vg.add('clip_eps', [0.5, 0.7]) vg.add('target_inner_step', [3e-3, 1e-2, 3e-2]) vg.add('init_kl_penalty', [1e-10]) vg.add('adaptive_kl_penalty', [True]) vg.add('max_epochs', [8]) vg.add('num_batches', [1]) vg.add('batch_size_env_samples', [1]) vg.add('batch_size_dynamics_samples', [50]) vg.add('initial_random_samples', [5000]) vg.add('num_maml_steps_per_iter', [5, 15, 25]) vg.add('retrain_model_when_reward_decreases', [False]) vg.add('reset_from_env_traj', [False]) vg.add('trainable_step_size', [False]) vg.add('num_models', [5]) # neural network configuration vg.add('hidden_nonlinearity_policy', ['tanh']) vg.add('hidden_nonlinearity_model', ['relu']) vg.add('hidden_sizes_policy', [(32, 32)]) vg.add('hidden_sizes_model', [(512, 512, 512)]) vg.add('weight_normalization_model', [True]) vg.add('reset_policy_std', [False]) vg.add('reinit_model_cycle', [0]) vg.add('optimizer_model', ['adam']) vg.add('policy', ['MAMLImprovedGaussianMLPPolicy']) vg.add('bias_transform', [False]) vg.add('param_noise_std', [0.0]) vg.add('dynamic_model_max_epochs', [(500, 500)]) vg.add('valid_split_ratio', [0.2]) vg.add('rolling_average_persitency', [0.95]) # other stuff vg.add('exp_prefix', [EXP_PREFIX]) variants = vg.variants() default_dict = dict(exp_prefix=EXP_PREFIX, snapshot_mode="gap", snapshot_gap=5, periodic_sync=True, sync_s3_pkl=True, sync_s3_log=True, python_command="python3", pre_commands=[ "yes | pip install tensorflow=='1.6.0'", "pip list", "yes | pip install --upgrade cloudpickle" ], use_cloudpickle=True, variants=variants) if args.mode == 'mgpu': current_path = os.path.dirname(os.path.abspath(__file__)) script_path = os.path.join(current_path, 'gpu-mb-mpo-train.py') n_gpu = args.n_gpu if n_gpu == 0: n_gpu = len(os.listdir('/proc/driver/nvidia/gpus')) run_multi_gpu(script_path, default_dict, n_gpu=n_gpu, ctx_per_gpu=args.ctx) else: # ----------------------- AWS conficuration --------------------------------- if args.mode == 'ec2': info = config.INSTANCE_TYPE_INFO[ec2_instance] n_parallel = int(info["vCPU"]) else: n_parallel = 12 if args.mode == 'ec2': config.AWS_INSTANCE_TYPE = ec2_instance config.AWS_SPOT_PRICE = str(info["price"]) subnets = cheapest_subnets(ec2_instance, num_subnets=NUM_EC2_SUBNETS) print( "\n" + "**********" * 10 + "\nexp_prefix: {}\nvariants: {}".format('PPO', len(variants))) print( 'Running on type {}, with price {}, on the subnets: '.format( config.AWS_INSTANCE_TYPE, config.AWS_SPOT_PRICE, ), str(subnets)) # ----------------------- TRAINING --------------------------------------- exp_ids = random.sample(range(1, 1000), len(variants)) for v, exp_id in zip(variants, exp_ids): exp_name = "model_ensemble_maml_train_env_%s_%i_%i_%i_%i_id_%i" % ( v['env'], v['path_length_env'], v['num_models'], v['batch_size_env_samples'], v['seed'], exp_id) v = instantiate_class_stings(v) if args.mode == 'ec2': subnet = random.choice(subnets) config.AWS_REGION_NAME = subnet[:-1] config.AWS_KEY_NAME = config.ALL_REGION_AWS_KEY_NAMES[ config.AWS_REGION_NAME] config.AWS_IMAGE_ID = config.ALL_REGION_AWS_IMAGE_IDS[ config.AWS_REGION_NAME] config.AWS_SECURITY_GROUP_IDS = \ config.ALL_REGION_AWS_SECURITY_GROUP_IDS[ config.AWS_REGION_NAME] run_experiment_lite( run_train_task, exp_prefix=EXP_PREFIX, exp_name=exp_name, # Number of parallel workers for sampling n_parallel=n_parallel, snapshot_mode="gap", snapshot_gap=5, periodic_sync=True, sync_s3_pkl=True, sync_s3_log=True, # Specifies the seed for the experiment. If this is not provided, a random seed # will be used seed=v["seed"], python_command="python3", pre_commands=[ "yes | pip install tensorflow=='1.6.0'", "pip list", "yes | pip install --upgrade cloudpickle" ], mode=args.mode, use_cloudpickle=True, variant=v, )
def run_experiment(argv): # -------------------- Parse Arguments ----------------------------------- parser = argparse.ArgumentParser() parser.add_argument( '--mode', type=str, default='local', help='Mode for running the experiments - local: runs on local machine, ' 'ec2: runs on AWS ec2 cluster (requires a proper configuration file)') parser.add_argument('--n_gpu', type=int, default=0, help='Number of GPUs') parser.add_argument('--ctx', type=int, default=4, help='Number of tasks per GPU') args = parser.parse_args(argv[1:]) # -------------------- Define Variants ----------------------------------- vg = VariantGenerator() vg.add('env', ['HalfCheetahEnvRandParams' ]) # HalfCheetahEnvRandParams #TODO ReacherEnvRandParams vg.add('n_itr', [40]) vg.add('log_scale_limit', [0.0]) vg.add('step_size', [0.01]) vg.add('seed', [22, 33, 55]) #TODO set back to [1, 11, 21, 31, 41] vg.add('discount', [0.99]) vg.add('path_length', [100]) vg.add('batch_size_env_samples', [4000]) vg.add('batch_size_dynamics_samples', [100000]) vg.add('initial_random_samples', [None]) vg.add('dynamic_model_epochs', [(1000, 1000)]) #TODO vg.add('num_gradient_steps_per_iter', [30]) #TODO vg.add('hidden_nonlinearity_policy', ['tanh']) vg.add('hidden_nonlinearity_model', ['relu']) vg.add('hidden_sizes_policy', [(32, 32)]) vg.add('hidden_sizes_model', [(512, 512)]) vg.add('weight_normalization_model', [False]) vg.add('retrain_model_when_reward_decreases', [False]) vg.add('reset_policy_std', [False]) vg.add('reinit_model_cycle', [0]) vg.add('num_models', [5]) vg.add('output_bias_range', [(0, 0.1), (0, 0.5), (0, 1)]) vg.add('output_noise_std', [0.0, 0.1]) vg.add('resample_output_bias', [True, False]) vg.add('exp_prefix', [EXP_PREFIX]) variants = vg.variants() default_dict = dict(exp_prefix=EXP_PREFIX, snapshot_mode="gap", snapshot_gap=10, periodic_sync=True, sync_s3_pkl=True, sync_s3_log=True, python_command="python3", pre_commands=[ "yes | pip install tensorflow=='1.6.0'", "pip list", "yes | pip install --upgrade cloudpickle" ], use_cloudpickle=True, variants=variants) if args.mode == 'mgpu': current_path = os.path.dirname(os.path.abspath(__file__)) script_path = os.path.join(current_path, 'mgpu_model_ensemble_trpo_train.py') n_gpu = args.n_gpu if n_gpu == 0: n_gpu = len(os.listdir('/proc/driver/nvidia/gpus')) run_multi_gpu(script_path, default_dict, n_gpu=n_gpu, ctx_per_gpu=args.ctx) else: # ----------------------- AWS conficuration --------------------------------- if args.mode == 'ec2': info = config.INSTANCE_TYPE_INFO[ec2_instance] n_parallel = int(info["vCPU"] / 2) # make the default 4 if not using ec2 else: n_parallel = 6 if args.mode == 'ec2': config.AWS_INSTANCE_TYPE = ec2_instance config.AWS_SPOT_PRICE = str(info["price"]) subnets = cheapest_subnets(ec2_instance, num_subnets=NUM_EC2_SUBNETS) print( "\n" + "**********" * 10 + "\nexp_prefix: {}\nvariants: {}".format('TRPO', len(variants))) print( 'Running on type {}, with price {}, on the subnets: '.format( config.AWS_INSTANCE_TYPE, config.AWS_SPOT_PRICE, ), str(subnets)) # ----------------------- TRAINING --------------------------------------- exp_ids = random.sample(range(1, 1000), len(variants)) for v, exp_id in zip(variants, exp_ids): exp_name = "model_trpo_train_env_%s_%i_%i_%i_%i_id_%i" % ( v['env'], v['path_length'], v['num_gradient_steps_per_iter'], v['batch_size_env_samples'], v['seed'], exp_id) v = instantiate_class_stings(v) if args.mode == 'ec2': subnet = random.choice(subnets) config.AWS_REGION_NAME = subnet[:-1] config.AWS_KEY_NAME = config.ALL_REGION_AWS_KEY_NAMES[ config.AWS_REGION_NAME] config.AWS_IMAGE_ID = config.ALL_REGION_AWS_IMAGE_IDS[ config.AWS_REGION_NAME] config.AWS_SECURITY_GROUP_IDS = \ config.ALL_REGION_AWS_SECURITY_GROUP_IDS[ config.AWS_REGION_NAME] run_experiment_lite( run_train_task, exp_prefix=EXP_PREFIX, exp_name=exp_name, # Number of parallel workers for sampling n_parallel=n_parallel, snapshot_mode="gap", snapshot_gap=5, periodic_sync=True, sync_s3_pkl=True, sync_s3_log=True, # Specifies the seed for the experiment. If this is not provided, a random seed # will be used seed=v["seed"], python_command='python3', pre_commands=[ "yes | pip install tensorflow=='1.6.0'", "pip list", "yes | pip install --upgrade cloudpickle" ], mode=args.mode, use_cloudpickle=True, variant=v, )