def run_experiment(argv): # -------------------- Parse Arguments ----------------------------------- parser = argparse.ArgumentParser() parser.add_argument( '--mode', type=str, default='local', help='Mode for running the experiments - local: runs on local machine, ' 'ec2: runs on AWS ec2 cluster (requires a proper configuration file)') parser.add_argument('--n_gpu', type=int, default=0, help='Number of GPUs') parser.add_argument('--ctx', type=int, default=4, help='Number of tasks per GPU') args = parser.parse_args(argv[1:]) # -------------------- Define Variants ----------------------------------- vg = VariantGenerator() vg.add('seed', [22, 23, 24]) # env spec vg.add('env', ['WalkerEnvRandomParams']) vg.add('log_scale_limit', [0.0]) vg.add('target_velocity', [None]) vg.add('path_length_env', [200]) # Model-based MAML algo spec vg.add('n_itr', [500]) vg.add('fast_lr', [0.001]) vg.add('outer_lr', [1e-3]) vg.add('meta_batch_size', [20]) # must be a multiple of num_models vg.add('discount', [0.99]) vg.add('entropy_bonus', [0]) vg.add('clip_eps', [0.5, 0.7]) vg.add('target_inner_step', [3e-3, 1e-2, 3e-2]) vg.add('init_kl_penalty', [1e-10]) vg.add('adaptive_kl_penalty', [True]) vg.add('max_epochs', [8]) vg.add('num_batches', [1]) vg.add('batch_size_env_samples', [1]) vg.add('batch_size_dynamics_samples', [50]) vg.add('initial_random_samples', [5000]) vg.add('num_maml_steps_per_iter', [5, 15, 25]) vg.add('retrain_model_when_reward_decreases', [False]) vg.add('reset_from_env_traj', [False]) vg.add('trainable_step_size', [False]) vg.add('num_models', [5]) # neural network configuration vg.add('hidden_nonlinearity_policy', ['tanh']) vg.add('hidden_nonlinearity_model', ['relu']) vg.add('hidden_sizes_policy', [(32, 32)]) vg.add('hidden_sizes_model', [(512, 512, 512)]) vg.add('weight_normalization_model', [True]) vg.add('reset_policy_std', [False]) vg.add('reinit_model_cycle', [0]) vg.add('optimizer_model', ['adam']) vg.add('policy', ['MAMLImprovedGaussianMLPPolicy']) vg.add('bias_transform', [False]) vg.add('param_noise_std', [0.0]) vg.add('dynamic_model_max_epochs', [(500, 500)]) vg.add('valid_split_ratio', [0.2]) vg.add('rolling_average_persitency', [0.95]) # other stuff vg.add('exp_prefix', [EXP_PREFIX]) variants = vg.variants() default_dict = dict(exp_prefix=EXP_PREFIX, snapshot_mode="gap", snapshot_gap=5, periodic_sync=True, sync_s3_pkl=True, sync_s3_log=True, python_command="python3", pre_commands=[ "yes | pip install tensorflow=='1.6.0'", "pip list", "yes | pip install --upgrade cloudpickle" ], use_cloudpickle=True, variants=variants) if args.mode == 'mgpu': current_path = os.path.dirname(os.path.abspath(__file__)) script_path = os.path.join(current_path, 'gpu-mb-mpo-train.py') n_gpu = args.n_gpu if n_gpu == 0: n_gpu = len(os.listdir('/proc/driver/nvidia/gpus')) run_multi_gpu(script_path, default_dict, n_gpu=n_gpu, ctx_per_gpu=args.ctx) else: # ----------------------- AWS conficuration --------------------------------- if args.mode == 'ec2': info = config.INSTANCE_TYPE_INFO[ec2_instance] n_parallel = int(info["vCPU"]) else: n_parallel = 12 if args.mode == 'ec2': config.AWS_INSTANCE_TYPE = ec2_instance config.AWS_SPOT_PRICE = str(info["price"]) subnets = cheapest_subnets(ec2_instance, num_subnets=NUM_EC2_SUBNETS) print( "\n" + "**********" * 10 + "\nexp_prefix: {}\nvariants: {}".format('PPO', len(variants))) print( 'Running on type {}, with price {}, on the subnets: '.format( config.AWS_INSTANCE_TYPE, config.AWS_SPOT_PRICE, ), str(subnets)) # ----------------------- TRAINING --------------------------------------- exp_ids = random.sample(range(1, 1000), len(variants)) for v, exp_id in zip(variants, exp_ids): exp_name = "model_ensemble_maml_train_env_%s_%i_%i_%i_%i_id_%i" % ( v['env'], v['path_length_env'], v['num_models'], v['batch_size_env_samples'], v['seed'], exp_id) v = instantiate_class_stings(v) if args.mode == 'ec2': subnet = random.choice(subnets) config.AWS_REGION_NAME = subnet[:-1] config.AWS_KEY_NAME = config.ALL_REGION_AWS_KEY_NAMES[ config.AWS_REGION_NAME] config.AWS_IMAGE_ID = config.ALL_REGION_AWS_IMAGE_IDS[ config.AWS_REGION_NAME] config.AWS_SECURITY_GROUP_IDS = \ config.ALL_REGION_AWS_SECURITY_GROUP_IDS[ config.AWS_REGION_NAME] run_experiment_lite( run_train_task, exp_prefix=EXP_PREFIX, exp_name=exp_name, # Number of parallel workers for sampling n_parallel=n_parallel, snapshot_mode="gap", snapshot_gap=5, periodic_sync=True, sync_s3_pkl=True, sync_s3_log=True, # Specifies the seed for the experiment. If this is not provided, a random seed # will be used seed=v["seed"], python_command="python3", pre_commands=[ "yes | pip install tensorflow=='1.6.0'", "pip list", "yes | pip install --upgrade cloudpickle" ], mode=args.mode, use_cloudpickle=True, variant=v, )
def run_experiment(argv): # -------------------- Parse Arguments ----------------------------------- parser = argparse.ArgumentParser() parser.add_argument( '--mode', type=str, default='local', help='Mode for running the experiments - local: runs on local machine, ' 'ec2: runs on AWS ec2 cluster (requires a proper configuration file)') parser.add_argument('--n_gpu', type=int, default=0, help='Number of GPUs') parser.add_argument('--ctx', type=int, default=4, help='Number of tasks per GPU') args = parser.parse_args(argv[1:]) # -------------------- Define Variants ----------------------------------- vg = VariantGenerator() vg.add('env', ['HalfCheetahEnvRandParams' ]) # HalfCheetahEnvRandParams #TODO ReacherEnvRandParams vg.add('n_itr', [40]) vg.add('log_scale_limit', [0.0]) vg.add('step_size', [0.01]) vg.add('seed', [22, 33, 55]) #TODO set back to [1, 11, 21, 31, 41] vg.add('discount', [0.99]) vg.add('path_length', [100]) vg.add('batch_size_env_samples', [4000]) vg.add('batch_size_dynamics_samples', [100000]) vg.add('initial_random_samples', [None]) vg.add('dynamic_model_epochs', [(1000, 1000)]) #TODO vg.add('num_gradient_steps_per_iter', [30]) #TODO vg.add('hidden_nonlinearity_policy', ['tanh']) vg.add('hidden_nonlinearity_model', ['relu']) vg.add('hidden_sizes_policy', [(32, 32)]) vg.add('hidden_sizes_model', [(512, 512)]) vg.add('weight_normalization_model', [False]) vg.add('retrain_model_when_reward_decreases', [False]) vg.add('reset_policy_std', [False]) vg.add('reinit_model_cycle', [0]) vg.add('num_models', [5]) vg.add('output_bias_range', [(0, 0.1), (0, 0.5), (0, 1)]) vg.add('output_noise_std', [0.0, 0.1]) vg.add('resample_output_bias', [True, False]) vg.add('exp_prefix', [EXP_PREFIX]) variants = vg.variants() default_dict = dict(exp_prefix=EXP_PREFIX, snapshot_mode="gap", snapshot_gap=10, periodic_sync=True, sync_s3_pkl=True, sync_s3_log=True, python_command="python3", pre_commands=[ "yes | pip install tensorflow=='1.6.0'", "pip list", "yes | pip install --upgrade cloudpickle" ], use_cloudpickle=True, variants=variants) if args.mode == 'mgpu': current_path = os.path.dirname(os.path.abspath(__file__)) script_path = os.path.join(current_path, 'mgpu_model_ensemble_trpo_train.py') n_gpu = args.n_gpu if n_gpu == 0: n_gpu = len(os.listdir('/proc/driver/nvidia/gpus')) run_multi_gpu(script_path, default_dict, n_gpu=n_gpu, ctx_per_gpu=args.ctx) else: # ----------------------- AWS conficuration --------------------------------- if args.mode == 'ec2': info = config.INSTANCE_TYPE_INFO[ec2_instance] n_parallel = int(info["vCPU"] / 2) # make the default 4 if not using ec2 else: n_parallel = 6 if args.mode == 'ec2': config.AWS_INSTANCE_TYPE = ec2_instance config.AWS_SPOT_PRICE = str(info["price"]) subnets = cheapest_subnets(ec2_instance, num_subnets=NUM_EC2_SUBNETS) print( "\n" + "**********" * 10 + "\nexp_prefix: {}\nvariants: {}".format('TRPO', len(variants))) print( 'Running on type {}, with price {}, on the subnets: '.format( config.AWS_INSTANCE_TYPE, config.AWS_SPOT_PRICE, ), str(subnets)) # ----------------------- TRAINING --------------------------------------- exp_ids = random.sample(range(1, 1000), len(variants)) for v, exp_id in zip(variants, exp_ids): exp_name = "model_trpo_train_env_%s_%i_%i_%i_%i_id_%i" % ( v['env'], v['path_length'], v['num_gradient_steps_per_iter'], v['batch_size_env_samples'], v['seed'], exp_id) v = instantiate_class_stings(v) if args.mode == 'ec2': subnet = random.choice(subnets) config.AWS_REGION_NAME = subnet[:-1] config.AWS_KEY_NAME = config.ALL_REGION_AWS_KEY_NAMES[ config.AWS_REGION_NAME] config.AWS_IMAGE_ID = config.ALL_REGION_AWS_IMAGE_IDS[ config.AWS_REGION_NAME] config.AWS_SECURITY_GROUP_IDS = \ config.ALL_REGION_AWS_SECURITY_GROUP_IDS[ config.AWS_REGION_NAME] run_experiment_lite( run_train_task, exp_prefix=EXP_PREFIX, exp_name=exp_name, # Number of parallel workers for sampling n_parallel=n_parallel, snapshot_mode="gap", snapshot_gap=5, periodic_sync=True, sync_s3_pkl=True, sync_s3_log=True, # Specifies the seed for the experiment. If this is not provided, a random seed # will be used seed=v["seed"], python_command='python3', pre_commands=[ "yes | pip install tensorflow=='1.6.0'", "pip list", "yes | pip install --upgrade cloudpickle" ], mode=args.mode, use_cloudpickle=True, variant=v, )
def run_experiment(argv): # -------------------- Parse Arguments ----------------------------------- parser = argparse.ArgumentParser() parser.add_argument( '--mode', type=str, default='local', help='Mode for running the experiments - local: runs on local machine, ' 'ec2: runs on AWS ec2 cluster (requires a proper configuration file)') args = parser.parse_args(argv[1:]) # -------------------- Define Variants ----------------------------------- vg = VariantGenerator() vg.add('env', ['WalkerEnvRandomParams', 'HopperEnvRandParams']) vg.add('n_itr', [301]) vg.add('fast_lr', [0.001, 0.01, 0.1]) vg.add('meta_batch_size', [40]) vg.add('num_grad_updates', [1]) vg.add('meta_step_size', [0.01]) vg.add('fast_batch_size', [20]) vg.add('seed', [1, 11, 21]) vg.add('discount', [0.99]) vg.add('path_length', [100]) vg.add('hidden_nonlinearity', ['tanh']) vg.add('hidden_sizes', [(64, 64)]) vg.add('trainable_step_size', [False]) vg.add('bias_transform', [False]) vg.add('policy', ['MAMLGaussianMLPPolicy']) vg.add('parallel_sampler', [True]) variants = vg.variants() # ----------------------- AWS conficuration --------------------------------- if args.mode == 'ec2': subnets = cheapest_subnets(ec2_instance, num_subnets=3) info = config.INSTANCE_TYPE_INFO[ec2_instance] config.AWS_INSTANCE_TYPE = ec2_instance config.AWS_SPOT_PRICE = str(info["price"]) print("\n" + "**********" * 10 + "\nexp_prefix: {}\nvariants: {}".format('TRPO', len(variants))) print( 'Running on type {}, with price {}, on the subnets: '.format( config.AWS_INSTANCE_TYPE, config.AWS_SPOT_PRICE, ), str(subnets)) if args.mode == 'ec2': n_parallel = 1 # for MAML use smaller number of parallel worker since parallelization is also done over the meta batch size else: n_parallel = 1 # ----------------------- TRAINING --------------------------------------- exp_ids = random.sample(range(1, 1000), len(variants)) for v, exp_id in zip(variants, exp_ids): exp_name = "%s_%s_%i_%.3f_%i_id_%i" % ( EXP_PREFIX, v['env'], v['hidden_sizes'][0], v['meta_step_size'], v['seed'], exp_id) v = instantiate_class_stings(v) if args.mode == 'ec2': # configure instance subnet = random.choice(subnets) config.AWS_REGION_NAME = subnet[:-1] config.AWS_KEY_NAME = config.ALL_REGION_AWS_KEY_NAMES[ config.AWS_REGION_NAME] config.AWS_IMAGE_ID = config.ALL_REGION_AWS_IMAGE_IDS[ config.AWS_REGION_NAME] config.AWS_SECURITY_GROUP_IDS = \ config.ALL_REGION_AWS_SECURITY_GROUP_IDS[ config.AWS_REGION_NAME] run_experiment_lite( run_train_task, exp_prefix=EXP_PREFIX, exp_name=exp_name, # Number of parallel workers for sampling n_parallel=n_parallel, # Only keep the snapshot parameters for the last iteration snapshot_mode="last_gap", snapshot_gap=50, periodic_sync=True, sync_s3_pkl=True, sync_s3_log=True, # Specifies the seed for the experiment. If this is not provided, a random seed # will be used pre_commands=[ "yes | pip install tensorflow=='1.6.0'", "yes | pip install --upgrade cloudpickle" ], seed=v["seed"], python_command="python3", mode=args.mode, use_cloudpickle=True, variant=v, )
from sandbox.rocky.tf.algos.trpo import TRPO from rllab.misc.instrument import run_experiment_lite from sandbox.ours.envs.mujoco import CassieEnv from rllab.misc.instrument import VariantGenerator from rllab import config from experiments.helpers.ec2_helpers import cheapest_subnets import tensorflow as tf import sys import argparse import random EXP_PREFIX = 'cassie-trpo-env-params' ec2_instance = 'c4.2xlarge' subnets = cheapest_subnets(ec2_instance, num_subnets=3) def run_train_task(vv): env = TfEnv( normalize( CassieEnv(fixed_gains=vv['fixed_gains'], stability_cost_coef=vv['stability_cost_coef'], ctrl_cost_coef=vv['ctrl_cost_coef'], alive_bonus=vv['alive_bonus']))) policy = GaussianMLPPolicy(name="policy", env_spec=env.spec, hidden_sizes=vv['hidden_sizes'], hidden_nonlinearity=vv['hidden_nonlinearity'])
def run_experiment(argv): # -------------------- Parse Arguments ----------------------------------- parser = argparse.ArgumentParser() parser.add_argument( '--mode', type=str, default='local', help='Mode for running the experiments - local: runs on local machine, ' 'ec2: runs on AWS ec2 cluster (requires a proper configuration file)') parser.add_argument('--n_gpu', type=int, default=0, help='Number of GPUs') parser.add_argument('--ctx', type=int, default=4, help='Number of tasks per GPU') args = parser.parse_args(argv[1:]) # -------------------- Define Variants ----------------------------------- vg = VariantGenerator() vg.add('seed', [22, 33]) # env spec vg.add('env', ['HalfCheetahEnvRandParams']) # HalfCheetahEnvRandParams vg.add('log_scale_limit', [0.0]) vg.add('path_length', [200, 500]) # Model-based MAML algo spec vg.add('n_itr', [100]) vg.add('step_size', [0.01]) vg.add('discount', [0.99]) vg.add('batch_size_env_samples', [4000]) vg.add('initial_random_samples', [4000]) vg.add('num_models', [5, 10]) vg.add('n_candidates', [1000]) vg.add('horizon', [10]) # neural network configuration vg.add('hidden_nonlinearity_model', ['relu']) vg.add('hidden_sizes_model', [(512, 512)]) vg.add('dynamic_model_epochs', [(200, 200)]) vg.add('weight_normalization_model', [True]) vg.add('reinit_model_cycle', [0]) vg.add('valid_split_ratio', [0.2]) vg.add('rolling_average_persitency', [0.99]) # other stuff vg.add('exp_prefix', [EXP_PREFIX]) variants = vg.variants() default_dict = dict(exp_prefix=EXP_PREFIX, snapshot_mode="gap", snapshot_gap=5, periodic_sync=True, sync_s3_pkl=True, sync_s3_log=True, python_command="python3", pre_commands=[ "yes | pip install tensorflow=='1.6.0'", "pip list", "yes | pip install --upgrade cloudpickle" ], use_cloudpickle=True, variants=variants) # ----------------------- AWS conficuration --------------------------------- if args.mode == 'ec2': info = config.INSTANCE_TYPE_INFO[ec2_instance] n_parallel = int(info["vCPU"] / 2) # make the default 4 if not using ec2 else: n_parallel = 6 if args.mode == 'ec2': config.AWS_INSTANCE_TYPE = ec2_instance config.AWS_SPOT_PRICE = str(info["price"]) subnets = cheapest_subnets(ec2_instance, num_subnets=NUM_EC2_SUBNETS) print("\n" + "**********" * 10 + "\nexp_prefix: {}\nvariants: {}".format('TRPO', len(variants))) print( 'Running on type {}, with price {}, on the subnets: '.format( config.AWS_INSTANCE_TYPE, config.AWS_SPOT_PRICE, ), str(subnets)) # ----------------------- TRAINING --------------------------------------- exp_ids = random.sample(range(1, 1000), len(variants)) for v, exp_id in zip(variants, exp_ids): exp_name = "model_based_mpc_train_env_%s_%i_%i_%i_id_%i" % ( v['env'], v['path_length'], v['batch_size_env_samples'], v['seed'], exp_id) v = instantiate_class_stings(v) if args.mode == 'ec2': subnet = random.choice(subnets) config.AWS_REGION_NAME = subnet[:-1] config.AWS_KEY_NAME = config.ALL_REGION_AWS_KEY_NAMES[ config.AWS_REGION_NAME] config.AWS_IMAGE_ID = config.ALL_REGION_AWS_IMAGE_IDS[ config.AWS_REGION_NAME] config.AWS_SECURITY_GROUP_IDS = \ config.ALL_REGION_AWS_SECURITY_GROUP_IDS[ config.AWS_REGION_NAME] run_experiment_lite( run_train_task, exp_prefix=EXP_PREFIX, exp_name=exp_name, # Number of parallel workers for sampling n_parallel=n_parallel, snapshot_mode="gap", snapshot_gap=5, periodic_sync=True, sync_s3_pkl=True, sync_s3_log=True, # Specifies the seed for the experiment. If this is not provided, a random seed # will be used seed=v["seed"], python_command='python3', pre_commands=[ "yes | pip install tensorflow=='1.6.0'", "pip list", "yes | pip install --upgrade cloudpickle" ], mode=args.mode, use_cloudpickle=True, variant=v, )
def run_evaluation(argv): # -------------------- Parse Arguments ----------------------------------- parser = argparse.ArgumentParser() parser.add_argument( 'exp_prefix_dir', type=str, help='path to dump dir which contains folders with ' 'the train results i.e. params.pkl and variant.json file') parser.add_argument( '--mode', type=str, default='local', help='Mode for running the experiments - local: runs on local machine, ' 'ec2: runs on AWS ec2 cluster (requires a proper configuration file)') parser.add_argument( '--n_parallel', type=int, default=1, help= 'Number of parallel workers to perform rollouts. 0 => don\'t start any workers' ) parser.add_argument('--num_sampled_envs', type=int, default=5, help='number or environments with samples parameters') args = parser.parse_args(argv[1:]) # ----------------------- EVALUATION --------------------------------------- exp_prefix = os.path.basename(args.exp_prefix_dir) eval_exp_prefix = exp_prefix + '-eval' evaluation_runs = eval.prepare_evaluation_runs( args.exp_prefix_dir, EXP_PREFIX, num_sampled_envs=args.num_sampled_envs) # ----------------------- AWS conficuration --------------------------------- if args.mode == 'ec2': subnets = cheapest_subnets(ec2_instance, num_subnets=3) info = config.INSTANCE_TYPE_INFO[ec2_instance] config.AWS_INSTANCE_TYPE = ec2_instance config.AWS_SPOT_PRICE = str(info["price"]) print("\n" + "**********" * 10 + "\nexp_prefix: {}\nvariants: {}".format('TRPO', len(evaluation_runs))) print( 'Running on type {}, with price {}, on the subnets: '.format( config.AWS_INSTANCE_TYPE, config.AWS_SPOT_PRICE, ), str(subnets)) for eval_exp_name, v in evaluation_runs: if args.mode == 'ec2': subnet = random.choice(subnets) config.AWS_REGION_NAME = subnet[:-1] config.AWS_KEY_NAME = config.ALL_REGION_AWS_KEY_NAMES[ config.AWS_REGION_NAME] config.AWS_IMAGE_ID = config.ALL_REGION_AWS_IMAGE_IDS[ config.AWS_REGION_NAME] config.AWS_SECURITY_GROUP_IDS = \ config.ALL_REGION_AWS_SECURITY_GROUP_IDS[ config.AWS_REGION_NAME] run_experiment_lite( run_eval_task, exp_prefix=eval_exp_prefix, exp_name=eval_exp_name, # Number of parallel workers for sampling n_parallel=args.n_parallel, # Only keep the snapshot parameters for the last iteration snapshot_mode="last", # Specifies the seed for the experiment. If this is not provided, a random seed # will be used seed=v["seed"], python_command='python3', pre_commands=[ "yes | pip install --upgrade pip", "yes | pip install tensorflow=='1.6.0'", "yes | pip install --upgrade cloudpickle" ], mode=args.mode, use_cloudpickle=True, periodic_sync=True, variant=v, # plot=True, # terminate_machine=False, )