def auto_benchmark_ddpg_garage_tf(): """Create garage TensorFlow DDPG model and training. Training over different environments and seeds. """ @wrap_experiment def ddpg_garage_tf(ctxt, env_id, seed): """Create garage TensorFlow DDPG model and training. Args: ctxt (garage.experiment.ExperimentContext): The experiment configuration used by LocalRunner to create the snapshotter. env_id (str): Environment id of the task. seed (int): Random positive integer for the trial. """ deterministic.set_seed(seed) with LocalTFRunner(ctxt) as runner: env = TfEnv(normalize(gym.make(env_id))) action_noise = OUStrategy(env.spec, sigma=hyper_parameters['sigma']) policy = ContinuousMLPPolicy( env_spec=env.spec, hidden_sizes=hyper_parameters['policy_hidden_sizes'], hidden_nonlinearity=tf.nn.relu, output_nonlinearity=tf.nn.tanh) qf = ContinuousMLPQFunction( env_spec=env.spec, hidden_sizes=hyper_parameters['qf_hidden_sizes'], hidden_nonlinearity=tf.nn.relu) replay_buffer = SimpleReplayBuffer( env_spec=env.spec, size_in_transitions=hyper_parameters['replay_buffer_size'], time_horizon=hyper_parameters['n_rollout_steps']) algo = DDPG(env_spec=env.spec, policy=policy, qf=qf, replay_buffer=replay_buffer, steps_per_epoch=hyper_parameters['steps_per_epoch'], policy_lr=hyper_parameters['policy_lr'], qf_lr=hyper_parameters['qf_lr'], target_update_tau=hyper_parameters['tau'], n_train_steps=hyper_parameters['n_train_steps'], discount=hyper_parameters['discount'], min_buffer_size=int(1e4), exploration_strategy=action_noise, policy_optimizer=tf.compat.v1.train.AdamOptimizer, qf_optimizer=tf.compat.v1.train.AdamOptimizer) runner.setup(algo, env) runner.train(n_epochs=hyper_parameters['n_epochs'], batch_size=hyper_parameters['n_rollout_steps']) for env_id, seed, log_dir in benchmark_helper.iterate_experiments( ddpg_garage_tf.__name__, tasks, seeds): ddpg_garage_tf(dict(log_dir=log_dir), env_id=env_id, seed=seed)
def auto_benchmark_ppo_garage_pytorch(): """Create garage PyTorch PPO model and training. Training over different environments and seeds. """ @wrap_experiment def ppo_garage_pytorch(ctxt, env_id, seed): """Create garage PyTorch PPO model and training. Args: ctxt (garage.experiment.ExperimentContext): The experiment configuration used by LocalRunner to create the snapshotter. env_id (str): Environment id of the task. seed (int): Random positive integer for the trial. """ deterministic.set_seed(seed) runner = LocalRunner(ctxt) env = TfEnv(normalize(gym.make(env_id))) policy = PyTorch_GMP(env.spec, hidden_sizes=(32, 32), hidden_nonlinearity=torch.tanh, output_nonlinearity=None) value_functions = LinearFeatureBaseline(env_spec=env.spec) algo = PyTorch_PPO(env_spec=env.spec, policy=policy, value_function=value_functions, optimizer=torch.optim.Adam, policy_lr=3e-4, max_path_length=hyper_parameters['max_path_length'], discount=0.99, gae_lambda=0.95, center_adv=True, lr_clip_range=0.2, minibatch_size=128, max_optimization_epochs=10) runner.setup(algo, env) runner.train(n_epochs=hyper_parameters['n_epochs'], batch_size=hyper_parameters['batch_size']) for env_id, seed, log_dir in benchmark_helper.iterate_experiments( ppo_garage_pytorch, tasks, seeds, use_tf=False, xcolumn='TotalEnvSteps', xlabel='Total Environment Steps', ycolumn='Evaluation/AverageReturn', ylabel='Average Return'): ppo_garage_pytorch(dict(log_dir=log_dir), env_id=env_id, seed=seed)
def auto_benchmark_trpo_baselines(): """Create TRPO baselines model and training. Training over different environments and seeds. """ def trpo_baselines(log_dir, env_id, seed): """Create Baseline model and training. Args: log_dir (str): Experiment log directory. env_id (str): Environment id of the task. seed (int): Random positive integer for the trial. """ # Set up TF Session ncpu = max(multiprocessing.cpu_count() // 2, 1) config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=ncpu, inter_op_parallelism_threads=ncpu) tf.compat.v1.Session(config=config).__enter__() # Set up logger for baselines configure(dir=log_dir, format_strs=['stdout', 'log', 'csv', 'tensorboard']) baselines_logger.info('rank {}: seed={}, logdir={}'.format( 0, seed, baselines_logger.get_dir())) set_global_seeds(seed) env = AutoStopEnv(env_name=env_id, max_path_length=100) trpo_mpi.learn(network='mlp', env=env, total_timesteps=hyper_parameters['batch_size'] * hyper_parameters['n_epochs'], timesteps_per_batch=hyper_parameters['batch_size'], gamma=hyper_parameters['discount'], lam=hyper_parameters['gae_lambda'], max_kl=hyper_parameters['max_kl'], cg_iters=10, cg_damping=0.1, vf_iters=5, vf_stepsize=1e-3) for env_id, seed, log_dir in benchmark_helper.iterate_experiments( trpo_baselines, tasks, seeds, use_tf=True, xcolumn='TimestepsSoFar', xlabel='Total Environment Steps', ycolumn='EpRewMean', ylabel='Average Return'): trpo_baselines(log_dir=log_dir, env_id=env_id, seed=seed)
def auto_benchmark_trpo_garage_tf(): """Create garage TensorFlow TRPO model and training. Training over different environments and seeds. """ @wrap_experiment def trpo_garage_tf(ctxt, env_id, seed): """Create garage Tensorflow TROI model and training. Args: ctxt (garage.experiment.ExperimentContext): The experiment configuration used by LocalRunner to create the snapshotter. env_id (str): Environment id of the task. seed (int): Random positive integer for the trial. """ deterministic.set_seed(seed) with LocalTFRunner(ctxt) as runner: env = TfEnv(normalize(gym.make(env_id))) policy = GaussianMLPPolicy( env_spec=env.spec, hidden_sizes=hyper_parameters['hidden_sizes'], hidden_nonlinearity=tf.nn.tanh, output_nonlinearity=None, ) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = TRPO(env_spec=env.spec, policy=policy, baseline=baseline, max_path_length=hyper_parameters['max_path_length'], discount=hyper_parameters['discount'], gae_lambda=hyper_parameters['gae_lambda'], max_kl_step=hyper_parameters['max_kl']) runner.setup(algo, env) runner.train(n_epochs=hyper_parameters['n_epochs'], batch_size=hyper_parameters['batch_size']) for env_id, seed, log_dir in benchmark_helper.iterate_experiments( trpo_garage_tf, tasks, seeds, use_tf=True, xcolumn='TotalEnvSteps', xlabel='Total Environment Steps', ycolumn='Evaluation/AverageReturn', ylabel='Average Return'): trpo_garage_tf(dict(log_dir=log_dir), env_id=env_id, seed=seed)
def auto_benchmark_trpo_garage_pytorch(): """Create garage PyTorch TRPO model and training. Training over different environments and seeds. """ @wrap_experiment def trpo_garage_pytorch(ctxt, env_id, seed): """Create garage PyTorch TRPO model and training. Args: ctxt (garage.experiment.ExperimentContext): The experiment configuration used by LocalRunner to create the snapshotter. env_id (str): Environment id of the task. seed (int): Random positive integer for the trial. """ deterministic.set_seed(seed) runner = LocalRunner(ctxt) env = TfEnv(normalize(gym.make(env_id))) policy = PyTorch_GMP(env.spec, hidden_sizes=hyper_parameters['hidden_sizes'], hidden_nonlinearity=torch.tanh, output_nonlinearity=None) value_function = GaussianMLPValueFunction( env_spec=env.spec, hidden_sizes=(32, 32), hidden_nonlinearity=torch.tanh, output_nonlinearity=None) algo = PyTorch_TRPO( env_spec=env.spec, policy=policy, value_function=value_function, max_path_length=hyper_parameters['max_path_length'], discount=hyper_parameters['discount'], gae_lambda=hyper_parameters['gae_lambda']) runner.setup(algo, env) runner.train(n_epochs=hyper_parameters['n_epochs'], batch_size=hyper_parameters['batch_size']) for env_id, seed, log_dir in benchmark_helper.iterate_experiments( trpo_garage_pytorch.__name__, tasks, seeds): trpo_garage_pytorch(dict(log_dir=log_dir), env_id=env_id, seed=seed)
def auto_benchmark_vpg_garage_tf(): """Create garage TensorFlow VPG model and training. Training over different environments and seeds. """ @wrap_experiment def vpg_garage_tf(ctxt, env_id, seed): """Create garage TensorFlow VPG model and training. Args: ctxt (garage.experiment.ExperimentContext): The experiment configuration used by LocalRunner to create the snapshotter. env_id (str): Environment id of the task. seed (int): Random positive integer for the trial. """ deterministic.set_seed(seed) with LocalTFRunner(ctxt) as runner: env = TfEnv(normalize(gym.make(env_id))) policy = TF_GMP( env_spec=env.spec, hidden_sizes=hyper_parameters['hidden_sizes'], hidden_nonlinearity=tf.nn.tanh, output_nonlinearity=None, ) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = TF_VPG(env_spec=env.spec, policy=policy, baseline=baseline, max_path_length=hyper_parameters['max_path_length'], discount=hyper_parameters['discount'], center_adv=hyper_parameters['center_adv'], optimizer_args=dict(tf_optimizer_args=dict( learning_rate=hyper_parameters['learning_rate']), verbose=True)) runner.setup(algo, env) runner.train(n_epochs=hyper_parameters['n_epochs'], batch_size=hyper_parameters['batch_size']) for env_id, seed, log_dir in benchmark_helper.iterate_experiments( vpg_garage_tf, tasks, seeds): vpg_garage_tf(dict(log_dir=log_dir), env_id=env_id, seed=seed)
def auto_benchmark_vpg_garage_pytorch(): """Create garage PyTorch VPG model and training. Training over different environments and seeds. """ @wrap_experiment def vpg_garage_pytorch(ctxt, env_id, seed): """Create garage PyTorch VPG model and training. Args: ctxt (garage.experiment.ExperimentContext): The experiment configuration used by LocalRunner to create the snapshotter. env_id (str): Environment id of the task. seed (int): Random positive integer for the trial. """ deterministic.set_seed(seed) runner = LocalRunner(ctxt) env = TfEnv(normalize(gym.make(env_id))) policy = PyTorch_GMP(env.spec, hidden_sizes=hyper_parameters['hidden_sizes'], hidden_nonlinearity=torch.tanh, output_nonlinearity=None) value_function = LinearFeatureBaseline(env_spec=env.spec) algo = PyTorch_VPG(env_spec=env.spec, policy=policy, optimizer=torch.optim.Adam, policy_lr=hyper_parameters['learning_rate'], value_function=value_function, max_path_length=hyper_parameters['max_path_length'], discount=hyper_parameters['discount'], center_adv=hyper_parameters['center_adv']) runner.setup(algo, env) runner.train(n_epochs=hyper_parameters['n_epochs'], batch_size=hyper_parameters['batch_size']) for env_id, seed, log_dir in benchmark_helper.iterate_experiments( vpg_garage_pytorch, tasks, seeds): vpg_garage_pytorch(dict(log_dir=log_dir), env_id=env_id, seed=seed)
def auto_benchmark_ppo_garage_tf(): """Create garage TensorFlow PPO model and training. Training over different environments and seeds. """ @wrap_experiment def ppo_garage_tf(ctxt, env_id, seed): """Create garage TensorFlow PPO model and training. Args: ctxt (garage.experiment.ExperimentContext): The experiment configuration used by LocalRunner to create the snapshotter. env_id (str): Environment id of the task. seed (int): Random positive integer for the trial. """ deterministic.set_seed(seed) with LocalTFRunner(ctxt) as runner: env = TfEnv(normalize(gym.make(env_id))) policy = TF_GMP( env_spec=env.spec, hidden_sizes=(32, 32), hidden_nonlinearity=tf.nn.tanh, output_nonlinearity=None, ) baseline = TF_GMB( env_spec=env.spec, regressor_args=dict( hidden_sizes=(32, 32), use_trust_region=False, optimizer=FirstOrderOptimizer, optimizer_args=dict( batch_size=32, max_epochs=10, tf_optimizer_args=dict(learning_rate=3e-4), ), ), ) algo = TF_PPO(env_spec=env.spec, policy=policy, baseline=baseline, max_path_length=hyper_parameters['max_path_length'], discount=0.99, gae_lambda=0.95, center_adv=True, lr_clip_range=0.2, optimizer_args=dict( batch_size=32, max_epochs=10, tf_optimizer_args=dict(learning_rate=3e-4), verbose=True)) runner.setup(algo, env) runner.train(n_epochs=hyper_parameters['n_epochs'], batch_size=hyper_parameters['batch_size']) for env_id, seed, log_dir in benchmark_helper.iterate_experiments( ppo_garage_tf.__name__, tasks, seeds): ppo_garage_tf(dict(log_dir=log_dir), env_id=env_id, seed=seed)
def auto_benchmark_ppo_baselines(): """Create PPO baselines model and training. Training over different environments and seeds. """ def ppo_baselines(log_dir, env_id, seed): """Create baselines model and training. Args: log_dir (str): Experiment log directory. env_id (str): Environment id of the task. seed (int): Random positive integer for the trial. """ # Set up TF Session ncpu = max(multiprocessing.cpu_count() // 2, 1) config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=ncpu, inter_op_parallelism_threads=ncpu) tf.compat.v1.Session(config=config).__enter__() # Set up baselines logger configure(dir=log_dir, format_strs=['stdout', 'log', 'csv', 'tensorboard']) baselines_logger.info('rank {}: seed={}, logdir={}'.format( 0, seed, baselines_logger.get_dir())) set_global_seeds(seed) env = DummyVecEnv([ lambda: bench.Monitor(gym.make(env_id), baselines_logger.get_dir(), allow_early_resets=True) ]) ppo2.learn(network='mlp', env=env, nsteps=hyper_parameters['batch_size'], nminibatches=32, lam=0.95, gamma=0.99, noptepochs=10, log_interval=1, ent_coef=0.0, max_grad_norm=None, lr=3e-4, cliprange=0.2, total_timesteps=(hyper_parameters['batch_size'] * hyper_parameters['n_epochs'])) for env_id, seed, log_dir in benchmark_helper.iterate_experiments( ppo_baselines, tasks, seeds, use_tf=True, xcolumn='misc/total_timesteps', xlabel='Total Environment Steps', ycolumn='eprewmean', ylabel='Average Return'): ppo_baselines(log_dir=log_dir, env_id=env_id, seed=seed)
def auto_benchmark_ppo_garage_pytorch(): """Create garage PyTorch PPO model and training. Training over different environments and seeds. """ @wrap_experiment def ppo_garage_pytorch(ctxt, env_id, seed): """Create garage PyTorch PPO model and training. Args: ctxt (garage.experiment.ExperimentContext): The experiment configuration used by LocalRunner to create the snapshotter. env_id (str): Environment id of the task. seed (int): Random positive integer for the trial. """ deterministic.set_seed(seed) runner = LocalRunner(ctxt) env = TfEnv(normalize(gym.make(env_id))) policy = PyTorch_GMP(env.spec, hidden_sizes=(32, 32), hidden_nonlinearity=torch.tanh, output_nonlinearity=None) value_function = GaussianMLPValueFunction( env_spec=env.spec, hidden_sizes=(32, 32), hidden_nonlinearity=torch.tanh, output_nonlinearity=None) policy_optimizer = OptimizerWrapper( (torch.optim.Adam, dict(lr=2.5e-4)), policy, max_optimization_epochs=10, minibatch_size=64) vf_optimizer = OptimizerWrapper((torch.optim.Adam, dict(lr=2.5e-4)), value_function, max_optimization_epochs=10, minibatch_size=64) algo = PyTorch_PPO(env_spec=env.spec, policy=policy, value_function=value_function, policy_optimizer=policy_optimizer, vf_optimizer=vf_optimizer, max_path_length=hyper_parameters['max_path_length'], discount=0.99, gae_lambda=0.95, center_adv=True, lr_clip_range=0.2) runner.setup(algo, env) runner.train(n_epochs=hyper_parameters['n_epochs'], batch_size=hyper_parameters['batch_size']) for env_id, seed, log_dir in benchmark_helper.iterate_experiments( ppo_garage_pytorch.__name__, tasks, seeds): ppo_garage_pytorch(dict(log_dir=log_dir), env_id=env_id, seed=seed)