def benchmark_ppo_continuous_mlp_baseline(): """ Compare benchmarks between CMB and potentially other baselines.""" mujoco1m = benchmarks.get_benchmark('Mujoco1M') timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') benchmark_dir = osp.join(os.getcwd(), 'data', 'local', 'benchmarks', 'ppo_cmb', timestamp) for task in mujoco1m['tasks']: env_id = task['env_id'] env = gym.make(env_id) seeds = random.sample(range(100), num_trials) task_dir = osp.join(benchmark_dir, env_id) cmb_csvs = [] for trial in range(num_trials): seed = seeds[trial] trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed) cmb_dir = trial_dir + '/continuous_mlp_baseline' with tf.Graph().as_default(): env.reset() cmb_csv = ppo_cmb(env, seed, cmb_dir) cmb_csvs.append(cmb_csv) env.close()
def test_benchmark_ddpg(self): """ Compare benchmarks between garage and baselines. :return: """ # Load Mujoco1M tasks, you can check other benchmarks here # https://github.com/openai/baselines/blob/master/baselines/bench/benchmarks.py mujoco1m = benchmarks.get_benchmark('Mujoco1M') timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') benchmark_dir = osp.join(os.getcwd(), 'data', 'local', 'benchmarks', 'ddpg', timestamp) for task in mujoco1m['tasks']: env_id = task['env_id'] env = gym.make(env_id) baseline_env = AutoStopEnv( env_name=env_id, max_path_length=params['n_rollout_steps']) seeds = random.sample(range(100), task['trials']) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join(benchmark_dir, '{}_benchmark.png'.format(env_id)) baselines_csvs = [] garage_csvs = [] for trial in range(task['trials']): env.reset() baseline_env.reset() seed = seeds[trial] trial_dir = osp.join( task_dir, 'trial_{}_seed_{}'.format(trial + 1, seed)) garage_dir = osp.join(trial_dir, 'garage') baselines_dir = osp.join(trial_dir, 'baselines') with tf.Graph().as_default(): # Run garage algorithms garage_csv = run_garage(env, seed, garage_dir) # Run baselines algorithms baselines_csv = run_baselines(baseline_env, seed, baselines_dir) garage_csvs.append(garage_csv) baselines_csvs.append(baselines_csv) env.close() plot(b_csvs=baselines_csvs, g_csvs=garage_csvs, g_x='Epoch', g_y='AverageReturn', b_x='total/epochs', b_y='rollout/return', trials=task['trials'], seeds=seeds, plt_file=plt_file, env_id=env_id)
def test_benchmark_sac(self): ''' Compare benchmarks between metarl and baselines. :return: ''' mujoco1m = benchmarks.get_benchmark('Mujoco1M') timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') benchmark_dir = osp.join(os.getcwd(), 'data', 'local', 'benchmarks', 'sac', timestamp) mujoco_tasks = ['HalfCheetah-v2'] for task in mujoco_tasks: env = MetaRLEnv(normalize(gym.make(task))) seeds = [121, 524, 4] task_dir = osp.join(benchmark_dir, task) plt_file = osp.join(benchmark_dir, '{}_benchmark.png'.format(task)) relplt_file = osp.join(benchmark_dir, '{}_benchmark_mean.png'.format(task)) metarl_csvs = [] for trial in range(3): env.reset() seed = seeds[trial] trial_dir = osp.join( task_dir, 'trial_{}_seed_{}'.format(trial + 1, seed)) metarl_dir = osp.join(trial_dir, 'metarl') # Run metarl algorithms metarl_csv = run_metarl(env, seed, metarl_dir) metarl_csvs.append(metarl_csv) env.close()
def benchmark_trpo(self): # pylint: disable=no-self-use """Compare benchmarks between garage and baselines.""" mujoco1m = benchmarks.get_benchmark('Mujoco1M') timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') benchmark_dir = './data/local/benchmarks/trpo/%s/' % timestamp result_json = {} for task in mujoco1m['tasks']: env_id = task['env_id'] env = gym.make(env_id) seeds = random.sample(range(100), hyper_parameters['n_trials']) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join(benchmark_dir, '{}_benchmark.png'.format(env_id)) garage_tf_csvs = [] garage_pytorch_csvs = [] for trial in range(hyper_parameters['n_trials']): _PLACEHOLDER_CACHE.clear() seed = seeds[trial] trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed) garage_tf_dir = trial_dir + '/garage_tf' garage_pytorch_dir = trial_dir + '/garage_pytorch' # Run garage algorithms env.reset() garage_pytorch_csv = run_garage_pytorch( env, seed, garage_pytorch_dir) # pylint: disable=not-context-manager with tf.Graph().as_default(): env.reset() garage_tf_csv = run_garage_tf(env, seed, garage_tf_dir) garage_tf_csvs.append(garage_tf_csv) garage_pytorch_csvs.append(garage_pytorch_csv) env.close() benchmark_helper.plot_average_over_trials( [garage_tf_csvs, garage_pytorch_csvs], ['Evaluation/AverageReturn'] * 2, plt_file=plt_file, env_id=env_id, x_label='Iteration', y_label='AverageReturn', names=['garage-TensorFlow', 'garage-PyTorch'], ) result_json[env_id] = benchmark_helper.create_json( [garage_tf_csvs, garage_pytorch_csvs], seeds=seeds, trials=hyper_parameters['n_trials'], xs=['Evaluation/Iteration'] * 2, ys=['Evaluation/AverageReturn'] * 2, factors=[hyper_parameters['batch_size']] * 2, names=['garage-TF', 'garage-PT']) Rh.write_file(result_json, 'TRPO')
def test_benchmark_ddpg(self): """ Compare benchmarks between garage and baselines. :return: """ # Load Mujoco1M tasks, you can check other benchmarks here # https://github.com/openai/baselines/blob/master/baselines/bench/benchmarks.py mujoco1m = benchmarks.get_benchmark("Mujoco1M") timestamp = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S-%f") benchmark_dir = "./data/local/benchmarks/ddpg/%s/" % timestamp for task in mujoco1m["tasks"]: env_id = task["env_id"] env = gym.make(env_id) baseline_env = AutoStopEnv( env_name=env_id, max_path_length=params["n_rollout_steps"]) seeds = random.sample(range(100), task["trials"]) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join(benchmark_dir, "{}_benchmark.png".format(env_id)) baselines_csvs = [] garage_csvs = [] for trial in range(task["trials"]): env.reset() baseline_env.reset() seed = seeds[trial] trial_dir = task_dir + "/trial_%d_seed_%d" % (trial + 1, seed) garage_dir = trial_dir + "/garage" baselines_dir = trial_dir + "/baselines" with tf.Graph().as_default(): # Run garage algorithms garage_csv = run_garage(env, seed, garage_dir) # Run baselines algorithms baselines_csv = run_baselines(baseline_env, seed, baselines_dir) garage_csvs.append(garage_csv) baselines_csvs.append(baselines_csv) env.close() plot(b_csvs=baselines_csvs, g_csvs=garage_csvs, g_x="Epoch", g_y="AverageReturn", b_x="total/epochs", b_y="rollout/return", trials=task["trials"], seeds=seeds, plt_file=plt_file, env_id=env_id)
def test_benchmark_trpo(self): """ Compare benchmarks between garage and baselines. :return: """ mujoco1m = benchmarks.get_benchmark("Mujoco1M") timestamp = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S-%f") benchmark_dir = "./data/local/benchmarks/trpo/%s/" % timestamp for task in mujoco1m["tasks"]: env_id = task["env_id"] env = gym.make(env_id) baseline_env = AutoStopEnv(env_name=env_id, max_path_length=100) seeds = random.sample(range(100), task["trials"]) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join(benchmark_dir, "{}_benchmark.png".format(env_id)) baselines_csvs = [] garage_csvs = [] for trial in range(task["trials"]): _PLACEHOLDER_CACHE.clear() seed = seeds[trial] trial_dir = task_dir + "/trial_%d_seed_%d" % (trial + 1, seed) garage_dir = trial_dir + "/garage" baselines_dir = trial_dir + "/baselines" with tf.Graph().as_default(): # Run garage algorithms env.reset() garage_csv = run_garage(env, seed, garage_dir) # Run baseline algorithms baseline_env.reset() baselines_csv = run_baselines(baseline_env, seed, baselines_dir) garage_csvs.append(garage_csv) baselines_csvs.append(baselines_csv) plot(b_csvs=baselines_csvs, g_csvs=garage_csvs, g_x="Iteration", g_y="EpisodeRewardMean", b_x="EpThisIter", b_y="EpRewMean", trials=task["trials"], seeds=seeds, plt_file=plt_file, env_id=env_id) env.close()
def test_benchmark_her(self): ''' Compare benchmarks between garage and baselines. :return: ''' mujoco1m = benchmarks.get_benchmark('HerDdpg') timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') benchmark_dir = osp.join(os.getcwd(), 'data', 'local', 'benchmarks', 'her', timestamp) for task in mujoco1m['tasks']: env_id = task['env_id'] env = gym.make(env_id) seeds = random.sample(range(100), task['trials']) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join(benchmark_dir, '{}_benchmark.png'.format(env_id)) baselines_csvs = [] garage_csvs = [] for trial in range(task['trials']): seed = seeds[trial] trial_dir = osp.join( task_dir, 'trial_{}_seed_{}'.format(trial + 1, seed)) garage_dir = osp.join(trial_dir, 'garage') baselines_dir = osp.join(trial_dir, 'baselines') with tf.Graph().as_default(): garage_csv = run_garage(env, seed, garage_dir) CACHED_ENVS.clear() baselines_csv = run_baselines(env_id, seed, baselines_dir) garage_csvs.append(garage_csv) baselines_csvs.append(baselines_csv) env.close() plot( b_csvs=baselines_csvs, g_csvs=garage_csvs, g_x='Epoch', g_y='AverageSuccessRate', b_x='epoch', b_y='train/success_rate', trials=task['trials'], seeds=seeds, plt_file=plt_file, env_id=env_id)
def test_benchmark_ppo(self): """ Compare benchmarks between garage and baselines. :return: """ mujoco1m = benchmarks.get_benchmark("Mujoco1M") timestamp = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S-%f") benchmark_dir = "./benchmark_ppo/%s/" % timestamp for task in mujoco1m["tasks"]: env_id = task["env_id"] env = gym.make(env_id) seeds = random.sample(range(100), task["trials"]) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join(benchmark_dir, "{}_benchmark.png".format(env_id)) baselines_csvs = [] garage_csvs = [] for trail in range(task["trials"]): env.reset() seed = seeds[trail] trail_dir = task_dir + "/trail_%d_seed_%d" % (trail + 1, seed) garage_dir = trail_dir + "/garage" baselines_dir = trail_dir + "/baselines" # Run garage algorithms garage_csv = run_garage(env, seed, garage_dir) # Run baselines algorithms env.reset() baselines_csv = run_baselines(env, seed, baselines_dir) garage_csvs.append(garage_csv) baselines_csvs.append(baselines_csv) env.close() plot(b_csvs=baselines_csvs, g_csvs=garage_csvs, g_x="Iteration", g_y="AverageReturn", b_x="nupdates", b_y="eprewmean", trails=task["trials"], seeds=seeds, plt_file=plt_file, env_id=env_id)
def test_benchmark_her(self): """ Compare benchmarks between garage and baselines. :return: """ mujoco1m = benchmarks.get_benchmark("HerDdpg") timestamp = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S-%f") benchmark_dir = "./data/local/benchmarks/her/%s/" % timestamp for task in mujoco1m["tasks"]: env_id = task["env_id"] env = gym.make(env_id) seeds = random.sample(range(100), task["trials"]) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join(benchmark_dir, "{}_benchmark.png".format(env_id)) baselines_csvs = [] garage_csvs = [] for trial in range(task["trials"]): seed = seeds[trial] trial_dir = task_dir + "/trial_%d_seed_%d" % (trial + 1, seed) garage_dir = trial_dir + "/garage" baselines_dir = trial_dir + "/baselines" with tf.Graph().as_default(): garage_csv = run_garage(env, seed, garage_dir) CACHED_ENVS.clear() baselines_csv = run_baselines(env_id, seed, baselines_dir) garage_csvs.append(garage_csv) baselines_csvs.append(baselines_csv) env.close() plot( b_csvs=baselines_csvs, g_csvs=garage_csvs, g_x="Epoch", g_y="AverageSuccessRate", b_x="epoch", b_y="train/success_rate", trials=task["trials"], seeds=seeds, plt_file=plt_file, env_id=env_id)
def benchmark_continuous_mlp_q_function(self): # pylint: disable=no-self-use """Test Continuous MLP QFunction Benchmarking.""" mujoco1m = benchmarks.get_benchmark('Mujoco1M') timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') benchmark_dir = osp.join(os.getcwd(), 'data', 'local', 'benchmarks', 'continuous_mlp_q_function', timestamp) for task in mujoco1m['tasks']: env_id = task['env_id'] env = gym.make(env_id) seeds = random.sample(range(100), num_of_trials) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join( benchmark_dir, '{}_benchmark_continuous_mlp_q_function.png'.format(env_id)) garage_csvs = [] for trial in range(num_of_trials): seed = seeds[trial] trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed) garage_dir = trial_dir + '/garage' with tf.Graph().as_default(): env.reset() garage_csv = run_garage(env, seed, garage_dir) garage_csvs.append(garage_csv) env.close() Rh.relplot(g_csvs=garage_csvs, b_csvs=[], g_x='Epoch', g_y='Evaluation/AverageReturn', g_z='Garage', b_x=None, b_y=None, b_z=None, trials=num_of_trials, seeds=seeds, plt_file=plt_file, env_id=env_id, x_label='Iteration', y_label='Evaluation/AverageReturn')
def test_benchmark_continuous_mlp_policy(self): mujoco1m = benchmarks.get_benchmark('Mujoco1M') timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') benchmark_dir = osp.join(os.getcwd(), 'data', 'local', 'benchmarks', 'continuous_mlp_policy', timestamp) for task in mujoco1m['tasks']: env_id = task['env_id'] env = gym.make(env_id) seeds = random.sample(range(100), num_of_trials) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join( benchmark_dir, '{}_benchmark_continuous_mlp_policy.png'.format(env_id)) metarl_csvs = [] for trial in range(num_of_trials): seed = seeds[trial] trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed) metarl_dir = trial_dir + '/metarl' with tf.Graph().as_default(): env.reset() metarl_csv = run_metarl(env, seed, metarl_dir) metarl_csvs.append(metarl_csv) env.close() Rh.relplot(g_csvs=metarl_csvs, b_csvs=[], g_x='Epoch', g_y='Evaluation/AverageReturn', g_z='MetaRL', b_x=None, b_y=None, b_z=None, trials=num_of_trials, seeds=seeds, plt_file=plt_file, env_id=env_id, x_label='Iteration', y_label='Evaluation/AverageReturn')
def test_benchmark_ppo(self): ''' Compare benchmarks between garage and baselines. :return: ''' mujoco1m = benchmarks.get_benchmark('Mujoco1M') timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') benchmark_dir = './data/local/benchmarks/ppo/%s/' % timestamp result_json = {} for task in mujoco1m['tasks']: env_id = task['env_id'] env = gym.make(env_id) baseline_env = AutoStopEnv(env_name=env_id, max_path_length=100) seeds = random.sample(range(100), task['trials']) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join(benchmark_dir, '{}_benchmark.png'.format(env_id)) baselines_csvs = [] garage_csvs = [] for trial in range(task['trials']): seed = seeds[trial] trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed) garage_dir = trial_dir + '/garage' baselines_dir = trial_dir + '/baselines' with tf.Graph().as_default(): # Run baselines algorithms baseline_env.reset() baselines_csv = run_baselines(baseline_env, seed, baselines_dir) # Run garage algorithms env.reset() garage_csv = run_garage(env, seed, garage_dir) garage_csvs.append(garage_csv) baselines_csvs.append(baselines_csv) env.close() Rh.plot(b_csvs=baselines_csvs, g_csvs=garage_csvs, g_x='Iteration', g_y='AverageReturn', b_x='nupdates', b_y='eprewmean', trials=task['trials'], seeds=seeds, plt_file=plt_file, env_id=env_id, x_label='Iteration', y_label='AverageReturn') result_json[env_id] = Rh.create_json(b_csvs=baselines_csvs, g_csvs=garage_csvs, seeds=seeds, trails=task['trials'], g_x='Iteration', g_y='AverageReturn', b_x='nupdates', b_y='eprewmean', factor_g=2048, factor_b=2048) Rh.write_file(result_json, 'PPO')
def test_benchmark_vpg(self): """Compare benchmarks between metarl and baselines. Returns: """ # pylint: disable=no-self-use mujoco1m = benchmarks.get_benchmark('Mujoco1M') timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') benchmark_dir = './data/local/benchmarks/vpg/%s/' % timestamp result_json = {} for task in mujoco1m['tasks']: env_id = task['env_id'] env = gym.make(env_id) seeds = random.sample(range(100), hyper_parameters['n_trials']) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join(benchmark_dir, '{}_benchmark.png'.format(env_id)) metarl_tf_csvs = [] metarl_pytorch_csvs = [] for trial in range(hyper_parameters['n_trials']): seed = seeds[trial] trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed) metarl_tf_dir = trial_dir + '/metarl/tf' metarl_pytorch_dir = trial_dir + '/metarl/pytorch' # pylint: disable=not-context-manager with tf.Graph().as_default(): # Run metarl algorithms env.reset() metarl_tf_csv = run_metarl_tf(env, seed, metarl_tf_dir) env.reset() metarl_pytorch_csv = run_metarl_pytorch( env, seed, metarl_pytorch_dir) metarl_tf_csvs.append(metarl_tf_csv) metarl_pytorch_csvs.append(metarl_pytorch_csv) env.close() benchmark_helper.plot_average_over_trials( [metarl_tf_csvs, metarl_pytorch_csvs], ['Evaluation/AverageReturn'] * 2, plt_file=plt_file, env_id=env_id, x_label='Iteration', y_label='Evaluation/AverageReturn', names=['metarl-TensorFlow', 'metarl-PyTorch']) result_json[env_id] = benchmark_helper.create_json( [metarl_tf_csvs, metarl_pytorch_csvs], seeds=seeds, trials=hyper_parameters['n_trials'], xs=['Iteration'] * 2, ys=['Evaluation/AverageReturn'] * 2, factors=[hyper_parameters['batch_size']] * 2, names=['metarl-tf', 'metarl-pytorch']) Rh.write_file(result_json, 'VPG')
from garage.tf.experiment import LocalTFRunner from garage.tf.optimizers import FirstOrderOptimizer from garage.tf.policies import GaussianMLPPolicy as TF_GMP from garage.torch.algos import PPO as PyTorch_PPO from garage.torch.policies import GaussianMLPPolicy as PyTorch_GMP from tests import benchmark_helper hyper_parameters = { 'n_epochs': 800, 'max_path_length': 128, 'batch_size': 1024, 'n_trials': 4, } seeds = random.sample(range(100), hyper_parameters['n_trials']) tasks = benchmarks.get_benchmark('Mujoco1M')['tasks'] @pytest.mark.benchmark def auto_benchmark_ppo_garage_tf(): """Create garage TensorFlow PPO model and training. Training over different environments and seeds. """ @wrap_experiment def ppo_garage_tf(ctxt, env_id, seed): """Create garage TensorFlow PPO model and training. Args: ctxt (garage.experiment.ExperimentContext): The experiment
def test_benchmark_vpg(): """Compare benchmarks between garage and baselines.""" mujoco1m = benchmarks.get_benchmark('Mujoco1M') timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') benchmark_dir = './data/local/benchmarks/vpg/%s/' % timestamp result_json = {} for task in mujoco1m['tasks']: env_id = task['env_id'] env = gym.make(env_id) seeds = random.sample(range(100), task['trials']) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join(benchmark_dir, '{}_benchmark.png'.format(env_id)) garage_tf_csvs = [] garage_pytorch_csvs = [] for trial in range(task['trials']): seed = seeds[trial] trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed) garage_tf_dir = trial_dir + '/garage/tf' garage_pytorch_dir = trial_dir + '/garage/pytorch' env.reset() garage_pytorch_csv = run_garage_pytorch(env, seed, garage_pytorch_dir) with tf.Graph().as_default(): # Run garage algorithms env.reset() garage_tf_csv = run_garage_tf(env, seed, garage_tf_dir) garage_tf_csvs.append(garage_tf_csv) garage_pytorch_csvs.append(garage_pytorch_csv) env.close() plot([garage_tf_csvs, garage_pytorch_csvs], ['Iteration', 'Iteration'], ['AverageReturn', 'AverageReturn'], trials=task['trials'], seeds=seeds, plt_file=plt_file, env_id=env_id, x_label='Iteration', y_label='AverageReturn', names=['garage-tf', 'garage-pytorch'], smooth=True) result_json[env_id] = create_json( [garage_tf_csvs, garage_pytorch_csvs], seeds=seeds, trails=task['trials'], xs=['Iteration', 'Iteration'], ys=['AverageReturn', 'AverageReturn'], factors=[2048, 2047], names=['garage-tf', 'garage-pytorch']) Rh.write_file(result_json, 'VPG')
def test_benchmark_td3(self): """ Test garage TD3 benchmarks. :return: """ # Load Mujoco1M tasks, you can check other benchmarks here # https://github.com/openai/baselines/blob/master/baselines/bench/benchmarks.py # noqa: E501 mujoco1m = benchmarks.get_benchmark('Mujoco1M') timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') benchmark_dir = osp.join(os.getcwd(), 'data', 'local', 'benchmarks', 'td3', timestamp) result_json = {} # rlkit throws error for'Reacher-V2' due to gym version mismatch mujoco1m['tasks'] = [ task for task in mujoco1m['tasks'] if task['env_id'] != 'Reacher-v2' ] for task in mujoco1m['tasks']: env_id = task['env_id'] env = gym.make(env_id) rlkit_env = AutoStopEnv(env_name=env_id, max_path_length=params['n_rollout_steps']) seeds = random.sample(range(100), task['trials']) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join(benchmark_dir, '{}_benchmark.png'.format(env_id)) garage_csvs = [] rlkit_csvs = [] for trial in range(task['trials']): env.reset() rlkit_env.reset() seed = seeds[trial] trial_dir = osp.join( task_dir, 'trial_{}_seed_{}'.format(trial + 1, seed)) garage_dir = trial_dir + '/garage' rlkit_dir = osp.join(trial_dir, 'rlkit') with tf.Graph().as_default(): # Run rlkit algorithms rlkit_csv = run_rlkit(rlkit_env, seed, rlkit_dir) # Run garage algorithms garage_csv = run_garage(env, seed, garage_dir) garage_csvs.append(garage_csv) rlkit_csvs.append(rlkit_csv) Rh.plot(b_csvs=rlkit_csvs, g_csvs=garage_csvs, g_x='Epoch', g_y='Evaluation/AverageReturn', g_z='garage', b_x='Epoch', b_y='evaluation/Average Returns', b_z='rlkit', trials=task['trials'], seeds=seeds, plt_file=plt_file, env_id=env_id, x_label='Iteration', y_label='Evaluation/AverageReturn') result_json[env_id] = Rh.create_json( b_csvs=rlkit_csvs, g_csvs=garage_csvs, seeds=seeds, trails=task['trials'], g_x='Epoch', g_y='Evaluation/AverageReturn', b_x='Epoch', b_y='evaluation/Average Returns', factor_g=1, factor_b=1) Rh.write_file(result_json, 'TD3')
def test_benchmark_ppo(self): """Compare benchmarks between garage and baselines. Returns: """ # pylint: disable=no-self-use mujoco1m = benchmarks.get_benchmark('Mujoco1M') timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') benchmark_dir = './data/local/benchmarks/ppo/%s/' % timestamp result_json = {} for task in mujoco1m['tasks']: env_id = task['env_id'] env = gym.make(env_id) baseline_env = AutoStopEnv( env_name=env_id, max_path_length=hyper_parameters['max_path_length']) seeds = random.sample(range(100), hyper_parameters['n_trials']) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join(benchmark_dir, '{}_benchmark.png'.format(env_id)) baselines_csvs = [] garage_tf_csvs = [] garage_pytorch_csvs = [] for trial in range(hyper_parameters['n_trials']): seed = seeds[trial] trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed) garage_tf_dir = trial_dir + '/garage/tf' garage_pytorch_dir = trial_dir + '/garage/pytorch' baselines_dir = trial_dir + '/baselines' # pylint: disable=not-context-manager with tf.Graph().as_default(): # Run baselines algorithms baseline_env.reset() baseline_csv = run_baselines(baseline_env, seed, baselines_dir) # Run garage algorithms env.reset() garage_tf_csv = run_garage_tf(env, seed, garage_tf_dir) env.reset() garage_pytorch_csv = run_garage_pytorch( env, seed, garage_pytorch_dir) baselines_csvs.append(baseline_csv) garage_tf_csvs.append(garage_tf_csv) garage_pytorch_csvs.append(garage_pytorch_csv) env.close() benchmark_helper.plot_average_over_trials( [baselines_csvs, garage_tf_csvs, garage_pytorch_csvs], [ 'eprewmean', 'Evaluation/AverageReturn', 'Evaluation/AverageReturn' ], plt_file=plt_file, env_id=env_id, x_label='Iteration', y_label='Evaluation/AverageReturn', names=['baseline', 'garage-TensorFlow', 'garage-PyTorch'], ) result_json[env_id] = benchmark_helper.create_json( [baselines_csvs, garage_tf_csvs, garage_pytorch_csvs], seeds=seeds, trials=hyper_parameters['n_trials'], xs=['nupdates', 'Iteration', 'Iteration'], ys=[ 'eprewmean', 'Evaluation/AverageReturn', 'Evaluation/AverageReturn' ], factors=[hyper_parameters['batch_size']] * 3, names=['baseline', 'garage-TF', 'garage-PT']) Rh.write_file(result_json, 'PPO')
def test_benchmark_ppo(self): """Compare benchmarks between metarl and baselines. Returns: """ # pylint: disable=no-self-use mujoco1m = benchmarks.get_benchmark('Mujoco1M') timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') benchmark_dir = './data/local/benchmarks/ppo/%s/' % timestamp result_json = {} for task in mujoco1m['tasks']: env_id = task['env_id'] env = gym.make(env_id) baseline_env = AutoStopEnv( env_name=env_id, max_path_length=hyper_parameters['max_path_length']) seeds = random.sample(range(100), hyper_parameters['n_trials']) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join(benchmark_dir, '{}_benchmark.png'.format(env_id)) baselines_csvs = [] metarl_tf_csvs = [] metarl_pytorch_csvs = [] for trial in range(hyper_parameters['n_trials']): seed = seeds[trial] trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed) metarl_tf_dir = trial_dir + '/metarl/tf' metarl_pytorch_dir = trial_dir + '/metarl/pytorch' baselines_dir = trial_dir + '/baselines' # pylint: disable=not-context-manager with tf.Graph().as_default(): # Run baselines algorithms baseline_env.reset() baseline_csv = run_baselines(baseline_env, seed, baselines_dir) # Run metarl algorithms env.reset() metarl_tf_csv = run_metarl_tf(env, seed, metarl_tf_dir) # env.reset() # metarl_pytorch_csv = run_metarl_pytorch( # env, seed, metarl_pytorch_dir) baselines_csvs.append(baseline_csv) metarl_tf_csvs.append(metarl_tf_csv) # metarl_pytorch_csvs.append(metarl_pytorch_csv) env.close() # benchmark_helper.plot_average_over_trials( # [baselines_csvs, metarl_tf_csvs, metarl_pytorch_csvs], # [ # 'eprewmean', 'Evaluation/AverageReturn', # 'Evaluation/AverageReturn' # ], # plt_file=plt_file, # env_id=env_id, # x_label='Iteration', # y_label='Evaluation/AverageReturn', # names=['baseline', 'metarl-TensorFlow', 'metarl-PyTorch'], # ) # result_json[env_id] = benchmark_helper.create_json( # [baselines_csvs, metarl_tf_csvs], # seeds=seeds, # trials=hyper_parameters['n_trials'], # xs=['total_timesteps', 'TotalEnvSteps'], # ys=[ # 'eprewmean', 'Evaluation/AverageReturn' # ], # factors=[hyper_parameters['batch_size']] * 2, # names=['baseline', 'metarl-TF']) result_json[env_id] = benchmark_helper.create_json( [baselines_csvs, metarl_tf_csvs], seeds=seeds, trials=hyper_parameters['n_trials'], xs=['total_timesteps', 'TotalEnvSteps'], ys=['eprewmean', 'Evaluation/AverageReturn'], factors=[hyper_parameters['batch_size']] * 2, names=['baseline', 'metarl-TF']) # Rh.relplot(g_csvs=metarl_tf_csvs, # b_csvs=baselines_csvs, # g_x='TotalEnvSteps', # g_y='Evaluation/AverageReturn', # g_z='MetaRL', # b_x='total_timesteps', # b_y='eprewmean', # b_z='Openai/Baseline', # trials=hyper_parameters['n_trials'], # seeds=seeds, # plt_file=plt_file, # env_id=env_id, # x_label='EnvTimeStep', # y_label='Performance') benchmark_helper.plot_average_over_trials_with_x( [baselines_csvs, metarl_tf_csvs], ['eprewmean', 'Evaluation/AverageReturn'], ['total_timesteps', 'TotalEnvSteps'], plt_file=plt_file, env_id=env_id, x_label='EnvTimeStep', y_label='Performance', names=['baseline', 'metarl-TensorFlow'], ) # Rh.relplot(g_csvs=metarl_tf_csvs, # b_csvs=metarl_pytorch_csvs, # g_x='TotalEnvSteps', # g_y='Evaluation/AverageReturn', # g_z='MetaRL-TF', # b_x='TotalEnvSteps', # b_y='Evaluation/AverageReturn', # b_z='MetaRL-PT', # trials=hyper_parameters['n_trials'], # seeds=seeds, # plt_file=plt_file, # env_id=env_id, # x_label='EnvTimeStep', # y_label='Performance') Rh.write_file(result_json, 'PPO')
'n_trials': 6, 'steps_per_epoch': 20, 'n_rollout_steps': 250, 'n_train_steps': 1, 'discount': 0.99, 'tau': 0.005, 'replay_buffer_size': int(1e6), 'sigma': 0.1, 'smooth_return': False, 'buffer_batch_size': 100, 'min_buffer_size': int(1e4) } seeds = random.sample(range(100), hyper_parameters['n_trials']) tasks = [ task for task in benchmarks.get_benchmark('Mujoco1M')['tasks'] if task['env_id'] != 'Reacher-v2' ] @pytest.mark.benchmark def auto_benchmark_td3_garage_tf(): """Create garage TensorFlow TD3 model and training. Training over different environments and seeds. """ @wrap_experiment def td3_garage_tf(ctxt, env_id, seed): """Create garage TensorFlow TD3 model and training.
def test_benchmark_ppo(self): """ Compare benchmarks between garage and baselines. :return: """ mujoco1m = benchmarks.get_benchmark("Mujoco1M") timestamp = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S-%f") benchmark_dir = "./data/local/benchmarks/ppo/%s/" % timestamp result_json = {} for task in mujoco1m["tasks"]: env_id = task["env_id"] env = gym.make(env_id) baseline_env = AutoStopEnv(env_name=env_id, max_path_length=100) seeds = random.sample(range(100), task["trials"]) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join(benchmark_dir, "{}_benchmark.png".format(env_id)) baselines_csvs = [] garage_csvs = [] for trial in range(task['trials']): seed = seeds[trial] trial_dir = task_dir + "/trial_%d_seed_%d" % (trial + 1, seed) garage_dir = trial_dir + "/garage" baselines_dir = trial_dir + "/baselines" with tf.Graph().as_default(): # Run baselines algorithms baseline_env.reset() baselines_csv = run_baselines(baseline_env, seed, baselines_dir) # Run garage algorithms env.reset() garage_csv = run_garage(env, seed, garage_dir) garage_csvs.append(garage_csv) baselines_csvs.append(baselines_csv) env.close() Rh.plot(b_csvs=baselines_csvs, g_csvs=garage_csvs, g_x="Iteration", g_y="EpisodeRewardMean", b_x="nupdates", b_y="eprewmean", trials=task['trials'], seeds=seeds, plt_file=plt_file, env_id=env_id, x_label="Iteration", y_label="AverageReturn") result_json[env_id] = Rh.create_json(b_csvs=baselines_csvs, g_csvs=garage_csvs, seeds=seeds, trails=task["trials"], g_x="Iteration", g_y="EpisodeRewardMean", b_x="nupdates", b_y="eprewmean", factor_g=2048, factor_b=2048) Rh.write_file(result_json, "PPO")
def test_benchmark_ddpg(self): ''' Compare benchmarks between metarl and baselines. :return: ''' # Load Mujoco1M tasks, you can check other benchmarks here # https://github.com/openai/baselines/blob/master/baselines/bench/benchmarks.py mujoco1m = benchmarks.get_benchmark('Mujoco1M') timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') benchmark_dir = osp.join(os.getcwd(), 'data', 'local', 'benchmarks', 'ddpg', timestamp) result_json = {} for task in mujoco1m['tasks']: env_id = task['env_id'] env = gym.make(env_id) baseline_env = AutoStopEnv( env_name=env_id, max_path_length=params['n_rollout_steps']) seeds = random.sample(range(100), task['trials']) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join(benchmark_dir, '{}_benchmark.png'.format(env_id)) relplt_file = osp.join(benchmark_dir, '{}_benchmark_mean.png'.format(env_id)) baselines_csvs = [] metarl_csvs = [] for trial in range(task['trials']): env.reset() baseline_env.reset() seed = seeds[trial] trial_dir = osp.join( task_dir, 'trial_{}_seed_{}'.format(trial + 1, seed)) metarl_dir = osp.join(trial_dir, 'metarl') baselines_dir = osp.join(trial_dir, 'baselines') with tf.Graph().as_default(): # Run metarl algorithms metarl_csv = run_metarl(env, seed, metarl_dir) # Run baselines algorithms baselines_csv = run_baselines(baseline_env, seed, baselines_dir) metarl_csvs.append(metarl_csv) baselines_csvs.append(baselines_csv) env.close() Rh.plot(b_csvs=baselines_csvs, g_csvs=metarl_csvs, g_x='Epoch', g_y='Evaluation/AverageReturn', g_z='MetaRL', b_x='total/epochs', b_y='rollout/return', b_z='Baseline', trials=task['trials'], seeds=seeds, plt_file=plt_file, env_id=env_id, x_label='Epoch', y_label='Evaluation/AverageReturn') Rh.relplot(g_csvs=metarl_csvs, b_csvs=baselines_csvs, g_x='Epoch', g_y='Evaluation/AverageReturn', g_z='MetaRL', b_x='total/epochs', b_y='rollout/return', b_z='Baseline', trials=task['trials'], seeds=seeds, plt_file=relplt_file, env_id=env_id, x_label='Epoch', y_label='Evaluation/AverageReturn') result_json[env_id] = Rh.create_json( b_csvs=baselines_csvs, g_csvs=metarl_csvs, seeds=seeds, trails=task['trials'], g_x='Epoch', g_y='Evaluation/AverageReturn', b_x='total/epochs', b_y='rollout/return', factor_g=params['steps_per_epoch'] * params['n_rollout_steps'], factor_b=1) Rh.write_file(result_json, 'DDPG')
"""Global parameters for benchmarking.""" from baselines.bench import benchmarks Fetch1M_ENV_SET = [ task['env_id'] for task in benchmarks.get_benchmark('Fetch1M')['tasks'] ] MuJoCo1M_ENV_SET = [ task['env_id'] for task in benchmarks.get_benchmark('Mujoco1M')['tasks'] ] PIXEL_ENV_SET = ['CubeCrash-v0', 'MemorizeDigits-v0'] STATE_ENV_SET = [ 'LunarLander-v2', 'Assault-ramDeterministic-v4', 'Breakout-ramDeterministic-v4', 'ChopperCommand-ramDeterministic-v4', 'Tutankham-ramDeterministic-v4', 'CartPole-v1', ]