def benchmark_trpo(self): # pylint: disable=no-self-use """Compare benchmarks between garage and baselines.""" mujoco1m = benchmarks.get_benchmark('Mujoco1M') timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') benchmark_dir = './data/local/benchmarks/trpo/%s/' % timestamp result_json = {} for task in mujoco1m['tasks']: env_id = task['env_id'] env = gym.make(env_id) seeds = random.sample(range(100), hyper_parameters['n_trials']) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join(benchmark_dir, '{}_benchmark.png'.format(env_id)) garage_tf_csvs = [] garage_pytorch_csvs = [] for trial in range(hyper_parameters['n_trials']): _PLACEHOLDER_CACHE.clear() seed = seeds[trial] trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed) garage_tf_dir = trial_dir + '/garage_tf' garage_pytorch_dir = trial_dir + '/garage_pytorch' # Run garage algorithms env.reset() garage_pytorch_csv = run_garage_pytorch( env, seed, garage_pytorch_dir) # pylint: disable=not-context-manager with tf.Graph().as_default(): env.reset() garage_tf_csv = run_garage_tf(env, seed, garage_tf_dir) garage_tf_csvs.append(garage_tf_csv) garage_pytorch_csvs.append(garage_pytorch_csv) env.close() benchmark_helper.plot_average_over_trials( [garage_tf_csvs, garage_pytorch_csvs], ['Evaluation/AverageReturn'] * 2, plt_file=plt_file, env_id=env_id, x_label='Iteration', y_label='AverageReturn', names=['garage-TensorFlow', 'garage-PyTorch'], ) result_json[env_id] = benchmark_helper.create_json( [garage_tf_csvs, garage_pytorch_csvs], seeds=seeds, trials=hyper_parameters['n_trials'], xs=['Evaluation/Iteration'] * 2, ys=['Evaluation/AverageReturn'] * 2, factors=[hyper_parameters['batch_size']] * 2, names=['garage-TF', 'garage-PT']) Rh.write_file(result_json, 'TRPO')
def test_benchmark_pearl(self): ''' Compare benchmarks between metarl and baselines. :return: ''' env_sampler = SetTaskSampler( lambda: MetaRLEnv(normalize(ML1.get_train_tasks('reach-v1')))) env = env_sampler.sample(params['num_train_tasks']) test_env_sampler = SetTaskSampler( lambda: MetaRLEnv(normalize(ML1.get_test_tasks('reach-v1')))) test_env = test_env_sampler.sample(params['num_train_tasks']) env_id = 'reach-v1' timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') benchmark_dir = osp.join(os.getcwd(), 'data', 'local', 'benchmarks', 'pearl', timestamp) result_json = {} seeds = random.sample(range(100), params['n_trials']) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join(benchmark_dir, '{}_benchmark.png'.format(env_id)) metarl_csvs = [] for trial in range(params['n_trials']): seed = seeds[trial] trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed) metarl_dir = trial_dir + '/metarl' metarl_csv = run_metarl(env, test_env, seed, metarl_dir) metarl_csvs.append(metarl_csv) env.close() benchmark_helper.plot_average_over_trials( [metarl_csvs], ys=['Test/Average/SuccessRate'], plt_file=plt_file, env_id=env_id, x_label='TotalEnvSteps', y_label='Test/Average/SuccessRate', names=['metarl_pearl'], ) factor_val = params['meta_batch_size'] * params['max_path_length'] result_json[env_id] = benchmark_helper.create_json( [metarl_csvs], seeds=seeds, trials=params['n_trials'], xs=['TotalEnvSteps'], ys=['Test/Average/SuccessRate'], factors=[factor_val], names=['metarl_pearl']) Rh.write_file(result_json, 'PEARL')
def test_finish_release_merge_conflict_tag(self): """ finish + tag with merge-conflicts on develop """ version_filename = 'VERSION' new_version = '1.1\n' gitflow = GitFlow(self.repo).init() fmgr = FeatureBranchManager(gitflow) fmgr.finish('even') fake_commit(self.repo, 'Overwrite version', filename=version_filename, change=new_version) # verify that the tag does not yet exist # "v" comes form "versiontag" prefix in the gitflow config for the "release" fixture self.assertNotIn('v1.0', self.repo.tags) mgr = ReleaseBranchManager(gitflow) taginfo = dict( message='Tagging version 1.0', ) self.assertRaises(MergeError, mgr.finish, '1.0', tagging_info=taginfo) # verify that the tag exists, even though there was a failed merge self.assertIn('v1.0', self.repo.tags) # resolve the conflict # this is in favor of the change on develop write_file(filename=version_filename, append=False, change=new_version) gitflow.git.add(version_filename) gitflow.git.commit('-F.git/MERGE_MSG') # the release branch is still here self.assertIn('rel/1.0', [b.name for b in self.repo.branches]) # finish the release again # this should skip the tagging, since that part previously succeeded mgr.finish('1.0', tagging_info=taginfo) # now the release branch is gone self.assertNotIn('rel/1.0', [b.name for b in self.repo.branches]) # verify that the tag still exists self.assertIn('v1.0', self.repo.tags)
def files(s3, source_bucket): files = [ ['folders/some_folder/file.txt', 'contents1'], ['folders/some_folder/file2.txt', 'contents2'], ] return [ write_file(s3, source_bucket, entry[0], entry[1]) for entry in files ]
def test_finish_release_merge_conflict_tag(self): """ finish + tag with merge-conflicts on develop """ version_filename = 'VERSION' new_version = '1.1\n' gitflow = GitFlow(self.repo).init() fmgr = FeatureBranchManager(gitflow) fmgr.finish('even') fake_commit(self.repo, 'Overwrite version', filename=version_filename, change=new_version) # verify that the tag does not yet exist # "v" comes form "versiontag" prefix in the gitflow config for the "release" fixture self.assertNotIn('v1.0', self.repo.tags) mgr = ReleaseBranchManager(gitflow) taginfo = dict(message='Tagging version 1.0', ) self.assertRaises(MergeError, mgr.finish, '1.0', tagging_info=taginfo) # verify that the tag exists, even though there was a failed merge self.assertIn('v1.0', self.repo.tags) # resolve the conflict # this is in favor of the change on develop write_file(filename=version_filename, append=False, change=new_version) gitflow.git.add(version_filename) gitflow.git.commit('-F.git/MERGE_MSG') # the release branch is still here self.assertIn('rel/1.0', [b.name for b in self.repo.branches]) # finish the release again # this should skip the tagging, since that part previously succeeded mgr.finish('1.0', tagging_info=taginfo) # now the release branch is gone self.assertNotIn('rel/1.0', [b.name for b in self.repo.branches]) # verify that the tag still exists self.assertIn('v1.0', self.repo.tags)
def test_benchmark_ddpg(self): ''' Compare benchmarks between garage and baselines. :return: ''' # Load Mujoco1M tasks, you can check other benchmarks here # https://github.com/openai/baselines/blob/master/baselines/bench/benchmarks.py mujoco1m = benchmarks.get_benchmark('Mujoco1M') timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') benchmark_dir = osp.join(os.getcwd(), 'data', 'local', 'benchmarks', 'ddpg', timestamp) result_json = {} for task in mujoco1m['tasks']: env_id = task['env_id'] env = gym.make(env_id) baseline_env = AutoStopEnv( env_name=env_id, max_path_length=params['n_rollout_steps']) seeds = random.sample(range(100), task['trials']) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join(benchmark_dir, '{}_benchmark.png'.format(env_id)) baselines_csvs = [] garage_csvs = [] for trial in range(task['trials']): env.reset() baseline_env.reset() seed = seeds[trial] trial_dir = osp.join( task_dir, 'trial_{}_seed_{}'.format(trial + 1, seed)) garage_dir = osp.join(trial_dir, 'garage') baselines_dir = osp.join(trial_dir, 'baselines') with tf.Graph().as_default(): # Run garage algorithms garage_csv = run_garage(env, seed, garage_dir) # Run baselines algorithms baselines_csv = run_baselines(baseline_env, seed, baselines_dir) garage_csvs.append(garage_csv) baselines_csvs.append(baselines_csv) env.close() Rh.plot(b_csvs=baselines_csvs, g_csvs=garage_csvs, g_x='Epoch', g_y='AverageReturn', b_x='total/epochs', b_y='rollout/return', trials=task['trials'], seeds=seeds, plt_file=plt_file, env_id=env_id, x_label='Epoch', y_label='AverageReturn') result_json[env_id] = Rh.create_json( b_csvs=baselines_csvs, g_csvs=garage_csvs, seeds=seeds, trails=task['trials'], g_x='Epoch', g_y='AverageReturn', b_x='total/epochs', b_y='rollout/return', factor_g=params['n_epoch_cycles'] * params['n_rollout_steps'], factor_b=1) Rh.write_file(result_json, 'DDPG')
def test_benchmark_ppo(self): ''' Compare benchmarks between garage and baselines. :return: ''' mujoco1m = benchmarks.get_benchmark('Mujoco1M') timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') benchmark_dir = './data/local/benchmarks/ppo/%s/' % timestamp result_json = {} for task in mujoco1m['tasks']: env_id = task['env_id'] env = gym.make(env_id) baseline_env = AutoStopEnv(env_name=env_id, max_path_length=100) seeds = random.sample(range(100), task['trials']) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join(benchmark_dir, '{}_benchmark.png'.format(env_id)) baselines_csvs = [] garage_csvs = [] for trial in range(task['trials']): seed = seeds[trial] trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed) garage_dir = trial_dir + '/garage' baselines_dir = trial_dir + '/baselines' with tf.Graph().as_default(): # Run baselines algorithms baseline_env.reset() baselines_csv = run_baselines(baseline_env, seed, baselines_dir) # Run garage algorithms env.reset() garage_csv = run_garage(env, seed, garage_dir) garage_csvs.append(garage_csv) baselines_csvs.append(baselines_csv) env.close() Rh.plot(b_csvs=baselines_csvs, g_csvs=garage_csvs, g_x='Iteration', g_y='AverageReturn', b_x='nupdates', b_y='eprewmean', trials=task['trials'], seeds=seeds, plt_file=plt_file, env_id=env_id, x_label='Iteration', y_label='AverageReturn') result_json[env_id] = Rh.create_json(b_csvs=baselines_csvs, g_csvs=garage_csvs, seeds=seeds, trails=task['trials'], g_x='Iteration', g_y='AverageReturn', b_x='nupdates', b_y='eprewmean', factor_g=2048, factor_b=2048) Rh.write_file(result_json, 'PPO')
def test_benchmark_vpg(self): """Compare benchmarks between metarl and baselines. Returns: """ # pylint: disable=no-self-use mujoco1m = benchmarks.get_benchmark('Mujoco1M') timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') benchmark_dir = './data/local/benchmarks/vpg/%s/' % timestamp result_json = {} for task in mujoco1m['tasks']: env_id = task['env_id'] env = gym.make(env_id) seeds = random.sample(range(100), hyper_parameters['n_trials']) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join(benchmark_dir, '{}_benchmark.png'.format(env_id)) metarl_tf_csvs = [] metarl_pytorch_csvs = [] for trial in range(hyper_parameters['n_trials']): seed = seeds[trial] trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed) metarl_tf_dir = trial_dir + '/metarl/tf' metarl_pytorch_dir = trial_dir + '/metarl/pytorch' # pylint: disable=not-context-manager with tf.Graph().as_default(): # Run metarl algorithms env.reset() metarl_tf_csv = run_metarl_tf(env, seed, metarl_tf_dir) env.reset() metarl_pytorch_csv = run_metarl_pytorch( env, seed, metarl_pytorch_dir) metarl_tf_csvs.append(metarl_tf_csv) metarl_pytorch_csvs.append(metarl_pytorch_csv) env.close() benchmark_helper.plot_average_over_trials( [metarl_tf_csvs, metarl_pytorch_csvs], ['Evaluation/AverageReturn'] * 2, plt_file=plt_file, env_id=env_id, x_label='Iteration', y_label='Evaluation/AverageReturn', names=['metarl-TensorFlow', 'metarl-PyTorch']) result_json[env_id] = benchmark_helper.create_json( [metarl_tf_csvs, metarl_pytorch_csvs], seeds=seeds, trials=hyper_parameters['n_trials'], xs=['Iteration'] * 2, ys=['Evaluation/AverageReturn'] * 2, factors=[hyper_parameters['batch_size']] * 2, names=['metarl-tf', 'metarl-pytorch']) Rh.write_file(result_json, 'VPG')
def test_benchmark_td3(self): """ Test garage TD3 benchmarks. :return: """ # Load Mujoco1M tasks, you can check other benchmarks here # https://github.com/openai/baselines/blob/master/baselines/bench/benchmarks.py # noqa: E501 mujoco1m = benchmarks.get_benchmark('Mujoco1M') timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') benchmark_dir = osp.join(os.getcwd(), 'data', 'local', 'benchmarks', 'td3', timestamp) result_json = {} # rlkit throws error for'Reacher-V2' due to gym version mismatch mujoco1m['tasks'] = [ task for task in mujoco1m['tasks'] if task['env_id'] != 'Reacher-v2' ] for task in mujoco1m['tasks']: env_id = task['env_id'] env = gym.make(env_id) rlkit_env = AutoStopEnv(env_name=env_id, max_path_length=params['n_rollout_steps']) seeds = random.sample(range(100), task['trials']) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join(benchmark_dir, '{}_benchmark.png'.format(env_id)) garage_csvs = [] rlkit_csvs = [] for trial in range(task['trials']): env.reset() rlkit_env.reset() seed = seeds[trial] trial_dir = osp.join( task_dir, 'trial_{}_seed_{}'.format(trial + 1, seed)) garage_dir = trial_dir + '/garage' rlkit_dir = osp.join(trial_dir, 'rlkit') with tf.Graph().as_default(): # Run rlkit algorithms rlkit_csv = run_rlkit(rlkit_env, seed, rlkit_dir) # Run garage algorithms garage_csv = run_garage(env, seed, garage_dir) garage_csvs.append(garage_csv) rlkit_csvs.append(rlkit_csv) Rh.plot(b_csvs=rlkit_csvs, g_csvs=garage_csvs, g_x='Epoch', g_y='Evaluation/AverageReturn', g_z='garage', b_x='Epoch', b_y='evaluation/Average Returns', b_z='rlkit', trials=task['trials'], seeds=seeds, plt_file=plt_file, env_id=env_id, x_label='Iteration', y_label='Evaluation/AverageReturn') result_json[env_id] = Rh.create_json( b_csvs=rlkit_csvs, g_csvs=garage_csvs, seeds=seeds, trails=task['trials'], g_x='Epoch', g_y='Evaluation/AverageReturn', b_x='Epoch', b_y='evaluation/Average Returns', factor_g=1, factor_b=1) Rh.write_file(result_json, 'TD3')
def test_benchmark_ppo(self): """Compare benchmarks between garage and baselines. Returns: """ # pylint: disable=no-self-use mujoco1m = benchmarks.get_benchmark('Mujoco1M') timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') benchmark_dir = './data/local/benchmarks/ppo/%s/' % timestamp result_json = {} for task in mujoco1m['tasks']: env_id = task['env_id'] env = gym.make(env_id) baseline_env = AutoStopEnv( env_name=env_id, max_path_length=hyper_parameters['max_path_length']) seeds = random.sample(range(100), hyper_parameters['n_trials']) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join(benchmark_dir, '{}_benchmark.png'.format(env_id)) baselines_csvs = [] garage_tf_csvs = [] garage_pytorch_csvs = [] for trial in range(hyper_parameters['n_trials']): seed = seeds[trial] trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed) garage_tf_dir = trial_dir + '/garage/tf' garage_pytorch_dir = trial_dir + '/garage/pytorch' baselines_dir = trial_dir + '/baselines' # pylint: disable=not-context-manager with tf.Graph().as_default(): # Run baselines algorithms baseline_env.reset() baseline_csv = run_baselines(baseline_env, seed, baselines_dir) # Run garage algorithms env.reset() garage_tf_csv = run_garage_tf(env, seed, garage_tf_dir) env.reset() garage_pytorch_csv = run_garage_pytorch( env, seed, garage_pytorch_dir) baselines_csvs.append(baseline_csv) garage_tf_csvs.append(garage_tf_csv) garage_pytorch_csvs.append(garage_pytorch_csv) env.close() benchmark_helper.plot_average_over_trials( [baselines_csvs, garage_tf_csvs, garage_pytorch_csvs], [ 'eprewmean', 'Evaluation/AverageReturn', 'Evaluation/AverageReturn' ], plt_file=plt_file, env_id=env_id, x_label='Iteration', y_label='Evaluation/AverageReturn', names=['baseline', 'garage-TensorFlow', 'garage-PyTorch'], ) result_json[env_id] = benchmark_helper.create_json( [baselines_csvs, garage_tf_csvs, garage_pytorch_csvs], seeds=seeds, trials=hyper_parameters['n_trials'], xs=['nupdates', 'Iteration', 'Iteration'], ys=[ 'eprewmean', 'Evaluation/AverageReturn', 'Evaluation/AverageReturn' ], factors=[hyper_parameters['batch_size']] * 3, names=['baseline', 'garage-TF', 'garage-PT']) Rh.write_file(result_json, 'PPO')
def test_benchmark_categorical_gru_policy(): """Benchmark categorical gru policy.""" categorical_tasks = [ 'LunarLander-v2', 'Assault-ramDeterministic-v4', 'Breakout-ramDeterministic-v4', 'ChopperCommand-ramDeterministic-v4', 'Tutankham-ramDeterministic-v4', 'CartPole-v1', ] timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') benchmark_dir = './data/local/benchmarks/ppo_categ_gru/%s/' % timestamp result_json = {} for task in categorical_tasks: env_id = task env = gym.make(env_id) # baseline_env = AutoStopEnv(env_name=env_id, max_path_length=100) seeds = random.sample(range(100), 3) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join(benchmark_dir, '{}_benchmark.png'.format(env_id)) baselines_csvs = [] garage_csvs = [] for trial in range(3): seed = seeds[trial] trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed) garage_dir = trial_dir + '/garage' with tf.Graph().as_default(): # Run baselines algorithms # baseline_env.reset() # baselines_csv = run_baselines(baseline_env, seed, # baselines_dir) # Run garage algorithms env.reset() garage_csv = run_garage(env, seed, garage_dir) garage_csvs.append(garage_csv) env.close() Rh.plot(b_csvs=baselines_csvs, g_csvs=garage_csvs, g_x='Iteration', g_y='AverageReturn', g_z='garage', b_x='Iteration', b_y='AverageReturn', b_z='baselines', trials=3, seeds=seeds, plt_file=plt_file, env_id=env_id, x_label='Iteration', y_label='AverageReturn') result_json[env_id] = Rh.create_json(b_csvs=baselines_csvs, g_csvs=garage_csvs, seeds=seeds, trails=3, g_x='Iteration', g_y='AverageReturn', b_x='Iteration', b_y='AverageReturn', factor_g=2048, factor_b=2048) Rh.write_file(result_json, 'PPO')
def test_benchmark_pearl(self): """Run benchmarks for metarl PEARL.""" ML_train_envs = [ TaskIdWrapper(MetaRLEnv( IgnoreDoneWrapper( normalize( env(*ML10_ARGS['train'][task]['args'], **ML10_ARGS['train'][task]['kwargs'])))), task_id=task_id, task_name=task) for (task_id, (task, env)) in enumerate(ML10_ENVS['train'].items()) ] ML_test_envs = [ TaskIdWrapper(MetaRLEnv( IgnoreDoneWrapper( normalize( env(*ML10_ARGS['test'][task]['args'], **ML10_ARGS['test'][task]['kwargs'])))), task_id=task_id, task_name=task) for (task_id, (task, env)) in enumerate(ML10_ENVS['test'].items()) ] env_sampler = EnvPoolSampler(ML_train_envs) env = env_sampler.sample(params['num_train_tasks']) test_env_sampler = EnvPoolSampler(ML_test_envs) test_env = test_env_sampler.sample(params['num_test_tasks']) env_id = 'ML10' timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') benchmark_dir = osp.join(os.getcwd(), 'data', 'local', 'benchmarks', 'pearl', timestamp) result_json = {} seeds = random.sample(range(100), params['n_trials']) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join(benchmark_dir, '{}_benchmark.png'.format(env_id)) metarl_csvs = [] for trial in range(params['n_trials']): seed = seeds[trial] trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed) metarl_dir = trial_dir + '/metarl' metarl_csv = run_metarl(env, test_env, seed, metarl_dir) metarl_csvs.append(metarl_csv) env.close() benchmark_helper.plot_average_over_trials( [metarl_csvs], ys=['Test/Average/SuccessRate'], plt_file=plt_file, env_id=env_id, x_label='TotalEnvSteps', y_label='Test/Average/SuccessRate', names=['metarl_pearl'], ) factor_val = params['meta_batch_size'] * params['max_path_length'] result_json[env_id] = benchmark_helper.create_json( [metarl_csvs], seeds=seeds, trials=params['n_trials'], xs=['TotalEnvSteps'], ys=['Test/Average/SuccessRate'], factors=[factor_val], names=['metarl_pearl']) Rh.write_file(result_json, 'PEARL')
def test_benchmark_ppo(self): """Compare benchmarks between metarl and baselines. Returns: """ # pylint: disable=no-self-use mujoco1m = benchmarks.get_benchmark('Mujoco1M') timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') benchmark_dir = './data/local/benchmarks/ppo/%s/' % timestamp result_json = {} for task in mujoco1m['tasks']: env_id = task['env_id'] env = gym.make(env_id) baseline_env = AutoStopEnv( env_name=env_id, max_path_length=hyper_parameters['max_path_length']) seeds = random.sample(range(100), hyper_parameters['n_trials']) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join(benchmark_dir, '{}_benchmark.png'.format(env_id)) baselines_csvs = [] metarl_tf_csvs = [] metarl_pytorch_csvs = [] for trial in range(hyper_parameters['n_trials']): seed = seeds[trial] trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed) metarl_tf_dir = trial_dir + '/metarl/tf' metarl_pytorch_dir = trial_dir + '/metarl/pytorch' baselines_dir = trial_dir + '/baselines' # pylint: disable=not-context-manager with tf.Graph().as_default(): # Run baselines algorithms baseline_env.reset() baseline_csv = run_baselines(baseline_env, seed, baselines_dir) # Run metarl algorithms env.reset() metarl_tf_csv = run_metarl_tf(env, seed, metarl_tf_dir) # env.reset() # metarl_pytorch_csv = run_metarl_pytorch( # env, seed, metarl_pytorch_dir) baselines_csvs.append(baseline_csv) metarl_tf_csvs.append(metarl_tf_csv) # metarl_pytorch_csvs.append(metarl_pytorch_csv) env.close() # benchmark_helper.plot_average_over_trials( # [baselines_csvs, metarl_tf_csvs, metarl_pytorch_csvs], # [ # 'eprewmean', 'Evaluation/AverageReturn', # 'Evaluation/AverageReturn' # ], # plt_file=plt_file, # env_id=env_id, # x_label='Iteration', # y_label='Evaluation/AverageReturn', # names=['baseline', 'metarl-TensorFlow', 'metarl-PyTorch'], # ) # result_json[env_id] = benchmark_helper.create_json( # [baselines_csvs, metarl_tf_csvs], # seeds=seeds, # trials=hyper_parameters['n_trials'], # xs=['total_timesteps', 'TotalEnvSteps'], # ys=[ # 'eprewmean', 'Evaluation/AverageReturn' # ], # factors=[hyper_parameters['batch_size']] * 2, # names=['baseline', 'metarl-TF']) result_json[env_id] = benchmark_helper.create_json( [baselines_csvs, metarl_tf_csvs], seeds=seeds, trials=hyper_parameters['n_trials'], xs=['total_timesteps', 'TotalEnvSteps'], ys=['eprewmean', 'Evaluation/AverageReturn'], factors=[hyper_parameters['batch_size']] * 2, names=['baseline', 'metarl-TF']) # Rh.relplot(g_csvs=metarl_tf_csvs, # b_csvs=baselines_csvs, # g_x='TotalEnvSteps', # g_y='Evaluation/AverageReturn', # g_z='MetaRL', # b_x='total_timesteps', # b_y='eprewmean', # b_z='Openai/Baseline', # trials=hyper_parameters['n_trials'], # seeds=seeds, # plt_file=plt_file, # env_id=env_id, # x_label='EnvTimeStep', # y_label='Performance') benchmark_helper.plot_average_over_trials_with_x( [baselines_csvs, metarl_tf_csvs], ['eprewmean', 'Evaluation/AverageReturn'], ['total_timesteps', 'TotalEnvSteps'], plt_file=plt_file, env_id=env_id, x_label='EnvTimeStep', y_label='Performance', names=['baseline', 'metarl-TensorFlow'], ) # Rh.relplot(g_csvs=metarl_tf_csvs, # b_csvs=metarl_pytorch_csvs, # g_x='TotalEnvSteps', # g_y='Evaluation/AverageReturn', # g_z='MetaRL-TF', # b_x='TotalEnvSteps', # b_y='Evaluation/AverageReturn', # b_z='MetaRL-PT', # trials=hyper_parameters['n_trials'], # seeds=seeds, # plt_file=plt_file, # env_id=env_id, # x_label='EnvTimeStep', # y_label='Performance') Rh.write_file(result_json, 'PPO')
def test_benchmark_categorical_mlp_policy(self): ''' Compare benchmarks between garage and baselines. :return: ''' categorical_tasks = [ 'LunarLander-v2', 'CartPole-v1', 'Assault-ramDeterministic-v4', 'Breakout-ramDeterministic-v4', 'ChopperCommand-ramDeterministic-v4', 'Tutankham-ramDeterministic-v4' ] timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') benchmark_dir = './data/local/benchmarks/categorical_mlp_policy/{0}/' benchmark_dir = benchmark_dir.format(timestamp) result_json = {} for task in categorical_tasks: env_id = task env = gym.make(env_id) trials = 3 seeds = random.sample(range(100), trials) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join(benchmark_dir, '{}_benchmark.png'.format(env_id)) relplt_file = osp.join(benchmark_dir, '{}_benchmark_mean.png'.format(env_id)) garage_csvs = [] for trial in range(trials): seed = seeds[trial] trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed) garage_dir = trial_dir + '/garage' with tf.Graph().as_default(): # Run garage algorithms env.reset() garage_csv = run_garage(env, seed, garage_dir) garage_csvs.append(garage_csv) env.close() Rh.plot(b_csvs=garage_csvs, g_csvs=garage_csvs, g_x='Iteration', g_y='AverageReturn', g_z='Garage', b_x='Iteration', b_y='AverageReturn', b_z='Garage', trials=trials, seeds=seeds, plt_file=plt_file, env_id=env_id, x_label='Iteration', y_label='AverageReturn') Rh.relplot(b_csvs=garage_csvs, g_csvs=garage_csvs, g_x='Iteration', g_y='AverageReturn', g_z='Garage', b_x='Iteration', b_y='AverageReturn', b_z='Garage', trials=trials, seeds=seeds, plt_file=relplt_file, env_id=env_id, x_label='Iteration', y_label='AverageReturn') result_json[env_id] = Rh.create_json(b_csvs=garage_csvs, g_csvs=garage_csvs, seeds=seeds, trails=trials, g_x='Iteration', g_y='AverageReturn', b_x='Iteration', b_y='AverageReturn', factor_g=2048, factor_b=2048) Rh.write_file(result_json, 'PPO')
def test_benchmark_vpg(): """Compare benchmarks between garage and baselines.""" mujoco1m = benchmarks.get_benchmark('Mujoco1M') timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') benchmark_dir = './data/local/benchmarks/vpg/%s/' % timestamp result_json = {} for task in mujoco1m['tasks']: env_id = task['env_id'] env = gym.make(env_id) seeds = random.sample(range(100), task['trials']) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join(benchmark_dir, '{}_benchmark.png'.format(env_id)) garage_tf_csvs = [] garage_pytorch_csvs = [] for trial in range(task['trials']): seed = seeds[trial] trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed) garage_tf_dir = trial_dir + '/garage/tf' garage_pytorch_dir = trial_dir + '/garage/pytorch' env.reset() garage_pytorch_csv = run_garage_pytorch(env, seed, garage_pytorch_dir) with tf.Graph().as_default(): # Run garage algorithms env.reset() garage_tf_csv = run_garage_tf(env, seed, garage_tf_dir) garage_tf_csvs.append(garage_tf_csv) garage_pytorch_csvs.append(garage_pytorch_csv) env.close() plot([garage_tf_csvs, garage_pytorch_csvs], ['Iteration', 'Iteration'], ['AverageReturn', 'AverageReturn'], trials=task['trials'], seeds=seeds, plt_file=plt_file, env_id=env_id, x_label='Iteration', y_label='AverageReturn', names=['garage-tf', 'garage-pytorch'], smooth=True) result_json[env_id] = create_json( [garage_tf_csvs, garage_pytorch_csvs], seeds=seeds, trails=task['trials'], xs=['Iteration', 'Iteration'], ys=['AverageReturn', 'AverageReturn'], factors=[2048, 2047], names=['garage-tf', 'garage-pytorch']) Rh.write_file(result_json, 'VPG')
def test_benchmark_trpo(self): """ Compare benchmarks between garage and baselines. :return: """ mujoco1m = benchmarks.get_benchmark("Mujoco1M") timestamp = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S-%f") benchmark_dir = "./data/local/benchmarks/trpo/%s/" % timestamp result_json = {} for task in mujoco1m["tasks"]: env_id = task["env_id"] env = gym.make(env_id) baseline_env = AutoStopEnv(env_name=env_id, max_path_length=100) seeds = random.sample(range(100), task["trials"]) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join(benchmark_dir, "{}_benchmark.png".format(env_id)) baselines_csvs = [] garage_csvs = [] for trial in range(task["trials"]): _PLACEHOLDER_CACHE.clear() seed = seeds[trial] trial_dir = task_dir + "/trial_%d_seed_%d" % (trial + 1, seed) garage_dir = trial_dir + "/garage" baselines_dir = trial_dir + "/baselines" with tf.Graph().as_default(): # Run garage algorithms env.reset() garage_csv = run_garage(env, seed, garage_dir) # Run baseline algorithms baseline_env.reset() baselines_csv = run_baselines(baseline_env, seed, baselines_dir) garage_csvs.append(garage_csv) baselines_csvs.append(baselines_csv) Rh.plot( b_csvs=baselines_csvs, g_csvs=garage_csvs, g_x="Iteration", g_y="AverageReturn", b_x="EpThisIter", b_y="EpRewMean", trials=task["trials"], seeds=seeds, plt_file=plt_file, env_id=env_id, x_label="Iteration", y_label="AverageReturn") result_json[env_id] = Rh.create_json( b_csvs=baselines_csvs, g_csvs=garage_csvs, seeds=seeds, trails=task["trials"], g_x="Iteration", g_y="AverageReturn", b_x="TimestepsSoFar", b_y="EpRewMean", factor_g=1024, factor_b=1) env.close() Rh.write_file(result_json, "TRPO")