def benchmark_trpo(self): # pylint: disable=no-self-use """Compare benchmarks between garage and baselines.""" mujoco1m = benchmarks.get_benchmark('Mujoco1M') timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') benchmark_dir = './data/local/benchmarks/trpo/%s/' % timestamp result_json = {} for task in mujoco1m['tasks']: env_id = task['env_id'] env = gym.make(env_id) seeds = random.sample(range(100), hyper_parameters['n_trials']) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join(benchmark_dir, '{}_benchmark.png'.format(env_id)) garage_tf_csvs = [] garage_pytorch_csvs = [] for trial in range(hyper_parameters['n_trials']): _PLACEHOLDER_CACHE.clear() seed = seeds[trial] trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed) garage_tf_dir = trial_dir + '/garage_tf' garage_pytorch_dir = trial_dir + '/garage_pytorch' # Run garage algorithms env.reset() garage_pytorch_csv = run_garage_pytorch( env, seed, garage_pytorch_dir) # pylint: disable=not-context-manager with tf.Graph().as_default(): env.reset() garage_tf_csv = run_garage_tf(env, seed, garage_tf_dir) garage_tf_csvs.append(garage_tf_csv) garage_pytorch_csvs.append(garage_pytorch_csv) env.close() benchmark_helper.plot_average_over_trials( [garage_tf_csvs, garage_pytorch_csvs], ['Evaluation/AverageReturn'] * 2, plt_file=plt_file, env_id=env_id, x_label='Iteration', y_label='AverageReturn', names=['garage-TensorFlow', 'garage-PyTorch'], ) result_json[env_id] = benchmark_helper.create_json( [garage_tf_csvs, garage_pytorch_csvs], seeds=seeds, trials=hyper_parameters['n_trials'], xs=['Evaluation/Iteration'] * 2, ys=['Evaluation/AverageReturn'] * 2, factors=[hyper_parameters['batch_size']] * 2, names=['garage-TF', 'garage-PT']) Rh.write_file(result_json, 'TRPO')
def test_benchmark_pearl(self): ''' Compare benchmarks between metarl and baselines. :return: ''' env_sampler = SetTaskSampler( lambda: MetaRLEnv(normalize(ML1.get_train_tasks('reach-v1')))) env = env_sampler.sample(params['num_train_tasks']) test_env_sampler = SetTaskSampler( lambda: MetaRLEnv(normalize(ML1.get_test_tasks('reach-v1')))) test_env = test_env_sampler.sample(params['num_train_tasks']) env_id = 'reach-v1' timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') benchmark_dir = osp.join(os.getcwd(), 'data', 'local', 'benchmarks', 'pearl', timestamp) result_json = {} seeds = random.sample(range(100), params['n_trials']) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join(benchmark_dir, '{}_benchmark.png'.format(env_id)) metarl_csvs = [] for trial in range(params['n_trials']): seed = seeds[trial] trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed) metarl_dir = trial_dir + '/metarl' metarl_csv = run_metarl(env, test_env, seed, metarl_dir) metarl_csvs.append(metarl_csv) env.close() benchmark_helper.plot_average_over_trials( [metarl_csvs], ys=['Test/Average/SuccessRate'], plt_file=plt_file, env_id=env_id, x_label='TotalEnvSteps', y_label='Test/Average/SuccessRate', names=['metarl_pearl'], ) factor_val = params['meta_batch_size'] * params['max_path_length'] result_json[env_id] = benchmark_helper.create_json( [metarl_csvs], seeds=seeds, trials=params['n_trials'], xs=['TotalEnvSteps'], ys=['Test/Average/SuccessRate'], factors=[factor_val], names=['metarl_pearl']) Rh.write_file(result_json, 'PEARL')
def test_benchmark_vpg(self): """Compare benchmarks between metarl and baselines. Returns: """ # pylint: disable=no-self-use mujoco1m = benchmarks.get_benchmark('Mujoco1M') timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') benchmark_dir = './data/local/benchmarks/vpg/%s/' % timestamp result_json = {} for task in mujoco1m['tasks']: env_id = task['env_id'] env = gym.make(env_id) seeds = random.sample(range(100), hyper_parameters['n_trials']) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join(benchmark_dir, '{}_benchmark.png'.format(env_id)) metarl_tf_csvs = [] metarl_pytorch_csvs = [] for trial in range(hyper_parameters['n_trials']): seed = seeds[trial] trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed) metarl_tf_dir = trial_dir + '/metarl/tf' metarl_pytorch_dir = trial_dir + '/metarl/pytorch' # pylint: disable=not-context-manager with tf.Graph().as_default(): # Run metarl algorithms env.reset() metarl_tf_csv = run_metarl_tf(env, seed, metarl_tf_dir) env.reset() metarl_pytorch_csv = run_metarl_pytorch( env, seed, metarl_pytorch_dir) metarl_tf_csvs.append(metarl_tf_csv) metarl_pytorch_csvs.append(metarl_pytorch_csv) env.close() benchmark_helper.plot_average_over_trials( [metarl_tf_csvs, metarl_pytorch_csvs], ['Evaluation/AverageReturn'] * 2, plt_file=plt_file, env_id=env_id, x_label='Iteration', y_label='Evaluation/AverageReturn', names=['metarl-TensorFlow', 'metarl-PyTorch']) result_json[env_id] = benchmark_helper.create_json( [metarl_tf_csvs, metarl_pytorch_csvs], seeds=seeds, trials=hyper_parameters['n_trials'], xs=['Iteration'] * 2, ys=['Evaluation/AverageReturn'] * 2, factors=[hyper_parameters['batch_size']] * 2, names=['metarl-tf', 'metarl-pytorch']) Rh.write_file(result_json, 'VPG')
def test_benchmark_ppo(self): """Compare benchmarks between garage and baselines. Returns: """ # pylint: disable=no-self-use mujoco1m = benchmarks.get_benchmark('Mujoco1M') timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') benchmark_dir = './data/local/benchmarks/ppo/%s/' % timestamp result_json = {} for task in mujoco1m['tasks']: env_id = task['env_id'] env = gym.make(env_id) baseline_env = AutoStopEnv( env_name=env_id, max_path_length=hyper_parameters['max_path_length']) seeds = random.sample(range(100), hyper_parameters['n_trials']) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join(benchmark_dir, '{}_benchmark.png'.format(env_id)) baselines_csvs = [] garage_tf_csvs = [] garage_pytorch_csvs = [] for trial in range(hyper_parameters['n_trials']): seed = seeds[trial] trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed) garage_tf_dir = trial_dir + '/garage/tf' garage_pytorch_dir = trial_dir + '/garage/pytorch' baselines_dir = trial_dir + '/baselines' # pylint: disable=not-context-manager with tf.Graph().as_default(): # Run baselines algorithms baseline_env.reset() baseline_csv = run_baselines(baseline_env, seed, baselines_dir) # Run garage algorithms env.reset() garage_tf_csv = run_garage_tf(env, seed, garage_tf_dir) env.reset() garage_pytorch_csv = run_garage_pytorch( env, seed, garage_pytorch_dir) baselines_csvs.append(baseline_csv) garage_tf_csvs.append(garage_tf_csv) garage_pytorch_csvs.append(garage_pytorch_csv) env.close() benchmark_helper.plot_average_over_trials( [baselines_csvs, garage_tf_csvs, garage_pytorch_csvs], [ 'eprewmean', 'Evaluation/AverageReturn', 'Evaluation/AverageReturn' ], plt_file=plt_file, env_id=env_id, x_label='Iteration', y_label='Evaluation/AverageReturn', names=['baseline', 'garage-TensorFlow', 'garage-PyTorch'], ) result_json[env_id] = benchmark_helper.create_json( [baselines_csvs, garage_tf_csvs, garage_pytorch_csvs], seeds=seeds, trials=hyper_parameters['n_trials'], xs=['nupdates', 'Iteration', 'Iteration'], ys=[ 'eprewmean', 'Evaluation/AverageReturn', 'Evaluation/AverageReturn' ], factors=[hyper_parameters['batch_size']] * 3, names=['baseline', 'garage-TF', 'garage-PT']) Rh.write_file(result_json, 'PPO')
def test_benchmark_pearl(self): """Run benchmarks for metarl PEARL.""" ML_train_envs = [ TaskIdWrapper(MetaRLEnv( IgnoreDoneWrapper( normalize( env(*ML10_ARGS['train'][task]['args'], **ML10_ARGS['train'][task]['kwargs'])))), task_id=task_id, task_name=task) for (task_id, (task, env)) in enumerate(ML10_ENVS['train'].items()) ] ML_test_envs = [ TaskIdWrapper(MetaRLEnv( IgnoreDoneWrapper( normalize( env(*ML10_ARGS['test'][task]['args'], **ML10_ARGS['test'][task]['kwargs'])))), task_id=task_id, task_name=task) for (task_id, (task, env)) in enumerate(ML10_ENVS['test'].items()) ] env_sampler = EnvPoolSampler(ML_train_envs) env = env_sampler.sample(params['num_train_tasks']) test_env_sampler = EnvPoolSampler(ML_test_envs) test_env = test_env_sampler.sample(params['num_test_tasks']) env_id = 'ML10' timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') benchmark_dir = osp.join(os.getcwd(), 'data', 'local', 'benchmarks', 'pearl', timestamp) result_json = {} seeds = random.sample(range(100), params['n_trials']) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join(benchmark_dir, '{}_benchmark.png'.format(env_id)) metarl_csvs = [] for trial in range(params['n_trials']): seed = seeds[trial] trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed) metarl_dir = trial_dir + '/metarl' metarl_csv = run_metarl(env, test_env, seed, metarl_dir) metarl_csvs.append(metarl_csv) env.close() benchmark_helper.plot_average_over_trials( [metarl_csvs], ys=['Test/Average/SuccessRate'], plt_file=plt_file, env_id=env_id, x_label='TotalEnvSteps', y_label='Test/Average/SuccessRate', names=['metarl_pearl'], ) factor_val = params['meta_batch_size'] * params['max_path_length'] result_json[env_id] = benchmark_helper.create_json( [metarl_csvs], seeds=seeds, trials=params['n_trials'], xs=['TotalEnvSteps'], ys=['Test/Average/SuccessRate'], factors=[factor_val], names=['metarl_pearl']) Rh.write_file(result_json, 'PEARL')
def test_benchmark_ppo(self): """Compare benchmarks between metarl and baselines. Returns: """ # pylint: disable=no-self-use mujoco1m = benchmarks.get_benchmark('Mujoco1M') timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') benchmark_dir = './data/local/benchmarks/ppo/%s/' % timestamp result_json = {} for task in mujoco1m['tasks']: env_id = task['env_id'] env = gym.make(env_id) baseline_env = AutoStopEnv( env_name=env_id, max_path_length=hyper_parameters['max_path_length']) seeds = random.sample(range(100), hyper_parameters['n_trials']) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join(benchmark_dir, '{}_benchmark.png'.format(env_id)) baselines_csvs = [] metarl_tf_csvs = [] metarl_pytorch_csvs = [] for trial in range(hyper_parameters['n_trials']): seed = seeds[trial] trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed) metarl_tf_dir = trial_dir + '/metarl/tf' metarl_pytorch_dir = trial_dir + '/metarl/pytorch' baselines_dir = trial_dir + '/baselines' # pylint: disable=not-context-manager with tf.Graph().as_default(): # Run baselines algorithms baseline_env.reset() baseline_csv = run_baselines(baseline_env, seed, baselines_dir) # Run metarl algorithms env.reset() metarl_tf_csv = run_metarl_tf(env, seed, metarl_tf_dir) # env.reset() # metarl_pytorch_csv = run_metarl_pytorch( # env, seed, metarl_pytorch_dir) baselines_csvs.append(baseline_csv) metarl_tf_csvs.append(metarl_tf_csv) # metarl_pytorch_csvs.append(metarl_pytorch_csv) env.close() # benchmark_helper.plot_average_over_trials( # [baselines_csvs, metarl_tf_csvs, metarl_pytorch_csvs], # [ # 'eprewmean', 'Evaluation/AverageReturn', # 'Evaluation/AverageReturn' # ], # plt_file=plt_file, # env_id=env_id, # x_label='Iteration', # y_label='Evaluation/AverageReturn', # names=['baseline', 'metarl-TensorFlow', 'metarl-PyTorch'], # ) # result_json[env_id] = benchmark_helper.create_json( # [baselines_csvs, metarl_tf_csvs], # seeds=seeds, # trials=hyper_parameters['n_trials'], # xs=['total_timesteps', 'TotalEnvSteps'], # ys=[ # 'eprewmean', 'Evaluation/AverageReturn' # ], # factors=[hyper_parameters['batch_size']] * 2, # names=['baseline', 'metarl-TF']) result_json[env_id] = benchmark_helper.create_json( [baselines_csvs, metarl_tf_csvs], seeds=seeds, trials=hyper_parameters['n_trials'], xs=['total_timesteps', 'TotalEnvSteps'], ys=['eprewmean', 'Evaluation/AverageReturn'], factors=[hyper_parameters['batch_size']] * 2, names=['baseline', 'metarl-TF']) # Rh.relplot(g_csvs=metarl_tf_csvs, # b_csvs=baselines_csvs, # g_x='TotalEnvSteps', # g_y='Evaluation/AverageReturn', # g_z='MetaRL', # b_x='total_timesteps', # b_y='eprewmean', # b_z='Openai/Baseline', # trials=hyper_parameters['n_trials'], # seeds=seeds, # plt_file=plt_file, # env_id=env_id, # x_label='EnvTimeStep', # y_label='Performance') benchmark_helper.plot_average_over_trials_with_x( [baselines_csvs, metarl_tf_csvs], ['eprewmean', 'Evaluation/AverageReturn'], ['total_timesteps', 'TotalEnvSteps'], plt_file=plt_file, env_id=env_id, x_label='EnvTimeStep', y_label='Performance', names=['baseline', 'metarl-TensorFlow'], ) # Rh.relplot(g_csvs=metarl_tf_csvs, # b_csvs=metarl_pytorch_csvs, # g_x='TotalEnvSteps', # g_y='Evaluation/AverageReturn', # g_z='MetaRL-TF', # b_x='TotalEnvSteps', # b_y='Evaluation/AverageReturn', # b_z='MetaRL-PT', # trials=hyper_parameters['n_trials'], # seeds=seeds, # plt_file=plt_file, # env_id=env_id, # x_label='EnvTimeStep', # y_label='Performance') Rh.write_file(result_json, 'PPO')