def test_benchmark_categorical_cnn_policy(self): timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') benchmark_dir = './data/local/benchmarks/ppo/%s/' % timestamp for env_id in ['CubeCrash-v0', 'MemorizeDigits-v0']: env = gym.make(env_id) seeds = random.sample(range(100), num_of_trials) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join(benchmark_dir, '{}_benchmark_ppo.png'.format(env_id)) relplt_file = osp.join(benchmark_dir, '{}_benchmark_ppo_mean.png'.format(env_id)) metarl_csvs = [] metarl_model_csvs = [] for trial in range(num_of_trials): seed = seeds[trial] trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed) metarl_dir = trial_dir + '/metarl' with tf.Graph().as_default(): # Run metarl algorithms env.reset() metarl_csv = run_metarl(env, seed, metarl_dir) metarl_csvs.append(metarl_csv) env.close() Rh.relplot(g_csvs=metarl_csvs, b_csvs=metarl_model_csvs, g_x='Iteration', g_y='Evaluation/AverageReturn', g_z='MetaRL', b_x='Iteration', b_y='Evaluation/AverageReturn', b_z='MetaRLWithModel', trials=num_of_trials, seeds=seeds, plt_file=relplt_file, env_id=env_id, x_label='Iteration', y_label='Evaluation/AverageReturn') Rh.plot(g_csvs=metarl_csvs, b_csvs=metarl_model_csvs, g_x='Iteration', g_y='Evaluation/AverageReturn', g_z='MetaRL', b_x='Iteration', b_y='Evaluation/AverageReturn', b_z='MetaRLWithModel', trials=num_of_trials, seeds=seeds, plt_file=plt_file, env_id=env_id, x_label='Iteration', y_label='Evaluation/AverageReturn')
def test_benchmark_ddpg(self): ''' Compare benchmarks between garage and baselines. :return: ''' # Load Mujoco1M tasks, you can check other benchmarks here # https://github.com/openai/baselines/blob/master/baselines/bench/benchmarks.py mujoco1m = benchmarks.get_benchmark('Mujoco1M') timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') benchmark_dir = osp.join(os.getcwd(), 'data', 'local', 'benchmarks', 'ddpg', timestamp) result_json = {} for task in mujoco1m['tasks']: env_id = task['env_id'] env = gym.make(env_id) baseline_env = AutoStopEnv( env_name=env_id, max_path_length=params['n_rollout_steps']) seeds = random.sample(range(100), task['trials']) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join(benchmark_dir, '{}_benchmark.png'.format(env_id)) baselines_csvs = [] garage_csvs = [] for trial in range(task['trials']): env.reset() baseline_env.reset() seed = seeds[trial] trial_dir = osp.join( task_dir, 'trial_{}_seed_{}'.format(trial + 1, seed)) garage_dir = osp.join(trial_dir, 'garage') baselines_dir = osp.join(trial_dir, 'baselines') with tf.Graph().as_default(): # Run garage algorithms garage_csv = run_garage(env, seed, garage_dir) # Run baselines algorithms baselines_csv = run_baselines(baseline_env, seed, baselines_dir) garage_csvs.append(garage_csv) baselines_csvs.append(baselines_csv) env.close() Rh.plot(b_csvs=baselines_csvs, g_csvs=garage_csvs, g_x='Epoch', g_y='AverageReturn', b_x='total/epochs', b_y='rollout/return', trials=task['trials'], seeds=seeds, plt_file=plt_file, env_id=env_id, x_label='Epoch', y_label='AverageReturn') result_json[env_id] = Rh.create_json( b_csvs=baselines_csvs, g_csvs=garage_csvs, seeds=seeds, trails=task['trials'], g_x='Epoch', g_y='AverageReturn', b_x='total/epochs', b_y='rollout/return', factor_g=params['n_epoch_cycles'] * params['n_rollout_steps'], factor_b=1) Rh.write_file(result_json, 'DDPG')
def test_benchmark_ppo(self): ''' Compare benchmarks between garage and baselines. :return: ''' mujoco1m = benchmarks.get_benchmark('Mujoco1M') timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') benchmark_dir = './data/local/benchmarks/ppo/%s/' % timestamp result_json = {} for task in mujoco1m['tasks']: env_id = task['env_id'] env = gym.make(env_id) baseline_env = AutoStopEnv(env_name=env_id, max_path_length=100) seeds = random.sample(range(100), task['trials']) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join(benchmark_dir, '{}_benchmark.png'.format(env_id)) baselines_csvs = [] garage_csvs = [] for trial in range(task['trials']): seed = seeds[trial] trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed) garage_dir = trial_dir + '/garage' baselines_dir = trial_dir + '/baselines' with tf.Graph().as_default(): # Run baselines algorithms baseline_env.reset() baselines_csv = run_baselines(baseline_env, seed, baselines_dir) # Run garage algorithms env.reset() garage_csv = run_garage(env, seed, garage_dir) garage_csvs.append(garage_csv) baselines_csvs.append(baselines_csv) env.close() Rh.plot(b_csvs=baselines_csvs, g_csvs=garage_csvs, g_x='Iteration', g_y='AverageReturn', b_x='nupdates', b_y='eprewmean', trials=task['trials'], seeds=seeds, plt_file=plt_file, env_id=env_id, x_label='Iteration', y_label='AverageReturn') result_json[env_id] = Rh.create_json(b_csvs=baselines_csvs, g_csvs=garage_csvs, seeds=seeds, trails=task['trials'], g_x='Iteration', g_y='AverageReturn', b_x='nupdates', b_y='eprewmean', factor_g=2048, factor_b=2048) Rh.write_file(result_json, 'PPO')
def test_benchmark_td3(self): """ Test garage TD3 benchmarks. :return: """ # Load Mujoco1M tasks, you can check other benchmarks here # https://github.com/openai/baselines/blob/master/baselines/bench/benchmarks.py # noqa: E501 mujoco1m = benchmarks.get_benchmark('Mujoco1M') timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') benchmark_dir = osp.join(os.getcwd(), 'data', 'local', 'benchmarks', 'td3', timestamp) result_json = {} # rlkit throws error for'Reacher-V2' due to gym version mismatch mujoco1m['tasks'] = [ task for task in mujoco1m['tasks'] if task['env_id'] != 'Reacher-v2' ] for task in mujoco1m['tasks']: env_id = task['env_id'] env = gym.make(env_id) rlkit_env = AutoStopEnv(env_name=env_id, max_path_length=params['n_rollout_steps']) seeds = random.sample(range(100), task['trials']) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join(benchmark_dir, '{}_benchmark.png'.format(env_id)) garage_csvs = [] rlkit_csvs = [] for trial in range(task['trials']): env.reset() rlkit_env.reset() seed = seeds[trial] trial_dir = osp.join( task_dir, 'trial_{}_seed_{}'.format(trial + 1, seed)) garage_dir = trial_dir + '/garage' rlkit_dir = osp.join(trial_dir, 'rlkit') with tf.Graph().as_default(): # Run rlkit algorithms rlkit_csv = run_rlkit(rlkit_env, seed, rlkit_dir) # Run garage algorithms garage_csv = run_garage(env, seed, garage_dir) garage_csvs.append(garage_csv) rlkit_csvs.append(rlkit_csv) Rh.plot(b_csvs=rlkit_csvs, g_csvs=garage_csvs, g_x='Epoch', g_y='Evaluation/AverageReturn', g_z='garage', b_x='Epoch', b_y='evaluation/Average Returns', b_z='rlkit', trials=task['trials'], seeds=seeds, plt_file=plt_file, env_id=env_id, x_label='Iteration', y_label='Evaluation/AverageReturn') result_json[env_id] = Rh.create_json( b_csvs=rlkit_csvs, g_csvs=garage_csvs, seeds=seeds, trails=task['trials'], g_x='Epoch', g_y='Evaluation/AverageReturn', b_x='Epoch', b_y='evaluation/Average Returns', factor_g=1, factor_b=1) Rh.write_file(result_json, 'TD3')
def test_benchmark_categorical_gru_policy(): """Benchmark categorical gru policy.""" categorical_tasks = [ 'LunarLander-v2', 'Assault-ramDeterministic-v4', 'Breakout-ramDeterministic-v4', 'ChopperCommand-ramDeterministic-v4', 'Tutankham-ramDeterministic-v4', 'CartPole-v1', ] timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') benchmark_dir = './data/local/benchmarks/ppo_categ_gru/%s/' % timestamp result_json = {} for task in categorical_tasks: env_id = task env = gym.make(env_id) # baseline_env = AutoStopEnv(env_name=env_id, max_path_length=100) seeds = random.sample(range(100), 3) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join(benchmark_dir, '{}_benchmark.png'.format(env_id)) baselines_csvs = [] garage_csvs = [] for trial in range(3): seed = seeds[trial] trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed) garage_dir = trial_dir + '/garage' with tf.Graph().as_default(): # Run baselines algorithms # baseline_env.reset() # baselines_csv = run_baselines(baseline_env, seed, # baselines_dir) # Run garage algorithms env.reset() garage_csv = run_garage(env, seed, garage_dir) garage_csvs.append(garage_csv) env.close() Rh.plot(b_csvs=baselines_csvs, g_csvs=garage_csvs, g_x='Iteration', g_y='AverageReturn', g_z='garage', b_x='Iteration', b_y='AverageReturn', b_z='baselines', trials=3, seeds=seeds, plt_file=plt_file, env_id=env_id, x_label='Iteration', y_label='AverageReturn') result_json[env_id] = Rh.create_json(b_csvs=baselines_csvs, g_csvs=garage_csvs, seeds=seeds, trails=3, g_x='Iteration', g_y='AverageReturn', b_x='Iteration', b_y='AverageReturn', factor_g=2048, factor_b=2048) Rh.write_file(result_json, 'PPO')
def test_benchmark_categorical_mlp_policy(self): ''' Compare benchmarks between garage and baselines. :return: ''' categorical_tasks = [ 'LunarLander-v2', 'CartPole-v1', 'Assault-ramDeterministic-v4', 'Breakout-ramDeterministic-v4', 'ChopperCommand-ramDeterministic-v4', 'Tutankham-ramDeterministic-v4' ] timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') benchmark_dir = './data/local/benchmarks/categorical_mlp_policy/{0}/' benchmark_dir = benchmark_dir.format(timestamp) result_json = {} for task in categorical_tasks: env_id = task env = gym.make(env_id) trials = 3 seeds = random.sample(range(100), trials) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join(benchmark_dir, '{}_benchmark.png'.format(env_id)) relplt_file = osp.join(benchmark_dir, '{}_benchmark_mean.png'.format(env_id)) garage_csvs = [] for trial in range(trials): seed = seeds[trial] trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed) garage_dir = trial_dir + '/garage' with tf.Graph().as_default(): # Run garage algorithms env.reset() garage_csv = run_garage(env, seed, garage_dir) garage_csvs.append(garage_csv) env.close() Rh.plot(b_csvs=garage_csvs, g_csvs=garage_csvs, g_x='Iteration', g_y='AverageReturn', g_z='Garage', b_x='Iteration', b_y='AverageReturn', b_z='Garage', trials=trials, seeds=seeds, plt_file=plt_file, env_id=env_id, x_label='Iteration', y_label='AverageReturn') Rh.relplot(b_csvs=garage_csvs, g_csvs=garage_csvs, g_x='Iteration', g_y='AverageReturn', g_z='Garage', b_x='Iteration', b_y='AverageReturn', b_z='Garage', trials=trials, seeds=seeds, plt_file=relplt_file, env_id=env_id, x_label='Iteration', y_label='AverageReturn') result_json[env_id] = Rh.create_json(b_csvs=garage_csvs, g_csvs=garage_csvs, seeds=seeds, trails=trials, g_x='Iteration', g_y='AverageReturn', b_x='Iteration', b_y='AverageReturn', factor_g=2048, factor_b=2048) Rh.write_file(result_json, 'PPO')
def test_benchmark_trpo(self): """ Compare benchmarks between garage and baselines. :return: """ mujoco1m = benchmarks.get_benchmark("Mujoco1M") timestamp = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S-%f") benchmark_dir = "./data/local/benchmarks/trpo/%s/" % timestamp result_json = {} for task in mujoco1m["tasks"]: env_id = task["env_id"] env = gym.make(env_id) baseline_env = AutoStopEnv(env_name=env_id, max_path_length=100) seeds = random.sample(range(100), task["trials"]) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join(benchmark_dir, "{}_benchmark.png".format(env_id)) baselines_csvs = [] garage_csvs = [] for trial in range(task["trials"]): _PLACEHOLDER_CACHE.clear() seed = seeds[trial] trial_dir = task_dir + "/trial_%d_seed_%d" % (trial + 1, seed) garage_dir = trial_dir + "/garage" baselines_dir = trial_dir + "/baselines" with tf.Graph().as_default(): # Run garage algorithms env.reset() garage_csv = run_garage(env, seed, garage_dir) # Run baseline algorithms baseline_env.reset() baselines_csv = run_baselines(baseline_env, seed, baselines_dir) garage_csvs.append(garage_csv) baselines_csvs.append(baselines_csv) Rh.plot( b_csvs=baselines_csvs, g_csvs=garage_csvs, g_x="Iteration", g_y="AverageReturn", b_x="EpThisIter", b_y="EpRewMean", trials=task["trials"], seeds=seeds, plt_file=plt_file, env_id=env_id, x_label="Iteration", y_label="AverageReturn") result_json[env_id] = Rh.create_json( b_csvs=baselines_csvs, g_csvs=garage_csvs, seeds=seeds, trails=task["trials"], g_x="Iteration", g_y="AverageReturn", b_x="TimestepsSoFar", b_y="EpRewMean", factor_g=1024, factor_b=1) env.close() Rh.write_file(result_json, "TRPO")