def test_benchmark_categorical_cnn_policy(self):
        timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
        benchmark_dir = './data/local/benchmarks/ppo/%s/' % timestamp
        for env_id in ['CubeCrash-v0', 'MemorizeDigits-v0']:
            env = gym.make(env_id)

            seeds = random.sample(range(100), num_of_trials)

            task_dir = osp.join(benchmark_dir, env_id)
            plt_file = osp.join(benchmark_dir,
                                '{}_benchmark_ppo.png'.format(env_id))
            relplt_file = osp.join(benchmark_dir,
                                   '{}_benchmark_ppo_mean.png'.format(env_id))
            metarl_csvs = []
            metarl_model_csvs = []

            for trial in range(num_of_trials):
                seed = seeds[trial]

                trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed)
                metarl_dir = trial_dir + '/metarl'

                with tf.Graph().as_default():
                    # Run metarl algorithms
                    env.reset()
                    metarl_csv = run_metarl(env, seed, metarl_dir)

                metarl_csvs.append(metarl_csv)

            env.close()

            Rh.relplot(g_csvs=metarl_csvs,
                       b_csvs=metarl_model_csvs,
                       g_x='Iteration',
                       g_y='Evaluation/AverageReturn',
                       g_z='MetaRL',
                       b_x='Iteration',
                       b_y='Evaluation/AverageReturn',
                       b_z='MetaRLWithModel',
                       trials=num_of_trials,
                       seeds=seeds,
                       plt_file=relplt_file,
                       env_id=env_id,
                       x_label='Iteration',
                       y_label='Evaluation/AverageReturn')

            Rh.plot(g_csvs=metarl_csvs,
                    b_csvs=metarl_model_csvs,
                    g_x='Iteration',
                    g_y='Evaluation/AverageReturn',
                    g_z='MetaRL',
                    b_x='Iteration',
                    b_y='Evaluation/AverageReturn',
                    b_z='MetaRLWithModel',
                    trials=num_of_trials,
                    seeds=seeds,
                    plt_file=plt_file,
                    env_id=env_id,
                    x_label='Iteration',
                    y_label='Evaluation/AverageReturn')
Ejemplo n.º 2
0
    def benchmark_continuous_mlp_q_function(self):
        # pylint: disable=no-self-use
        """Test Continuous MLP QFunction Benchmarking."""
        mujoco1m = benchmarks.get_benchmark('Mujoco1M')

        timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
        benchmark_dir = osp.join(os.getcwd(), 'data', 'local', 'benchmarks',
                                 'continuous_mlp_q_function', timestamp)
        for task in mujoco1m['tasks']:
            env_id = task['env_id']
            env = gym.make(env_id)

            seeds = random.sample(range(100), num_of_trials)

            task_dir = osp.join(benchmark_dir, env_id)
            plt_file = osp.join(
                benchmark_dir,
                '{}_benchmark_continuous_mlp_q_function.png'.format(env_id))
            garage_csvs = []

            for trial in range(num_of_trials):
                seed = seeds[trial]

                trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed)
                garage_dir = trial_dir + '/garage'

                with tf.Graph().as_default():
                    env.reset()
                    garage_csv = run_garage(env, seed, garage_dir)
                garage_csvs.append(garage_csv)

            env.close()

            Rh.relplot(g_csvs=garage_csvs,
                       b_csvs=[],
                       g_x='Epoch',
                       g_y='Evaluation/AverageReturn',
                       g_z='Garage',
                       b_x=None,
                       b_y=None,
                       b_z=None,
                       trials=num_of_trials,
                       seeds=seeds,
                       plt_file=plt_file,
                       env_id=env_id,
                       x_label='Iteration',
                       y_label='Evaluation/AverageReturn')
Ejemplo n.º 3
0
    def test_benchmark_continuous_mlp_policy(self):
        mujoco1m = benchmarks.get_benchmark('Mujoco1M')

        timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
        benchmark_dir = osp.join(os.getcwd(), 'data', 'local', 'benchmarks',
                                 'continuous_mlp_policy', timestamp)
        for task in mujoco1m['tasks']:
            env_id = task['env_id']
            env = gym.make(env_id)

            seeds = random.sample(range(100), num_of_trials)

            task_dir = osp.join(benchmark_dir, env_id)
            plt_file = osp.join(
                benchmark_dir,
                '{}_benchmark_continuous_mlp_policy.png'.format(env_id))
            metarl_csvs = []

            for trial in range(num_of_trials):
                seed = seeds[trial]

                trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed)
                metarl_dir = trial_dir + '/metarl'

                with tf.Graph().as_default():
                    env.reset()
                    metarl_csv = run_metarl(env, seed, metarl_dir)
                metarl_csvs.append(metarl_csv)

            env.close()

            Rh.relplot(g_csvs=metarl_csvs,
                       b_csvs=[],
                       g_x='Epoch',
                       g_y='Evaluation/AverageReturn',
                       g_z='MetaRL',
                       b_x=None,
                       b_y=None,
                       b_z=None,
                       trials=num_of_trials,
                       seeds=seeds,
                       plt_file=plt_file,
                       env_id=env_id,
                       x_label='Iteration',
                       y_label='Evaluation/AverageReturn')
Ejemplo n.º 4
0
    def test_benchmark_gaussian_cnn_baseline(self):
        timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
        benchmark_dir = './data/local/benchmarks/ppo/%s/' % timestamp
        for env_id in ['CubeCrash-v0', 'MemorizeDigits-v0']:
            env = gym.make(env_id)
            seeds = random.sample(range(100), num_of_trials)

            task_dir = osp.join(benchmark_dir, env_id)
            plt_file = osp.join(
                benchmark_dir,
                '{}_benchmark_gaussian_cnn_baseline.png'.format(env_id))
            garage_csvs = []

            for trial in range(num_of_trials):
                seed = seeds[trial]

                trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed)
                garage_dir = trial_dir + '/garage'

                with tf.Graph().as_default():
                    # Run garage algorithms
                    env.reset()
                    garage_csv = run_garage(env, seed, garage_dir)
                garage_csvs.append(garage_csv)

            env.close()

            Rh.relplot(g_csvs=garage_csvs,
                       b_csvs=[],
                       g_x='Iteration',
                       g_y='Evaluation/AverageReturn',
                       g_z='Garage',
                       b_x='Iteration',
                       b_y='Evaluation/AverageReturn',
                       b_z='GarageWithModel',
                       trials=num_of_trials,
                       seeds=seeds,
                       plt_file=plt_file,
                       env_id=env_id,
                       x_label='Iteration',
                       y_label='Evaluation/AverageReturn')
Ejemplo n.º 5
0
    def test_benchmark_rl2(self):  # pylint: disable=no-self-use
        """Compare benchmarks between metarl and baselines."""
        # test set has a higher max_obs_dim
        env_obs_dim = [env().observation_space.shape[0] for (_, env) in ML45_ENVS['test'].items()]
        max_obs_dim = max(env_obs_dim)
        env_id = 'ML45'
        ML_train_envs = [
            TaskIdWrapper(NormalizedRewardEnv(RL2Env(env(*ML45_ARGS['train'][task]['args'],
                **ML45_ARGS['train'][task]['kwargs']), max_obs_dim)), task_id=task_id, task_name=task)
            for (task_id, (task, env)) in enumerate(ML45_ENVS['train'].items())
        ]
        tasks = task_sampler.EnvPoolSampler(ML_train_envs)
        tasks.grow_pool(hyper_parameters['meta_batch_size'])
        envs = tasks.sample(hyper_parameters['meta_batch_size'])
        env = envs[0]()

        timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
        benchmark_dir = './data/local/benchmarks/rl2/%s/' % timestamp
        result_json = {}

        # Start main loop
        seeds = random.sample(range(100), hyper_parameters['n_trials'])
        task_dir = osp.join(benchmark_dir, env_id)
        metarl_tf_csvs = []

        for trial in range(hyper_parameters['n_trials']):
            seed = seeds[trial]
            trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed)
            metarl_tf_dir = trial_dir + '/metarl'

            with tf.Graph().as_default():
                env.reset()
                metarl_tf_csv = run_metarl(env, envs, tasks, seed, metarl_tf_dir)

            metarl_tf_csvs.append(metarl_tf_csv)

        with open(osp.join(metarl_tf_dir, 'parameters.txt'), 'w') as outfile:
            hyper_parameters_copy = copy.deepcopy(hyper_parameters)
            hyper_parameters_copy['sampler_cls'] = str(hyper_parameters_copy['sampler_cls'])
            json.dump(hyper_parameters_copy, outfile)

        g_x = 'TotalEnvSteps'
        g_ys = [
            'Evaluation/AverageReturn',
            'Evaluation/SuccessRate',
        ]

        for g_y in g_ys:
            plt_file = osp.join(benchmark_dir,
                            '{}_benchmark_{}.png'.format(env_id, g_y.replace('/', '-')))
            Rh.relplot(g_csvs=metarl_tf_csvs,
                       b_csvs=None,
                       g_x=g_x,
                       g_y=g_y,
                       g_z='MetaRL',
                       b_x=None,
                       b_y=None,
                       b_z='ProMP',
                       trials=hyper_parameters['n_trials'],
                       seeds=seeds,
                       plt_file=plt_file,
                       env_id=env_id,
                       x_label=g_x,
                       y_label=g_y)
Ejemplo n.º 6
0
    def benchmark_categorical_lstm_policy(self):
        categorical_tasks = [
            'LunarLander-v2', 'Assault-ramDeterministic-v4',
            'Breakout-ramDeterministic-v4',
            'ChopperCommand-ramDeterministic-v4',
            'Tutankham-ramDeterministic-v4'
        ]
        timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
        benchmark_dir = './data/local/benchmarks/ppo_categ/%s/' % timestamp
        result_json = {}
        for task in categorical_tasks:
            env_id = task
            env = gym.make(env_id)
            # baseline_env = AutoStopEnv(env_name=env_id, max_path_length=100)

            seeds = random.sample(range(100), 3)

            task_dir = osp.join(benchmark_dir, env_id)
            plt_file = osp.join(benchmark_dir,
                                '{}_benchmark.png'.format(env_id))
            mean_plt_file = osp.join(benchmark_dir,
                                     '{}_benchmark_mean.png'.format(env_id))

            garage_models_csvs = []
            garage_csvs = []

            for trial in range(3):
                seed = seeds[trial]

                trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed)
                garage_dir = trial_dir + '/garage'

                with tf.Graph().as_default():
                    # Run baselines algorithms
                    # baseline_env.reset()
                    # baselines_csv = run_baselines(baseline_env, seed,
                    #                               baselines_dir)

                    # Run garage algorithms
                    env.reset()
                    garage_csv = run_garage(env, seed, garage_dir)
                    env.reset()

                garage_csvs.append(garage_csv)

            env.close()

            Rh.plot(b_csvs=garage_models_csvs,
                    g_csvs=garage_csvs,
                    g_x='Iteration',
                    g_y='Evaluation/AverageReturn',
                    g_z='garage',
                    b_x='Iteration',
                    b_y='Evaluation/AverageReturn',
                    b_z='garage_model',
                    trials=3,
                    seeds=seeds,
                    plt_file=plt_file,
                    env_id=env_id,
                    x_label='Iteration',
                    y_label='Evaluation/AverageReturn')

            Rh.relplot(b_csvs=garage_models_csvs,
                       g_csvs=garage_csvs,
                       g_x='Iteration',
                       g_y='Evaluation/AverageReturn',
                       g_z='garage',
                       b_x='Iteration',
                       b_y='Evaluation/AverageReturn',
                       b_z='garage_model',
                       trials=3,
                       seeds=seeds,
                       plt_file=mean_plt_file,
                       env_id=env_id,
                       x_label='Iteration',
                       y_label='Evaluation/AverageReturn')

            result_json[env_id] = Rh.create_json(
                b_csvs=garage_models_csvs,
                g_csvs=garage_csvs,
                seeds=seeds,
                trails=3,
                g_x='Iteration',
                g_y='Evaluation/AverageReturn',
                b_x='Iteration',
                b_y='Evaluation/AverageReturn',
                factor_g=2048,
                factor_b=2048)

        Rh.write_file(result_json, 'PPO')
Ejemplo n.º 7
0
    def test_benchmark_ddpg(self):
        '''
        Compare benchmarks between metarl and baselines.
        :return:
        '''
        # Load Mujoco1M tasks, you can check other benchmarks here
        # https://github.com/openai/baselines/blob/master/baselines/bench/benchmarks.py
        mujoco1m = benchmarks.get_benchmark('Mujoco1M')

        timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
        benchmark_dir = osp.join(os.getcwd(), 'data', 'local', 'benchmarks',
                                 'ddpg', timestamp)
        result_json = {}
        for task in mujoco1m['tasks']:
            env_id = task['env_id']
            env = gym.make(env_id)
            baseline_env = AutoStopEnv(
                env_name=env_id, max_path_length=params['n_rollout_steps'])
            seeds = random.sample(range(100), task['trials'])

            task_dir = osp.join(benchmark_dir, env_id)
            plt_file = osp.join(benchmark_dir,
                                '{}_benchmark.png'.format(env_id))
            relplt_file = osp.join(benchmark_dir,
                                   '{}_benchmark_mean.png'.format(env_id))
            baselines_csvs = []
            metarl_csvs = []

            for trial in range(task['trials']):
                env.reset()
                baseline_env.reset()
                seed = seeds[trial]

                trial_dir = osp.join(
                    task_dir, 'trial_{}_seed_{}'.format(trial + 1, seed))
                metarl_dir = osp.join(trial_dir, 'metarl')
                baselines_dir = osp.join(trial_dir, 'baselines')

                with tf.Graph().as_default():
                    # Run metarl algorithms
                    metarl_csv = run_metarl(env, seed, metarl_dir)

                    # Run baselines algorithms
                    baselines_csv = run_baselines(baseline_env, seed,
                                                  baselines_dir)

                metarl_csvs.append(metarl_csv)
                baselines_csvs.append(baselines_csv)

            env.close()

            Rh.plot(b_csvs=baselines_csvs,
                    g_csvs=metarl_csvs,
                    g_x='Epoch',
                    g_y='Evaluation/AverageReturn',
                    g_z='MetaRL',
                    b_x='total/epochs',
                    b_y='rollout/return',
                    b_z='Baseline',
                    trials=task['trials'],
                    seeds=seeds,
                    plt_file=plt_file,
                    env_id=env_id,
                    x_label='Epoch',
                    y_label='Evaluation/AverageReturn')

            Rh.relplot(g_csvs=metarl_csvs,
                       b_csvs=baselines_csvs,
                       g_x='Epoch',
                       g_y='Evaluation/AverageReturn',
                       g_z='MetaRL',
                       b_x='total/epochs',
                       b_y='rollout/return',
                       b_z='Baseline',
                       trials=task['trials'],
                       seeds=seeds,
                       plt_file=relplt_file,
                       env_id=env_id,
                       x_label='Epoch',
                       y_label='Evaluation/AverageReturn')

            result_json[env_id] = Rh.create_json(
                b_csvs=baselines_csvs,
                g_csvs=metarl_csvs,
                seeds=seeds,
                trails=task['trials'],
                g_x='Epoch',
                g_y='Evaluation/AverageReturn',
                b_x='total/epochs',
                b_y='rollout/return',
                factor_g=params['steps_per_epoch'] * params['n_rollout_steps'],
                factor_b=1)

        Rh.write_file(result_json, 'DDPG')
Ejemplo n.º 8
0
    def test_benchmark_rl2(self):  # pylint: disable=no-self-use
        """Compare benchmarks between metarl and baselines."""
        if ML:
            if env_ind == 2:
                envs = [ML1.get_train_tasks('push-v1')]
                env_ids = ['ML1-push-v1']
            elif env_ind == 3:
                envs = [ML1.get_train_tasks('reach-v1')]
                env_ids = ['ML1-reach-v1']
            elif env_ind == 4:
                envs = [ML1.get_train_tasks('pick-place-v1')]
                env_ids = ['ML1-pick-place-v1']
            else:
                raise ValueError("Env index is wrong")
        else:
            if env_ind == 0:
                envs = [HalfCheetahVelEnv]
                env_ids = ['HalfCheetahVelEnv']
            elif env_ind == 1:
                envs = [HalfCheetahDirEnv]
                env_ids = ['HalfCheetahDirEnv']
            else:
                raise ValueError("Env index is wrong")
        timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
        benchmark_dir = './data/local/benchmarks/rl2/%s/' % timestamp
        result_json = {}
        for i, env in enumerate(envs):
            seeds = random.sample(range(100), hyper_parameters['n_trials'])
            task_dir = osp.join(benchmark_dir, env_ids[i])
            plt_file = osp.join(benchmark_dir,
                                '{}_benchmark.png'.format(env_ids[i]))
            metarl_tf_csvs = []
            promp_csvs = []

            for trial in range(hyper_parameters['n_trials']):
                seed = seeds[trial]
                trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed)
                promp_dir = trial_dir + '/promp'

                with tf.Graph().as_default():
                    if isinstance(env, gym.Env):
                        env.reset()
                        promp_csv = run_promp(env, seed, promp_dir)
                    else:
                        promp_csv = run_promp(env(), seed, promp_dir)

                promp_csvs.append(promp_csv)

            with open(osp.join(promp_dir, 'parameters.txt'), 'w') as outfile:
                json.dump(hyper_parameters, outfile)

            if isinstance(env, gym.Env):
                env.close()

            p_x = 'n_timesteps'

            if ML:
                p_ys = ['train-AverageReturn', 'train-SuccessRate']
            else:
                p_ys = ['train-AverageReturn']

            for p_y in p_ys:
                plt_file = osp.join(
                    benchmark_dir,
                    '{}_benchmark_promp_{}.png'.format(env_ids[i],
                                                       p_y.replace('/', '-')))
                Rh.relplot(g_csvs=promp_csvs,
                           b_csvs=None,
                           g_x=p_x,
                           g_y=p_y,
                           g_z='ProMP',
                           b_x=None,
                           b_y=None,
                           b_z='None',
                           trials=hyper_parameters['n_trials'],
                           seeds=seeds,
                           plt_file=plt_file,
                           env_id=env_ids[i])
Ejemplo n.º 9
0
    def test_benchmark_rl2(self):  # pylint: disable=no-self-use
        """Compare benchmarks between metarl and baselines."""
        if ML:
            if env_ind == 2:
                envs = [ML1.get_train_tasks('push-v1')]
                env_ids = ['ML1-push-v1']
            elif env_ind == 3:
                envs = [ML1.get_train_tasks('reach-v1')]
                env_ids = ['ML1-reach-v1']
            elif env_ind == 4:
                envs = [ML1.get_train_tasks('pick-place-v1')]
                env_ids = ['ML1-pick-place-v1']
            else:
                raise ValueError("Env index is wrong")
        else:
            if env_ind == 0:
                envs = [HalfCheetahVelEnv]
                env_ids = ['HalfCheetahVelEnv']
            elif env_ind == 1:
                envs = [HalfCheetahDirEnv]
                env_ids = ['HalfCheetahDirEnv']
            else:
                raise ValueError("Env index is wrong")

        timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
        benchmark_dir = './data/local/benchmarks/rl2/%s/' % timestamp
        result_json = {}
        for i, env in enumerate(envs):
            seeds = random.sample(range(100), hyper_parameters['n_trials'])
            task_dir = osp.join(benchmark_dir, env_ids[i])
            plt_file = osp.join(benchmark_dir,
                                '{}_benchmark.png'.format(env_ids[i]))
            metarl_tf_csvs = []
            promp_csvs = []

            for trial in range(hyper_parameters['n_trials']):
                seed = seeds[trial]
                trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed)
                metarl_tf_dir = trial_dir + '/metarl'
                promp_dir = trial_dir + '/promp'

                with tf.Graph().as_default():
                    metarl_tf_csv = run_metarl(env, seed, metarl_tf_dir)

                metarl_tf_csvs.append(metarl_tf_csv)

            with open(osp.join(metarl_tf_dir, 'parameters.txt'),
                      'w') as outfile:
                hyper_parameters_copy = copy.deepcopy(hyper_parameters)
                hyper_parameters_copy['sampler_cls'] = str(
                    hyper_parameters_copy['sampler_cls'])
                json.dump(hyper_parameters_copy, outfile)

            g_x = 'TotalEnvSteps'

            if ML:
                g_ys = [
                    'Evaluation/AverageReturn',
                    'Evaluation/SuccessRate',
                ]
            else:
                g_ys = [
                    'Evaluation/AverageReturn',
                ]

            for g_y in g_ys:
                plt_file = osp.join(
                    benchmark_dir,
                    '{}_benchmark_rl2_{}.png'.format(env_ids[i],
                                                     g_y.replace('/', '-')))
                Rh.relplot(g_csvs=metarl_tf_csvs,
                           b_csvs=None,
                           g_x=g_x,
                           g_y=g_y,
                           g_z='MetaRL',
                           b_x=None,
                           b_y=None,
                           b_z=None,
                           trials=hyper_parameters['n_trials'],
                           seeds=seeds,
                           plt_file=plt_file,
                           env_id=env_ids[i],
                           x_label=g_x,
                           y_label=g_y)
    def test_benchmark_categorical_mlp_policy(self):
        '''
        Compare benchmarks between garage and baselines.
        :return:
        '''
        categorical_tasks = [
            'LunarLander-v2', 'CartPole-v1', 'Assault-ramDeterministic-v4',
            'Breakout-ramDeterministic-v4',
            'ChopperCommand-ramDeterministic-v4',
            'Tutankham-ramDeterministic-v4'
        ]
        timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
        benchmark_dir = './data/local/benchmarks/categorical_mlp_policy/{0}/'
        benchmark_dir = benchmark_dir.format(timestamp)
        result_json = {}
        for task in categorical_tasks:
            env_id = task
            env = gym.make(env_id)
            trials = 3
            seeds = random.sample(range(100), trials)

            task_dir = osp.join(benchmark_dir, env_id)
            plt_file = osp.join(benchmark_dir,
                                '{}_benchmark.png'.format(env_id))
            relplt_file = osp.join(benchmark_dir,
                                   '{}_benchmark_mean.png'.format(env_id))
            garage_csvs = []

            for trial in range(trials):
                seed = seeds[trial]

                trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed)
                garage_dir = trial_dir + '/garage'

                with tf.Graph().as_default():
                    # Run garage algorithms
                    env.reset()
                    garage_csv = run_garage(env, seed, garage_dir)
                garage_csvs.append(garage_csv)

            env.close()

            Rh.plot(b_csvs=garage_csvs,
                    g_csvs=garage_csvs,
                    g_x='Iteration',
                    g_y='AverageReturn',
                    g_z='Garage',
                    b_x='Iteration',
                    b_y='AverageReturn',
                    b_z='Garage',
                    trials=trials,
                    seeds=seeds,
                    plt_file=plt_file,
                    env_id=env_id,
                    x_label='Iteration',
                    y_label='AverageReturn')

            Rh.relplot(b_csvs=garage_csvs,
                       g_csvs=garage_csvs,
                       g_x='Iteration',
                       g_y='AverageReturn',
                       g_z='Garage',
                       b_x='Iteration',
                       b_y='AverageReturn',
                       b_z='Garage',
                       trials=trials,
                       seeds=seeds,
                       plt_file=relplt_file,
                       env_id=env_id,
                       x_label='Iteration',
                       y_label='AverageReturn')

            result_json[env_id] = Rh.create_json(b_csvs=garage_csvs,
                                                 g_csvs=garage_csvs,
                                                 seeds=seeds,
                                                 trails=trials,
                                                 g_x='Iteration',
                                                 g_y='AverageReturn',
                                                 b_x='Iteration',
                                                 b_y='AverageReturn',
                                                 factor_g=2048,
                                                 factor_b=2048)

        Rh.write_file(result_json, 'PPO')