Ejemplo n.º 1
0
    def test_benchmark_ddpg(self):
        """
        Compare benchmarks between garage and baselines.

        :return:
        """
        # Load Mujoco1M tasks, you can check other benchmarks here
        # https://github.com/openai/baselines/blob/master/baselines/bench/benchmarks.py
        mujoco1m = benchmarks.get_benchmark('Mujoco1M')

        timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
        benchmark_dir = osp.join(os.getcwd(), 'data', 'local', 'benchmarks',
                                 'ddpg', timestamp)

        for task in mujoco1m['tasks']:
            env_id = task['env_id']
            env = gym.make(env_id)
            baseline_env = AutoStopEnv(
                env_name=env_id, max_path_length=params['n_rollout_steps'])
            seeds = random.sample(range(100), task['trials'])

            task_dir = osp.join(benchmark_dir, env_id)
            plt_file = osp.join(benchmark_dir,
                                '{}_benchmark.png'.format(env_id))
            baselines_csvs = []
            garage_csvs = []

            for trial in range(task['trials']):
                env.reset()
                baseline_env.reset()
                seed = seeds[trial]

                trial_dir = osp.join(
                    task_dir, 'trial_{}_seed_{}'.format(trial + 1, seed))
                garage_dir = osp.join(trial_dir, 'garage')
                baselines_dir = osp.join(trial_dir, 'baselines')

                with tf.Graph().as_default():
                    # Run garage algorithms
                    garage_csv = run_garage(env, seed, garage_dir)

                    # Run baselines algorithms
                    baselines_csv = run_baselines(baseline_env, seed,
                                                  baselines_dir)

                garage_csvs.append(garage_csv)
                baselines_csvs.append(baselines_csv)

            env.close()

            plot(b_csvs=baselines_csvs,
                 g_csvs=garage_csvs,
                 g_x='Epoch',
                 g_y='AverageReturn',
                 b_x='total/epochs',
                 b_y='rollout/return',
                 trials=task['trials'],
                 seeds=seeds,
                 plt_file=plt_file,
                 env_id=env_id)
Ejemplo n.º 2
0
    def test_benchmark_ddpg(self):
        """
        Compare benchmarks between garage and baselines.

        :return:
        """
        # Load Mujoco1M tasks, you can check other benchmarks here
        # https://github.com/openai/baselines/blob/master/baselines/bench/benchmarks.py
        mujoco1m = benchmarks.get_benchmark("Mujoco1M")

        timestamp = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S-%f")
        benchmark_dir = "./data/local/benchmarks/ddpg/%s/" % timestamp

        for task in mujoco1m["tasks"]:
            env_id = task["env_id"]
            env = gym.make(env_id)
            baseline_env = AutoStopEnv(
                env_name=env_id, max_path_length=params["n_rollout_steps"])
            seeds = random.sample(range(100), task["trials"])

            task_dir = osp.join(benchmark_dir, env_id)
            plt_file = osp.join(benchmark_dir,
                                "{}_benchmark.png".format(env_id))
            baselines_csvs = []
            garage_csvs = []

            for trial in range(task["trials"]):
                env.reset()
                baseline_env.reset()
                seed = seeds[trial]

                trial_dir = task_dir + "/trial_%d_seed_%d" % (trial + 1, seed)
                garage_dir = trial_dir + "/garage"
                baselines_dir = trial_dir + "/baselines"

                with tf.Graph().as_default():
                    # Run garage algorithms
                    garage_csv = run_garage(env, seed, garage_dir)

                    # Run baselines algorithms
                    baselines_csv = run_baselines(baseline_env, seed,
                                                  baselines_dir)

                garage_csvs.append(garage_csv)
                baselines_csvs.append(baselines_csv)

            env.close()

            plot(b_csvs=baselines_csvs,
                 g_csvs=garage_csvs,
                 g_x="Epoch",
                 g_y="AverageReturn",
                 b_x="total/epochs",
                 b_y="rollout/return",
                 trials=task["trials"],
                 seeds=seeds,
                 plt_file=plt_file,
                 env_id=env_id)
Ejemplo n.º 3
0
    def test_benchmark_trpo(self):
        """
        Compare benchmarks between garage and baselines.

        :return:
        """

        mujoco1m = benchmarks.get_benchmark("Mujoco1M")

        timestamp = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S-%f")
        benchmark_dir = "./data/local/benchmarks/trpo/%s/" % timestamp
        for task in mujoco1m["tasks"]:
            env_id = task["env_id"]
            env = gym.make(env_id)
            baseline_env = AutoStopEnv(env_name=env_id, max_path_length=100)

            seeds = random.sample(range(100), task["trials"])

            task_dir = osp.join(benchmark_dir, env_id)
            plt_file = osp.join(benchmark_dir,
                                "{}_benchmark.png".format(env_id))
            baselines_csvs = []
            garage_csvs = []

            for trial in range(task["trials"]):
                _PLACEHOLDER_CACHE.clear()
                seed = seeds[trial]

                trial_dir = task_dir + "/trial_%d_seed_%d" % (trial + 1, seed)
                garage_dir = trial_dir + "/garage"
                baselines_dir = trial_dir + "/baselines"

                with tf.Graph().as_default():
                    # Run garage algorithms
                    env.reset()
                    garage_csv = run_garage(env, seed, garage_dir)

                    # Run baseline algorithms
                    baseline_env.reset()
                    baselines_csv = run_baselines(baseline_env, seed,
                                                  baselines_dir)

                garage_csvs.append(garage_csv)
                baselines_csvs.append(baselines_csv)

            plot(b_csvs=baselines_csvs,
                 g_csvs=garage_csvs,
                 g_x="Iteration",
                 g_y="EpisodeRewardMean",
                 b_x="EpThisIter",
                 b_y="EpRewMean",
                 trials=task["trials"],
                 seeds=seeds,
                 plt_file=plt_file,
                 env_id=env_id)

            env.close()
Ejemplo n.º 4
0
    def test_benchmark_ppo(self):
        '''
        Compare benchmarks between garage and baselines.

        :return:
        '''
        mujoco1m = benchmarks.get_benchmark('Mujoco1M')
        timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
        benchmark_dir = './data/local/benchmarks/ppo/%s/' % timestamp
        result_json = {}
        for task in mujoco1m['tasks']:
            env_id = task['env_id']

            env = gym.make(env_id)
            baseline_env = AutoStopEnv(env_name=env_id, max_path_length=100)

            seeds = random.sample(range(100), task['trials'])

            task_dir = osp.join(benchmark_dir, env_id)
            plt_file = osp.join(benchmark_dir,
                                '{}_benchmark.png'.format(env_id))
            baselines_csvs = []
            garage_csvs = []

            for trial in range(task['trials']):
                seed = seeds[trial]

                trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed)
                garage_dir = trial_dir + '/garage'
                baselines_dir = trial_dir + '/baselines'

                with tf.Graph().as_default():
                    # Run baselines algorithms
                    baseline_env.reset()
                    baselines_csv = run_baselines(baseline_env, seed,
                                                  baselines_dir)

                    # Run garage algorithms
                    env.reset()
                    garage_csv = run_garage(env, seed, garage_dir)

                garage_csvs.append(garage_csv)
                baselines_csvs.append(baselines_csv)

            env.close()

            Rh.plot(b_csvs=baselines_csvs,
                    g_csvs=garage_csvs,
                    g_x='Iteration',
                    g_y='AverageReturn',
                    b_x='nupdates',
                    b_y='eprewmean',
                    trials=task['trials'],
                    seeds=seeds,
                    plt_file=plt_file,
                    env_id=env_id,
                    x_label='Iteration',
                    y_label='AverageReturn')

            result_json[env_id] = Rh.create_json(b_csvs=baselines_csvs,
                                                 g_csvs=garage_csvs,
                                                 seeds=seeds,
                                                 trails=task['trials'],
                                                 g_x='Iteration',
                                                 g_y='AverageReturn',
                                                 b_x='nupdates',
                                                 b_y='eprewmean',
                                                 factor_g=2048,
                                                 factor_b=2048)

        Rh.write_file(result_json, 'PPO')
Ejemplo n.º 5
0
    def test_benchmark_td3(self):
        """
        Test garage TD3 benchmarks.

        :return:
        """
        # Load Mujoco1M tasks, you can check other benchmarks here
        # https://github.com/openai/baselines/blob/master/baselines/bench/benchmarks.py # noqa: E501
        mujoco1m = benchmarks.get_benchmark('Mujoco1M')

        timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
        benchmark_dir = osp.join(os.getcwd(), 'data', 'local', 'benchmarks',
                                 'td3', timestamp)
        result_json = {}

        # rlkit throws error for'Reacher-V2' due to gym version mismatch
        mujoco1m['tasks'] = [
            task for task in mujoco1m['tasks']
            if task['env_id'] != 'Reacher-v2'
        ]

        for task in mujoco1m['tasks']:
            env_id = task['env_id']
            env = gym.make(env_id)
            rlkit_env = AutoStopEnv(env_name=env_id,
                                    max_path_length=params['n_rollout_steps'])
            seeds = random.sample(range(100), task['trials'])

            task_dir = osp.join(benchmark_dir, env_id)
            plt_file = osp.join(benchmark_dir,
                                '{}_benchmark.png'.format(env_id))
            garage_csvs = []
            rlkit_csvs = []

            for trial in range(task['trials']):
                env.reset()
                rlkit_env.reset()
                seed = seeds[trial]

                trial_dir = osp.join(
                    task_dir, 'trial_{}_seed_{}'.format(trial + 1, seed))
                garage_dir = trial_dir + '/garage'
                rlkit_dir = osp.join(trial_dir, 'rlkit')

                with tf.Graph().as_default():
                    # Run rlkit algorithms
                    rlkit_csv = run_rlkit(rlkit_env, seed, rlkit_dir)

                    # Run garage algorithms
                    garage_csv = run_garage(env, seed, garage_dir)

                garage_csvs.append(garage_csv)
                rlkit_csvs.append(rlkit_csv)

            Rh.plot(b_csvs=rlkit_csvs,
                    g_csvs=garage_csvs,
                    g_x='Epoch',
                    g_y='Evaluation/AverageReturn',
                    g_z='garage',
                    b_x='Epoch',
                    b_y='evaluation/Average Returns',
                    b_z='rlkit',
                    trials=task['trials'],
                    seeds=seeds,
                    plt_file=plt_file,
                    env_id=env_id,
                    x_label='Iteration',
                    y_label='Evaluation/AverageReturn')

            result_json[env_id] = Rh.create_json(
                b_csvs=rlkit_csvs,
                g_csvs=garage_csvs,
                seeds=seeds,
                trails=task['trials'],
                g_x='Epoch',
                g_y='Evaluation/AverageReturn',
                b_x='Epoch',
                b_y='evaluation/Average Returns',
                factor_g=1,
                factor_b=1)

        Rh.write_file(result_json, 'TD3')
Ejemplo n.º 6
0
    def test_benchmark_ppo(self):
        """Compare benchmarks between garage and baselines.

        Returns:

        """
        # pylint: disable=no-self-use
        mujoco1m = benchmarks.get_benchmark('Mujoco1M')
        timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
        benchmark_dir = './data/local/benchmarks/ppo/%s/' % timestamp
        result_json = {}
        for task in mujoco1m['tasks']:
            env_id = task['env_id']

            env = gym.make(env_id)
            baseline_env = AutoStopEnv(
                env_name=env_id,
                max_path_length=hyper_parameters['max_path_length'])

            seeds = random.sample(range(100), hyper_parameters['n_trials'])

            task_dir = osp.join(benchmark_dir, env_id)
            plt_file = osp.join(benchmark_dir,
                                '{}_benchmark.png'.format(env_id))

            baselines_csvs = []
            garage_tf_csvs = []
            garage_pytorch_csvs = []

            for trial in range(hyper_parameters['n_trials']):
                seed = seeds[trial]

                trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed)
                garage_tf_dir = trial_dir + '/garage/tf'
                garage_pytorch_dir = trial_dir + '/garage/pytorch'
                baselines_dir = trial_dir + '/baselines'

                # pylint: disable=not-context-manager
                with tf.Graph().as_default():
                    # Run baselines algorithms
                    baseline_env.reset()
                    baseline_csv = run_baselines(baseline_env, seed,
                                                 baselines_dir)

                    # Run garage algorithms
                    env.reset()
                    garage_tf_csv = run_garage_tf(env, seed, garage_tf_dir)

                env.reset()
                garage_pytorch_csv = run_garage_pytorch(
                    env, seed, garage_pytorch_dir)

                baselines_csvs.append(baseline_csv)
                garage_tf_csvs.append(garage_tf_csv)
                garage_pytorch_csvs.append(garage_pytorch_csv)

            env.close()

            benchmark_helper.plot_average_over_trials(
                [baselines_csvs, garage_tf_csvs, garage_pytorch_csvs],
                [
                    'eprewmean', 'Evaluation/AverageReturn',
                    'Evaluation/AverageReturn'
                ],
                plt_file=plt_file,
                env_id=env_id,
                x_label='Iteration',
                y_label='Evaluation/AverageReturn',
                names=['baseline', 'garage-TensorFlow', 'garage-PyTorch'],
            )

            result_json[env_id] = benchmark_helper.create_json(
                [baselines_csvs, garage_tf_csvs, garage_pytorch_csvs],
                seeds=seeds,
                trials=hyper_parameters['n_trials'],
                xs=['nupdates', 'Iteration', 'Iteration'],
                ys=[
                    'eprewmean', 'Evaluation/AverageReturn',
                    'Evaluation/AverageReturn'
                ],
                factors=[hyper_parameters['batch_size']] * 3,
                names=['baseline', 'garage-TF', 'garage-PT'])

        Rh.write_file(result_json, 'PPO')
Ejemplo n.º 7
0
    def test_benchmark_ppo(self):
        """Compare benchmarks between metarl and baselines.

        Returns:

        """
        # pylint: disable=no-self-use
        mujoco1m = benchmarks.get_benchmark('Mujoco1M')
        timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
        benchmark_dir = './data/local/benchmarks/ppo/%s/' % timestamp
        result_json = {}
        for task in mujoco1m['tasks']:
            env_id = task['env_id']

            env = gym.make(env_id)
            baseline_env = AutoStopEnv(
                env_name=env_id,
                max_path_length=hyper_parameters['max_path_length'])

            seeds = random.sample(range(100), hyper_parameters['n_trials'])

            task_dir = osp.join(benchmark_dir, env_id)
            plt_file = osp.join(benchmark_dir,
                                '{}_benchmark.png'.format(env_id))

            baselines_csvs = []
            metarl_tf_csvs = []
            metarl_pytorch_csvs = []

            for trial in range(hyper_parameters['n_trials']):
                seed = seeds[trial]

                trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed)
                metarl_tf_dir = trial_dir + '/metarl/tf'
                metarl_pytorch_dir = trial_dir + '/metarl/pytorch'
                baselines_dir = trial_dir + '/baselines'

                # pylint: disable=not-context-manager
                with tf.Graph().as_default():
                    # Run baselines algorithms
                    baseline_env.reset()
                    baseline_csv = run_baselines(baseline_env, seed,
                                                 baselines_dir)

                    # Run metarl algorithms
                    env.reset()
                    metarl_tf_csv = run_metarl_tf(env, seed, metarl_tf_dir)

                # env.reset()
                # metarl_pytorch_csv = run_metarl_pytorch(
                #     env, seed, metarl_pytorch_dir)

                baselines_csvs.append(baseline_csv)
                metarl_tf_csvs.append(metarl_tf_csv)
                # metarl_pytorch_csvs.append(metarl_pytorch_csv)

            env.close()

            # benchmark_helper.plot_average_over_trials(
            #     [baselines_csvs, metarl_tf_csvs, metarl_pytorch_csvs],
            #     [
            #         'eprewmean', 'Evaluation/AverageReturn',
            #         'Evaluation/AverageReturn'
            #     ],
            #     plt_file=plt_file,
            #     env_id=env_id,
            #     x_label='Iteration',
            #     y_label='Evaluation/AverageReturn',
            #     names=['baseline', 'metarl-TensorFlow', 'metarl-PyTorch'],
            # )

            # result_json[env_id] = benchmark_helper.create_json(
            #     [baselines_csvs, metarl_tf_csvs],
            #     seeds=seeds,
            #     trials=hyper_parameters['n_trials'],
            #     xs=['total_timesteps', 'TotalEnvSteps'],
            #     ys=[
            #         'eprewmean', 'Evaluation/AverageReturn'
            #     ],
            #     factors=[hyper_parameters['batch_size']] * 2,
            #     names=['baseline', 'metarl-TF'])

            result_json[env_id] = benchmark_helper.create_json(
                [baselines_csvs, metarl_tf_csvs],
                seeds=seeds,
                trials=hyper_parameters['n_trials'],
                xs=['total_timesteps', 'TotalEnvSteps'],
                ys=['eprewmean', 'Evaluation/AverageReturn'],
                factors=[hyper_parameters['batch_size']] * 2,
                names=['baseline', 'metarl-TF'])

            # Rh.relplot(g_csvs=metarl_tf_csvs,
            #            b_csvs=baselines_csvs,
            #            g_x='TotalEnvSteps',
            #            g_y='Evaluation/AverageReturn',
            #            g_z='MetaRL',
            #            b_x='total_timesteps',
            #            b_y='eprewmean',
            #            b_z='Openai/Baseline',
            #            trials=hyper_parameters['n_trials'],
            #            seeds=seeds,
            #            plt_file=plt_file,
            #            env_id=env_id,
            #            x_label='EnvTimeStep',
            #            y_label='Performance')

            benchmark_helper.plot_average_over_trials_with_x(
                [baselines_csvs, metarl_tf_csvs],
                ['eprewmean', 'Evaluation/AverageReturn'],
                ['total_timesteps', 'TotalEnvSteps'],
                plt_file=plt_file,
                env_id=env_id,
                x_label='EnvTimeStep',
                y_label='Performance',
                names=['baseline', 'metarl-TensorFlow'],
            )

        # Rh.relplot(g_csvs=metarl_tf_csvs,
        #            b_csvs=metarl_pytorch_csvs,
        #            g_x='TotalEnvSteps',
        #            g_y='Evaluation/AverageReturn',
        #            g_z='MetaRL-TF',
        #            b_x='TotalEnvSteps',
        #            b_y='Evaluation/AverageReturn',
        #            b_z='MetaRL-PT',
        #            trials=hyper_parameters['n_trials'],
        #            seeds=seeds,
        #            plt_file=plt_file,
        #            env_id=env_id,
        #            x_label='EnvTimeStep',
        #            y_label='Performance')

        Rh.write_file(result_json, 'PPO')
Ejemplo n.º 8
0
    def test_benchmark_ddpg(self):
        '''
        Compare benchmarks between metarl and baselines.
        :return:
        '''
        # Load Mujoco1M tasks, you can check other benchmarks here
        # https://github.com/openai/baselines/blob/master/baselines/bench/benchmarks.py
        mujoco1m = benchmarks.get_benchmark('Mujoco1M')

        timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
        benchmark_dir = osp.join(os.getcwd(), 'data', 'local', 'benchmarks',
                                 'ddpg', timestamp)
        result_json = {}
        for task in mujoco1m['tasks']:
            env_id = task['env_id']
            env = gym.make(env_id)
            baseline_env = AutoStopEnv(
                env_name=env_id, max_path_length=params['n_rollout_steps'])
            seeds = random.sample(range(100), task['trials'])

            task_dir = osp.join(benchmark_dir, env_id)
            plt_file = osp.join(benchmark_dir,
                                '{}_benchmark.png'.format(env_id))
            relplt_file = osp.join(benchmark_dir,
                                   '{}_benchmark_mean.png'.format(env_id))
            baselines_csvs = []
            metarl_csvs = []

            for trial in range(task['trials']):
                env.reset()
                baseline_env.reset()
                seed = seeds[trial]

                trial_dir = osp.join(
                    task_dir, 'trial_{}_seed_{}'.format(trial + 1, seed))
                metarl_dir = osp.join(trial_dir, 'metarl')
                baselines_dir = osp.join(trial_dir, 'baselines')

                with tf.Graph().as_default():
                    # Run metarl algorithms
                    metarl_csv = run_metarl(env, seed, metarl_dir)

                    # Run baselines algorithms
                    baselines_csv = run_baselines(baseline_env, seed,
                                                  baselines_dir)

                metarl_csvs.append(metarl_csv)
                baselines_csvs.append(baselines_csv)

            env.close()

            Rh.plot(b_csvs=baselines_csvs,
                    g_csvs=metarl_csvs,
                    g_x='Epoch',
                    g_y='Evaluation/AverageReturn',
                    g_z='MetaRL',
                    b_x='total/epochs',
                    b_y='rollout/return',
                    b_z='Baseline',
                    trials=task['trials'],
                    seeds=seeds,
                    plt_file=plt_file,
                    env_id=env_id,
                    x_label='Epoch',
                    y_label='Evaluation/AverageReturn')

            Rh.relplot(g_csvs=metarl_csvs,
                       b_csvs=baselines_csvs,
                       g_x='Epoch',
                       g_y='Evaluation/AverageReturn',
                       g_z='MetaRL',
                       b_x='total/epochs',
                       b_y='rollout/return',
                       b_z='Baseline',
                       trials=task['trials'],
                       seeds=seeds,
                       plt_file=relplt_file,
                       env_id=env_id,
                       x_label='Epoch',
                       y_label='Evaluation/AverageReturn')

            result_json[env_id] = Rh.create_json(
                b_csvs=baselines_csvs,
                g_csvs=metarl_csvs,
                seeds=seeds,
                trails=task['trials'],
                g_x='Epoch',
                g_y='Evaluation/AverageReturn',
                b_x='total/epochs',
                b_y='rollout/return',
                factor_g=params['steps_per_epoch'] * params['n_rollout_steps'],
                factor_b=1)

        Rh.write_file(result_json, 'DDPG')
Ejemplo n.º 9
0
    def test_benchmark_ppo(self):
        """
        Compare benchmarks between garage and baselines.

        :return:
        """
        mujoco1m = benchmarks.get_benchmark("Mujoco1M")
        timestamp = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S-%f")
        benchmark_dir = "./data/local/benchmarks/ppo/%s/" % timestamp
        result_json = {}
        for task in mujoco1m["tasks"]:
            env_id = task["env_id"]

            env = gym.make(env_id)
            baseline_env = AutoStopEnv(env_name=env_id, max_path_length=100)

            seeds = random.sample(range(100), task["trials"])

            task_dir = osp.join(benchmark_dir, env_id)
            plt_file = osp.join(benchmark_dir,
                                "{}_benchmark.png".format(env_id))
            baselines_csvs = []
            garage_csvs = []

            for trial in range(task['trials']):
                seed = seeds[trial]

                trial_dir = task_dir + "/trial_%d_seed_%d" % (trial + 1, seed)
                garage_dir = trial_dir + "/garage"
                baselines_dir = trial_dir + "/baselines"

                with tf.Graph().as_default():
                    # Run baselines algorithms
                    baseline_env.reset()
                    baselines_csv = run_baselines(baseline_env, seed,
                                                  baselines_dir)

                    # Run garage algorithms
                    env.reset()
                    garage_csv = run_garage(env, seed, garage_dir)

                garage_csvs.append(garage_csv)
                baselines_csvs.append(baselines_csv)

            env.close()

            Rh.plot(b_csvs=baselines_csvs,
                    g_csvs=garage_csvs,
                    g_x="Iteration",
                    g_y="EpisodeRewardMean",
                    b_x="nupdates",
                    b_y="eprewmean",
                    trials=task['trials'],
                    seeds=seeds,
                    plt_file=plt_file,
                    env_id=env_id,
                    x_label="Iteration",
                    y_label="AverageReturn")

            result_json[env_id] = Rh.create_json(b_csvs=baselines_csvs,
                                                 g_csvs=garage_csvs,
                                                 seeds=seeds,
                                                 trails=task["trials"],
                                                 g_x="Iteration",
                                                 g_y="EpisodeRewardMean",
                                                 b_x="nupdates",
                                                 b_y="eprewmean",
                                                 factor_g=2048,
                                                 factor_b=2048)

        Rh.write_file(result_json, "PPO")