Ejemplo n.º 1
0
    def benchmark_trpo(self):  # pylint: disable=no-self-use
        """Compare benchmarks between garage and baselines."""
        mujoco1m = benchmarks.get_benchmark('Mujoco1M')

        timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
        benchmark_dir = './data/local/benchmarks/trpo/%s/' % timestamp
        result_json = {}
        for task in mujoco1m['tasks']:
            env_id = task['env_id']
            env = gym.make(env_id)
            seeds = random.sample(range(100), hyper_parameters['n_trials'])
            task_dir = osp.join(benchmark_dir, env_id)
            plt_file = osp.join(benchmark_dir,
                                '{}_benchmark.png'.format(env_id))
            garage_tf_csvs = []
            garage_pytorch_csvs = []

            for trial in range(hyper_parameters['n_trials']):
                _PLACEHOLDER_CACHE.clear()
                seed = seeds[trial]
                trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed)
                garage_tf_dir = trial_dir + '/garage_tf'
                garage_pytorch_dir = trial_dir + '/garage_pytorch'

                # Run garage algorithms
                env.reset()
                garage_pytorch_csv = run_garage_pytorch(
                    env, seed, garage_pytorch_dir)

                # pylint: disable=not-context-manager
                with tf.Graph().as_default():
                    env.reset()
                    garage_tf_csv = run_garage_tf(env, seed, garage_tf_dir)

                garage_tf_csvs.append(garage_tf_csv)
                garage_pytorch_csvs.append(garage_pytorch_csv)

            env.close()

            benchmark_helper.plot_average_over_trials(
                [garage_tf_csvs, garage_pytorch_csvs],
                ['Evaluation/AverageReturn'] * 2,
                plt_file=plt_file,
                env_id=env_id,
                x_label='Iteration',
                y_label='AverageReturn',
                names=['garage-TensorFlow', 'garage-PyTorch'],
            )

            result_json[env_id] = benchmark_helper.create_json(
                [garage_tf_csvs, garage_pytorch_csvs],
                seeds=seeds,
                trials=hyper_parameters['n_trials'],
                xs=['Evaluation/Iteration'] * 2,
                ys=['Evaluation/AverageReturn'] * 2,
                factors=[hyper_parameters['batch_size']] * 2,
                names=['garage-TF', 'garage-PT'])

        Rh.write_file(result_json, 'TRPO')
Ejemplo n.º 2
0
    def test_benchmark_pearl(self):
        '''
        Compare benchmarks between metarl and baselines.
        :return:
        '''
        env_sampler = SetTaskSampler(
            lambda: MetaRLEnv(normalize(ML1.get_train_tasks('reach-v1'))))
        env = env_sampler.sample(params['num_train_tasks'])
        test_env_sampler = SetTaskSampler(
            lambda: MetaRLEnv(normalize(ML1.get_test_tasks('reach-v1'))))
        test_env = test_env_sampler.sample(params['num_train_tasks'])
        env_id = 'reach-v1'
        timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
        benchmark_dir = osp.join(os.getcwd(), 'data', 'local', 'benchmarks',
                                 'pearl', timestamp)
        result_json = {}
        seeds = random.sample(range(100), params['n_trials'])
        task_dir = osp.join(benchmark_dir, env_id)
        plt_file = osp.join(benchmark_dir, '{}_benchmark.png'.format(env_id))
        metarl_csvs = []

        for trial in range(params['n_trials']):
            seed = seeds[trial]
            trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed)
            metarl_dir = trial_dir + '/metarl'

            metarl_csv = run_metarl(env, test_env, seed, metarl_dir)
            metarl_csvs.append(metarl_csv)

        env.close()

        benchmark_helper.plot_average_over_trials(
            [metarl_csvs],
            ys=['Test/Average/SuccessRate'],
            plt_file=plt_file,
            env_id=env_id,
            x_label='TotalEnvSteps',
            y_label='Test/Average/SuccessRate',
            names=['metarl_pearl'],
        )

        factor_val = params['meta_batch_size'] * params['max_path_length']
        result_json[env_id] = benchmark_helper.create_json(
            [metarl_csvs],
            seeds=seeds,
            trials=params['n_trials'],
            xs=['TotalEnvSteps'],
            ys=['Test/Average/SuccessRate'],
            factors=[factor_val],
            names=['metarl_pearl'])

        Rh.write_file(result_json, 'PEARL')
Ejemplo n.º 3
0
    def test_finish_release_merge_conflict_tag(self):
        """
        finish + tag with merge-conflicts on develop
        """
        version_filename = 'VERSION'
        new_version = '1.1\n'

        gitflow = GitFlow(self.repo).init()
        fmgr = FeatureBranchManager(gitflow)
        fmgr.finish('even')
        fake_commit(self.repo, 'Overwrite version',
                    filename=version_filename,
                    change=new_version)

        # verify that the tag does not yet exist
        # "v" comes form "versiontag" prefix in the gitflow config for the "release" fixture
        self.assertNotIn('v1.0', self.repo.tags)

        mgr = ReleaseBranchManager(gitflow)
        taginfo = dict(
            message='Tagging version 1.0',
        )
        self.assertRaises(MergeError,
                          mgr.finish, '1.0', tagging_info=taginfo)

        # verify that the tag exists, even though there was a failed merge
        self.assertIn('v1.0', self.repo.tags)

        # resolve the conflict
        # this is in favor of the change on develop
        write_file(filename=version_filename,
                   append=False,
                   change=new_version)
        gitflow.git.add(version_filename)
        gitflow.git.commit('-F.git/MERGE_MSG')
        # the release branch is still here
        self.assertIn('rel/1.0',
                      [b.name for b in self.repo.branches])
        # finish the release again
        # this should skip the tagging, since that part previously succeeded
        mgr.finish('1.0', tagging_info=taginfo)
        # now the release branch is gone
        self.assertNotIn('rel/1.0',
                         [b.name for b in self.repo.branches])

        # verify that the tag still exists
        self.assertIn('v1.0', self.repo.tags)
Ejemplo n.º 4
0
def files(s3, source_bucket):
    files = [
        ['folders/some_folder/file.txt', 'contents1'],
        ['folders/some_folder/file2.txt', 'contents2'],
    ]
    return [
        write_file(s3, source_bucket, entry[0], entry[1]) for entry in files
    ]
Ejemplo n.º 5
0
    def test_finish_release_merge_conflict_tag(self):
        """
        finish + tag with merge-conflicts on develop
        """
        version_filename = 'VERSION'
        new_version = '1.1\n'

        gitflow = GitFlow(self.repo).init()
        fmgr = FeatureBranchManager(gitflow)
        fmgr.finish('even')
        fake_commit(self.repo,
                    'Overwrite version',
                    filename=version_filename,
                    change=new_version)

        # verify that the tag does not yet exist
        # "v" comes form "versiontag" prefix in the gitflow config for the "release" fixture
        self.assertNotIn('v1.0', self.repo.tags)

        mgr = ReleaseBranchManager(gitflow)
        taginfo = dict(message='Tagging version 1.0', )
        self.assertRaises(MergeError, mgr.finish, '1.0', tagging_info=taginfo)

        # verify that the tag exists, even though there was a failed merge
        self.assertIn('v1.0', self.repo.tags)

        # resolve the conflict
        # this is in favor of the change on develop
        write_file(filename=version_filename, append=False, change=new_version)
        gitflow.git.add(version_filename)
        gitflow.git.commit('-F.git/MERGE_MSG')
        # the release branch is still here
        self.assertIn('rel/1.0', [b.name for b in self.repo.branches])
        # finish the release again
        # this should skip the tagging, since that part previously succeeded
        mgr.finish('1.0', tagging_info=taginfo)
        # now the release branch is gone
        self.assertNotIn('rel/1.0', [b.name for b in self.repo.branches])

        # verify that the tag still exists
        self.assertIn('v1.0', self.repo.tags)
Ejemplo n.º 6
0
    def test_benchmark_ddpg(self):
        '''
        Compare benchmarks between garage and baselines.

        :return:
        '''
        # Load Mujoco1M tasks, you can check other benchmarks here
        # https://github.com/openai/baselines/blob/master/baselines/bench/benchmarks.py
        mujoco1m = benchmarks.get_benchmark('Mujoco1M')

        timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
        benchmark_dir = osp.join(os.getcwd(), 'data', 'local', 'benchmarks',
                                 'ddpg', timestamp)
        result_json = {}
        for task in mujoco1m['tasks']:
            env_id = task['env_id']
            env = gym.make(env_id)
            baseline_env = AutoStopEnv(
                env_name=env_id, max_path_length=params['n_rollout_steps'])
            seeds = random.sample(range(100), task['trials'])

            task_dir = osp.join(benchmark_dir, env_id)
            plt_file = osp.join(benchmark_dir,
                                '{}_benchmark.png'.format(env_id))
            baselines_csvs = []
            garage_csvs = []

            for trial in range(task['trials']):
                env.reset()
                baseline_env.reset()
                seed = seeds[trial]

                trial_dir = osp.join(
                    task_dir, 'trial_{}_seed_{}'.format(trial + 1, seed))
                garage_dir = osp.join(trial_dir, 'garage')
                baselines_dir = osp.join(trial_dir, 'baselines')

                with tf.Graph().as_default():
                    # Run garage algorithms
                    garage_csv = run_garage(env, seed, garage_dir)

                    # Run baselines algorithms
                    baselines_csv = run_baselines(baseline_env, seed,
                                                  baselines_dir)

                garage_csvs.append(garage_csv)
                baselines_csvs.append(baselines_csv)

            env.close()

            Rh.plot(b_csvs=baselines_csvs,
                    g_csvs=garage_csvs,
                    g_x='Epoch',
                    g_y='AverageReturn',
                    b_x='total/epochs',
                    b_y='rollout/return',
                    trials=task['trials'],
                    seeds=seeds,
                    plt_file=plt_file,
                    env_id=env_id,
                    x_label='Epoch',
                    y_label='AverageReturn')

            result_json[env_id] = Rh.create_json(
                b_csvs=baselines_csvs,
                g_csvs=garage_csvs,
                seeds=seeds,
                trails=task['trials'],
                g_x='Epoch',
                g_y='AverageReturn',
                b_x='total/epochs',
                b_y='rollout/return',
                factor_g=params['n_epoch_cycles'] * params['n_rollout_steps'],
                factor_b=1)

        Rh.write_file(result_json, 'DDPG')
Ejemplo n.º 7
0
    def test_benchmark_ppo(self):
        '''
        Compare benchmarks between garage and baselines.

        :return:
        '''
        mujoco1m = benchmarks.get_benchmark('Mujoco1M')
        timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
        benchmark_dir = './data/local/benchmarks/ppo/%s/' % timestamp
        result_json = {}
        for task in mujoco1m['tasks']:
            env_id = task['env_id']

            env = gym.make(env_id)
            baseline_env = AutoStopEnv(env_name=env_id, max_path_length=100)

            seeds = random.sample(range(100), task['trials'])

            task_dir = osp.join(benchmark_dir, env_id)
            plt_file = osp.join(benchmark_dir,
                                '{}_benchmark.png'.format(env_id))
            baselines_csvs = []
            garage_csvs = []

            for trial in range(task['trials']):
                seed = seeds[trial]

                trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed)
                garage_dir = trial_dir + '/garage'
                baselines_dir = trial_dir + '/baselines'

                with tf.Graph().as_default():
                    # Run baselines algorithms
                    baseline_env.reset()
                    baselines_csv = run_baselines(baseline_env, seed,
                                                  baselines_dir)

                    # Run garage algorithms
                    env.reset()
                    garage_csv = run_garage(env, seed, garage_dir)

                garage_csvs.append(garage_csv)
                baselines_csvs.append(baselines_csv)

            env.close()

            Rh.plot(b_csvs=baselines_csvs,
                    g_csvs=garage_csvs,
                    g_x='Iteration',
                    g_y='AverageReturn',
                    b_x='nupdates',
                    b_y='eprewmean',
                    trials=task['trials'],
                    seeds=seeds,
                    plt_file=plt_file,
                    env_id=env_id,
                    x_label='Iteration',
                    y_label='AverageReturn')

            result_json[env_id] = Rh.create_json(b_csvs=baselines_csvs,
                                                 g_csvs=garage_csvs,
                                                 seeds=seeds,
                                                 trails=task['trials'],
                                                 g_x='Iteration',
                                                 g_y='AverageReturn',
                                                 b_x='nupdates',
                                                 b_y='eprewmean',
                                                 factor_g=2048,
                                                 factor_b=2048)

        Rh.write_file(result_json, 'PPO')
Ejemplo n.º 8
0
    def test_benchmark_vpg(self):
        """Compare benchmarks between metarl and baselines.

        Returns:

        """
        # pylint: disable=no-self-use
        mujoco1m = benchmarks.get_benchmark('Mujoco1M')
        timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
        benchmark_dir = './data/local/benchmarks/vpg/%s/' % timestamp
        result_json = {}
        for task in mujoco1m['tasks']:
            env_id = task['env_id']

            env = gym.make(env_id)

            seeds = random.sample(range(100), hyper_parameters['n_trials'])

            task_dir = osp.join(benchmark_dir, env_id)
            plt_file = osp.join(benchmark_dir,
                                '{}_benchmark.png'.format(env_id))

            metarl_tf_csvs = []
            metarl_pytorch_csvs = []

            for trial in range(hyper_parameters['n_trials']):
                seed = seeds[trial]

                trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed)
                metarl_tf_dir = trial_dir + '/metarl/tf'
                metarl_pytorch_dir = trial_dir + '/metarl/pytorch'

                # pylint: disable=not-context-manager
                with tf.Graph().as_default():
                    # Run metarl algorithms
                    env.reset()
                    metarl_tf_csv = run_metarl_tf(env, seed, metarl_tf_dir)

                env.reset()
                metarl_pytorch_csv = run_metarl_pytorch(
                    env, seed, metarl_pytorch_dir)

                metarl_tf_csvs.append(metarl_tf_csv)
                metarl_pytorch_csvs.append(metarl_pytorch_csv)

            env.close()

            benchmark_helper.plot_average_over_trials(
                [metarl_tf_csvs, metarl_pytorch_csvs],
                ['Evaluation/AverageReturn'] * 2,
                plt_file=plt_file,
                env_id=env_id,
                x_label='Iteration',
                y_label='Evaluation/AverageReturn',
                names=['metarl-TensorFlow', 'metarl-PyTorch'])

            result_json[env_id] = benchmark_helper.create_json(
                [metarl_tf_csvs, metarl_pytorch_csvs],
                seeds=seeds,
                trials=hyper_parameters['n_trials'],
                xs=['Iteration'] * 2,
                ys=['Evaluation/AverageReturn'] * 2,
                factors=[hyper_parameters['batch_size']] * 2,
                names=['metarl-tf', 'metarl-pytorch'])

        Rh.write_file(result_json, 'VPG')
Ejemplo n.º 9
0
    def test_benchmark_td3(self):
        """
        Test garage TD3 benchmarks.

        :return:
        """
        # Load Mujoco1M tasks, you can check other benchmarks here
        # https://github.com/openai/baselines/blob/master/baselines/bench/benchmarks.py # noqa: E501
        mujoco1m = benchmarks.get_benchmark('Mujoco1M')

        timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
        benchmark_dir = osp.join(os.getcwd(), 'data', 'local', 'benchmarks',
                                 'td3', timestamp)
        result_json = {}

        # rlkit throws error for'Reacher-V2' due to gym version mismatch
        mujoco1m['tasks'] = [
            task for task in mujoco1m['tasks']
            if task['env_id'] != 'Reacher-v2'
        ]

        for task in mujoco1m['tasks']:
            env_id = task['env_id']
            env = gym.make(env_id)
            rlkit_env = AutoStopEnv(env_name=env_id,
                                    max_path_length=params['n_rollout_steps'])
            seeds = random.sample(range(100), task['trials'])

            task_dir = osp.join(benchmark_dir, env_id)
            plt_file = osp.join(benchmark_dir,
                                '{}_benchmark.png'.format(env_id))
            garage_csvs = []
            rlkit_csvs = []

            for trial in range(task['trials']):
                env.reset()
                rlkit_env.reset()
                seed = seeds[trial]

                trial_dir = osp.join(
                    task_dir, 'trial_{}_seed_{}'.format(trial + 1, seed))
                garage_dir = trial_dir + '/garage'
                rlkit_dir = osp.join(trial_dir, 'rlkit')

                with tf.Graph().as_default():
                    # Run rlkit algorithms
                    rlkit_csv = run_rlkit(rlkit_env, seed, rlkit_dir)

                    # Run garage algorithms
                    garage_csv = run_garage(env, seed, garage_dir)

                garage_csvs.append(garage_csv)
                rlkit_csvs.append(rlkit_csv)

            Rh.plot(b_csvs=rlkit_csvs,
                    g_csvs=garage_csvs,
                    g_x='Epoch',
                    g_y='Evaluation/AverageReturn',
                    g_z='garage',
                    b_x='Epoch',
                    b_y='evaluation/Average Returns',
                    b_z='rlkit',
                    trials=task['trials'],
                    seeds=seeds,
                    plt_file=plt_file,
                    env_id=env_id,
                    x_label='Iteration',
                    y_label='Evaluation/AverageReturn')

            result_json[env_id] = Rh.create_json(
                b_csvs=rlkit_csvs,
                g_csvs=garage_csvs,
                seeds=seeds,
                trails=task['trials'],
                g_x='Epoch',
                g_y='Evaluation/AverageReturn',
                b_x='Epoch',
                b_y='evaluation/Average Returns',
                factor_g=1,
                factor_b=1)

        Rh.write_file(result_json, 'TD3')
Ejemplo n.º 10
0
    def test_benchmark_ppo(self):
        """Compare benchmarks between garage and baselines.

        Returns:

        """
        # pylint: disable=no-self-use
        mujoco1m = benchmarks.get_benchmark('Mujoco1M')
        timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
        benchmark_dir = './data/local/benchmarks/ppo/%s/' % timestamp
        result_json = {}
        for task in mujoco1m['tasks']:
            env_id = task['env_id']

            env = gym.make(env_id)
            baseline_env = AutoStopEnv(
                env_name=env_id,
                max_path_length=hyper_parameters['max_path_length'])

            seeds = random.sample(range(100), hyper_parameters['n_trials'])

            task_dir = osp.join(benchmark_dir, env_id)
            plt_file = osp.join(benchmark_dir,
                                '{}_benchmark.png'.format(env_id))

            baselines_csvs = []
            garage_tf_csvs = []
            garage_pytorch_csvs = []

            for trial in range(hyper_parameters['n_trials']):
                seed = seeds[trial]

                trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed)
                garage_tf_dir = trial_dir + '/garage/tf'
                garage_pytorch_dir = trial_dir + '/garage/pytorch'
                baselines_dir = trial_dir + '/baselines'

                # pylint: disable=not-context-manager
                with tf.Graph().as_default():
                    # Run baselines algorithms
                    baseline_env.reset()
                    baseline_csv = run_baselines(baseline_env, seed,
                                                 baselines_dir)

                    # Run garage algorithms
                    env.reset()
                    garage_tf_csv = run_garage_tf(env, seed, garage_tf_dir)

                env.reset()
                garage_pytorch_csv = run_garage_pytorch(
                    env, seed, garage_pytorch_dir)

                baselines_csvs.append(baseline_csv)
                garage_tf_csvs.append(garage_tf_csv)
                garage_pytorch_csvs.append(garage_pytorch_csv)

            env.close()

            benchmark_helper.plot_average_over_trials(
                [baselines_csvs, garage_tf_csvs, garage_pytorch_csvs],
                [
                    'eprewmean', 'Evaluation/AverageReturn',
                    'Evaluation/AverageReturn'
                ],
                plt_file=plt_file,
                env_id=env_id,
                x_label='Iteration',
                y_label='Evaluation/AverageReturn',
                names=['baseline', 'garage-TensorFlow', 'garage-PyTorch'],
            )

            result_json[env_id] = benchmark_helper.create_json(
                [baselines_csvs, garage_tf_csvs, garage_pytorch_csvs],
                seeds=seeds,
                trials=hyper_parameters['n_trials'],
                xs=['nupdates', 'Iteration', 'Iteration'],
                ys=[
                    'eprewmean', 'Evaluation/AverageReturn',
                    'Evaluation/AverageReturn'
                ],
                factors=[hyper_parameters['batch_size']] * 3,
                names=['baseline', 'garage-TF', 'garage-PT'])

        Rh.write_file(result_json, 'PPO')
Ejemplo n.º 11
0
def test_benchmark_categorical_gru_policy():
    """Benchmark categorical gru policy."""
    categorical_tasks = [
        'LunarLander-v2',
        'Assault-ramDeterministic-v4',
        'Breakout-ramDeterministic-v4',
        'ChopperCommand-ramDeterministic-v4',
        'Tutankham-ramDeterministic-v4',
        'CartPole-v1',
    ]
    timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
    benchmark_dir = './data/local/benchmarks/ppo_categ_gru/%s/' % timestamp
    result_json = {}
    for task in categorical_tasks:
        env_id = task
        env = gym.make(env_id)
        # baseline_env = AutoStopEnv(env_name=env_id, max_path_length=100)

        seeds = random.sample(range(100), 3)

        task_dir = osp.join(benchmark_dir, env_id)
        plt_file = osp.join(benchmark_dir, '{}_benchmark.png'.format(env_id))
        baselines_csvs = []
        garage_csvs = []

        for trial in range(3):
            seed = seeds[trial]

            trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed)
            garage_dir = trial_dir + '/garage'

            with tf.Graph().as_default():
                # Run baselines algorithms
                # baseline_env.reset()
                # baselines_csv = run_baselines(baseline_env, seed,
                #                               baselines_dir)

                # Run garage algorithms
                env.reset()
                garage_csv = run_garage(env, seed, garage_dir)

            garage_csvs.append(garage_csv)

        env.close()

        Rh.plot(b_csvs=baselines_csvs,
                g_csvs=garage_csvs,
                g_x='Iteration',
                g_y='AverageReturn',
                g_z='garage',
                b_x='Iteration',
                b_y='AverageReturn',
                b_z='baselines',
                trials=3,
                seeds=seeds,
                plt_file=plt_file,
                env_id=env_id,
                x_label='Iteration',
                y_label='AverageReturn')

        result_json[env_id] = Rh.create_json(b_csvs=baselines_csvs,
                                             g_csvs=garage_csvs,
                                             seeds=seeds,
                                             trails=3,
                                             g_x='Iteration',
                                             g_y='AverageReturn',
                                             b_x='Iteration',
                                             b_y='AverageReturn',
                                             factor_g=2048,
                                             factor_b=2048)

    Rh.write_file(result_json, 'PPO')
Ejemplo n.º 12
0
    def test_benchmark_pearl(self):
        """Run benchmarks for metarl PEARL."""

        ML_train_envs = [
            TaskIdWrapper(MetaRLEnv(
                IgnoreDoneWrapper(
                    normalize(
                        env(*ML10_ARGS['train'][task]['args'],
                            **ML10_ARGS['train'][task]['kwargs'])))),
                          task_id=task_id,
                          task_name=task)
            for (task_id, (task, env)) in enumerate(ML10_ENVS['train'].items())
        ]
        ML_test_envs = [
            TaskIdWrapper(MetaRLEnv(
                IgnoreDoneWrapper(
                    normalize(
                        env(*ML10_ARGS['test'][task]['args'],
                            **ML10_ARGS['test'][task]['kwargs'])))),
                          task_id=task_id,
                          task_name=task)
            for (task_id, (task, env)) in enumerate(ML10_ENVS['test'].items())
        ]

        env_sampler = EnvPoolSampler(ML_train_envs)
        env = env_sampler.sample(params['num_train_tasks'])
        test_env_sampler = EnvPoolSampler(ML_test_envs)
        test_env = test_env_sampler.sample(params['num_test_tasks'])

        env_id = 'ML10'
        timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
        benchmark_dir = osp.join(os.getcwd(), 'data', 'local', 'benchmarks',
                                 'pearl', timestamp)
        result_json = {}
        seeds = random.sample(range(100), params['n_trials'])
        task_dir = osp.join(benchmark_dir, env_id)
        plt_file = osp.join(benchmark_dir, '{}_benchmark.png'.format(env_id))
        metarl_csvs = []

        for trial in range(params['n_trials']):
            seed = seeds[trial]
            trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed)
            metarl_dir = trial_dir + '/metarl'

            metarl_csv = run_metarl(env, test_env, seed, metarl_dir)
            metarl_csvs.append(metarl_csv)

        env.close()

        benchmark_helper.plot_average_over_trials(
            [metarl_csvs],
            ys=['Test/Average/SuccessRate'],
            plt_file=plt_file,
            env_id=env_id,
            x_label='TotalEnvSteps',
            y_label='Test/Average/SuccessRate',
            names=['metarl_pearl'],
        )

        factor_val = params['meta_batch_size'] * params['max_path_length']
        result_json[env_id] = benchmark_helper.create_json(
            [metarl_csvs],
            seeds=seeds,
            trials=params['n_trials'],
            xs=['TotalEnvSteps'],
            ys=['Test/Average/SuccessRate'],
            factors=[factor_val],
            names=['metarl_pearl'])

        Rh.write_file(result_json, 'PEARL')
Ejemplo n.º 13
0
    def test_benchmark_ppo(self):
        """Compare benchmarks between metarl and baselines.

        Returns:

        """
        # pylint: disable=no-self-use
        mujoco1m = benchmarks.get_benchmark('Mujoco1M')
        timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
        benchmark_dir = './data/local/benchmarks/ppo/%s/' % timestamp
        result_json = {}
        for task in mujoco1m['tasks']:
            env_id = task['env_id']

            env = gym.make(env_id)
            baseline_env = AutoStopEnv(
                env_name=env_id,
                max_path_length=hyper_parameters['max_path_length'])

            seeds = random.sample(range(100), hyper_parameters['n_trials'])

            task_dir = osp.join(benchmark_dir, env_id)
            plt_file = osp.join(benchmark_dir,
                                '{}_benchmark.png'.format(env_id))

            baselines_csvs = []
            metarl_tf_csvs = []
            metarl_pytorch_csvs = []

            for trial in range(hyper_parameters['n_trials']):
                seed = seeds[trial]

                trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed)
                metarl_tf_dir = trial_dir + '/metarl/tf'
                metarl_pytorch_dir = trial_dir + '/metarl/pytorch'
                baselines_dir = trial_dir + '/baselines'

                # pylint: disable=not-context-manager
                with tf.Graph().as_default():
                    # Run baselines algorithms
                    baseline_env.reset()
                    baseline_csv = run_baselines(baseline_env, seed,
                                                 baselines_dir)

                    # Run metarl algorithms
                    env.reset()
                    metarl_tf_csv = run_metarl_tf(env, seed, metarl_tf_dir)

                # env.reset()
                # metarl_pytorch_csv = run_metarl_pytorch(
                #     env, seed, metarl_pytorch_dir)

                baselines_csvs.append(baseline_csv)
                metarl_tf_csvs.append(metarl_tf_csv)
                # metarl_pytorch_csvs.append(metarl_pytorch_csv)

            env.close()

            # benchmark_helper.plot_average_over_trials(
            #     [baselines_csvs, metarl_tf_csvs, metarl_pytorch_csvs],
            #     [
            #         'eprewmean', 'Evaluation/AverageReturn',
            #         'Evaluation/AverageReturn'
            #     ],
            #     plt_file=plt_file,
            #     env_id=env_id,
            #     x_label='Iteration',
            #     y_label='Evaluation/AverageReturn',
            #     names=['baseline', 'metarl-TensorFlow', 'metarl-PyTorch'],
            # )

            # result_json[env_id] = benchmark_helper.create_json(
            #     [baselines_csvs, metarl_tf_csvs],
            #     seeds=seeds,
            #     trials=hyper_parameters['n_trials'],
            #     xs=['total_timesteps', 'TotalEnvSteps'],
            #     ys=[
            #         'eprewmean', 'Evaluation/AverageReturn'
            #     ],
            #     factors=[hyper_parameters['batch_size']] * 2,
            #     names=['baseline', 'metarl-TF'])

            result_json[env_id] = benchmark_helper.create_json(
                [baselines_csvs, metarl_tf_csvs],
                seeds=seeds,
                trials=hyper_parameters['n_trials'],
                xs=['total_timesteps', 'TotalEnvSteps'],
                ys=['eprewmean', 'Evaluation/AverageReturn'],
                factors=[hyper_parameters['batch_size']] * 2,
                names=['baseline', 'metarl-TF'])

            # Rh.relplot(g_csvs=metarl_tf_csvs,
            #            b_csvs=baselines_csvs,
            #            g_x='TotalEnvSteps',
            #            g_y='Evaluation/AverageReturn',
            #            g_z='MetaRL',
            #            b_x='total_timesteps',
            #            b_y='eprewmean',
            #            b_z='Openai/Baseline',
            #            trials=hyper_parameters['n_trials'],
            #            seeds=seeds,
            #            plt_file=plt_file,
            #            env_id=env_id,
            #            x_label='EnvTimeStep',
            #            y_label='Performance')

            benchmark_helper.plot_average_over_trials_with_x(
                [baselines_csvs, metarl_tf_csvs],
                ['eprewmean', 'Evaluation/AverageReturn'],
                ['total_timesteps', 'TotalEnvSteps'],
                plt_file=plt_file,
                env_id=env_id,
                x_label='EnvTimeStep',
                y_label='Performance',
                names=['baseline', 'metarl-TensorFlow'],
            )

        # Rh.relplot(g_csvs=metarl_tf_csvs,
        #            b_csvs=metarl_pytorch_csvs,
        #            g_x='TotalEnvSteps',
        #            g_y='Evaluation/AverageReturn',
        #            g_z='MetaRL-TF',
        #            b_x='TotalEnvSteps',
        #            b_y='Evaluation/AverageReturn',
        #            b_z='MetaRL-PT',
        #            trials=hyper_parameters['n_trials'],
        #            seeds=seeds,
        #            plt_file=plt_file,
        #            env_id=env_id,
        #            x_label='EnvTimeStep',
        #            y_label='Performance')

        Rh.write_file(result_json, 'PPO')
    def test_benchmark_categorical_mlp_policy(self):
        '''
        Compare benchmarks between garage and baselines.
        :return:
        '''
        categorical_tasks = [
            'LunarLander-v2', 'CartPole-v1', 'Assault-ramDeterministic-v4',
            'Breakout-ramDeterministic-v4',
            'ChopperCommand-ramDeterministic-v4',
            'Tutankham-ramDeterministic-v4'
        ]
        timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
        benchmark_dir = './data/local/benchmarks/categorical_mlp_policy/{0}/'
        benchmark_dir = benchmark_dir.format(timestamp)
        result_json = {}
        for task in categorical_tasks:
            env_id = task
            env = gym.make(env_id)
            trials = 3
            seeds = random.sample(range(100), trials)

            task_dir = osp.join(benchmark_dir, env_id)
            plt_file = osp.join(benchmark_dir,
                                '{}_benchmark.png'.format(env_id))
            relplt_file = osp.join(benchmark_dir,
                                   '{}_benchmark_mean.png'.format(env_id))
            garage_csvs = []

            for trial in range(trials):
                seed = seeds[trial]

                trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed)
                garage_dir = trial_dir + '/garage'

                with tf.Graph().as_default():
                    # Run garage algorithms
                    env.reset()
                    garage_csv = run_garage(env, seed, garage_dir)
                garage_csvs.append(garage_csv)

            env.close()

            Rh.plot(b_csvs=garage_csvs,
                    g_csvs=garage_csvs,
                    g_x='Iteration',
                    g_y='AverageReturn',
                    g_z='Garage',
                    b_x='Iteration',
                    b_y='AverageReturn',
                    b_z='Garage',
                    trials=trials,
                    seeds=seeds,
                    plt_file=plt_file,
                    env_id=env_id,
                    x_label='Iteration',
                    y_label='AverageReturn')

            Rh.relplot(b_csvs=garage_csvs,
                       g_csvs=garage_csvs,
                       g_x='Iteration',
                       g_y='AverageReturn',
                       g_z='Garage',
                       b_x='Iteration',
                       b_y='AverageReturn',
                       b_z='Garage',
                       trials=trials,
                       seeds=seeds,
                       plt_file=relplt_file,
                       env_id=env_id,
                       x_label='Iteration',
                       y_label='AverageReturn')

            result_json[env_id] = Rh.create_json(b_csvs=garage_csvs,
                                                 g_csvs=garage_csvs,
                                                 seeds=seeds,
                                                 trails=trials,
                                                 g_x='Iteration',
                                                 g_y='AverageReturn',
                                                 b_x='Iteration',
                                                 b_y='AverageReturn',
                                                 factor_g=2048,
                                                 factor_b=2048)

        Rh.write_file(result_json, 'PPO')
Ejemplo n.º 15
0
def test_benchmark_vpg():
    """Compare benchmarks between garage and baselines."""
    mujoco1m = benchmarks.get_benchmark('Mujoco1M')
    timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
    benchmark_dir = './data/local/benchmarks/vpg/%s/' % timestamp
    result_json = {}
    for task in mujoco1m['tasks']:
        env_id = task['env_id']

        env = gym.make(env_id)

        seeds = random.sample(range(100), task['trials'])

        task_dir = osp.join(benchmark_dir, env_id)
        plt_file = osp.join(benchmark_dir, '{}_benchmark.png'.format(env_id))

        garage_tf_csvs = []
        garage_pytorch_csvs = []

        for trial in range(task['trials']):
            seed = seeds[trial]

            trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed)
            garage_tf_dir = trial_dir + '/garage/tf'
            garage_pytorch_dir = trial_dir + '/garage/pytorch'

            env.reset()
            garage_pytorch_csv = run_garage_pytorch(env, seed,
                                                    garage_pytorch_dir)

            with tf.Graph().as_default():
                # Run garage algorithms
                env.reset()
                garage_tf_csv = run_garage_tf(env, seed, garage_tf_dir)

            garage_tf_csvs.append(garage_tf_csv)
            garage_pytorch_csvs.append(garage_pytorch_csv)

        env.close()

        plot([garage_tf_csvs, garage_pytorch_csvs], ['Iteration', 'Iteration'],
             ['AverageReturn', 'AverageReturn'],
             trials=task['trials'],
             seeds=seeds,
             plt_file=plt_file,
             env_id=env_id,
             x_label='Iteration',
             y_label='AverageReturn',
             names=['garage-tf', 'garage-pytorch'],
             smooth=True)

        result_json[env_id] = create_json(
            [garage_tf_csvs, garage_pytorch_csvs],
            seeds=seeds,
            trails=task['trials'],
            xs=['Iteration', 'Iteration'],
            ys=['AverageReturn', 'AverageReturn'],
            factors=[2048, 2047],
            names=['garage-tf', 'garage-pytorch'])

    Rh.write_file(result_json, 'VPG')
Ejemplo n.º 16
0
    def test_benchmark_trpo(self):
        """
        Compare benchmarks between garage and baselines.

        :return:
        """
        mujoco1m = benchmarks.get_benchmark("Mujoco1M")

        timestamp = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S-%f")
        benchmark_dir = "./data/local/benchmarks/trpo/%s/" % timestamp
        result_json = {}
        for task in mujoco1m["tasks"]:
            env_id = task["env_id"]
            env = gym.make(env_id)
            baseline_env = AutoStopEnv(env_name=env_id, max_path_length=100)

            seeds = random.sample(range(100), task["trials"])

            task_dir = osp.join(benchmark_dir, env_id)
            plt_file = osp.join(benchmark_dir,
                                "{}_benchmark.png".format(env_id))
            baselines_csvs = []
            garage_csvs = []

            for trial in range(task["trials"]):
                _PLACEHOLDER_CACHE.clear()
                seed = seeds[trial]

                trial_dir = task_dir + "/trial_%d_seed_%d" % (trial + 1, seed)
                garage_dir = trial_dir + "/garage"
                baselines_dir = trial_dir + "/baselines"

                with tf.Graph().as_default():
                    # Run garage algorithms
                    env.reset()
                    garage_csv = run_garage(env, seed, garage_dir)

                    # Run baseline algorithms
                    baseline_env.reset()
                    baselines_csv = run_baselines(baseline_env, seed,
                                                  baselines_dir)

                garage_csvs.append(garage_csv)
                baselines_csvs.append(baselines_csv)

            Rh.plot(
                b_csvs=baselines_csvs,
                g_csvs=garage_csvs,
                g_x="Iteration",
                g_y="AverageReturn",
                b_x="EpThisIter",
                b_y="EpRewMean",
                trials=task["trials"],
                seeds=seeds,
                plt_file=plt_file,
                env_id=env_id,
                x_label="Iteration",
                y_label="AverageReturn")

            result_json[env_id] = Rh.create_json(
                b_csvs=baselines_csvs,
                g_csvs=garage_csvs,
                seeds=seeds,
                trails=task["trials"],
                g_x="Iteration",
                g_y="AverageReturn",
                b_x="TimestepsSoFar",
                b_y="EpRewMean",
                factor_g=1024,
                factor_b=1)
            env.close()

        Rh.write_file(result_json, "TRPO")