Esempio n. 1
0
    def benchmark_trpo(self):  # pylint: disable=no-self-use
        """Compare benchmarks between garage and baselines."""
        mujoco1m = benchmarks.get_benchmark('Mujoco1M')

        timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
        benchmark_dir = './data/local/benchmarks/trpo/%s/' % timestamp
        result_json = {}
        for task in mujoco1m['tasks']:
            env_id = task['env_id']
            env = gym.make(env_id)
            seeds = random.sample(range(100), hyper_parameters['n_trials'])
            task_dir = osp.join(benchmark_dir, env_id)
            plt_file = osp.join(benchmark_dir,
                                '{}_benchmark.png'.format(env_id))
            garage_tf_csvs = []
            garage_pytorch_csvs = []

            for trial in range(hyper_parameters['n_trials']):
                _PLACEHOLDER_CACHE.clear()
                seed = seeds[trial]
                trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed)
                garage_tf_dir = trial_dir + '/garage_tf'
                garage_pytorch_dir = trial_dir + '/garage_pytorch'

                # Run garage algorithms
                env.reset()
                garage_pytorch_csv = run_garage_pytorch(
                    env, seed, garage_pytorch_dir)

                # pylint: disable=not-context-manager
                with tf.Graph().as_default():
                    env.reset()
                    garage_tf_csv = run_garage_tf(env, seed, garage_tf_dir)

                garage_tf_csvs.append(garage_tf_csv)
                garage_pytorch_csvs.append(garage_pytorch_csv)

            env.close()

            benchmark_helper.plot_average_over_trials(
                [garage_tf_csvs, garage_pytorch_csvs],
                ['Evaluation/AverageReturn'] * 2,
                plt_file=plt_file,
                env_id=env_id,
                x_label='Iteration',
                y_label='AverageReturn',
                names=['garage-TensorFlow', 'garage-PyTorch'],
            )

            result_json[env_id] = benchmark_helper.create_json(
                [garage_tf_csvs, garage_pytorch_csvs],
                seeds=seeds,
                trials=hyper_parameters['n_trials'],
                xs=['Evaluation/Iteration'] * 2,
                ys=['Evaluation/AverageReturn'] * 2,
                factors=[hyper_parameters['batch_size']] * 2,
                names=['garage-TF', 'garage-PT'])

        Rh.write_file(result_json, 'TRPO')
Esempio n. 2
0
    def test_benchmark_pearl(self):
        '''
        Compare benchmarks between metarl and baselines.
        :return:
        '''
        env_sampler = SetTaskSampler(
            lambda: MetaRLEnv(normalize(ML1.get_train_tasks('reach-v1'))))
        env = env_sampler.sample(params['num_train_tasks'])
        test_env_sampler = SetTaskSampler(
            lambda: MetaRLEnv(normalize(ML1.get_test_tasks('reach-v1'))))
        test_env = test_env_sampler.sample(params['num_train_tasks'])
        env_id = 'reach-v1'
        timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
        benchmark_dir = osp.join(os.getcwd(), 'data', 'local', 'benchmarks',
                                 'pearl', timestamp)
        result_json = {}
        seeds = random.sample(range(100), params['n_trials'])
        task_dir = osp.join(benchmark_dir, env_id)
        plt_file = osp.join(benchmark_dir, '{}_benchmark.png'.format(env_id))
        metarl_csvs = []

        for trial in range(params['n_trials']):
            seed = seeds[trial]
            trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed)
            metarl_dir = trial_dir + '/metarl'

            metarl_csv = run_metarl(env, test_env, seed, metarl_dir)
            metarl_csvs.append(metarl_csv)

        env.close()

        benchmark_helper.plot_average_over_trials(
            [metarl_csvs],
            ys=['Test/Average/SuccessRate'],
            plt_file=plt_file,
            env_id=env_id,
            x_label='TotalEnvSteps',
            y_label='Test/Average/SuccessRate',
            names=['metarl_pearl'],
        )

        factor_val = params['meta_batch_size'] * params['max_path_length']
        result_json[env_id] = benchmark_helper.create_json(
            [metarl_csvs],
            seeds=seeds,
            trials=params['n_trials'],
            xs=['TotalEnvSteps'],
            ys=['Test/Average/SuccessRate'],
            factors=[factor_val],
            names=['metarl_pearl'])

        Rh.write_file(result_json, 'PEARL')
Esempio n. 3
0
    def test_benchmark_vpg(self):
        """Compare benchmarks between metarl and baselines.

        Returns:

        """
        # pylint: disable=no-self-use
        mujoco1m = benchmarks.get_benchmark('Mujoco1M')
        timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
        benchmark_dir = './data/local/benchmarks/vpg/%s/' % timestamp
        result_json = {}
        for task in mujoco1m['tasks']:
            env_id = task['env_id']

            env = gym.make(env_id)

            seeds = random.sample(range(100), hyper_parameters['n_trials'])

            task_dir = osp.join(benchmark_dir, env_id)
            plt_file = osp.join(benchmark_dir,
                                '{}_benchmark.png'.format(env_id))

            metarl_tf_csvs = []
            metarl_pytorch_csvs = []

            for trial in range(hyper_parameters['n_trials']):
                seed = seeds[trial]

                trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed)
                metarl_tf_dir = trial_dir + '/metarl/tf'
                metarl_pytorch_dir = trial_dir + '/metarl/pytorch'

                # pylint: disable=not-context-manager
                with tf.Graph().as_default():
                    # Run metarl algorithms
                    env.reset()
                    metarl_tf_csv = run_metarl_tf(env, seed, metarl_tf_dir)

                env.reset()
                metarl_pytorch_csv = run_metarl_pytorch(
                    env, seed, metarl_pytorch_dir)

                metarl_tf_csvs.append(metarl_tf_csv)
                metarl_pytorch_csvs.append(metarl_pytorch_csv)

            env.close()

            benchmark_helper.plot_average_over_trials(
                [metarl_tf_csvs, metarl_pytorch_csvs],
                ['Evaluation/AverageReturn'] * 2,
                plt_file=plt_file,
                env_id=env_id,
                x_label='Iteration',
                y_label='Evaluation/AverageReturn',
                names=['metarl-TensorFlow', 'metarl-PyTorch'])

            result_json[env_id] = benchmark_helper.create_json(
                [metarl_tf_csvs, metarl_pytorch_csvs],
                seeds=seeds,
                trials=hyper_parameters['n_trials'],
                xs=['Iteration'] * 2,
                ys=['Evaluation/AverageReturn'] * 2,
                factors=[hyper_parameters['batch_size']] * 2,
                names=['metarl-tf', 'metarl-pytorch'])

        Rh.write_file(result_json, 'VPG')
Esempio n. 4
0
    def test_benchmark_ppo(self):
        """Compare benchmarks between garage and baselines.

        Returns:

        """
        # pylint: disable=no-self-use
        mujoco1m = benchmarks.get_benchmark('Mujoco1M')
        timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
        benchmark_dir = './data/local/benchmarks/ppo/%s/' % timestamp
        result_json = {}
        for task in mujoco1m['tasks']:
            env_id = task['env_id']

            env = gym.make(env_id)
            baseline_env = AutoStopEnv(
                env_name=env_id,
                max_path_length=hyper_parameters['max_path_length'])

            seeds = random.sample(range(100), hyper_parameters['n_trials'])

            task_dir = osp.join(benchmark_dir, env_id)
            plt_file = osp.join(benchmark_dir,
                                '{}_benchmark.png'.format(env_id))

            baselines_csvs = []
            garage_tf_csvs = []
            garage_pytorch_csvs = []

            for trial in range(hyper_parameters['n_trials']):
                seed = seeds[trial]

                trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed)
                garage_tf_dir = trial_dir + '/garage/tf'
                garage_pytorch_dir = trial_dir + '/garage/pytorch'
                baselines_dir = trial_dir + '/baselines'

                # pylint: disable=not-context-manager
                with tf.Graph().as_default():
                    # Run baselines algorithms
                    baseline_env.reset()
                    baseline_csv = run_baselines(baseline_env, seed,
                                                 baselines_dir)

                    # Run garage algorithms
                    env.reset()
                    garage_tf_csv = run_garage_tf(env, seed, garage_tf_dir)

                env.reset()
                garage_pytorch_csv = run_garage_pytorch(
                    env, seed, garage_pytorch_dir)

                baselines_csvs.append(baseline_csv)
                garage_tf_csvs.append(garage_tf_csv)
                garage_pytorch_csvs.append(garage_pytorch_csv)

            env.close()

            benchmark_helper.plot_average_over_trials(
                [baselines_csvs, garage_tf_csvs, garage_pytorch_csvs],
                [
                    'eprewmean', 'Evaluation/AverageReturn',
                    'Evaluation/AverageReturn'
                ],
                plt_file=plt_file,
                env_id=env_id,
                x_label='Iteration',
                y_label='Evaluation/AverageReturn',
                names=['baseline', 'garage-TensorFlow', 'garage-PyTorch'],
            )

            result_json[env_id] = benchmark_helper.create_json(
                [baselines_csvs, garage_tf_csvs, garage_pytorch_csvs],
                seeds=seeds,
                trials=hyper_parameters['n_trials'],
                xs=['nupdates', 'Iteration', 'Iteration'],
                ys=[
                    'eprewmean', 'Evaluation/AverageReturn',
                    'Evaluation/AverageReturn'
                ],
                factors=[hyper_parameters['batch_size']] * 3,
                names=['baseline', 'garage-TF', 'garage-PT'])

        Rh.write_file(result_json, 'PPO')
Esempio n. 5
0
    def test_benchmark_pearl(self):
        """Run benchmarks for metarl PEARL."""

        ML_train_envs = [
            TaskIdWrapper(MetaRLEnv(
                IgnoreDoneWrapper(
                    normalize(
                        env(*ML10_ARGS['train'][task]['args'],
                            **ML10_ARGS['train'][task]['kwargs'])))),
                          task_id=task_id,
                          task_name=task)
            for (task_id, (task, env)) in enumerate(ML10_ENVS['train'].items())
        ]
        ML_test_envs = [
            TaskIdWrapper(MetaRLEnv(
                IgnoreDoneWrapper(
                    normalize(
                        env(*ML10_ARGS['test'][task]['args'],
                            **ML10_ARGS['test'][task]['kwargs'])))),
                          task_id=task_id,
                          task_name=task)
            for (task_id, (task, env)) in enumerate(ML10_ENVS['test'].items())
        ]

        env_sampler = EnvPoolSampler(ML_train_envs)
        env = env_sampler.sample(params['num_train_tasks'])
        test_env_sampler = EnvPoolSampler(ML_test_envs)
        test_env = test_env_sampler.sample(params['num_test_tasks'])

        env_id = 'ML10'
        timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
        benchmark_dir = osp.join(os.getcwd(), 'data', 'local', 'benchmarks',
                                 'pearl', timestamp)
        result_json = {}
        seeds = random.sample(range(100), params['n_trials'])
        task_dir = osp.join(benchmark_dir, env_id)
        plt_file = osp.join(benchmark_dir, '{}_benchmark.png'.format(env_id))
        metarl_csvs = []

        for trial in range(params['n_trials']):
            seed = seeds[trial]
            trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed)
            metarl_dir = trial_dir + '/metarl'

            metarl_csv = run_metarl(env, test_env, seed, metarl_dir)
            metarl_csvs.append(metarl_csv)

        env.close()

        benchmark_helper.plot_average_over_trials(
            [metarl_csvs],
            ys=['Test/Average/SuccessRate'],
            plt_file=plt_file,
            env_id=env_id,
            x_label='TotalEnvSteps',
            y_label='Test/Average/SuccessRate',
            names=['metarl_pearl'],
        )

        factor_val = params['meta_batch_size'] * params['max_path_length']
        result_json[env_id] = benchmark_helper.create_json(
            [metarl_csvs],
            seeds=seeds,
            trials=params['n_trials'],
            xs=['TotalEnvSteps'],
            ys=['Test/Average/SuccessRate'],
            factors=[factor_val],
            names=['metarl_pearl'])

        Rh.write_file(result_json, 'PEARL')
Esempio n. 6
0
    def test_benchmark_ppo(self):
        """Compare benchmarks between metarl and baselines.

        Returns:

        """
        # pylint: disable=no-self-use
        mujoco1m = benchmarks.get_benchmark('Mujoco1M')
        timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
        benchmark_dir = './data/local/benchmarks/ppo/%s/' % timestamp
        result_json = {}
        for task in mujoco1m['tasks']:
            env_id = task['env_id']

            env = gym.make(env_id)
            baseline_env = AutoStopEnv(
                env_name=env_id,
                max_path_length=hyper_parameters['max_path_length'])

            seeds = random.sample(range(100), hyper_parameters['n_trials'])

            task_dir = osp.join(benchmark_dir, env_id)
            plt_file = osp.join(benchmark_dir,
                                '{}_benchmark.png'.format(env_id))

            baselines_csvs = []
            metarl_tf_csvs = []
            metarl_pytorch_csvs = []

            for trial in range(hyper_parameters['n_trials']):
                seed = seeds[trial]

                trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed)
                metarl_tf_dir = trial_dir + '/metarl/tf'
                metarl_pytorch_dir = trial_dir + '/metarl/pytorch'
                baselines_dir = trial_dir + '/baselines'

                # pylint: disable=not-context-manager
                with tf.Graph().as_default():
                    # Run baselines algorithms
                    baseline_env.reset()
                    baseline_csv = run_baselines(baseline_env, seed,
                                                 baselines_dir)

                    # Run metarl algorithms
                    env.reset()
                    metarl_tf_csv = run_metarl_tf(env, seed, metarl_tf_dir)

                # env.reset()
                # metarl_pytorch_csv = run_metarl_pytorch(
                #     env, seed, metarl_pytorch_dir)

                baselines_csvs.append(baseline_csv)
                metarl_tf_csvs.append(metarl_tf_csv)
                # metarl_pytorch_csvs.append(metarl_pytorch_csv)

            env.close()

            # benchmark_helper.plot_average_over_trials(
            #     [baselines_csvs, metarl_tf_csvs, metarl_pytorch_csvs],
            #     [
            #         'eprewmean', 'Evaluation/AverageReturn',
            #         'Evaluation/AverageReturn'
            #     ],
            #     plt_file=plt_file,
            #     env_id=env_id,
            #     x_label='Iteration',
            #     y_label='Evaluation/AverageReturn',
            #     names=['baseline', 'metarl-TensorFlow', 'metarl-PyTorch'],
            # )

            # result_json[env_id] = benchmark_helper.create_json(
            #     [baselines_csvs, metarl_tf_csvs],
            #     seeds=seeds,
            #     trials=hyper_parameters['n_trials'],
            #     xs=['total_timesteps', 'TotalEnvSteps'],
            #     ys=[
            #         'eprewmean', 'Evaluation/AverageReturn'
            #     ],
            #     factors=[hyper_parameters['batch_size']] * 2,
            #     names=['baseline', 'metarl-TF'])

            result_json[env_id] = benchmark_helper.create_json(
                [baselines_csvs, metarl_tf_csvs],
                seeds=seeds,
                trials=hyper_parameters['n_trials'],
                xs=['total_timesteps', 'TotalEnvSteps'],
                ys=['eprewmean', 'Evaluation/AverageReturn'],
                factors=[hyper_parameters['batch_size']] * 2,
                names=['baseline', 'metarl-TF'])

            # Rh.relplot(g_csvs=metarl_tf_csvs,
            #            b_csvs=baselines_csvs,
            #            g_x='TotalEnvSteps',
            #            g_y='Evaluation/AverageReturn',
            #            g_z='MetaRL',
            #            b_x='total_timesteps',
            #            b_y='eprewmean',
            #            b_z='Openai/Baseline',
            #            trials=hyper_parameters['n_trials'],
            #            seeds=seeds,
            #            plt_file=plt_file,
            #            env_id=env_id,
            #            x_label='EnvTimeStep',
            #            y_label='Performance')

            benchmark_helper.plot_average_over_trials_with_x(
                [baselines_csvs, metarl_tf_csvs],
                ['eprewmean', 'Evaluation/AverageReturn'],
                ['total_timesteps', 'TotalEnvSteps'],
                plt_file=plt_file,
                env_id=env_id,
                x_label='EnvTimeStep',
                y_label='Performance',
                names=['baseline', 'metarl-TensorFlow'],
            )

        # Rh.relplot(g_csvs=metarl_tf_csvs,
        #            b_csvs=metarl_pytorch_csvs,
        #            g_x='TotalEnvSteps',
        #            g_y='Evaluation/AverageReturn',
        #            g_z='MetaRL-TF',
        #            b_x='TotalEnvSteps',
        #            b_y='Evaluation/AverageReturn',
        #            b_z='MetaRL-PT',
        #            trials=hyper_parameters['n_trials'],
        #            seeds=seeds,
        #            plt_file=plt_file,
        #            env_id=env_id,
        #            x_label='EnvTimeStep',
        #            y_label='Performance')

        Rh.write_file(result_json, 'PPO')