def benchmark_ppo_continuous_mlp_baseline():
    """ Compare benchmarks between CMB and potentially other baselines."""
    mujoco1m = benchmarks.get_benchmark('Mujoco1M')

    timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
    benchmark_dir = osp.join(os.getcwd(), 'data', 'local', 'benchmarks',
                             'ppo_cmb', timestamp)
    for task in mujoco1m['tasks']:
        env_id = task['env_id']
        env = gym.make(env_id)

        seeds = random.sample(range(100), num_trials)

        task_dir = osp.join(benchmark_dir, env_id)
        cmb_csvs = []
        for trial in range(num_trials):
            seed = seeds[trial]

            trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed)
            cmb_dir = trial_dir + '/continuous_mlp_baseline'

            with tf.Graph().as_default():
                env.reset()
                cmb_csv = ppo_cmb(env, seed, cmb_dir)
            cmb_csvs.append(cmb_csv)

        env.close()
예제 #2
0
    def test_benchmark_ddpg(self):
        """
        Compare benchmarks between garage and baselines.

        :return:
        """
        # Load Mujoco1M tasks, you can check other benchmarks here
        # https://github.com/openai/baselines/blob/master/baselines/bench/benchmarks.py
        mujoco1m = benchmarks.get_benchmark('Mujoco1M')

        timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
        benchmark_dir = osp.join(os.getcwd(), 'data', 'local', 'benchmarks',
                                 'ddpg', timestamp)

        for task in mujoco1m['tasks']:
            env_id = task['env_id']
            env = gym.make(env_id)
            baseline_env = AutoStopEnv(
                env_name=env_id, max_path_length=params['n_rollout_steps'])
            seeds = random.sample(range(100), task['trials'])

            task_dir = osp.join(benchmark_dir, env_id)
            plt_file = osp.join(benchmark_dir,
                                '{}_benchmark.png'.format(env_id))
            baselines_csvs = []
            garage_csvs = []

            for trial in range(task['trials']):
                env.reset()
                baseline_env.reset()
                seed = seeds[trial]

                trial_dir = osp.join(
                    task_dir, 'trial_{}_seed_{}'.format(trial + 1, seed))
                garage_dir = osp.join(trial_dir, 'garage')
                baselines_dir = osp.join(trial_dir, 'baselines')

                with tf.Graph().as_default():
                    # Run garage algorithms
                    garage_csv = run_garage(env, seed, garage_dir)

                    # Run baselines algorithms
                    baselines_csv = run_baselines(baseline_env, seed,
                                                  baselines_dir)

                garage_csvs.append(garage_csv)
                baselines_csvs.append(baselines_csv)

            env.close()

            plot(b_csvs=baselines_csvs,
                 g_csvs=garage_csvs,
                 g_x='Epoch',
                 g_y='AverageReturn',
                 b_x='total/epochs',
                 b_y='rollout/return',
                 trials=task['trials'],
                 seeds=seeds,
                 plt_file=plt_file,
                 env_id=env_id)
예제 #3
0
    def test_benchmark_sac(self):
        '''
        Compare benchmarks between metarl and baselines.
        :return:
        '''
        mujoco1m = benchmarks.get_benchmark('Mujoco1M')

        timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
        benchmark_dir = osp.join(os.getcwd(), 'data', 'local', 'benchmarks',
                                 'sac', timestamp)
        mujoco_tasks = ['HalfCheetah-v2']
        for task in mujoco_tasks:
            env = MetaRLEnv(normalize(gym.make(task)))

            seeds = [121, 524, 4]

            task_dir = osp.join(benchmark_dir, task)
            plt_file = osp.join(benchmark_dir, '{}_benchmark.png'.format(task))
            relplt_file = osp.join(benchmark_dir,
                                   '{}_benchmark_mean.png'.format(task))
            metarl_csvs = []

            for trial in range(3):
                env.reset()
                seed = seeds[trial]

                trial_dir = osp.join(
                    task_dir, 'trial_{}_seed_{}'.format(trial + 1, seed))
                metarl_dir = osp.join(trial_dir, 'metarl')
                # Run metarl algorithms
                metarl_csv = run_metarl(env, seed, metarl_dir)
                metarl_csvs.append(metarl_csv)

            env.close()
예제 #4
0
    def benchmark_trpo(self):  # pylint: disable=no-self-use
        """Compare benchmarks between garage and baselines."""
        mujoco1m = benchmarks.get_benchmark('Mujoco1M')

        timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
        benchmark_dir = './data/local/benchmarks/trpo/%s/' % timestamp
        result_json = {}
        for task in mujoco1m['tasks']:
            env_id = task['env_id']
            env = gym.make(env_id)
            seeds = random.sample(range(100), hyper_parameters['n_trials'])
            task_dir = osp.join(benchmark_dir, env_id)
            plt_file = osp.join(benchmark_dir,
                                '{}_benchmark.png'.format(env_id))
            garage_tf_csvs = []
            garage_pytorch_csvs = []

            for trial in range(hyper_parameters['n_trials']):
                _PLACEHOLDER_CACHE.clear()
                seed = seeds[trial]
                trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed)
                garage_tf_dir = trial_dir + '/garage_tf'
                garage_pytorch_dir = trial_dir + '/garage_pytorch'

                # Run garage algorithms
                env.reset()
                garage_pytorch_csv = run_garage_pytorch(
                    env, seed, garage_pytorch_dir)

                # pylint: disable=not-context-manager
                with tf.Graph().as_default():
                    env.reset()
                    garage_tf_csv = run_garage_tf(env, seed, garage_tf_dir)

                garage_tf_csvs.append(garage_tf_csv)
                garage_pytorch_csvs.append(garage_pytorch_csv)

            env.close()

            benchmark_helper.plot_average_over_trials(
                [garage_tf_csvs, garage_pytorch_csvs],
                ['Evaluation/AverageReturn'] * 2,
                plt_file=plt_file,
                env_id=env_id,
                x_label='Iteration',
                y_label='AverageReturn',
                names=['garage-TensorFlow', 'garage-PyTorch'],
            )

            result_json[env_id] = benchmark_helper.create_json(
                [garage_tf_csvs, garage_pytorch_csvs],
                seeds=seeds,
                trials=hyper_parameters['n_trials'],
                xs=['Evaluation/Iteration'] * 2,
                ys=['Evaluation/AverageReturn'] * 2,
                factors=[hyper_parameters['batch_size']] * 2,
                names=['garage-TF', 'garage-PT'])

        Rh.write_file(result_json, 'TRPO')
예제 #5
0
    def test_benchmark_ddpg(self):
        """
        Compare benchmarks between garage and baselines.

        :return:
        """
        # Load Mujoco1M tasks, you can check other benchmarks here
        # https://github.com/openai/baselines/blob/master/baselines/bench/benchmarks.py
        mujoco1m = benchmarks.get_benchmark("Mujoco1M")

        timestamp = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S-%f")
        benchmark_dir = "./data/local/benchmarks/ddpg/%s/" % timestamp

        for task in mujoco1m["tasks"]:
            env_id = task["env_id"]
            env = gym.make(env_id)
            baseline_env = AutoStopEnv(
                env_name=env_id, max_path_length=params["n_rollout_steps"])
            seeds = random.sample(range(100), task["trials"])

            task_dir = osp.join(benchmark_dir, env_id)
            plt_file = osp.join(benchmark_dir,
                                "{}_benchmark.png".format(env_id))
            baselines_csvs = []
            garage_csvs = []

            for trial in range(task["trials"]):
                env.reset()
                baseline_env.reset()
                seed = seeds[trial]

                trial_dir = task_dir + "/trial_%d_seed_%d" % (trial + 1, seed)
                garage_dir = trial_dir + "/garage"
                baselines_dir = trial_dir + "/baselines"

                with tf.Graph().as_default():
                    # Run garage algorithms
                    garage_csv = run_garage(env, seed, garage_dir)

                    # Run baselines algorithms
                    baselines_csv = run_baselines(baseline_env, seed,
                                                  baselines_dir)

                garage_csvs.append(garage_csv)
                baselines_csvs.append(baselines_csv)

            env.close()

            plot(b_csvs=baselines_csvs,
                 g_csvs=garage_csvs,
                 g_x="Epoch",
                 g_y="AverageReturn",
                 b_x="total/epochs",
                 b_y="rollout/return",
                 trials=task["trials"],
                 seeds=seeds,
                 plt_file=plt_file,
                 env_id=env_id)
예제 #6
0
    def test_benchmark_trpo(self):
        """
        Compare benchmarks between garage and baselines.

        :return:
        """

        mujoco1m = benchmarks.get_benchmark("Mujoco1M")

        timestamp = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S-%f")
        benchmark_dir = "./data/local/benchmarks/trpo/%s/" % timestamp
        for task in mujoco1m["tasks"]:
            env_id = task["env_id"]
            env = gym.make(env_id)
            baseline_env = AutoStopEnv(env_name=env_id, max_path_length=100)

            seeds = random.sample(range(100), task["trials"])

            task_dir = osp.join(benchmark_dir, env_id)
            plt_file = osp.join(benchmark_dir,
                                "{}_benchmark.png".format(env_id))
            baselines_csvs = []
            garage_csvs = []

            for trial in range(task["trials"]):
                _PLACEHOLDER_CACHE.clear()
                seed = seeds[trial]

                trial_dir = task_dir + "/trial_%d_seed_%d" % (trial + 1, seed)
                garage_dir = trial_dir + "/garage"
                baselines_dir = trial_dir + "/baselines"

                with tf.Graph().as_default():
                    # Run garage algorithms
                    env.reset()
                    garage_csv = run_garage(env, seed, garage_dir)

                    # Run baseline algorithms
                    baseline_env.reset()
                    baselines_csv = run_baselines(baseline_env, seed,
                                                  baselines_dir)

                garage_csvs.append(garage_csv)
                baselines_csvs.append(baselines_csv)

            plot(b_csvs=baselines_csvs,
                 g_csvs=garage_csvs,
                 g_x="Iteration",
                 g_y="EpisodeRewardMean",
                 b_x="EpThisIter",
                 b_y="EpRewMean",
                 trials=task["trials"],
                 seeds=seeds,
                 plt_file=plt_file,
                 env_id=env_id)

            env.close()
예제 #7
0
    def test_benchmark_her(self):
        '''
        Compare benchmarks between garage and baselines.

        :return:
        '''
        mujoco1m = benchmarks.get_benchmark('HerDdpg')

        timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
        benchmark_dir = osp.join(os.getcwd(), 'data', 'local', 'benchmarks',
                                 'her', timestamp)
        for task in mujoco1m['tasks']:
            env_id = task['env_id']
            env = gym.make(env_id)
            seeds = random.sample(range(100), task['trials'])

            task_dir = osp.join(benchmark_dir, env_id)
            plt_file = osp.join(benchmark_dir,
                                '{}_benchmark.png'.format(env_id))
            baselines_csvs = []
            garage_csvs = []

            for trial in range(task['trials']):
                seed = seeds[trial]

                trial_dir = osp.join(
                    task_dir, 'trial_{}_seed_{}'.format(trial + 1, seed))
                garage_dir = osp.join(trial_dir, 'garage')
                baselines_dir = osp.join(trial_dir, 'baselines')

                with tf.Graph().as_default():
                    garage_csv = run_garage(env, seed, garage_dir)

                    CACHED_ENVS.clear()
                    baselines_csv = run_baselines(env_id, seed, baselines_dir)

                garage_csvs.append(garage_csv)
                baselines_csvs.append(baselines_csv)

            env.close()

            plot(
                b_csvs=baselines_csvs,
                g_csvs=garage_csvs,
                g_x='Epoch',
                g_y='AverageSuccessRate',
                b_x='epoch',
                b_y='train/success_rate',
                trials=task['trials'],
                seeds=seeds,
                plt_file=plt_file,
                env_id=env_id)
예제 #8
0
    def test_benchmark_ppo(self):
        """
        Compare benchmarks between garage and baselines.

        :return:
        """
        mujoco1m = benchmarks.get_benchmark("Mujoco1M")

        timestamp = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S-%f")
        benchmark_dir = "./benchmark_ppo/%s/" % timestamp
        for task in mujoco1m["tasks"]:
            env_id = task["env_id"]
            env = gym.make(env_id)
            seeds = random.sample(range(100), task["trials"])

            task_dir = osp.join(benchmark_dir, env_id)
            plt_file = osp.join(benchmark_dir,
                                "{}_benchmark.png".format(env_id))
            baselines_csvs = []
            garage_csvs = []

            for trail in range(task["trials"]):
                env.reset()
                seed = seeds[trail]

                trail_dir = task_dir + "/trail_%d_seed_%d" % (trail + 1, seed)
                garage_dir = trail_dir + "/garage"
                baselines_dir = trail_dir + "/baselines"

                # Run garage algorithms
                garage_csv = run_garage(env, seed, garage_dir)

                # Run baselines algorithms
                env.reset()
                baselines_csv = run_baselines(env, seed, baselines_dir)

                garage_csvs.append(garage_csv)
                baselines_csvs.append(baselines_csv)

            env.close()

            plot(b_csvs=baselines_csvs,
                 g_csvs=garage_csvs,
                 g_x="Iteration",
                 g_y="AverageReturn",
                 b_x="nupdates",
                 b_y="eprewmean",
                 trails=task["trials"],
                 seeds=seeds,
                 plt_file=plt_file,
                 env_id=env_id)
예제 #9
0
    def test_benchmark_her(self):
        """
        Compare benchmarks between garage and baselines.

        :return:
        """
        mujoco1m = benchmarks.get_benchmark("HerDdpg")

        timestamp = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S-%f")
        benchmark_dir = "./data/local/benchmarks/her/%s/" % timestamp
        for task in mujoco1m["tasks"]:
            env_id = task["env_id"]
            env = gym.make(env_id)
            seeds = random.sample(range(100), task["trials"])

            task_dir = osp.join(benchmark_dir, env_id)
            plt_file = osp.join(benchmark_dir,
                                "{}_benchmark.png".format(env_id))
            baselines_csvs = []
            garage_csvs = []

            for trial in range(task["trials"]):
                seed = seeds[trial]

                trial_dir = task_dir + "/trial_%d_seed_%d" % (trial + 1, seed)
                garage_dir = trial_dir + "/garage"
                baselines_dir = trial_dir + "/baselines"

                with tf.Graph().as_default():
                    garage_csv = run_garage(env, seed, garage_dir)

                    CACHED_ENVS.clear()
                    baselines_csv = run_baselines(env_id, seed, baselines_dir)

                garage_csvs.append(garage_csv)
                baselines_csvs.append(baselines_csv)

            env.close()

            plot(
                b_csvs=baselines_csvs,
                g_csvs=garage_csvs,
                g_x="Epoch",
                g_y="AverageSuccessRate",
                b_x="epoch",
                b_y="train/success_rate",
                trials=task["trials"],
                seeds=seeds,
                plt_file=plt_file,
                env_id=env_id)
예제 #10
0
    def benchmark_continuous_mlp_q_function(self):
        # pylint: disable=no-self-use
        """Test Continuous MLP QFunction Benchmarking."""
        mujoco1m = benchmarks.get_benchmark('Mujoco1M')

        timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
        benchmark_dir = osp.join(os.getcwd(), 'data', 'local', 'benchmarks',
                                 'continuous_mlp_q_function', timestamp)
        for task in mujoco1m['tasks']:
            env_id = task['env_id']
            env = gym.make(env_id)

            seeds = random.sample(range(100), num_of_trials)

            task_dir = osp.join(benchmark_dir, env_id)
            plt_file = osp.join(
                benchmark_dir,
                '{}_benchmark_continuous_mlp_q_function.png'.format(env_id))
            garage_csvs = []

            for trial in range(num_of_trials):
                seed = seeds[trial]

                trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed)
                garage_dir = trial_dir + '/garage'

                with tf.Graph().as_default():
                    env.reset()
                    garage_csv = run_garage(env, seed, garage_dir)
                garage_csvs.append(garage_csv)

            env.close()

            Rh.relplot(g_csvs=garage_csvs,
                       b_csvs=[],
                       g_x='Epoch',
                       g_y='Evaluation/AverageReturn',
                       g_z='Garage',
                       b_x=None,
                       b_y=None,
                       b_z=None,
                       trials=num_of_trials,
                       seeds=seeds,
                       plt_file=plt_file,
                       env_id=env_id,
                       x_label='Iteration',
                       y_label='Evaluation/AverageReturn')
예제 #11
0
    def test_benchmark_continuous_mlp_policy(self):
        mujoco1m = benchmarks.get_benchmark('Mujoco1M')

        timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
        benchmark_dir = osp.join(os.getcwd(), 'data', 'local', 'benchmarks',
                                 'continuous_mlp_policy', timestamp)
        for task in mujoco1m['tasks']:
            env_id = task['env_id']
            env = gym.make(env_id)

            seeds = random.sample(range(100), num_of_trials)

            task_dir = osp.join(benchmark_dir, env_id)
            plt_file = osp.join(
                benchmark_dir,
                '{}_benchmark_continuous_mlp_policy.png'.format(env_id))
            metarl_csvs = []

            for trial in range(num_of_trials):
                seed = seeds[trial]

                trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed)
                metarl_dir = trial_dir + '/metarl'

                with tf.Graph().as_default():
                    env.reset()
                    metarl_csv = run_metarl(env, seed, metarl_dir)
                metarl_csvs.append(metarl_csv)

            env.close()

            Rh.relplot(g_csvs=metarl_csvs,
                       b_csvs=[],
                       g_x='Epoch',
                       g_y='Evaluation/AverageReturn',
                       g_z='MetaRL',
                       b_x=None,
                       b_y=None,
                       b_z=None,
                       trials=num_of_trials,
                       seeds=seeds,
                       plt_file=plt_file,
                       env_id=env_id,
                       x_label='Iteration',
                       y_label='Evaluation/AverageReturn')
예제 #12
0
    def test_benchmark_ppo(self):
        '''
        Compare benchmarks between garage and baselines.

        :return:
        '''
        mujoco1m = benchmarks.get_benchmark('Mujoco1M')
        timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
        benchmark_dir = './data/local/benchmarks/ppo/%s/' % timestamp
        result_json = {}
        for task in mujoco1m['tasks']:
            env_id = task['env_id']

            env = gym.make(env_id)
            baseline_env = AutoStopEnv(env_name=env_id, max_path_length=100)

            seeds = random.sample(range(100), task['trials'])

            task_dir = osp.join(benchmark_dir, env_id)
            plt_file = osp.join(benchmark_dir,
                                '{}_benchmark.png'.format(env_id))
            baselines_csvs = []
            garage_csvs = []

            for trial in range(task['trials']):
                seed = seeds[trial]

                trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed)
                garage_dir = trial_dir + '/garage'
                baselines_dir = trial_dir + '/baselines'

                with tf.Graph().as_default():
                    # Run baselines algorithms
                    baseline_env.reset()
                    baselines_csv = run_baselines(baseline_env, seed,
                                                  baselines_dir)

                    # Run garage algorithms
                    env.reset()
                    garage_csv = run_garage(env, seed, garage_dir)

                garage_csvs.append(garage_csv)
                baselines_csvs.append(baselines_csv)

            env.close()

            Rh.plot(b_csvs=baselines_csvs,
                    g_csvs=garage_csvs,
                    g_x='Iteration',
                    g_y='AverageReturn',
                    b_x='nupdates',
                    b_y='eprewmean',
                    trials=task['trials'],
                    seeds=seeds,
                    plt_file=plt_file,
                    env_id=env_id,
                    x_label='Iteration',
                    y_label='AverageReturn')

            result_json[env_id] = Rh.create_json(b_csvs=baselines_csvs,
                                                 g_csvs=garage_csvs,
                                                 seeds=seeds,
                                                 trails=task['trials'],
                                                 g_x='Iteration',
                                                 g_y='AverageReturn',
                                                 b_x='nupdates',
                                                 b_y='eprewmean',
                                                 factor_g=2048,
                                                 factor_b=2048)

        Rh.write_file(result_json, 'PPO')
예제 #13
0
    def test_benchmark_vpg(self):
        """Compare benchmarks between metarl and baselines.

        Returns:

        """
        # pylint: disable=no-self-use
        mujoco1m = benchmarks.get_benchmark('Mujoco1M')
        timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
        benchmark_dir = './data/local/benchmarks/vpg/%s/' % timestamp
        result_json = {}
        for task in mujoco1m['tasks']:
            env_id = task['env_id']

            env = gym.make(env_id)

            seeds = random.sample(range(100), hyper_parameters['n_trials'])

            task_dir = osp.join(benchmark_dir, env_id)
            plt_file = osp.join(benchmark_dir,
                                '{}_benchmark.png'.format(env_id))

            metarl_tf_csvs = []
            metarl_pytorch_csvs = []

            for trial in range(hyper_parameters['n_trials']):
                seed = seeds[trial]

                trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed)
                metarl_tf_dir = trial_dir + '/metarl/tf'
                metarl_pytorch_dir = trial_dir + '/metarl/pytorch'

                # pylint: disable=not-context-manager
                with tf.Graph().as_default():
                    # Run metarl algorithms
                    env.reset()
                    metarl_tf_csv = run_metarl_tf(env, seed, metarl_tf_dir)

                env.reset()
                metarl_pytorch_csv = run_metarl_pytorch(
                    env, seed, metarl_pytorch_dir)

                metarl_tf_csvs.append(metarl_tf_csv)
                metarl_pytorch_csvs.append(metarl_pytorch_csv)

            env.close()

            benchmark_helper.plot_average_over_trials(
                [metarl_tf_csvs, metarl_pytorch_csvs],
                ['Evaluation/AverageReturn'] * 2,
                plt_file=plt_file,
                env_id=env_id,
                x_label='Iteration',
                y_label='Evaluation/AverageReturn',
                names=['metarl-TensorFlow', 'metarl-PyTorch'])

            result_json[env_id] = benchmark_helper.create_json(
                [metarl_tf_csvs, metarl_pytorch_csvs],
                seeds=seeds,
                trials=hyper_parameters['n_trials'],
                xs=['Iteration'] * 2,
                ys=['Evaluation/AverageReturn'] * 2,
                factors=[hyper_parameters['batch_size']] * 2,
                names=['metarl-tf', 'metarl-pytorch'])

        Rh.write_file(result_json, 'VPG')
예제 #14
0
from garage.tf.experiment import LocalTFRunner
from garage.tf.optimizers import FirstOrderOptimizer
from garage.tf.policies import GaussianMLPPolicy as TF_GMP
from garage.torch.algos import PPO as PyTorch_PPO
from garage.torch.policies import GaussianMLPPolicy as PyTorch_GMP
from tests import benchmark_helper

hyper_parameters = {
    'n_epochs': 800,
    'max_path_length': 128,
    'batch_size': 1024,
    'n_trials': 4,
}

seeds = random.sample(range(100), hyper_parameters['n_trials'])
tasks = benchmarks.get_benchmark('Mujoco1M')['tasks']


@pytest.mark.benchmark
def auto_benchmark_ppo_garage_tf():
    """Create garage TensorFlow PPO model and training.

    Training over different environments and seeds.

    """
    @wrap_experiment
    def ppo_garage_tf(ctxt, env_id, seed):
        """Create garage TensorFlow PPO model and training.

        Args:
            ctxt (garage.experiment.ExperimentContext): The experiment
예제 #15
0
def test_benchmark_vpg():
    """Compare benchmarks between garage and baselines."""
    mujoco1m = benchmarks.get_benchmark('Mujoco1M')
    timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
    benchmark_dir = './data/local/benchmarks/vpg/%s/' % timestamp
    result_json = {}
    for task in mujoco1m['tasks']:
        env_id = task['env_id']

        env = gym.make(env_id)

        seeds = random.sample(range(100), task['trials'])

        task_dir = osp.join(benchmark_dir, env_id)
        plt_file = osp.join(benchmark_dir, '{}_benchmark.png'.format(env_id))

        garage_tf_csvs = []
        garage_pytorch_csvs = []

        for trial in range(task['trials']):
            seed = seeds[trial]

            trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed)
            garage_tf_dir = trial_dir + '/garage/tf'
            garage_pytorch_dir = trial_dir + '/garage/pytorch'

            env.reset()
            garage_pytorch_csv = run_garage_pytorch(env, seed,
                                                    garage_pytorch_dir)

            with tf.Graph().as_default():
                # Run garage algorithms
                env.reset()
                garage_tf_csv = run_garage_tf(env, seed, garage_tf_dir)

            garage_tf_csvs.append(garage_tf_csv)
            garage_pytorch_csvs.append(garage_pytorch_csv)

        env.close()

        plot([garage_tf_csvs, garage_pytorch_csvs], ['Iteration', 'Iteration'],
             ['AverageReturn', 'AverageReturn'],
             trials=task['trials'],
             seeds=seeds,
             plt_file=plt_file,
             env_id=env_id,
             x_label='Iteration',
             y_label='AverageReturn',
             names=['garage-tf', 'garage-pytorch'],
             smooth=True)

        result_json[env_id] = create_json(
            [garage_tf_csvs, garage_pytorch_csvs],
            seeds=seeds,
            trails=task['trials'],
            xs=['Iteration', 'Iteration'],
            ys=['AverageReturn', 'AverageReturn'],
            factors=[2048, 2047],
            names=['garage-tf', 'garage-pytorch'])

    Rh.write_file(result_json, 'VPG')
예제 #16
0
    def test_benchmark_td3(self):
        """
        Test garage TD3 benchmarks.

        :return:
        """
        # Load Mujoco1M tasks, you can check other benchmarks here
        # https://github.com/openai/baselines/blob/master/baselines/bench/benchmarks.py # noqa: E501
        mujoco1m = benchmarks.get_benchmark('Mujoco1M')

        timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
        benchmark_dir = osp.join(os.getcwd(), 'data', 'local', 'benchmarks',
                                 'td3', timestamp)
        result_json = {}

        # rlkit throws error for'Reacher-V2' due to gym version mismatch
        mujoco1m['tasks'] = [
            task for task in mujoco1m['tasks']
            if task['env_id'] != 'Reacher-v2'
        ]

        for task in mujoco1m['tasks']:
            env_id = task['env_id']
            env = gym.make(env_id)
            rlkit_env = AutoStopEnv(env_name=env_id,
                                    max_path_length=params['n_rollout_steps'])
            seeds = random.sample(range(100), task['trials'])

            task_dir = osp.join(benchmark_dir, env_id)
            plt_file = osp.join(benchmark_dir,
                                '{}_benchmark.png'.format(env_id))
            garage_csvs = []
            rlkit_csvs = []

            for trial in range(task['trials']):
                env.reset()
                rlkit_env.reset()
                seed = seeds[trial]

                trial_dir = osp.join(
                    task_dir, 'trial_{}_seed_{}'.format(trial + 1, seed))
                garage_dir = trial_dir + '/garage'
                rlkit_dir = osp.join(trial_dir, 'rlkit')

                with tf.Graph().as_default():
                    # Run rlkit algorithms
                    rlkit_csv = run_rlkit(rlkit_env, seed, rlkit_dir)

                    # Run garage algorithms
                    garage_csv = run_garage(env, seed, garage_dir)

                garage_csvs.append(garage_csv)
                rlkit_csvs.append(rlkit_csv)

            Rh.plot(b_csvs=rlkit_csvs,
                    g_csvs=garage_csvs,
                    g_x='Epoch',
                    g_y='Evaluation/AverageReturn',
                    g_z='garage',
                    b_x='Epoch',
                    b_y='evaluation/Average Returns',
                    b_z='rlkit',
                    trials=task['trials'],
                    seeds=seeds,
                    plt_file=plt_file,
                    env_id=env_id,
                    x_label='Iteration',
                    y_label='Evaluation/AverageReturn')

            result_json[env_id] = Rh.create_json(
                b_csvs=rlkit_csvs,
                g_csvs=garage_csvs,
                seeds=seeds,
                trails=task['trials'],
                g_x='Epoch',
                g_y='Evaluation/AverageReturn',
                b_x='Epoch',
                b_y='evaluation/Average Returns',
                factor_g=1,
                factor_b=1)

        Rh.write_file(result_json, 'TD3')
예제 #17
0
    def test_benchmark_ppo(self):
        """Compare benchmarks between garage and baselines.

        Returns:

        """
        # pylint: disable=no-self-use
        mujoco1m = benchmarks.get_benchmark('Mujoco1M')
        timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
        benchmark_dir = './data/local/benchmarks/ppo/%s/' % timestamp
        result_json = {}
        for task in mujoco1m['tasks']:
            env_id = task['env_id']

            env = gym.make(env_id)
            baseline_env = AutoStopEnv(
                env_name=env_id,
                max_path_length=hyper_parameters['max_path_length'])

            seeds = random.sample(range(100), hyper_parameters['n_trials'])

            task_dir = osp.join(benchmark_dir, env_id)
            plt_file = osp.join(benchmark_dir,
                                '{}_benchmark.png'.format(env_id))

            baselines_csvs = []
            garage_tf_csvs = []
            garage_pytorch_csvs = []

            for trial in range(hyper_parameters['n_trials']):
                seed = seeds[trial]

                trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed)
                garage_tf_dir = trial_dir + '/garage/tf'
                garage_pytorch_dir = trial_dir + '/garage/pytorch'
                baselines_dir = trial_dir + '/baselines'

                # pylint: disable=not-context-manager
                with tf.Graph().as_default():
                    # Run baselines algorithms
                    baseline_env.reset()
                    baseline_csv = run_baselines(baseline_env, seed,
                                                 baselines_dir)

                    # Run garage algorithms
                    env.reset()
                    garage_tf_csv = run_garage_tf(env, seed, garage_tf_dir)

                env.reset()
                garage_pytorch_csv = run_garage_pytorch(
                    env, seed, garage_pytorch_dir)

                baselines_csvs.append(baseline_csv)
                garage_tf_csvs.append(garage_tf_csv)
                garage_pytorch_csvs.append(garage_pytorch_csv)

            env.close()

            benchmark_helper.plot_average_over_trials(
                [baselines_csvs, garage_tf_csvs, garage_pytorch_csvs],
                [
                    'eprewmean', 'Evaluation/AverageReturn',
                    'Evaluation/AverageReturn'
                ],
                plt_file=plt_file,
                env_id=env_id,
                x_label='Iteration',
                y_label='Evaluation/AverageReturn',
                names=['baseline', 'garage-TensorFlow', 'garage-PyTorch'],
            )

            result_json[env_id] = benchmark_helper.create_json(
                [baselines_csvs, garage_tf_csvs, garage_pytorch_csvs],
                seeds=seeds,
                trials=hyper_parameters['n_trials'],
                xs=['nupdates', 'Iteration', 'Iteration'],
                ys=[
                    'eprewmean', 'Evaluation/AverageReturn',
                    'Evaluation/AverageReturn'
                ],
                factors=[hyper_parameters['batch_size']] * 3,
                names=['baseline', 'garage-TF', 'garage-PT'])

        Rh.write_file(result_json, 'PPO')
예제 #18
0
    def test_benchmark_ppo(self):
        """Compare benchmarks between metarl and baselines.

        Returns:

        """
        # pylint: disable=no-self-use
        mujoco1m = benchmarks.get_benchmark('Mujoco1M')
        timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
        benchmark_dir = './data/local/benchmarks/ppo/%s/' % timestamp
        result_json = {}
        for task in mujoco1m['tasks']:
            env_id = task['env_id']

            env = gym.make(env_id)
            baseline_env = AutoStopEnv(
                env_name=env_id,
                max_path_length=hyper_parameters['max_path_length'])

            seeds = random.sample(range(100), hyper_parameters['n_trials'])

            task_dir = osp.join(benchmark_dir, env_id)
            plt_file = osp.join(benchmark_dir,
                                '{}_benchmark.png'.format(env_id))

            baselines_csvs = []
            metarl_tf_csvs = []
            metarl_pytorch_csvs = []

            for trial in range(hyper_parameters['n_trials']):
                seed = seeds[trial]

                trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed)
                metarl_tf_dir = trial_dir + '/metarl/tf'
                metarl_pytorch_dir = trial_dir + '/metarl/pytorch'
                baselines_dir = trial_dir + '/baselines'

                # pylint: disable=not-context-manager
                with tf.Graph().as_default():
                    # Run baselines algorithms
                    baseline_env.reset()
                    baseline_csv = run_baselines(baseline_env, seed,
                                                 baselines_dir)

                    # Run metarl algorithms
                    env.reset()
                    metarl_tf_csv = run_metarl_tf(env, seed, metarl_tf_dir)

                # env.reset()
                # metarl_pytorch_csv = run_metarl_pytorch(
                #     env, seed, metarl_pytorch_dir)

                baselines_csvs.append(baseline_csv)
                metarl_tf_csvs.append(metarl_tf_csv)
                # metarl_pytorch_csvs.append(metarl_pytorch_csv)

            env.close()

            # benchmark_helper.plot_average_over_trials(
            #     [baselines_csvs, metarl_tf_csvs, metarl_pytorch_csvs],
            #     [
            #         'eprewmean', 'Evaluation/AverageReturn',
            #         'Evaluation/AverageReturn'
            #     ],
            #     plt_file=plt_file,
            #     env_id=env_id,
            #     x_label='Iteration',
            #     y_label='Evaluation/AverageReturn',
            #     names=['baseline', 'metarl-TensorFlow', 'metarl-PyTorch'],
            # )

            # result_json[env_id] = benchmark_helper.create_json(
            #     [baselines_csvs, metarl_tf_csvs],
            #     seeds=seeds,
            #     trials=hyper_parameters['n_trials'],
            #     xs=['total_timesteps', 'TotalEnvSteps'],
            #     ys=[
            #         'eprewmean', 'Evaluation/AverageReturn'
            #     ],
            #     factors=[hyper_parameters['batch_size']] * 2,
            #     names=['baseline', 'metarl-TF'])

            result_json[env_id] = benchmark_helper.create_json(
                [baselines_csvs, metarl_tf_csvs],
                seeds=seeds,
                trials=hyper_parameters['n_trials'],
                xs=['total_timesteps', 'TotalEnvSteps'],
                ys=['eprewmean', 'Evaluation/AverageReturn'],
                factors=[hyper_parameters['batch_size']] * 2,
                names=['baseline', 'metarl-TF'])

            # Rh.relplot(g_csvs=metarl_tf_csvs,
            #            b_csvs=baselines_csvs,
            #            g_x='TotalEnvSteps',
            #            g_y='Evaluation/AverageReturn',
            #            g_z='MetaRL',
            #            b_x='total_timesteps',
            #            b_y='eprewmean',
            #            b_z='Openai/Baseline',
            #            trials=hyper_parameters['n_trials'],
            #            seeds=seeds,
            #            plt_file=plt_file,
            #            env_id=env_id,
            #            x_label='EnvTimeStep',
            #            y_label='Performance')

            benchmark_helper.plot_average_over_trials_with_x(
                [baselines_csvs, metarl_tf_csvs],
                ['eprewmean', 'Evaluation/AverageReturn'],
                ['total_timesteps', 'TotalEnvSteps'],
                plt_file=plt_file,
                env_id=env_id,
                x_label='EnvTimeStep',
                y_label='Performance',
                names=['baseline', 'metarl-TensorFlow'],
            )

        # Rh.relplot(g_csvs=metarl_tf_csvs,
        #            b_csvs=metarl_pytorch_csvs,
        #            g_x='TotalEnvSteps',
        #            g_y='Evaluation/AverageReturn',
        #            g_z='MetaRL-TF',
        #            b_x='TotalEnvSteps',
        #            b_y='Evaluation/AverageReturn',
        #            b_z='MetaRL-PT',
        #            trials=hyper_parameters['n_trials'],
        #            seeds=seeds,
        #            plt_file=plt_file,
        #            env_id=env_id,
        #            x_label='EnvTimeStep',
        #            y_label='Performance')

        Rh.write_file(result_json, 'PPO')
예제 #19
0
    'n_trials': 6,
    'steps_per_epoch': 20,
    'n_rollout_steps': 250,
    'n_train_steps': 1,
    'discount': 0.99,
    'tau': 0.005,
    'replay_buffer_size': int(1e6),
    'sigma': 0.1,
    'smooth_return': False,
    'buffer_batch_size': 100,
    'min_buffer_size': int(1e4)
}

seeds = random.sample(range(100), hyper_parameters['n_trials'])
tasks = [
    task for task in benchmarks.get_benchmark('Mujoco1M')['tasks']
    if task['env_id'] != 'Reacher-v2'
]


@pytest.mark.benchmark
def auto_benchmark_td3_garage_tf():
    """Create garage TensorFlow TD3 model and training.

    Training over different environments and seeds.

    """
    @wrap_experiment
    def td3_garage_tf(ctxt, env_id, seed):
        """Create garage TensorFlow TD3 model and training.
예제 #20
0
    def test_benchmark_ppo(self):
        """
        Compare benchmarks between garage and baselines.

        :return:
        """
        mujoco1m = benchmarks.get_benchmark("Mujoco1M")
        timestamp = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S-%f")
        benchmark_dir = "./data/local/benchmarks/ppo/%s/" % timestamp
        result_json = {}
        for task in mujoco1m["tasks"]:
            env_id = task["env_id"]

            env = gym.make(env_id)
            baseline_env = AutoStopEnv(env_name=env_id, max_path_length=100)

            seeds = random.sample(range(100), task["trials"])

            task_dir = osp.join(benchmark_dir, env_id)
            plt_file = osp.join(benchmark_dir,
                                "{}_benchmark.png".format(env_id))
            baselines_csvs = []
            garage_csvs = []

            for trial in range(task['trials']):
                seed = seeds[trial]

                trial_dir = task_dir + "/trial_%d_seed_%d" % (trial + 1, seed)
                garage_dir = trial_dir + "/garage"
                baselines_dir = trial_dir + "/baselines"

                with tf.Graph().as_default():
                    # Run baselines algorithms
                    baseline_env.reset()
                    baselines_csv = run_baselines(baseline_env, seed,
                                                  baselines_dir)

                    # Run garage algorithms
                    env.reset()
                    garage_csv = run_garage(env, seed, garage_dir)

                garage_csvs.append(garage_csv)
                baselines_csvs.append(baselines_csv)

            env.close()

            Rh.plot(b_csvs=baselines_csvs,
                    g_csvs=garage_csvs,
                    g_x="Iteration",
                    g_y="EpisodeRewardMean",
                    b_x="nupdates",
                    b_y="eprewmean",
                    trials=task['trials'],
                    seeds=seeds,
                    plt_file=plt_file,
                    env_id=env_id,
                    x_label="Iteration",
                    y_label="AverageReturn")

            result_json[env_id] = Rh.create_json(b_csvs=baselines_csvs,
                                                 g_csvs=garage_csvs,
                                                 seeds=seeds,
                                                 trails=task["trials"],
                                                 g_x="Iteration",
                                                 g_y="EpisodeRewardMean",
                                                 b_x="nupdates",
                                                 b_y="eprewmean",
                                                 factor_g=2048,
                                                 factor_b=2048)

        Rh.write_file(result_json, "PPO")
예제 #21
0
    def test_benchmark_ddpg(self):
        '''
        Compare benchmarks between metarl and baselines.
        :return:
        '''
        # Load Mujoco1M tasks, you can check other benchmarks here
        # https://github.com/openai/baselines/blob/master/baselines/bench/benchmarks.py
        mujoco1m = benchmarks.get_benchmark('Mujoco1M')

        timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
        benchmark_dir = osp.join(os.getcwd(), 'data', 'local', 'benchmarks',
                                 'ddpg', timestamp)
        result_json = {}
        for task in mujoco1m['tasks']:
            env_id = task['env_id']
            env = gym.make(env_id)
            baseline_env = AutoStopEnv(
                env_name=env_id, max_path_length=params['n_rollout_steps'])
            seeds = random.sample(range(100), task['trials'])

            task_dir = osp.join(benchmark_dir, env_id)
            plt_file = osp.join(benchmark_dir,
                                '{}_benchmark.png'.format(env_id))
            relplt_file = osp.join(benchmark_dir,
                                   '{}_benchmark_mean.png'.format(env_id))
            baselines_csvs = []
            metarl_csvs = []

            for trial in range(task['trials']):
                env.reset()
                baseline_env.reset()
                seed = seeds[trial]

                trial_dir = osp.join(
                    task_dir, 'trial_{}_seed_{}'.format(trial + 1, seed))
                metarl_dir = osp.join(trial_dir, 'metarl')
                baselines_dir = osp.join(trial_dir, 'baselines')

                with tf.Graph().as_default():
                    # Run metarl algorithms
                    metarl_csv = run_metarl(env, seed, metarl_dir)

                    # Run baselines algorithms
                    baselines_csv = run_baselines(baseline_env, seed,
                                                  baselines_dir)

                metarl_csvs.append(metarl_csv)
                baselines_csvs.append(baselines_csv)

            env.close()

            Rh.plot(b_csvs=baselines_csvs,
                    g_csvs=metarl_csvs,
                    g_x='Epoch',
                    g_y='Evaluation/AverageReturn',
                    g_z='MetaRL',
                    b_x='total/epochs',
                    b_y='rollout/return',
                    b_z='Baseline',
                    trials=task['trials'],
                    seeds=seeds,
                    plt_file=plt_file,
                    env_id=env_id,
                    x_label='Epoch',
                    y_label='Evaluation/AverageReturn')

            Rh.relplot(g_csvs=metarl_csvs,
                       b_csvs=baselines_csvs,
                       g_x='Epoch',
                       g_y='Evaluation/AverageReturn',
                       g_z='MetaRL',
                       b_x='total/epochs',
                       b_y='rollout/return',
                       b_z='Baseline',
                       trials=task['trials'],
                       seeds=seeds,
                       plt_file=relplt_file,
                       env_id=env_id,
                       x_label='Epoch',
                       y_label='Evaluation/AverageReturn')

            result_json[env_id] = Rh.create_json(
                b_csvs=baselines_csvs,
                g_csvs=metarl_csvs,
                seeds=seeds,
                trails=task['trials'],
                g_x='Epoch',
                g_y='Evaluation/AverageReturn',
                b_x='total/epochs',
                b_y='rollout/return',
                factor_g=params['steps_per_epoch'] * params['n_rollout_steps'],
                factor_b=1)

        Rh.write_file(result_json, 'DDPG')
예제 #22
0
"""Global parameters for benchmarking."""
from baselines.bench import benchmarks

Fetch1M_ENV_SET = [
    task['env_id'] for task in benchmarks.get_benchmark('Fetch1M')['tasks']
]

MuJoCo1M_ENV_SET = [
    task['env_id'] for task in benchmarks.get_benchmark('Mujoco1M')['tasks']
]

PIXEL_ENV_SET = ['CubeCrash-v0', 'MemorizeDigits-v0']

STATE_ENV_SET = [
    'LunarLander-v2',
    'Assault-ramDeterministic-v4',
    'Breakout-ramDeterministic-v4',
    'ChopperCommand-ramDeterministic-v4',
    'Tutankham-ramDeterministic-v4',
    'CartPole-v1',
]