def test_benchmark_gaussian_gru_policy(self):
        bench_envs = [
            'HalfCheetah-v2', 'Reacher-v2', 'Walker2d-v2', 'Hopper-v2',
            'Swimmer-v2', 'InvertedPendulum-v2', 'InvertedDoublePendulum-v2'
        ]

        seeds = np.random.choice(100, size=(len(bench_envs), 3))

        for env_num in range(len(bench_envs)):
            self._env = bench_envs[env_num]
            for seed in seeds[env_num]:
                self._seed = seed
                deterministic.set_seed(self._seed)
                name = '{}_seed_{}_metarl'.format(self._env, self._seed)
                run_experiment(self.run_task,
                               snapshot_mode='last',
                               seed=self._seed,
                               n_parallel=12,
                               exp_name=name)
Beispiel #2
0

def run_task(snapshot_config, *_):
    """Run task."""
    with LocalTFRunner(snapshot_config=snapshot_config,
                       max_cpus=n_envs) as runner:
        env = TfEnv(env_name='CartPole-v1')

        policy = CategoricalMLPPolicy(name='policy',
                                      env_spec=env.spec,
                                      hidden_sizes=(32, 32))

        baseline = LinearFeatureBaseline(env_spec=env.spec)

        algo = TRPO(env_spec=env.spec,
                    policy=policy,
                    baseline=baseline,
                    max_path_length=max_path_length,
                    discount=0.99,
                    max_kl_step=0.01)

        runner.setup(algo=algo,
                     env=env,
                     sampler_cls=BatchSampler,
                     sampler_args={'n_envs': n_envs})

        runner.train(n_epochs=100, batch_size=4000, plot=False)


run_experiment(run_task, snapshot_mode='last', seed=1)
Beispiel #3
0
        runner.setup(algo,
                     env,
                     sampler_cls=LocalSampler,
                     sampler_args=None,
                     worker_class=TaskEmbeddingWorker)
        runner.train(n_epochs=600, batch_size=v.batch_size, plot=False)


config = dict(
    tasks=TASKS,
    latent_length=1,
    inference_window=2,
    batch_size=1024 * len(TASKS),
    policy_ent_coeff=2e-2,  # 2e-2
    embedding_ent_coeff=2.2e-3,  # 1e-2
    inference_ce_coeff=5e-2,  # 1e-2
    max_path_length=100,
    embedding_init_std=1.0,
    embedding_max_std=2.0,
    embedding_min_std=0.38,
    policy_init_std=1.0,
    policy_max_std=None,
    policy_min_std=None,
)

run_experiment(run_task,
               snapshot_mode='last',
               seed=1,
               variant=config,
               plot=False)
Beispiel #4
0
#!/usr/bin/env python3
"""This is an example to resume training programmatically."""
from metarl.experiment import run_experiment
from metarl.tf.experiment import LocalTFRunner


def run_task(snapshot_config, *_):
    """Run task."""
    with LocalTFRunner(snapshot_config=snapshot_config) as runner:
        runner.restore(from_dir='dir/', from_epoch=2)
        runner.resume()


run_experiment(
    run_task,
    log_dir='new_dir/',
    snapshot_mode='last',
    seed=1,
)
def run_task(*_):
    env = normalize(gym.make('Pendulum-v0'))

    policy = DummyPolicy(env_spec=env)

    baseline = LinearFeatureBaseline(env_spec=env)
    algo = InstrumentedNOP(env=env,
                           policy=policy,
                           baseline=baseline,
                           batch_size=4000,
                           max_path_length=100,
                           n_itr=4,
                           discount=0.99,
                           step_size=0.01,
                           plot=True)
    algo.train()
    env.close()


run_experiment(
    run_task,
    # Number of parallel workers for sampling
    n_parallel=6,
    # Only keep the snapshot parameters for the last iteration
    snapshot_mode='last',
    # Specifies the seed for the experiment. If this is not provided, a random
    # seed will be used
    seed=1,
    plot=True,
)
Beispiel #6
0
                   target_network_update_freq=2,
                   buffer_batch_size=32)

        runner.setup(algo, env)
        runner.train(n_epochs=n_epochs, batch_size=sampler_batch_size)


@click.command()
@click.option('--buffer_size', type=int, default=int(5e4))
def _args(buffer_size):
    """A click command to parse arguments for automated testing purposes.

    Args:
        buffer_size (int): Size of replay buffer.

    Returns:
        int: The input argument as-is.

    """
    return buffer_size


replay_buffer_size = _args.main(standalone_mode=False)
run_experiment(
    run_task,
    snapshot_mode='last',
    seed=1,
    plot=False,
    variant={'buffer_size': replay_buffer_size},
)
Beispiel #7
0
                    discount=0.99,
                    max_kl_step=0.01,
                    flatten_input=False)

        runner.setup(algo, env)
        runner.train(n_epochs=100, batch_size=variant_data['batch_size'])


@click.command()
@click.option('--batch_size', '_batch_size', type=int, default=4000)
def _args(_batch_size):
    """A click command to parse arguments for automated testing purposes.

    Args:
        _batch_size (int): Number of environment steps in one batch.

    Returns:
        int: The input argument as-is.

    """
    return _batch_size


batch_size = _args.main(standalone_mode=False)
run_experiment(
    run_task,
    snapshot_mode='last',
    seed=1,
    variant={'batch_size': batch_size},
)