Ejemplo n.º 1
0
    def test_cma_es_cartpole(self):
        """Test CMAES with Cartpole-v1 environment."""
        with LocalTFRunner() as runner:
            env = TfEnv(env_name='CartPole-v1')

            policy = CategoricalMLPPolicy(
                name='policy', env_spec=env.spec, hidden_sizes=(32, 32))

            baseline = LinearFeatureBaseline(env_spec=env.spec)

            runner.initialize_tf_vars()

            n_samples = 20

            algo = CMAES(
                env_spec=env.spec,
                policy=policy,
                baseline=baseline,
                max_path_length=100,
                n_samples=n_samples)

            runner.setup(algo, env, sampler_cls=OnPolicyVectorizedSampler)
            runner.train(n_epochs=1, batch_size=1000, n_epoch_cycles=n_samples)
            # No assertion on return because CMAES is not stable.

            env.close()
Ejemplo n.º 2
0
def cma_es_cartpole(ctxt=None, seed=1):
    """Train CMA_ES with Cartpole-v1 environment.

    Args:
        ctxt (garage.experiment.ExperimentContext): The experiment
            configuration used by LocalRunner to create the snapshotter.
        seed (int): Used to seed the random number generator to produce
            determinism.

    """
    set_seed(seed)
    with LocalTFRunner(ctxt) as runner:
        env = TfEnv(env_name='CartPole-v1')

        policy = CategoricalMLPPolicy(name='policy',
                                      env_spec=env.spec,
                                      hidden_sizes=(32, 32))
        baseline = LinearFeatureBaseline(env_spec=env.spec)

        n_samples = 20

        algo = CMAES(env_spec=env.spec,
                     policy=policy,
                     baseline=baseline,
                     max_path_length=100,
                     n_samples=n_samples)

        runner.setup(algo, env, sampler_cls=OnPolicyVectorizedSampler)
        runner.train(n_epochs=100, batch_size=1000)
Ejemplo n.º 3
0
def cma_es_cartpole(ctxt=None, seed=1):
    """Train CMA_ES with Cartpole-v1 environment.

    Args:
        ctxt (garage.experiment.ExperimentContext): The experiment
            configuration used by Trainer to create the snapshotter.
        seed (int): Used to seed the random number generator to produce
            determinism.

    """
    set_seed(seed)
    with TFTrainer(ctxt) as trainer:
        env = GymEnv('CartPole-v1')

        policy = CategoricalMLPPolicy(name='policy',
                                      env_spec=env.spec,
                                      hidden_sizes=(32, 32))

        n_samples = 20

        sampler = LocalSampler(agents=policy,
                               envs=env,
                               max_episode_length=env.spec.max_episode_length,
                               is_tf_worker=True)

        algo = CMAES(env_spec=env.spec,
                     policy=policy,
                     sampler=sampler,
                     n_samples=n_samples)

        trainer.setup(algo, env)
        trainer.train(n_epochs=100, batch_size=1000)
Ejemplo n.º 4
0
def run_task(snapshot_config, *_):
    """Train CMA_ES with Cartpole-v1 environment.

    Args:
        snapshot_config (garage.experiment.SnapshotConfig): The snapshot
            configuration used by LocalRunner to create the snapshotter.
        *_ (object): Ignored by this function.

    """
    with LocalTFRunner(snapshot_config=snapshot_config) as runner:
        env = TfEnv(env_name='CartPole-v1')

        policy = CategoricalMLPPolicy(name='policy',
                                      env_spec=env.spec,
                                      hidden_sizes=(32, 32))

        baseline = LinearFeatureBaseline(env_spec=env.spec)

        n_samples = 20

        algo = CMAES(env_spec=env.spec,
                     policy=policy,
                     baseline=baseline,
                     max_path_length=100,
                     n_samples=n_samples)

        runner.setup(algo, env, sampler_cls=OnPolicyVectorizedSampler)
        runner.train(n_epochs=100, batch_size=1000)
Ejemplo n.º 5
0
    def test_cma_es_cartpole(self):
        """Test CMAES with Cartpole-v1 environment."""
        with TFTrainer(snapshot_config) as trainer:
            env = GymEnv('CartPole-v1')

            policy = CategoricalMLPPolicy(name='policy',
                                          env_spec=env.spec,
                                          hidden_sizes=(32, 32))

            n_samples = 20

            sampler = LocalSampler(
                agents=policy,
                envs=env,
                max_episode_length=env.spec.max_episode_length,
                is_tf_worker=True)

            algo = CMAES(env_spec=env.spec,
                         policy=policy,
                         sampler=sampler,
                         n_samples=n_samples)

            trainer.setup(algo, env)
            trainer.train(n_epochs=1, batch_size=1000)
            # No assertion on return because CMAES is not stable.

            env.close()
Ejemplo n.º 6
0
def run_task(snapshot_config, *_):
    """Run task."""
    with LocalTFRunner(snapshot_config=snapshot_config) as runner:
        # env = TfEnv(normalize(MassSpringEnv_OptK_HwAsAction(params), normalize_action=False, normalize_obs=False, normalize_reward=True, reward_alpha=0.1))
        env = TfEnv(MassSpringEnv_OptK_HwAsAction(params))

        zip_project(log_dir=runner._snapshotter._snapshot_dir)

        comp_policy_model = MLPModel(
            output_dim=1,
            hidden_sizes=params.comp_policy_network_size,
            hidden_nonlinearity=tf.nn.tanh,
            output_nonlinearity=tf.nn.tanh,
        )

        mech_policy_model = MechPolicyModel_OptK_HwAsAction(params)

        policy = CompMechPolicy_OptK_HwAsAction(
            name='comp_mech_policy',
            env_spec=env.spec,
            comp_policy_model=comp_policy_model,
            mech_policy_model=mech_policy_model)

        # baseline = GaussianMLPBaseline(
        #     env_spec=env.spec,
        #     regressor_args=dict(
        #         hidden_sizes=params.baseline_network_size,
        #         hidden_nonlinearity=tf.nn.tanh,
        #         use_trust_region=True,
        #     ),
        # )

        baseline = LinearFeatureBaseline(env_spec=env.spec)

        algo = CMAES(env_spec=env.spec,
                     policy=policy,
                     baseline=baseline,
                     **params.cmaes_algo_kwargs)

        runner.setup(algo, env)

        runner.train(**params.cmaes_train_kwargs)
Ejemplo n.º 7
0
def run_task(snapshot_config, *_):
    """Train CMA_ES with Cartpole-v1 environment."""
    with LocalTFRunner(snapshot_config=snapshot_config) as runner:
        env = TfEnv(env_name='CartPole-v1')

        policy = CategoricalMLPPolicy(name='policy',
                                      env_spec=env.spec,
                                      hidden_sizes=(32, 32))

        baseline = LinearFeatureBaseline(env_spec=env.spec)

        n_samples = 20

        algo = CMAES(env_spec=env.spec,
                     policy=policy,
                     baseline=baseline,
                     max_path_length=100,
                     n_samples=n_samples)

        runner.setup(algo, env, sampler_cls=OnPolicyVectorizedSampler)
        # NOTE: make sure that n_epoch_cycles == n_samples !
        runner.train(n_epochs=100, batch_size=1000, n_epoch_cycles=n_samples)
Ejemplo n.º 8
0
    def test_cma_es_cartpole(self):
        """Test CMAES with Cartpole-v1 environment."""
        with TFTrainer(snapshot_config) as trainer:
            env = GymEnv('CartPole-v1')

            policy = CategoricalMLPPolicy(name='policy',
                                          env_spec=env.spec,
                                          hidden_sizes=(32, 32))
            baseline = LinearFeatureBaseline(env_spec=env.spec)

            n_samples = 20

            algo = CMAES(env_spec=env.spec,
                         policy=policy,
                         baseline=baseline,
                         n_samples=n_samples)

            trainer.setup(algo, env, sampler_cls=LocalSampler)
            trainer.train(n_epochs=1, batch_size=1000)
            # No assertion on return because CMAES is not stable.

            env.close()