Python GarageEnv.GarageEnv Examples

Programming Language: Python

Namespace/Package Name: garage.envs

Class/Type: GarageEnv

Method/Function: GarageEnv

Examples at hotexamples.com: 30

GarageEnv is a Python package that provides a collection of environments for reinforcement learning research. It is part of the broader Garage library.

The GarageEnv class is the main interface for interacting with environments in the package. It provides a common set of methods for interacting with environments, such as reset(), step(), and render().

Example 1: Creating a new environment

from garage.envs import GarageEnv

class MyEnv(GarageEnv):
    def __init__(self, arg1, arg2):
        super().__init__()
        # Initialize your environment here   

env = MyEnv(arg1=val1, arg2=val2)

This example shows how to create a new environment by subclassing GarageEnv and implementing the necessary methods.

Example 2: Wrapping an existing environment

from gym.envs.classic_control import CartPoleEnv
from garage.envs import GarageEnv

class MyEnv(GarageEnv):
    def __init__(self, **kwargs):
        super().__init__(env=CartPoleEnv(), **kwargs)

env = MyEnv()

This example shows how to wrap an existing environment (in this case, CartPoleEnv from OpenAI Gym) with a GarageEnv wrapper. This can be useful for adding additional functionality or modifying the behavior of an existing environment. Overall, GarageEnv is a useful package for researchers in reinforcement learning who need a consistent interface for interacting with a wide variety of environments. It is part of the Garage library, which is an open-source framework for developing and evaluating reinforcement learning algorithms.

Python GarageEnv.GarageEnv - 30 examples found. These are the top rated real world Python examples of garage.envs.GarageEnv.GarageEnv extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

GarageEnv(30)

close(30)

reset(30)

step(30)

render(5)

_active_task_one_hot(1)

active_task_one_hot(1)

seed(1)

Example #1

Show file

 def setup_method(self):
     super().setup_method()
     self.env = GarageEnv(normalize(gym.make('InvertedDoublePendulum-v2')))
     self.policy = GaussianMLPPolicy(
         env_spec=self.env.spec,
         hidden_sizes=(64, 64),
         hidden_nonlinearity=tf.nn.tanh,
         output_nonlinearity=None,
     )
     self.baseline = GaussianMLPBaseline(
         env_spec=self.env.spec,
         hidden_sizes=(32, 32),
     )

Example #2

Show file

File: ppo_pendulum.py Project: j-donahue/garage-1

def ppo_pendulum(ctxt=None, seed=1):
    """Train PPO with InvertedDoublePendulum-v2 environment.

    Args:
        ctxt (garage.experiment.ExperimentContext): The experiment
            configuration used by LocalRunner to create the snapshotter.
        seed (int): Used to seed the random number generator to produce
            determinism.

    """
    set_seed(seed)
    with LocalTFRunner(snapshot_config=ctxt) as runner:
        env = GarageEnv(normalize(gym.make('InvertedDoublePendulum-v2')))

        policy = GaussianMLPPolicy(
            env_spec=env.spec,
            hidden_sizes=(64, 64),
            hidden_nonlinearity=tf.nn.tanh,
            output_nonlinearity=None,
        )

        baseline = GaussianMLPBaseline(
            env_spec=env.spec,
            hidden_sizes=(32, 32),
            use_trust_region=True,
        )

        # NOTE: make sure when setting entropy_method to 'max', set
        # center_adv to False and turn off policy gradient. See
        # tf.algos.NPO for detailed documentation.
        algo = PPO(
            env_spec=env.spec,
            policy=policy,
            baseline=baseline,
            max_path_length=100,
            discount=0.99,
            gae_lambda=0.95,
            lr_clip_range=0.2,
            optimizer_args=dict(
                batch_size=32,
                max_epochs=10,
            ),
            stop_entropy_gradient=True,
            entropy_method='max',
            policy_ent_coeff=0.02,
            center_adv=False,
        )

        runner.setup(algo, env)

        runner.train(n_epochs=120, batch_size=2048, plot=False)

Example #3

Show file

File: ppo_garage_tf.py Project: j-donahue/garage-1

def ppo_garage_tf(ctxt, env_id, seed):
    """Create garage TensorFlow PPO model and training.

    Args:
        ctxt (garage.experiment.ExperimentContext): The experiment
            configuration used by LocalRunner to create the
            snapshotter.
        env_id (str): Environment id of the task.
        seed (int): Random positive integer for the trial.

    """
    deterministic.set_seed(seed)

    with LocalTFRunner(ctxt) as runner:
        env = GarageEnv(normalize(gym.make(env_id)))

        policy = TF_GMP(
            env_spec=env.spec,
            hidden_sizes=(32, 32),
            hidden_nonlinearity=tf.nn.tanh,
            output_nonlinearity=None,
        )

        baseline = TF_GMB(
            env_spec=env.spec,
            hidden_sizes=(32, 32),
            use_trust_region=False,
            optimizer=FirstOrderOptimizer,
            optimizer_args=dict(
                batch_size=32,
                max_epochs=10,
                learning_rate=3e-4,
            ),
        )

        algo = TF_PPO(env_spec=env.spec,
                      policy=policy,
                      baseline=baseline,
                      max_path_length=hyper_parameters['max_path_length'],
                      discount=0.99,
                      gae_lambda=0.95,
                      center_adv=True,
                      lr_clip_range=0.2,
                      optimizer_args=dict(batch_size=32,
                                          max_epochs=10,
                                          learning_rate=3e-4,
                                          verbose=True))

        runner.setup(algo, env)
        runner.train(n_epochs=hyper_parameters['n_epochs'],
                     batch_size=hyper_parameters['batch_size'])

Example #4

Show file

File: ppo_garage_pytorch.py Project: thanhkaist/garage

def ppo_garage_pytorch(ctxt, env_id, seed):
    """Create garage PyTorch PPO model and training.

    Args:
        ctxt (garage.experiment.ExperimentContext): The experiment
            configuration used by LocalRunner to create the
            snapshotter.
        env_id (str): Environment id of the task.
        seed (int): Random positive integer for the trial.

    """
    deterministic.set_seed(seed)

    runner = LocalRunner(ctxt)

    env = GarageEnv(normalize(gym.make(env_id)))

    policy = PyTorch_GMP(env.spec,
                         hidden_sizes=(32, 32),
                         hidden_nonlinearity=torch.tanh,
                         output_nonlinearity=None)

    value_function = GaussianMLPValueFunction(env_spec=env.spec,
                                              hidden_sizes=(32, 32),
                                              hidden_nonlinearity=torch.tanh,
                                              output_nonlinearity=None)

    policy_optimizer = OptimizerWrapper((torch.optim.Adam, dict(lr=2.5e-4)),
                                        policy,
                                        max_optimization_epochs=10,
                                        minibatch_size=64)

    vf_optimizer = OptimizerWrapper((torch.optim.Adam, dict(lr=2.5e-4)),
                                    value_function,
                                    max_optimization_epochs=10,
                                    minibatch_size=64)

    algo = PyTorch_PPO(env_spec=env.spec,
                       policy=policy,
                       value_function=value_function,
                       policy_optimizer=policy_optimizer,
                       vf_optimizer=vf_optimizer,
                       max_path_length=hyper_parameters['max_path_length'],
                       discount=0.99,
                       gae_lambda=0.95,
                       center_adv=True,
                       lr_clip_range=0.2)

    runner.setup(algo, env)
    runner.train(n_epochs=hyper_parameters['n_epochs'],
                 batch_size=hyper_parameters['batch_size'])

Example #5

Show file

def test_sac_inverted_double_pendulum():
    """Test Sac performance on inverted pendulum."""
    # pylint: disable=unexpected-keyword-arg
    env = GarageEnv(normalize(gym.make('InvertedDoublePendulum-v2')))
    deterministic.set_seed(0)
    policy = TanhGaussianMLPPolicy(
        env_spec=env.spec,
        hidden_sizes=[32, 32],
        hidden_nonlinearity=torch.nn.ReLU,
        output_nonlinearity=None,
        min_std=np.exp(-20.),
        max_std=np.exp(2.),
    )

    qf1 = ContinuousMLPQFunction(env_spec=env.spec,
                                 hidden_sizes=[32, 32],
                                 hidden_nonlinearity=F.relu)

    qf2 = ContinuousMLPQFunction(env_spec=env.spec,
                                 hidden_sizes=[32, 32],
                                 hidden_nonlinearity=F.relu)
    replay_buffer = PathBuffer(capacity_in_transitions=int(1e6), )
    runner = LocalRunner(snapshot_config=snapshot_config)
    sac = SAC(env_spec=env.spec,
              policy=policy,
              qf1=qf1,
              qf2=qf2,
              gradient_steps_per_itr=100,
              max_path_length=100,
              replay_buffer=replay_buffer,
              min_buffer_size=1e3,
              target_update_tau=5e-3,
              discount=0.99,
              buffer_batch_size=64,
              reward_scale=1.,
              steps_per_epoch=2)
    runner.setup(sac, env, sampler_cls=LocalSampler)
    if torch.cuda.is_available():
        tu.set_gpu_mode(True)
    else:
        tu.set_gpu_mode(False)
    sac.to()
    ret = runner.train(n_epochs=12, batch_size=200, plot=False)
    # check that automatic entropy tuning is used
    assert sac._use_automatic_entropy_tuning
    # assert that there was a gradient properly connected to alpha
    # this doesn't verify that the path from the temperature objective is
    # correct.
    assert not torch.allclose(torch.Tensor([1.]), sac._log_alpha.to('cpu'))
    # check that policy is learning beyond predecided threshold
    assert ret > 85

Example #6

Show file

File: ddpg_garage_tf.py Project: thanhkaist/garage

def ddpg_garage_tf(ctxt, env_id, seed):
    """Create garage TensorFlow DDPG model and training.

    Args:
        ctxt (garage.experiment.ExperimentContext): The experiment
            configuration used by LocalRunner to create the
            snapshotter.
        env_id (str): Environment id of the task.
        seed (int): Random positive integer for the trial.

    """
    deterministic.set_seed(seed)

    with LocalTFRunner(ctxt) as runner:
        env = GarageEnv(normalize(gym.make(env_id)))

        policy = ContinuousMLPPolicy(
            env_spec=env.spec,
            hidden_sizes=hyper_parameters['policy_hidden_sizes'],
            hidden_nonlinearity=tf.nn.relu,
            output_nonlinearity=tf.nn.tanh)

        exploration_policy = AddOrnsteinUhlenbeckNoise(
            env.spec, policy, sigma=hyper_parameters['sigma'])

        qf = ContinuousMLPQFunction(
            env_spec=env.spec,
            hidden_sizes=hyper_parameters['qf_hidden_sizes'],
            hidden_nonlinearity=tf.nn.relu)

        replay_buffer = PathBuffer(
            capacity_in_transitions=hyper_parameters['replay_buffer_size'])

        algo = DDPG(env_spec=env.spec,
                    policy=policy,
                    qf=qf,
                    replay_buffer=replay_buffer,
                    steps_per_epoch=hyper_parameters['steps_per_epoch'],
                    policy_lr=hyper_parameters['policy_lr'],
                    qf_lr=hyper_parameters['qf_lr'],
                    target_update_tau=hyper_parameters['tau'],
                    n_train_steps=hyper_parameters['n_train_steps'],
                    discount=hyper_parameters['discount'],
                    min_buffer_size=int(1e4),
                    exploration_policy=exploration_policy,
                    policy_optimizer=tf.compat.v1.train.AdamOptimizer,
                    qf_optimizer=tf.compat.v1.train.AdamOptimizer)

        runner.setup(algo, env)
        runner.train(n_epochs=hyper_parameters['n_epochs'],
                     batch_size=hyper_parameters['n_rollout_steps'])

Example #7

Show file

File: test_continuous_cnn_q_function.py Project: j-donahue/garage-1

    def test_obs_not_image(self):
        env = GarageEnv(DummyDiscretePixelEnv())

        with mock.patch(('tests.fixtures.models.SimpleCNNModel._build'),
                        autospec=True,
                        side_effect=SimpleCNNModel._build) as build:
            with mock.patch(('garage.tf.models.'
                             'cnn_mlp_merge_model.CNNModel'),
                            new=SimpleCNNModel):
                with mock.patch(('garage.tf.models.'
                                 'cnn_mlp_merge_model.MLPMergeModel'),
                                new=SimpleMLPMergeModel):

                    qf = ContinuousCNNQFunction(env_spec=env.spec,
                                                filters=((5, (3, 3)), ),
                                                strides=(1, ))

                    # ensure non-image obses are not normalized
                    # in _initialize() and get_qval()

                    normalized_obs = build.call_args_list[0][0][1]
                    assert normalized_obs == qf.inputs[0]

                    fake_obs = [
                        np.full(env.spec.observation_space.shape, 255.)
                    ]

                    assert (self.sess.run(normalized_obs,
                                          feed_dict={qf.inputs[0]:
                                                     fake_obs}) == 255.).all()

                    # ensure non-image obses are not normalized
                    # in get_qval_sym()

                    obs_dim = env.spec.observation_space.shape
                    state_input = tf.compat.v1.placeholder(tf.float32,
                                                           shape=(None, ) +
                                                           obs_dim)

                    act_dim = env.spec.observation_space.shape
                    action_input = tf.compat.v1.placeholder(tf.float32,
                                                            shape=(None, ) +
                                                            act_dim)

                    qf.get_qval_sym(state_input, action_input, name='another')
                    normalized_obs = build.call_args_list[1][0][1]

                    assert (self.sess.run(normalized_obs,
                                          feed_dict={state_input:
                                                     fake_obs}) == 255.).all()

Example #8

Show file

    def test_output_shape(self, obs_dim, action_dim):
        env = GarageEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim))
        with mock.patch(('garage.tf.q_functions.'
                         'continuous_mlp_q_function.MLPMergeModel'),
                        new=SimpleMLPMergeModel):
            qf = ContinuousMLPQFunction(env_spec=env.spec)
        env.reset()
        obs, _, _, _ = env.step(1)
        obs = obs.flatten()
        act = np.full(action_dim, 0.5).flatten()

        outputs = qf.get_qval([obs], [act])

        assert outputs.shape == (1, 1)

Example #9

Show file

 def test_polopt_algo(self, algo_cls, env_cls, policy_cls, baseline_cls):
     logger.log('Testing {}, {}, {}'.format(
         algo_cls.__name__, env_cls.__name__, policy_cls.__name__))
     env = GarageEnv(env_cls())
     policy = policy_cls(env_spec=env)
     baseline = baseline_cls(env_spec=env)
     algo = algo_cls(
         env=env,
         policy=policy,
         baseline=baseline,
         **(algo_args.get(algo_cls, dict())))
     algo.train()
     assert not np.any(np.isnan(policy.get_param_values()))
     env.close()

Example #10

Show file

def gaussian_cnn_baseline(ctxt, env_id, seed):
    """Create Gaussian CNN Baseline on TF-PPO.

    Args:
        ctxt (garage.experiment.ExperimentContext): The experiment
            configuration used by LocalRunner to create the
            snapshotter.
        env_id (str): Environment id of the task.
        seed (int): Random positive integer for the trial.

    """
    deterministic.set_seed(seed)

    with LocalTFRunner(ctxt, max_cpus=12) as runner:
        env = GarageEnv(normalize(gym.make(env_id)))

        policy = CategoricalCNNPolicy(env_spec=env.spec,
                                      filters=params['conv_filters'],
                                      strides=params['conv_strides'],
                                      padding=params['conv_pad'],
                                      hidden_sizes=params['hidden_sizes'])

        baseline = GaussianCNNBaseline(
            env_spec=env.spec,
            regressor_args=dict(filters=params['conv_filters'],
                                strides=params['conv_strides'],
                                padding=params['conv_pad'],
                                hidden_sizes=params['hidden_sizes'],
                                use_trust_region=params['use_trust_region']))

        algo = PPO(
            env_spec=env.spec,
            policy=policy,
            baseline=baseline,
            max_path_length=100,
            discount=0.99,
            gae_lambda=0.95,
            lr_clip_range=0.2,
            policy_ent_coeff=0.0,
            flatten_input=False,
            optimizer_args=dict(
                batch_size=32,
                max_epochs=10,
                learning_rate=1e-3,
            ),
        )

        runner.setup(algo, env)
        runner.train(n_epochs=params['n_epochs'],
                     batch_size=params['batch_size'])

Example #11

Show file

File: test_gaussian_mlp_policy.py Project: j-donahue/garage

    def test_get_action(self, obs_dim, action_dim):
        env = GarageEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim))
        policy = GaussianMLPPolicy(env_spec=env.spec)

        env.reset()
        obs, _, _, _ = env.step(1)

        action, _ = policy.get_action(obs.flatten())
        assert env.action_space.contains(action)
        actions, _ = policy.get_actions(
            [obs.flatten(), obs.flatten(),
             obs.flatten()])
        for action in actions:
            assert env.action_space.contains(action)

Example #12

Show file

    def test_get_action(self, obs_dim, action_dim):
        env = GarageEnv(
            DummyDiscreteEnv(obs_dim=obs_dim, action_dim=action_dim))
        policy = CategoricalMLPPolicy(env_spec=env.spec)
        obs = env.reset()

        action, _ = policy.get_action(obs.flatten())
        assert env.action_space.contains(action)

        actions, _ = policy.get_actions(
            [obs.flatten(), obs.flatten(),
             obs.flatten()])
        for action in actions:
            assert env.action_space.contains(action)

Example #13

Show file

File: test_multi_env_wrapper.py Project: thanhkaist/garage

    def _init_multi_env_wrapper(self,
                                env_names,
                                sample_strategy=uniform_random_strategy):
        """helper function to initialize multi_env_wrapper

        Args:
            env_names (list(str)): List of gym.Env names.
            sample_strategy (func): A sampling strategy.

        Returns:
            garage.envs.multi_env_wrapper: Multi env wrapper.
        """
        task_envs = [GarageEnv(env_name=name) for name in env_names]
        return MultiEnvWrapper(task_envs, sample_strategy=sample_strategy)

Example #14

Show file

File: test_gaussian_gru_policy.py Project: j-donahue/garage-1

    def test_get_action(self, obs_dim, action_dim, hidden_dim):
        env = GarageEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim))
        policy = GaussianGRUPolicy(env_spec=env.spec,
                                   hidden_dim=hidden_dim,
                                   state_include_action=False)
        policy.reset(do_resets=None)
        obs = env.reset()

        action, _ = policy.get_action(obs.flatten())
        assert env.action_space.contains(action)

        actions, _ = policy.get_actions([obs.flatten()])
        for action in actions:
            assert env.action_space.contains(action)

Example #15

Show file

File: test_mtsac.py Project: j-donahue/garage-1

def test_mtsac_get_log_alpha(monkeypatch):
    """Check that the private function _get_log_alpha functions correctly.

    MTSAC uses disentangled alphas, meaning that

    """
    env_names = ['CartPole-v0', 'CartPole-v1']
    task_envs = [GarageEnv(env_name=name) for name in env_names]
    env = MultiEnvWrapper(task_envs, sample_strategy=round_robin_strategy)
    deterministic.set_seed(0)
    policy = TanhGaussianMLPPolicy(
        env_spec=env.spec,
        hidden_sizes=[1, 1],
        hidden_nonlinearity=torch.nn.ReLU,
        output_nonlinearity=None,
        min_std=np.exp(-20.),
        max_std=np.exp(2.),
    )

    qf1 = ContinuousMLPQFunction(env_spec=env.spec,
                                 hidden_sizes=[1, 1],
                                 hidden_nonlinearity=F.relu)

    qf2 = ContinuousMLPQFunction(env_spec=env.spec,
                                 hidden_sizes=[1, 1],
                                 hidden_nonlinearity=F.relu)
    replay_buffer = PathBuffer(capacity_in_transitions=int(1e6), )

    num_tasks = 2
    buffer_batch_size = 2
    mtsac = MTSAC(policy=policy,
                  qf1=qf1,
                  qf2=qf2,
                  gradient_steps_per_itr=150,
                  max_path_length=150,
                  eval_env=env,
                  env_spec=env.spec,
                  num_tasks=num_tasks,
                  steps_per_epoch=5,
                  replay_buffer=replay_buffer,
                  min_buffer_size=1e3,
                  target_update_tau=5e-3,
                  discount=0.99,
                  buffer_batch_size=buffer_batch_size)
    monkeypatch.setattr(mtsac, '_log_alpha', torch.Tensor([1., 2.]))
    for i, _ in enumerate(env_names):
        obs = torch.Tensor([env.reset()] * buffer_batch_size)
        log_alpha = mtsac._get_log_alpha(dict(observation=obs))
        assert (log_alpha == torch.Tensor([i + 1, i + 1])).all().item()
        assert log_alpha.size() == torch.Size([mtsac._buffer_batch_size])

Example #16

Show file

def continuous_mlp_baseline(ctxt, env_id, seed):
    """Create Continuous MLP Baseline on TF-PPO.

    Args:
        ctxt (garage.experiment.ExperimentContext): The experiment
            configuration used by LocalRunner to create the
            snapshotter.
        env_id (str): Environment id of the task.
        seed (int): Random positive integer for the trial.

    """
    deterministic.set_seed(seed)

    with LocalTFRunner(ctxt) as runner:
        env = GarageEnv(normalize(gym.make(env_id)))

        policy = GaussianLSTMPolicy(
            env_spec=env.spec,
            hidden_dim=hyper_params['policy_hidden_sizes'],
            hidden_nonlinearity=hyper_params['hidden_nonlinearity'],
        )

        baseline = ContinuousMLPBaseline(
            env_spec=env.spec,
            hidden_sizes=(64, 64),
        )

        algo = PPO(env_spec=env.spec,
                   policy=policy,
                   baseline=baseline,
                   max_path_length=hyper_params['max_path_length'],
                   discount=hyper_params['discount'],
                   gae_lambda=hyper_params['gae_lambda'],
                   lr_clip_range=hyper_params['lr_clip_range'],
                   entropy_method=hyper_params['entropy_method'],
                   policy_ent_coeff=hyper_params['policy_ent_coeff'],
                   optimizer_args=dict(
                       batch_size=32,
                       max_epochs=10,
                       learning_rate=1e-3,
                   ),
                   center_adv=hyper_params['center_adv'],
                   stop_entropy_gradient=True)

        runner.setup(algo,
                     env,
                     sampler_args=dict(n_envs=hyper_params['n_envs']))
        runner.train(n_epochs=hyper_params['n_epochs'],
                     batch_size=hyper_params['n_rollout_steps'])

Example #17

Show file

    def test_output_shape(self, batch_size, hidden_sizes):
        env_spec = GarageEnv(DummyBoxEnv())
        obs_dim = env_spec.observation_space.flat_dim
        act_dim = env_spec.action_space.flat_dim
        obs = torch.ones(batch_size, obs_dim, dtype=torch.float32)
        act = torch.ones(batch_size, act_dim, dtype=torch.float32)

        qf = ContinuousMLPQFunction(env_spec=env_spec,
                                    hidden_nonlinearity=None,
                                    hidden_sizes=hidden_sizes,
                                    hidden_w_init=nn.init.ones_,
                                    output_w_init=nn.init.ones_)
        output = qf(obs, act)

        assert output.shape == (batch_size, 1)

Example #18

Show file

    def test_get_action_img_obs(self, hidden_channels, kernel_sizes, strides,
                                hidden_sizes):
        """Test get_action function with akro.Image observation space."""
        env = GarageEnv(DummyDiscretePixelEnv(), is_image=True)
        env = self._initialize_obs_env(env)
        policy = CategoricalCNNPolicy(env=env,
                                      kernel_sizes=kernel_sizes,
                                      hidden_channels=hidden_channels,
                                      strides=strides,
                                      hidden_sizes=hidden_sizes)
        env.reset()
        obs, _, _, _ = env.step(1)

        action, _ = policy.get_action(obs)
        assert env.action_space.contains(action)

Example #19

Show file

File: test_discrete_mlp_q_function.py Project: cversteeg/garage-1

    def test_build(self, obs_dim, action_dim):
        env = GarageEnv(
            DummyDiscreteEnv(obs_dim=obs_dim, action_dim=action_dim))
        qf = DiscreteMLPQFunction(env_spec=env.spec)
        env.reset()
        obs, _, _, _ = env.step(1)

        output1 = self.sess.run(qf.q_vals, feed_dict={qf.input: [obs]})

        input_var = tf.compat.v1.placeholder(tf.float32,
                                             shape=(None, ) + obs_dim)
        q_vals = qf.build(input_var, 'another')
        output2 = self.sess.run(q_vals, feed_dict={input_var: [obs]})

        assert np.array_equal(output1, output2)

Example #20

Show file

    def stest_cma_es_cartpole():
        """Test CMAES with Cartpole-v1 environment."""
        with LocalTFRunner(snapshot_config) as runner:
            with TensorBoardPytorchWriter(PROJECT_APP_PATH.user_log /
                                          "CMA") as writer:
                env = GarageEnv(env_name="CartPole-v1")

                algo = CovarianceMatrixAdaptationEvolutionStrategyAgent(
                    env_spec=env.spec, max_rollout_length=100)
                algo.build()

                runner.setup(algo, env, sampler_cls=LocalSampler)
                runner.train(n_epochs=1, batch_size=1000)

                env.close()

Example #21

Show file

def tutorial_cem(ctxt=None):
    """Train CEM with Cartpole-v1 environment.

    Args:
        ctxt (garage.experiment.ExperimentContext): The experiment
            configuration used by LocalRunner to create the snapshotter.

    """
    set_seed(100)
    with LocalTFRunner(ctxt) as runner:
        env = GarageEnv(env_name='CartPole-v1')
        policy = CategoricalMLPPolicy(env.spec)
        algo = SimpleCEM(env.spec, policy)
        runner.setup(algo, env)
        runner.train(n_epochs=100, batch_size=1000)

Example #22

Show file

def tutorial_vpg(ctxt=None):
    """Train VPG with PointEnv environment.

    Args:
        ctxt (garage.experiment.ExperimentContext): The experiment
            configuration used by LocalRunner to create the snapshotter.

    """
    set_seed(100)
    runner = LocalRunner(ctxt)
    env = GarageEnv(PointEnv())
    policy = GaussianMLPPolicy(env.spec)
    algo = SimpleVPG(env.spec, policy)
    runner.setup(algo, env)
    runner.train(n_epochs=200, batch_size=4000)

Example #23

Show file

File: test_gaussian_mlp_encoder.py Project: seraliilhan/garage

    def test_get_embedding(self, obs_dim, embedding_dim):
        env = GarageEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=embedding_dim))
        embedding_spec = InOutSpec(input_space=env.spec.observation_space,
                                   output_space=env.spec.action_space)
        embedding = GaussianMLPEncoder(embedding_spec)
        task_input = tf.compat.v1.placeholder(tf.float32,
                                              shape=(None, None,
                                                     embedding.input_dim))
        embedding.build(task_input)

        env.reset()
        obs, _, _, _ = env.step(1)

        latent, _ = embedding.forward(obs)
        assert env.action_space.contains(latent)

Example #24

Show file

File: test_multi_env_wrapper.py Project: thanhkaist/garage

 def test_one_hot_observation_space(self):
     """test one hot representation of observation space"""
     envs = ['CartPole-v0', 'CartPole-v1']
     mt_env = self._init_multi_env_wrapper(envs)
     cartpole = GarageEnv(env_name='CartPole-v0')
     cartpole_lb, cartpole_ub = cartpole.observation_space.bounds
     obs_space = akro.Box(np.concatenate([cartpole_lb,
                                          np.zeros(2)]),
                          np.concatenate([cartpole_ub,
                                          np.ones(2)]))
     assert mt_env.observation_space.shape == obs_space.shape
     assert (
         mt_env.observation_space.bounds[0] == obs_space.bounds[0]).all()
     assert (
         mt_env.observation_space.bounds[1] == obs_space.bounds[1]).all()

Example #25

Show file

File: test_gaussian_mlp_policy.py Project: j-donahue/garage

    def test_build(self, obs_dim, action_dim):
        env = GarageEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim))
        policy = GaussianMLPPolicy(env_spec=env.spec)
        obs = env.reset()

        state_input = tf.compat.v1.placeholder(tf.float32,
                                               shape=(None, None,
                                                      policy.input_dim))
        dist_sym = policy.build(state_input, name='dist_sym').dist
        dist_sym2 = policy.build(state_input, name='dist_sym2').dist
        output1 = self.sess.run([dist_sym.loc],
                                feed_dict={state_input: [[obs.flatten()]]})
        output2 = self.sess.run([dist_sym2.loc],
                                feed_dict={state_input: [[obs.flatten()]]})
        assert np.array_equal(output1, output2)

Example #26

Show file

def test_no_seed():
    max_path_length = 16
    env = GarageEnv(PointEnv())
    policy = FixedPolicy(env.spec,
                         scripted_actions=[
                             env.action_space.sample()
                             for _ in range(max_path_length)
                         ])
    n_workers = 8
    workers = WorkerFactory(seed=None,
                            max_path_length=max_path_length,
                            n_workers=n_workers)
    sampler = LocalSampler.from_worker_factory(workers, policy, env)
    rollouts = sampler.obtain_samples(0, 160, policy)
    assert sum(rollouts.lengths) >= 160

Example #27

Show file

File: test_meta_evaluator.py Project: songanz/garage

def test_meta_evaluator():
    set_seed(100)
    tasks = SetTaskSampler(lambda: GarageEnv(PointEnv()))
    max_episode_length = 200
    with tempfile.TemporaryDirectory() as log_dir_name:
        runner = LocalRunner(
            SnapshotConfig(snapshot_dir=log_dir_name,
                           snapshot_mode='last',
                           snapshot_gap=1))
        env = GarageEnv(PointEnv())
        algo = OptimalActionInference(env=env,
                                      max_episode_length=max_episode_length)
        runner.setup(algo, env)
        meta_eval = MetaEvaluator(test_task_sampler=tasks,
                                  max_episode_length=max_episode_length,
                                  n_test_tasks=10)
        log_file = tempfile.NamedTemporaryFile()
        csv_output = CsvOutput(log_file.name)
        logger.add_output(csv_output)
        meta_eval.evaluate(algo)
        logger.log(tabular)
        meta_eval.evaluate(algo)
        logger.log(tabular)
        logger.dump_output_type(CsvOutput)
        logger.remove_output_type(CsvOutput)
        with open(log_file.name, 'r') as file:
            rows = list(csv.DictReader(file))
        assert len(rows) == 2
        assert float(
            rows[0]['MetaTest/__unnamed_task__/TerminationRate']) < 1.0
        assert float(rows[0]['MetaTest/__unnamed_task__/Iteration']) == 0
        assert (float(rows[0]['MetaTest/__unnamed_task__/MaxReturn']) >= float(
            rows[0]['MetaTest/__unnamed_task__/AverageReturn']))
        assert (float(rows[0]['MetaTest/__unnamed_task__/AverageReturn']) >=
                float(rows[0]['MetaTest/__unnamed_task__/MinReturn']))
        assert float(rows[1]['MetaTest/__unnamed_task__/Iteration']) == 1

Example #28

Show file

File: ddpg_pendulum.py Project: Jorsorokin/garage

def ddpg_pendulum(ctxt=None, seed=1, lr=1e-4):
    """Train DDPG with InvertedDoublePendulum-v2 environment.

    Args:
        ctxt (garage.experiment.ExperimentContext): The experiment
            configuration used by LocalRunner to create the snapshotter.
        seed (int): Used to seed the random number generator to produce
            determinism.
        lr (float): Learning rate for policy optimization.

    """
    set_seed(seed)
    runner = LocalRunner(ctxt)
    env = GarageEnv(normalize(gym.make('InvertedDoublePendulum-v2')))

    policy = DeterministicMLPPolicy(env_spec=env.spec,
                                    hidden_sizes=[64, 64],
                                    hidden_nonlinearity=F.relu,
                                    output_nonlinearity=torch.tanh)

    exploration_policy = AddOrnsteinUhlenbeckNoise(env.spec, policy, sigma=0.2)

    qf = ContinuousMLPQFunction(env_spec=env.spec,
                                hidden_sizes=[64, 64],
                                hidden_nonlinearity=F.relu)

    replay_buffer = SimpleReplayBuffer(env_spec=env.spec,
                                       size_in_transitions=int(1e6),
                                       time_horizon=100)

    policy_optimizer = (torch.optim.Adagrad, {'lr': lr, 'lr_decay': 0.99})

    ddpg = DDPG(env_spec=env.spec,
                policy=policy,
                qf=qf,
                replay_buffer=replay_buffer,
                steps_per_epoch=20,
                n_train_steps=50,
                min_buffer_size=int(1e4),
                exploration_policy=exploration_policy,
                target_update_tau=1e-2,
                discount=0.9,
                policy_optimizer=policy_optimizer,
                qf_optimizer=torch.optim.Adam)

    runner.setup(algo=ddpg, env=env)

    runner.train(n_epochs=500, batch_size=100)

Example #29

Show file

File: ddpg_pendulum.py Project: jamartinh/garage

def ddpg_pendulum(ctxt=None, seed=1):
    """Train DDPG with InvertedDoublePendulum-v2 environment.

    Args:
        ctxt (garage.experiment.ExperimentContext): The experiment
            configuration used by LocalRunner to create the snapshotter.
        seed (int): Used to seed the random number generator to produce
            determinism.

    """
    set_seed(seed)
    with LocalTFRunner(snapshot_config=ctxt) as runner:
        env = GarageEnv(gym.make('InvertedDoublePendulum-v2'))

        policy = ContinuousMLPPolicy(env_spec=env.spec,
                                     hidden_sizes=[64, 64],
                                     hidden_nonlinearity=tf.nn.relu,
                                     output_nonlinearity=tf.nn.tanh)

        exploration_policy = AddOrnsteinUhlenbeckNoise(env.spec,
                                                       policy,
                                                       sigma=0.2)

        qf = ContinuousMLPQFunction(env_spec=env.spec,
                                    hidden_sizes=[64, 64],
                                    hidden_nonlinearity=tf.nn.relu)

        replay_buffer = PathBuffer(capacity_in_transitions=int(1e6))

        ddpg = DDPG(env_spec=env.spec,
                    policy=policy,
                    policy_lr=1e-4,
                    qf_lr=1e-3,
                    qf=qf,
                    replay_buffer=replay_buffer,
                    max_path_length=100,
                    steps_per_epoch=20,
                    target_update_tau=1e-2,
                    n_train_steps=50,
                    discount=0.9,
                    min_buffer_size=int(1e4),
                    exploration_policy=exploration_policy,
                    policy_optimizer=tf.compat.v1.train.AdamOptimizer,
                    qf_optimizer=tf.compat.v1.train.AdamOptimizer)

        runner.setup(algo=ddpg, env=env)

        runner.train(n_epochs=500, batch_size=100)

Example #30

Show file

    def test_get_action(self, obs_dim, action_dim, hidden_dim):
        env = GarageEnv(
            DummyDiscreteEnv(obs_dim=obs_dim, action_dim=action_dim))
        policy = CategoricalLSTMPolicy(env_spec=env.spec,
                                       hidden_dim=hidden_dim,
                                       state_include_action=False)

        policy.reset()
        obs = env.reset()

        action, _ = policy.get_action(obs.flatten())
        assert env.action_space.contains(action)

        actions, _ = policy.get_actions([obs.flatten()])
        for action in actions:
            assert env.action_space.contains(action)