Esempio n. 1
0
    def test_erwr_cartpole(self):
        """Test ERWR with Cartpole-v1 environment."""
        with TFTrainer(snapshot_config, sess=self.sess) as trainer:
            deterministic.set_seed(1)
            env = GymEnv('CartPole-v1')

            policy = CategoricalMLPPolicy(name='policy',
                                          env_spec=env.spec,
                                          hidden_sizes=(32, 32))

            baseline = LinearFeatureBaseline(env_spec=env.spec)

            sampler = LocalSampler(
                agents=policy,
                envs=env,
                max_episode_length=env.spec.max_episode_length,
                is_tf_worker=True)

            algo = ERWR(env_spec=env.spec,
                        policy=policy,
                        baseline=baseline,
                        sampler=sampler,
                        discount=0.99)

            trainer.setup(algo, env)

            last_avg_ret = trainer.train(n_epochs=10, batch_size=10000)
            assert last_avg_ret > 60

            env.close()
Esempio n. 2
0
    def test_cma_es_cartpole(self):
        """Test CMAES with Cartpole-v1 environment."""
        with TFTrainer(snapshot_config) as trainer:
            env = GymEnv('CartPole-v1')

            policy = CategoricalMLPPolicy(name='policy',
                                          env_spec=env.spec,
                                          hidden_sizes=(32, 32))

            n_samples = 20

            sampler = LocalSampler(
                agents=policy,
                envs=env,
                max_episode_length=env.spec.max_episode_length,
                is_tf_worker=True)

            algo = CMAES(env_spec=env.spec,
                         policy=policy,
                         sampler=sampler,
                         n_samples=n_samples)

            trainer.setup(algo, env)
            trainer.train(n_epochs=1, batch_size=1000)
            # No assertion on return because CMAES is not stable.

            env.close()
Esempio n. 3
0
    def test_reps_cartpole(self):
        """Test REPS with gym Cartpole environment."""
        with TFTrainer(snapshot_config, sess=self.sess) as trainer:
            env = GymEnv('CartPole-v0')

            policy = CategoricalMLPPolicy(env_spec=env.spec,
                                          hidden_sizes=[32, 32])

            baseline = LinearFeatureBaseline(env_spec=env.spec)

            sampler = LocalSampler(
                agents=policy,
                envs=env,
                max_episode_length=env.spec.max_episode_length,
                is_tf_worker=True)

            algo = REPS(env_spec=env.spec,
                        policy=policy,
                        baseline=baseline,
                        sampler=sampler,
                        discount=0.99)

            trainer.setup(algo, env)

            last_avg_ret = trainer.train(n_epochs=10, batch_size=4000)
            assert last_avg_ret > 5

            env.close()
Esempio n. 4
0
    def test_ddpg_double_pendulum(self):
        """Test DDPG with Pendulum environment."""
        with LocalTFRunner(snapshot_config, sess=self.sess) as runner:
            env = GymEnv('InvertedDoublePendulum-v2')
            policy = ContinuousMLPPolicy(env_spec=env.spec,
                                         hidden_sizes=[64, 64],
                                         hidden_nonlinearity=tf.nn.relu,
                                         output_nonlinearity=tf.nn.tanh)
            exploration_policy = AddOrnsteinUhlenbeckNoise(env.spec,
                                                           policy,
                                                           sigma=0.2)
            qf = ContinuousMLPQFunction(env_spec=env.spec,
                                        hidden_sizes=[64, 64],
                                        hidden_nonlinearity=tf.nn.relu)
            replay_buffer = PathBuffer(capacity_in_transitions=int(1e5))
            algo = DDPG(
                env_spec=env.spec,
                policy=policy,
                policy_lr=1e-4,
                qf_lr=1e-3,
                qf=qf,
                replay_buffer=replay_buffer,
                max_episode_length=100,
                steps_per_epoch=20,
                target_update_tau=1e-2,
                n_train_steps=50,
                discount=0.9,
                min_buffer_size=int(5e3),
                exploration_policy=exploration_policy,
            )
            runner.setup(algo, env)
            last_avg_ret = runner.train(n_epochs=10, batch_size=100)
            assert last_avg_ret > 60

            env.close()
Esempio n. 5
0
class TestDiscretePolicies(TfGraphTestCase):
    def setup_method(self):
        super().setup_method()
        self.env = GymEnv(DummyDiscreteEnv())

    def teardown_method(self):
        self.env.close()
        super().teardown_method()

    def test_categorial_gru_policy(self):
        categorical_gru_policy = CategoricalGRUPolicy(
            env_spec=self.env, hidden_dim=1, state_include_action=False)
        categorical_gru_policy.reset()

        obs = self.env.observation_space.high
        assert categorical_gru_policy.get_action(obs)

    def test_categorical_lstm_policy(self):
        categorical_lstm_policy = CategoricalLSTMPolicy(
            env_spec=self.env, hidden_dim=1, state_include_action=False)
        categorical_lstm_policy.reset()

        obs = self.env.observation_space.high
        assert categorical_lstm_policy.get_action(obs)

    def test_categorial_mlp_policy(self):
        categorical_mlp_policy = CategoricalMLPPolicy(env_spec=self.env,
                                                      hidden_sizes=(1, ))
        obs = self.env.observation_space.high
        assert categorical_mlp_policy.get_action(obs)
Esempio n. 6
0
    def test_cem_cartpole(self):
        """Test CEM with Cartpole-v1 environment."""
        with TFTrainer(snapshot_config) as trainer:
            env = GymEnv('CartPole-v1')

            policy = CategoricalMLPPolicy(name='policy',
                                          env_spec=env.spec,
                                          hidden_sizes=(32, 32))

            n_samples = 10

            sampler = LocalSampler(
                agents=policy,
                envs=env,
                max_episode_length=env.spec.max_episode_length,
                is_tf_worker=True)

            algo = CEM(env_spec=env.spec,
                       policy=policy,
                       sampler=sampler,
                       best_frac=0.1,
                       n_samples=n_samples)

            trainer.setup(algo, env)
            rtn = trainer.train(n_epochs=10, batch_size=2048)
            assert rtn > 40

            env.close()
Esempio n. 7
0
    def test_dqn_cartpole_pickle(self):
        """Test DQN with CartPole environment."""
        deterministic.set_seed(100)
        with TFTrainer(snapshot_config, sess=self.sess) as trainer:
            n_epochs = 10
            steps_per_epoch = 10
            sampler_batch_size = 500
            num_timesteps = n_epochs * steps_per_epoch * sampler_batch_size
            env = GymEnv('CartPole-v0')
            replay_buffer = PathBuffer(capacity_in_transitions=int(1e4))
            qf = DiscreteMLPQFunction(env_spec=env.spec, hidden_sizes=(64, 64))
            policy = DiscreteQFArgmaxPolicy(env_spec=env.spec, qf=qf)
            epilson_greedy_policy = EpsilonGreedyPolicy(
                env_spec=env.spec,
                policy=policy,
                total_timesteps=num_timesteps,
                max_epsilon=1.0,
                min_epsilon=0.02,
                decay_ratio=0.1)
            sampler = LocalSampler(
                agents=epilson_greedy_policy,
                envs=env,
                max_episode_length=env.spec.max_episode_length,
                is_tf_worker=True,
                worker_class=FragmentWorker)
            algo = DQN(env_spec=env.spec,
                       policy=policy,
                       qf=qf,
                       exploration_policy=epilson_greedy_policy,
                       replay_buffer=replay_buffer,
                       sampler=sampler,
                       qf_lr=1e-4,
                       discount=1.0,
                       min_buffer_size=int(1e3),
                       double_q=False,
                       n_train_steps=500,
                       grad_norm_clipping=5.0,
                       steps_per_epoch=steps_per_epoch,
                       target_network_update_freq=1,
                       buffer_batch_size=32)
            trainer.setup(algo, env)
            with tf.compat.v1.variable_scope(
                    'DiscreteMLPQFunction/mlp/hidden_0', reuse=True):
                bias = tf.compat.v1.get_variable('bias')
                # assign it to all one
                old_bias = tf.ones_like(bias).eval()
                bias.load(old_bias)
                h = pickle.dumps(algo)

            with tf.compat.v1.Session(graph=tf.Graph()):
                pickle.loads(h)
                with tf.compat.v1.variable_scope(
                        'DiscreteMLPQFunction/mlp/hidden_0', reuse=True):
                    new_bias = tf.compat.v1.get_variable('bias')
                    new_bias = new_bias.eval()
                    assert np.array_equal(old_bias, new_bias)

            env.close()
Esempio n. 8
0
    def test_baseline(self):
        """Test the baseline initialization."""
        box_env = GymEnv(DummyBoxEnv())
        deterministic_mlp_baseline = ContinuousMLPBaseline(env_spec=box_env)
        gaussian_mlp_baseline = GaussianMLPBaseline(env_spec=box_env)

        self.sess.run(tf.compat.v1.global_variables_initializer())
        deterministic_mlp_baseline.get_param_values()
        gaussian_mlp_baseline.get_param_values()

        box_env.close()
Esempio n. 9
0
def test_process_env_argument():
    env = GymEnv(env=gym.make('MountainCar-v0'))
    env.close()
    env = GymEnv(env='MountainCar-v0')
    env.close()
    env = GymEnv(gym.make('MountainCar-v0'))
    env.close()
    env = GymEnv('MountainCar-v0')
    env.close()
    with pytest.raises(ValueError, match='GymEnv can take env'):
        env = GymEnv(1)
    env.close()
Esempio n. 10
0
def dqn_cartpole(ctxt=None, seed=24):
    """Train DQN with CartPole-v0 environment.

    Args:
        ctxt (garage.experiment.ExperimentContext): The experiment
            configuration used by LocalRunner to create the snapshotter.
        seed (int): Used to seed the random number generator to produce
            determinism.
    """
    set_seed(seed)
    runner = Trainer(ctxt)

    n_epochs = 100
    steps_per_epoch = 10
    sampler_batch_size = 512
    num_timesteps = n_epochs * steps_per_epoch * sampler_batch_size
    env = GymEnv('CartPole-v0')
    replay_buffer = PathBuffer(capacity_in_transitions=int(1e6))
    qf = DiscreteMLPQFunction(env_spec=env.spec, hidden_sizes=(8, 5))
    policy = DiscreteQFArgmaxPolicy(env_spec=env.spec, qf=qf)
    exploration_policy = EpsilonGreedyPolicy(env_spec=env.spec,
                                             policy=policy,
                                             total_timesteps=num_timesteps,
                                             max_epsilon=1.0,
                                             min_epsilon=0.01,
                                             decay_ratio=0.4)
    sampler = LocalSampler(agents=exploration_policy,
                           envs=env,
                           max_episode_length=env.spec.max_episode_length,
                           worker_class=FragmentWorker)
    algo = DQN(env_spec=env.spec,
               policy=policy,
               qf=qf,
               exploration_policy=exploration_policy,
               replay_buffer=replay_buffer,
               sampler=sampler,
               steps_per_epoch=steps_per_epoch,
               qf_lr=5e-5,
               discount=0.9,
               min_buffer_size=int(1e4),
               n_train_steps=500,
               target_update_freq=30,
               buffer_batch_size=64)

    runner.setup(algo, env)
    runner.train(n_epochs=n_epochs, batch_size=sampler_batch_size)

    env.close()
Esempio n. 11
0
    def test_dqn_cartpole_grad_clip(self):
        """Test DQN with CartPole environment."""
        deterministic.set_seed(100)
        with TFTrainer(snapshot_config, sess=self.sess) as trainer:
            n_epochs = 10
            steps_per_epoch = 10
            sampler_batch_size = 500
            num_timesteps = n_epochs * steps_per_epoch * sampler_batch_size
            env = GymEnv('CartPole-v0')
            replay_buffer = PathBuffer(capacity_in_transitions=int(1e4))
            qf = DiscreteMLPQFunction(env_spec=env.spec, hidden_sizes=(64, 64))
            policy = DiscreteQFArgmaxPolicy(env_spec=env.spec, qf=qf)
            epilson_greedy_policy = EpsilonGreedyPolicy(
                env_spec=env.spec,
                policy=policy,
                total_timesteps=num_timesteps,
                max_epsilon=1.0,
                min_epsilon=0.02,
                decay_ratio=0.1)
            sampler = LocalSampler(
                agents=epilson_greedy_policy,
                envs=env,
                max_episode_length=env.spec.max_episode_length,
                is_tf_worker=True,
                worker_class=FragmentWorker)
            algo = DQN(env_spec=env.spec,
                       policy=policy,
                       qf=qf,
                       exploration_policy=epilson_greedy_policy,
                       replay_buffer=replay_buffer,
                       sampler=sampler,
                       qf_lr=1e-4,
                       discount=1.0,
                       min_buffer_size=int(1e3),
                       double_q=False,
                       n_train_steps=500,
                       grad_norm_clipping=5.0,
                       steps_per_epoch=steps_per_epoch,
                       target_network_update_freq=1,
                       buffer_batch_size=32)

            trainer.setup(algo, env)
            last_avg_ret = trainer.train(n_epochs=n_epochs,
                                         batch_size=sampler_batch_size)
            assert last_avg_ret > 8.8

            env.close()
Esempio n. 12
0
    def test_ddpg_double_pendulum(self):
        """Test DDPG with Pendulum environment."""
        deterministic.set_seed(0)
        trainer = Trainer(snapshot_config)
        env = GymEnv('InvertedDoublePendulum-v2', max_episode_length=100)
        policy = DeterministicMLPPolicy(env_spec=env.spec,
                                        hidden_sizes=[64, 64],
                                        hidden_nonlinearity=F.relu,
                                        output_nonlinearity=torch.tanh)

        exploration_policy = AddOrnsteinUhlenbeckNoise(env.spec,
                                                       policy,
                                                       sigma=0.2)

        qf = ContinuousMLPQFunction(env_spec=env.spec,
                                    hidden_sizes=[64, 64],
                                    hidden_nonlinearity=F.relu)

        replay_buffer = PathBuffer(capacity_in_transitions=int(1e6))

        sampler = LocalSampler(agents=exploration_policy,
                               envs=env,
                               max_episode_length=env.spec.max_episode_length,
                               worker_class=FragmentWorker)

        algo = DDPG(env_spec=env.spec,
                    policy=policy,
                    qf=qf,
                    replay_buffer=replay_buffer,
                    sampler=sampler,
                    steps_per_epoch=20,
                    n_train_steps=50,
                    min_buffer_size=int(1e4),
                    exploration_policy=exploration_policy,
                    target_update_tau=1e-2,
                    discount=0.9)

        trainer.setup(algo, env)
        last_avg_ret = trainer.train(n_epochs=10, batch_size=100)
        assert last_avg_ret > 45

        env.close()
Esempio n. 13
0
    def test_cem_cartpole(self):
        """Test CEM with Cartpole-v1 environment."""
        with TFTrainer(snapshot_config) as trainer:
            env = GymEnv('CartPole-v1')

            policy = CategoricalMLPPolicy(name='policy',
                                          env_spec=env.spec,
                                          hidden_sizes=(32, 32))

            n_samples = 10

            algo = CEM(env_spec=env.spec,
                       policy=policy,
                       best_frac=0.1,
                       n_samples=n_samples)

            trainer.setup(algo, env, sampler_cls=LocalSampler)
            rtn = trainer.train(n_epochs=10, batch_size=2048)
            assert rtn > 40

            env.close()
Esempio n. 14
0
    def test_reps_cartpole(self):
        """Test REPS with gym Cartpole environment."""
        with LocalTFRunner(snapshot_config, sess=self.sess) as runner:
            env = GymEnv('CartPole-v0')

            policy = CategoricalMLPPolicy(env_spec=env.spec,
                                          hidden_sizes=[32, 32])

            baseline = LinearFeatureBaseline(env_spec=env.spec)

            algo = REPS(env_spec=env.spec,
                        policy=policy,
                        baseline=baseline,
                        discount=0.99)

            runner.setup(algo, env, sampler_cls=LocalSampler)

            last_avg_ret = runner.train(n_epochs=10, batch_size=4000)
            assert last_avg_ret > 5

            env.close()
Esempio n. 15
0
    def test_cma_es_cartpole(self):
        """Test CMAES with Cartpole-v1 environment."""
        with TFTrainer(snapshot_config) as trainer:
            env = GymEnv('CartPole-v1')

            policy = CategoricalMLPPolicy(name='policy',
                                          env_spec=env.spec,
                                          hidden_sizes=(32, 32))
            baseline = LinearFeatureBaseline(env_spec=env.spec)

            n_samples = 20

            algo = CMAES(env_spec=env.spec,
                         policy=policy,
                         baseline=baseline,
                         n_samples=n_samples)

            trainer.setup(algo, env, sampler_cls=LocalSampler)
            trainer.train(n_epochs=1, batch_size=1000)
            # No assertion on return because CMAES is not stable.

            env.close()
Esempio n. 16
0
    def test_dqn_cartpole_double_q(self):
        """Test DQN with CartPole environment."""
        deterministic.set_seed(100)
        with LocalTFRunner(snapshot_config, sess=self.sess) as runner:
            n_epochs = 10
            steps_per_epoch = 10
            sampler_batch_size = 500
            num_timesteps = n_epochs * steps_per_epoch * sampler_batch_size
            env = GymEnv('CartPole-v0')
            replay_buffer = PathBuffer(capacity_in_transitions=int(1e4))
            qf = DiscreteMLPQFunction(env_spec=env.spec, hidden_sizes=(64, 64))
            policy = DiscreteQfDerivedPolicy(env_spec=env.spec, qf=qf)
            epilson_greedy_policy = EpsilonGreedyPolicy(
                env_spec=env.spec,
                policy=policy,
                total_timesteps=num_timesteps,
                max_epsilon=1.0,
                min_epsilon=0.02,
                decay_ratio=0.1)
            algo = DQN(env_spec=env.spec,
                       policy=policy,
                       qf=qf,
                       exploration_policy=epilson_greedy_policy,
                       replay_buffer=replay_buffer,
                       qf_lr=1e-4,
                       discount=1.0,
                       min_buffer_size=int(1e3),
                       double_q=True,
                       n_train_steps=500,
                       steps_per_epoch=steps_per_epoch,
                       target_network_update_freq=1,
                       buffer_batch_size=32)

            runner.setup(algo, env)
            last_avg_ret = runner.train(n_epochs=n_epochs,
                                        batch_size=sampler_batch_size)
            assert last_avg_ret > 8.8

            env.close()
Esempio n. 17
0
class TestContinuousPolicies(TfGraphTestCase):
    def setup_method(self):
        super().setup_method()
        self.env = GymEnv(DummyBoxEnv())

    def teardown_method(self):
        self.env.close()
        super().teardown_method()

    def test_continuous_mlp_policy(self):
        continuous_mlp_policy = ContinuousMLPPolicy(env_spec=self.env,
                                                    hidden_sizes=(1, ))
        obs = self.env.observation_space.high
        assert continuous_mlp_policy.get_action(obs)

    def test_gaussian_gru_policy(self):
        gaussian_gru_policy = GaussianGRUPolicy(env_spec=self.env,
                                                hidden_dim=1,
                                                state_include_action=False)
        gaussian_gru_policy.reset()

        obs = self.env.observation_space.high
        assert gaussian_gru_policy.get_action(obs)

    def test_gaussian_lstm_policy(self):
        gaussian_lstm_policy = GaussianLSTMPolicy(env_spec=self.env,
                                                  hidden_dim=1,
                                                  state_include_action=False)
        gaussian_lstm_policy.reset()

        obs = self.env.observation_space.high
        assert gaussian_lstm_policy.get_action(obs)

    def test_gaussian_mlp_policy(self):
        gaussian_mlp_policy = GaussianMLPPolicy(env_spec=self.env,
                                                hidden_sizes=(1, ))
        obs = self.env.observation_space.high
        assert gaussian_mlp_policy.get_action(obs)
Esempio n. 18
0
    def test_vpg_cartpole(self):
        """Test VPG with CartPole-v1 environment."""
        with LocalTFRunner(snapshot_config, sess=self.sess) as runner:
            env = GymEnv('CartPole-v1')

            policy = CategoricalMLPPolicy(name='policy',
                                          env_spec=env.spec,
                                          hidden_sizes=(32, 32))

            baseline = LinearFeatureBaseline(env_spec=env.spec)

            algo = VPG(env_spec=env.spec,
                       policy=policy,
                       baseline=baseline,
                       discount=0.99,
                       optimizer_args=dict(learning_rate=0.01, ))

            runner.setup(algo, env, sampler_cls=LocalSampler)

            last_avg_ret = runner.train(n_epochs=10, batch_size=10000)
            assert last_avg_ret > 90

            env.close()
Esempio n. 19
0
    def test_erwr_cartpole(self):
        """Test ERWR with Cartpole-v1 environment."""
        with LocalTFRunner(snapshot_config, sess=self.sess) as runner:
            deterministic.set_seed(1)
            env = GymEnv('CartPole-v1')

            policy = CategoricalMLPPolicy(name='policy',
                                          env_spec=env.spec,
                                          hidden_sizes=(32, 32))

            baseline = LinearFeatureBaseline(env_spec=env.spec)

            algo = ERWR(env_spec=env.spec,
                        policy=policy,
                        baseline=baseline,
                        discount=0.99)

            runner.setup(algo, env, sampler_cls=LocalSampler)

            last_avg_ret = runner.train(n_epochs=10, batch_size=10000)
            assert last_avg_ret > 60

            env.close()
Esempio n. 20
0
    def test_cem_cartpole(self):
        """Test CEM with Cartpole-v1 environment."""
        with LocalTFRunner(snapshot_config) as runner:
            env = GymEnv('CartPole-v1')

            policy = CategoricalMLPPolicy(name='policy',
                                          env_spec=env.spec,
                                          hidden_sizes=(32, 32))
            baseline = LinearFeatureBaseline(env_spec=env.spec)

            n_samples = 10

            algo = CEM(env_spec=env.spec,
                       policy=policy,
                       baseline=baseline,
                       best_frac=0.1,
                       max_episode_length=100,
                       n_samples=n_samples)

            runner.setup(algo, env, sampler_cls=LocalSampler)
            rtn = runner.train(n_epochs=10, batch_size=2048)
            assert rtn > 40

            env.close()
Esempio n. 21
0
class TestVPG:
    """Test class for VPG."""
    @classmethod
    def setup_class(cls):
        """Setup method which is called once before all tests in this class."""
        deterministic.set_seed(0)

    def setup_method(self):
        """Setup method which is called before every test."""
        self._env = GymEnv('InvertedDoublePendulum-v2')
        self._runner = LocalRunner(snapshot_config)

        self._policy = GaussianMLPPolicy(env_spec=self._env.spec,
                                         hidden_sizes=[64, 64],
                                         hidden_nonlinearity=torch.tanh,
                                         output_nonlinearity=None)
        self._params = {
            'env_spec': self._env.spec,
            'policy': self._policy,
            'value_function':
            GaussianMLPValueFunction(env_spec=self._env.spec),
            'max_episode_length': 100,
            'discount': 0.99,
        }

    def teardown_method(self):
        """Teardown method which is called after every test."""
        self._env.close()

    @pytest.mark.mujoco
    def test_vpg_no_entropy(self):
        """Test VPG with no_entropy."""
        self._params['positive_adv'] = True
        self._params['use_softplus_entropy'] = True

        algo = VPG(**self._params)
        self._runner.setup(algo, self._env, sampler_cls=LocalSampler)
        last_avg_ret = self._runner.train(n_epochs=10, batch_size=100)
        assert last_avg_ret > 0

    @pytest.mark.mujoco
    def test_vpg_max(self):
        """Test VPG with maximum entropy."""
        self._params['center_adv'] = False
        self._params['stop_entropy_gradient'] = True
        self._params['entropy_method'] = 'max'

        algo = VPG(**self._params)
        self._runner.setup(algo, self._env, sampler_cls=LocalSampler)
        last_avg_ret = self._runner.train(n_epochs=10, batch_size=100)
        assert last_avg_ret > 0

    @pytest.mark.mujoco
    def test_vpg_regularized(self):
        """Test VPG with entropy_regularized."""
        self._params['entropy_method'] = 'regularized'

        algo = VPG(**self._params)
        self._runner.setup(algo, self._env, sampler_cls=LocalSampler)
        last_avg_ret = self._runner.train(n_epochs=10, batch_size=100)
        assert last_avg_ret > 0

    @pytest.mark.mujoco
    @pytest.mark.parametrize('algo_param, error, msg', INVALID_ENTROPY_CONFIG)
    def test_invalid_entropy_config(self, algo_param, error, msg):
        """Test VPG with invalid entropy config."""
        self._params.update(algo_param)
        with pytest.raises(error, match=msg):
            VPG(**self._params)
Esempio n. 22
0
#!/usr/bin/env python3
"""Example of how to load, step, and visualize an environment."""
import argparse

from garage.envs import GymEnv

parser = argparse.ArgumentParser()
parser.add_argument('--n_steps',
                    type=int,
                    default=1000,
                    help='Number of steps to run')
args = parser.parse_args()

# Construct the environment
env = GymEnv('MountainCar-v0')

# Reset the environment and launch the viewer
env.reset()
env.visualize()

step_count = 0
es = env.step(env.action_space.sample())

while not es.last and step_count < args.n_steps:
    es = env.step(env.action_space.sample())
    step_count += 1

env.close()
Esempio n. 23
0
def test_get_time_limit_max_ep_len_not_equal_timeout():
    env = GymEnv('CartPoleBulletEnv-v1', max_episode_length=1)
    assert env.spec.max_episode_length == 1
    env.close()
Esempio n. 24
0
def test_closes_mujoco():
    garage_env = GymEnv('Ant-v2')
    garage_env.visualize()
    assert garage_env._env.viewer is not None
    garage_env.close()
    assert garage_env._env.viewer is None
Esempio n. 25
0
def test_closes_box2d():
    garage_env = GymEnv('CarRacing-v0')
    garage_env.visualize()
    assert garage_env._env.viewer is not None
    garage_env.close()
    assert garage_env._env.viewer is None
Esempio n. 26
0
def test_get_time_limit_detects_inconsistency():
    with pytest.raises(RuntimeError):
        env = GymEnv('CartPoleBulletEnv-v1', max_episode_length=math.inf)
        env.close()