Ejemplo n.º 1
0
 def _make_parallel_environment(self,
                                constructor=None,
                                num_envs=2,
                                flatten=True,
                                start_serially=True,
                                blocking=True):
     self._set_default_specs()
     constructor = constructor or functools.partial(
         RandomAlfEnvironment, self.observation_spec, self.action_spec)
     return parallel_environment.ParallelAlfEnvironment(
         env_constructors=[constructor] * num_envs,
         blocking=blocking,
         flatten=flatten,
         start_serially=start_serially)
Ejemplo n.º 2
0
    def test_dmlab_env_run(self, scene):
        def ctor(scene, env_id=None):
            return suite_dmlab.load(
                scene=scene,
                gym_env_wrappers=[gym_wrappers.FrameResize],
                wrap_with_process=False)

        constructor = functools.partial(ctor, scene)

        self._env = parallel_environment.ParallelAlfEnvironment(
            [constructor] * 5)
        self.assertEqual((3, 84, 84), self._env.observation_spec().shape)

        for _ in range(10):
            actions = self._env.action_spec().sample(outer_dims=(5, ))
            self._env.step(actions)
Ejemplo n.º 3
0
    def test_parallel_env(self):
        env_num = 5

        def ctor(env_id=None):
            return suite_highway.load(environment_name='highway-v0')

        constructor = functools.partial(ctor)

        self._env = parallel_environment.ParallelAlfEnvironment([constructor] *
                                                                env_num)

        self.assertTrue(self._env.batched)
        self.assertEqual(self._env.batch_size, env_num)
        self.assertEqual(torch.float32, self._env.observation_spec().dtype)

        actions = self._env.action_spec().sample(outer_dims=(env_num, ))
        for _ in range(10):
            time_step = self._env.step(actions)
Ejemplo n.º 4
0
    def test_parallel_envs(self):
        env_num = 5
        env_name = 'SocialBot-CartPole-v0'

        def ctor(env_name, env_id=None):
            return suite_socialbot.load(environment_name=env_name,
                                        wrap_with_process=False)

        constructor = functools.partial(ctor, env_name)

        self._env = parallel_environment.ParallelAlfEnvironment(
            [constructor] * env_num, start_serially=False)

        self.assertTrue(self._env.batched)
        self.assertEqual(self._env.batch_size, env_num)

        actions = self._env.action_spec().sample(outer_dims=(env_num, ))
        for _ in range(10):
            time_step = self._env.step(actions)
Ejemplo n.º 5
0
    def test_parallel_env(self):
        env_num = 8

        def ctor(env_id=None):
            return suite_safety_gym.load(
                environment_name='Safexp-PointGoal1-v0')

        constructor = functools.partial(ctor)

        self._env = parallel_environment.ParallelAlfEnvironment([constructor] *
                                                                env_num)
        self.assertTrue(self._env.batched)
        self.assertEqual(self._env.batch_size, env_num)
        self.assertEqual(torch.float32, self._env.observation_spec().dtype)
        self.assertEqual((suite_safety_gym.VectorReward.REWARD_DIMENSION, ),
                         self._env.reward_spec().shape)

        actions = self._env.action_spec().sample(outer_dims=(env_num, ))
        for _ in range(10):
            time_step = self._env.step(actions)
Ejemplo n.º 6
0
    def test_parallel_env(self):
        scene = 'lt_chasm'
        env_num = 8

        def ctor(scene, env_id=None):
            return suite_dmlab.load(
                scene=scene,
                gym_env_wrappers=[
                    gym_wrappers.FrameGrayScale, gym_wrappers.FrameResize,
                    gym_wrappers.FrameStack
                ],
                wrap_with_process=False)

        constructor = functools.partial(ctor, scene)

        self._env = parallel_environment.ParallelAlfEnvironment(
            [constructor] * env_num)
        self.assertTrue(self._env.batched)
        self.assertEqual(self._env.batch_size, env_num)
        self.assertEqual((4, 84, 84), self._env.observation_spec().shape)
Ejemplo n.º 7
0
    def test_parallel_env(self):
        game = 'SuperMarioBros-Nes'
        env_num = 8

        def ctor(game, env_id=None):
            return suite_mario.load(game=game,
                                    state='Level1-1',
                                    wrap_with_process=False)

        constructor = functools.partial(ctor, game)

        self._env = parallel_environment.ParallelAlfEnvironment([constructor] *
                                                                env_num)
        self.assertTrue(self._env.batched)
        self.assertEqual(self._env.batch_size, env_num)
        self.assertEqual(torch.uint8, self._env.observation_spec().dtype)
        self.assertEqual((4, 84, 84), self._env.observation_spec().shape)

        actions = self._env.action_spec().sample(outer_dims=(env_num, ))
        for _ in range(10):
            time_step = self._env.step(actions)
Ejemplo n.º 8
0
Archivo: utils.py Proyecto: zhuboli/alf
def create_environment(env_name='CartPole-v0',
                       env_load_fn=suite_gym.load,
                       num_parallel_environments=30,
                       nonparallel=False,
                       seed=None):
    """Create a batched environment.

    Args:
        env_name (str): env name
        env_load_fn (Callable) : callable that create an environment
            If env_load_fn has attribute ``batched`` and it is True,
            ``evn_load_fn(env_name, batch_size=num_parallel_environments)``
            will be used to create the batched environment. Otherwise, a
            ``ParallAlfEnvironment`` will be created.
        num_parallel_environments (int): num of parallel environments
        nonparallel (bool): force to create a single env in the current
            process. Used for correctly exposing game gin confs to tensorboard.

    Returns:
        AlfEnvironment:
    """

    if hasattr(env_load_fn, 'batched') and env_load_fn.batched:
        if nonparallel:
            return env_load_fn(env_name, batch_size=1)
        else:
            return env_load_fn(env_name, batch_size=num_parallel_environments)

    if nonparallel:
        # Each time we can only create one unwrapped env at most

        # Create and step the env in a separate thread. env `step` and `reset` must
        #   run in the same thread which the env is created in for some simulation
        #   environments such as social_bot(gazebo)
        alf_env = thread_environment.ThreadEnvironment(
            lambda: env_load_fn(env_name))
        if seed is None:
            alf_env.seed(np.random.randint(0, np.iinfo(np.int32).max))
        else:
            alf_env.seed(seed)
    else:
        # flatten=True will use flattened action and time_step in
        #   process environments to reduce communication overhead.
        alf_env = parallel_environment.ParallelAlfEnvironment(
            [functools.partial(env_load_fn, env_name)] *
            num_parallel_environments,
            flatten=True)

        if seed is None:
            alf_env.seed([
                np.random.randint(0,
                                  np.iinfo(np.int32).max)
                for i in range(num_parallel_environments)
            ])
        else:
            # We want deterministic behaviors for each environment, but different
            # behaviors among different individual environments (to increase the
            # diversity of environment data)!
            alf_env.seed([seed + i for i in range(num_parallel_environments)])

    return alf_env