Beispiel #1
0
def test_vectorize_gym():
    env = gym3.vectorize_gym(num=2,
                             env_fn=gym.make,
                             env_kwargs={"id": "Pendulum-v0"})
    env.observe()
    env.act(zeros(env.ac_space, bshape=(env.num, )))
    env.observe()
Beispiel #2
0
 def _new_trajectory_dict(self):
     assert self._ob_actual_dtype is not None, (
         "Not supposed to happen; self._ob_actual_dtype should have been set"
         " in the first act() call before _new_trajectory_dict is called")
     traj_dict = dict(
         reward=list(),
         ob=zeros(self.env.ob_space, (0, )),
         info=list(),
         act=zeros(self.env.ac_space, (0, )),
     )
     traj_dict["ob"] = multimap(
         lambda arr, my_dtype: arr.astype(my_dtype),
         traj_dict["ob"],
         self._ob_actual_dtype,
     )
     traj_dict["act"] = multimap(
         lambda arr, my_dtype: arr.astype(my_dtype),
         traj_dict["act"],
         self._ac_actual_dtype,
     )
     return traj_dict
def test_speed(benchmark, make_env):
    """
    Test the speed of different environments
    """
    env = make_env()
    ac = types_np.zeros(env.ac_space, bshape=(env.num,))

    def loop():
        for _ in range(1000):
            env.act(ac)

    benchmark(loop)
Beispiel #4
0
 def __init__(
     self,
     ob_space: types.ValType = types.TensorType(eltype=types.Discrete(
         256, dtype_name="uint8"),
                                                shape=(64, 64, 3)),
     ac_space: types.ValType = types.discrete_scalar(2),
     num: int = 1,
     episode_len: int = 1000,
     delay_seconds: float = 0.0,
 ) -> None:
     super().__init__(ob_space=ob_space, ac_space=ac_space, num=num)
     self._delay_seconds = delay_seconds
     self._episode_len = episode_len
     self._ob = types_np.zeros(self.ob_space, bshape=(self.num, ))
     self._rews = np.zeros((self.num, ), dtype=np.float32)
     self._steps = 0
     self._none_first = np.zeros((self.num, ), dtype=np.bool)
     self._all_first = np.ones((self.num, ), dtype=np.bool)
     self._infos = [{} for _ in range(self.num)]
Beispiel #5
0
def test_recorder():
    with tempfile.TemporaryDirectory() as tmpdir:
        env = IdentityEnv(
            space=types.TensorType(eltype=types.Discrete(256), shape=(64, 64, 3))
        )
        writer_kwargs = {
            "codec": "libx264rgb",
            "pixelformat": "bgr24",
            "output_params": ["-crf", "0"],
        }
        env = VideoRecorderWrapper(
            env=env, directory=tmpdir, env_index=0, writer_kwargs=writer_kwargs
        )
        _, obs, _ = env.observe()
        for _ in range(2):
            env.act(types_np.zeros(env.ac_space, bshape=(env.num,)))
        video_files = sorted(glob(os.path.join(tmpdir, "*.mp4")))
        assert len(video_files) > 0
        with imageio.get_reader(video_files[0]) as r:
            for im in r:
                assert np.allclose(im, obs[0])
                break
Beispiel #6
0
def test_recorder():
    with tempfile.TemporaryDirectory() as tmpdir:
        ep_len1 = 3
        ep_len2 = 4
        env1 = IdentityEnv(
            space=types.TensorType(eltype=types.Discrete(256),
                                   shape=(3, 3, 3)),
            episode_len=ep_len1,
        )
        env2 = IdentityEnv(
            space=types.TensorType(eltype=types.Discrete(256),
                                   shape=(3, 3, 3)),
            episode_len=ep_len2,
            seed=1,
        )
        env = ConcatEnv([env1, env2])
        env = TrajectoryRecorderWrapper(env=env, directory=tmpdir)
        _, obs, _ = env.observe()
        action = types_np.zeros(env.ac_space, bshape=(env.num, ))
        action[1] = 1
        num_acs = 10
        for _ in range(num_acs):
            env.act(action)
        files = sorted(glob(os.path.join(tmpdir, "*.pickle")))
        print(files)
        assert len(files) == (num_acs // ep_len1) + (num_acs // ep_len2)

        with open(files[0], "rb") as f:
            loaded_traj = pickle.load(f)
        assert len(loaded_traj["ob"]) == ep_len1
        assert np.allclose(loaded_traj["ob"][0], obs[0])
        assert np.allclose(loaded_traj["act"][0], action[0])

        with open(files[1], "rb") as f:
            loaded_traj = pickle.load(f)
        assert len(loaded_traj["ob"]) == ep_len2
        assert np.allclose(loaded_traj["ob"][0], obs[1])
        assert np.allclose(loaded_traj["act"][0], action[1])
Beispiel #7
0
    def _update(self, dt, keys_clicked, keys_pressed):
        # if we're displaying done info, don't advance the simulation
        if self._display_info_seconds_remaining > 0:
            self._display_info_seconds_remaining -= dt
            return

        first = False

        if self._synchronous:
            keys = keys_clicked
            act = self._keys_to_act(keys)

            if act is not None:
                first = self._act(act)
                print(
                    "first={} steps={} episode_steps={} rew={} episode_return={}"
                    .format(
                        int(first),  # shoter than printing True/False
                        self._steps,
                        self._episode_steps,
                        self._last_rew,
                        self._episode_return,
                    ))
        else:
            # cap the number of frames rendered so we don't just spend forever trying to catch up on frames
            # if rendering is slow
            max_dt = self._max_sim_frames_per_update * self._sec_per_timestep
            if dt > max_dt:
                dt = max_dt

            # catch up the simulation to the current time
            self._current_time += dt
            while self._sim_time < self._current_time or self._synchronous:
                self._sim_time += self._sec_per_timestep

                # assume that for async environments, we just want to repeat keys for as long as they are held
                keys = keys_pressed

                act = self._keys_to_act(keys)
                if act is None:
                    act = types_np.zeros(self._env.ac_space,
                                         bshape=(self._env.num, ))

                first = self._act(act)
                if self._steps % self._tps == 0 or first:
                    episode_return_delta = (self._episode_return -
                                            self._prev_episode_return)
                    self._prev_episode_return = self._episode_return
                    print(
                        "first={} steps={} episode_steps={} episode_return_delta={} episode_return={}"
                        .format(
                            int(first),
                            self._steps,
                            self._episode_steps,
                            episode_return_delta,
                            self._episode_return,
                        ))
                if first:
                    break

        if first:
            print(f"final info={self._last_info}")
            self._episode_steps = 0
            self._episode_return = 0
            self._prev_episode_return = 0
            self._display_info_seconds_remaining = SECONDS_TO_DISPLAY_DONE_INFO