def test_vectorize_gym(): env = gym3.vectorize_gym(num=2, env_fn=gym.make, env_kwargs={"id": "Pendulum-v0"}) env.observe() env.act(zeros(env.ac_space, bshape=(env.num, ))) env.observe()
def _new_trajectory_dict(self): assert self._ob_actual_dtype is not None, ( "Not supposed to happen; self._ob_actual_dtype should have been set" " in the first act() call before _new_trajectory_dict is called") traj_dict = dict( reward=list(), ob=zeros(self.env.ob_space, (0, )), info=list(), act=zeros(self.env.ac_space, (0, )), ) traj_dict["ob"] = multimap( lambda arr, my_dtype: arr.astype(my_dtype), traj_dict["ob"], self._ob_actual_dtype, ) traj_dict["act"] = multimap( lambda arr, my_dtype: arr.astype(my_dtype), traj_dict["act"], self._ac_actual_dtype, ) return traj_dict
def test_speed(benchmark, make_env): """ Test the speed of different environments """ env = make_env() ac = types_np.zeros(env.ac_space, bshape=(env.num,)) def loop(): for _ in range(1000): env.act(ac) benchmark(loop)
def __init__( self, ob_space: types.ValType = types.TensorType(eltype=types.Discrete( 256, dtype_name="uint8"), shape=(64, 64, 3)), ac_space: types.ValType = types.discrete_scalar(2), num: int = 1, episode_len: int = 1000, delay_seconds: float = 0.0, ) -> None: super().__init__(ob_space=ob_space, ac_space=ac_space, num=num) self._delay_seconds = delay_seconds self._episode_len = episode_len self._ob = types_np.zeros(self.ob_space, bshape=(self.num, )) self._rews = np.zeros((self.num, ), dtype=np.float32) self._steps = 0 self._none_first = np.zeros((self.num, ), dtype=np.bool) self._all_first = np.ones((self.num, ), dtype=np.bool) self._infos = [{} for _ in range(self.num)]
def test_recorder(): with tempfile.TemporaryDirectory() as tmpdir: env = IdentityEnv( space=types.TensorType(eltype=types.Discrete(256), shape=(64, 64, 3)) ) writer_kwargs = { "codec": "libx264rgb", "pixelformat": "bgr24", "output_params": ["-crf", "0"], } env = VideoRecorderWrapper( env=env, directory=tmpdir, env_index=0, writer_kwargs=writer_kwargs ) _, obs, _ = env.observe() for _ in range(2): env.act(types_np.zeros(env.ac_space, bshape=(env.num,))) video_files = sorted(glob(os.path.join(tmpdir, "*.mp4"))) assert len(video_files) > 0 with imageio.get_reader(video_files[0]) as r: for im in r: assert np.allclose(im, obs[0]) break
def test_recorder(): with tempfile.TemporaryDirectory() as tmpdir: ep_len1 = 3 ep_len2 = 4 env1 = IdentityEnv( space=types.TensorType(eltype=types.Discrete(256), shape=(3, 3, 3)), episode_len=ep_len1, ) env2 = IdentityEnv( space=types.TensorType(eltype=types.Discrete(256), shape=(3, 3, 3)), episode_len=ep_len2, seed=1, ) env = ConcatEnv([env1, env2]) env = TrajectoryRecorderWrapper(env=env, directory=tmpdir) _, obs, _ = env.observe() action = types_np.zeros(env.ac_space, bshape=(env.num, )) action[1] = 1 num_acs = 10 for _ in range(num_acs): env.act(action) files = sorted(glob(os.path.join(tmpdir, "*.pickle"))) print(files) assert len(files) == (num_acs // ep_len1) + (num_acs // ep_len2) with open(files[0], "rb") as f: loaded_traj = pickle.load(f) assert len(loaded_traj["ob"]) == ep_len1 assert np.allclose(loaded_traj["ob"][0], obs[0]) assert np.allclose(loaded_traj["act"][0], action[0]) with open(files[1], "rb") as f: loaded_traj = pickle.load(f) assert len(loaded_traj["ob"]) == ep_len2 assert np.allclose(loaded_traj["ob"][0], obs[1]) assert np.allclose(loaded_traj["act"][0], action[1])
def _update(self, dt, keys_clicked, keys_pressed): # if we're displaying done info, don't advance the simulation if self._display_info_seconds_remaining > 0: self._display_info_seconds_remaining -= dt return first = False if self._synchronous: keys = keys_clicked act = self._keys_to_act(keys) if act is not None: first = self._act(act) print( "first={} steps={} episode_steps={} rew={} episode_return={}" .format( int(first), # shoter than printing True/False self._steps, self._episode_steps, self._last_rew, self._episode_return, )) else: # cap the number of frames rendered so we don't just spend forever trying to catch up on frames # if rendering is slow max_dt = self._max_sim_frames_per_update * self._sec_per_timestep if dt > max_dt: dt = max_dt # catch up the simulation to the current time self._current_time += dt while self._sim_time < self._current_time or self._synchronous: self._sim_time += self._sec_per_timestep # assume that for async environments, we just want to repeat keys for as long as they are held keys = keys_pressed act = self._keys_to_act(keys) if act is None: act = types_np.zeros(self._env.ac_space, bshape=(self._env.num, )) first = self._act(act) if self._steps % self._tps == 0 or first: episode_return_delta = (self._episode_return - self._prev_episode_return) self._prev_episode_return = self._episode_return print( "first={} steps={} episode_steps={} episode_return_delta={} episode_return={}" .format( int(first), self._steps, self._episode_steps, episode_return_delta, self._episode_return, )) if first: break if first: print(f"final info={self._last_info}") self._episode_steps = 0 self._episode_return = 0 self._prev_episode_return = 0 self._display_info_seconds_remaining = SECONDS_TO_DISPLAY_DONE_INFO