def act(self, ac: Any) -> None: self._firsts[0] = False state = self._q.popleft() rews = [] def add_reward(subspace, substate, subval): if isinstance(subspace.eltype, types.Discrete): r = 1 if (substate == subval).all() else 0 elif isinstance(subspace.eltype, types.Real): diff = subval - substate diff = diff[:] r = -0.5 * np.dot(diff, diff) else: raise Exception( f"unrecognized action space eltype {subspace.eltype}") rews.append(r) types.multimap(add_reward, self.ac_space, state, ac) rew = sum(rews) / len(rews) if self._step < self._delay_steps: # don't give any reward for guessing un-observed states rew = 0 self._rews[0] = rew self._q.append( types_np.sample(self.ac_space, bshape=(self.num, ), rng=self._rng)) self._step += 1 if self._step >= self._episode_len: self._reset()
def _reset(self) -> None: self._q.clear() for _ in range(self._delay_steps + 1): self._q.append( types_np.sample(self.ac_space, bshape=(self.num, ), rng=self._rng)) self._step = 0 self._firsts[0] = True
def main(): import procgen env = procgen.ProcgenGym3Env(num=1, env_name="coinrun", render_human=True) env = ViewerWrapper(env=env, info_key="rgb") start = time.time() for i in range(10000): env.act(types_np.sample(env.ac_space, bshape=(env.num, ))) print("step", i, i / (time.time() - start))
def test_works(make_env): """ Make sure the environment works at all and that we can instantiate multiple copies """ envs = [] for _ in range(3): env = make_env() envs.append(env) for _ in range(10): ac = types_np.sample(env.ac_space, bshape=(env.num,)) env.act(ac)
def test_fast_env(): num_env = 2 num_steps = 10000 episode_len = 100 start = time.time() env = TimingEnv(num=num_env, episode_len=episode_len) episode_count = 0 expected_episode_count = num_env * num_steps / episode_len for i in range(num_steps): env.act(types_np.sample(env.ac_space, bshape=(env.num, ))) _rew, _obs, first = env.observe() for f in first: if f: episode_count += 1 if i == num_steps - 2: assert episode_count == expected_episode_count - num_env elapsed = time.time() - start assert elapsed / num_steps < 1e-3 assert episode_count == expected_episode_count
def gym3_rollout(e): for _ in range(10): rew, ob, done = e.observe() print(multimap(lambda x: x.shape, ob), rew.shape, done.shape) e.act(sample(e.ac_space, (e.num, )))
""" Example random agent script using the gym3 API to demonstrate that procgen works """ from gym3 import types_np from procgen import ProcgenGym3Env env = ProcgenGym3Env(num=1, env_name="coinrun") step = 0 while True: env.act(types_np.sample(env.ac_space, bshape=(env.num, ))) rew, obs, first = env.observe() print(f"step {step} reward {rew} first {first}") if step > 0 and first: break step += 1