Beispiel #1
0
    def act(self, ac: Any) -> None:
        self._firsts[0] = False
        state = self._q.popleft()

        rews = []

        def add_reward(subspace, substate, subval):
            if isinstance(subspace.eltype, types.Discrete):
                r = 1 if (substate == subval).all() else 0
            elif isinstance(subspace.eltype, types.Real):
                diff = subval - substate
                diff = diff[:]
                r = -0.5 * np.dot(diff, diff)
            else:
                raise Exception(
                    f"unrecognized action space eltype {subspace.eltype}")
            rews.append(r)

        types.multimap(add_reward, self.ac_space, state, ac)
        rew = sum(rews) / len(rews)

        if self._step < self._delay_steps:
            # don't give any reward for guessing un-observed states
            rew = 0
        self._rews[0] = rew
        self._q.append(
            types_np.sample(self.ac_space, bshape=(self.num, ), rng=self._rng))
        self._step += 1
        if self._step >= self._episode_len:
            self._reset()
Beispiel #2
0
 def _reset(self) -> None:
     self._q.clear()
     for _ in range(self._delay_steps + 1):
         self._q.append(
             types_np.sample(self.ac_space,
                             bshape=(self.num, ),
                             rng=self._rng))
     self._step = 0
     self._firsts[0] = True
Beispiel #3
0
def main():
    import procgen

    env = procgen.ProcgenGym3Env(num=1, env_name="coinrun", render_human=True)
    env = ViewerWrapper(env=env, info_key="rgb")
    start = time.time()
    for i in range(10000):
        env.act(types_np.sample(env.ac_space, bshape=(env.num, )))
        print("step", i, i / (time.time() - start))
def test_works(make_env):
    """
    Make sure the environment works at all and that we can instantiate multiple copies
    """
    envs = []
    for _ in range(3):
        env = make_env()
        envs.append(env)
        for _ in range(10):
            ac = types_np.sample(env.ac_space, bshape=(env.num,))
            env.act(ac)
Beispiel #5
0
def test_fast_env():
    num_env = 2
    num_steps = 10000
    episode_len = 100
    start = time.time()
    env = TimingEnv(num=num_env, episode_len=episode_len)
    episode_count = 0
    expected_episode_count = num_env * num_steps / episode_len
    for i in range(num_steps):
        env.act(types_np.sample(env.ac_space, bshape=(env.num, )))
        _rew, _obs, first = env.observe()
        for f in first:
            if f:
                episode_count += 1

        if i == num_steps - 2:
            assert episode_count == expected_episode_count - num_env
    elapsed = time.time() - start
    assert elapsed / num_steps < 1e-3
    assert episode_count == expected_episode_count
Beispiel #6
0
def gym3_rollout(e):
    for _ in range(10):
        rew, ob, done = e.observe()
        print(multimap(lambda x: x.shape, ob), rew.shape, done.shape)
        e.act(sample(e.ac_space, (e.num, )))
Beispiel #7
0
"""
Example random agent script using the gym3 API to demonstrate that procgen works
"""

from gym3 import types_np
from procgen import ProcgenGym3Env

env = ProcgenGym3Env(num=1, env_name="coinrun")
step = 0
while True:
    env.act(types_np.sample(env.ac_space, bshape=(env.num, )))
    rew, obs, first = env.observe()
    print(f"step {step} reward {rew} first {first}")
    if step > 0 and first:
        break
    step += 1