Esempio n. 1
0
 def __init__(self,
              n_actions: int = 10,
              episode_len: int = 100,
              num: int = 1) -> None:
     super().__init__(
         ac_space=types.discrete_scalar(n_actions),
         ob_space=types.discrete_scalar(1),
         num=num,
     )
     rng = np.random.RandomState(0)
     self.sequence = rng.randint(0, n_actions, size=episode_len)
     self.time = 0
     self.actions = np.zeros(num, "i")
     self.episode_len = episode_len
Esempio n. 2
0
def test_concat(space_type):
    if space_type == "binary":
        space = types.discrete_scalar(2)
    elif space_type == "dict":
        space = types.DictType(degrees=types.discrete_scalar(360))
    elif space_type == "real_dict":
        space = types.DictType(
            degrees=types.TensorType(shape=(), eltype=types.Real()))
    else:
        raise Exception(f"invalid space_type {space_type}")

    base_env1 = IdentityEnv(space=space, episode_len=1, seed=0)
    base_env1.f = lambda x: [x[0]**2]
    base_env2 = IdentityEnv(space=space, episode_len=2, seed=1)
    base_env2.f = lambda x: [2 * x[0]]
    env1 = AssertSpacesWrapper(base_env1)
    env1 = AddInfo(env=env1, id=1)
    env2 = AssertSpacesWrapper(base_env2)
    env2 = AddInfo(env=env2, id=2)
    env = AssertSpacesWrapper(ConcatEnv([env1, env2]))
    rew, ob, first = env.observe()
    assert np.array_equal(rew, np.array([0, 0]))
    if isinstance(space, types.DictType):
        ob = ob["degrees"]
    assert ob.shape == (2, )
    assert ob[0] != ob[1]
    assert np.array_equal(first, np.array([1, 1]))
    act = np.array([ob[0], ob[0]])
    if isinstance(space, types.DictType):
        act = dict(degrees=act)
    env.act(act)
    rew, _ob, first = env.observe()
    if space_type == "real_dict":
        assert rew[0] == 0
        assert rew[1] < 0
    else:
        assert np.array_equal(rew, np.array([1, 0]))
    assert np.array_equal(first, np.array([1, 0]))
    assert env.get_info() == [{"id": 1}, {"id": 2}]
    with pytest.raises(AssertionError):
        env.callmethod("f", [2, 3, 4])

    assert env.callmethod("f", [2, 3]) == [2**2, 2 * 3]
Esempio n. 3
0
def test_identity_env():
    env = IdentityEnv(space=types.discrete_scalar(1024), episode_len=2)
    rew, ob, first = env.observe()
    assert ob == ob
    assert first
    assert rew == 0
    env.act(ob)
    rew, ob, first = env.observe()
    assert not first
    assert rew == 1
    env.act(ob)
    rew, ob, first = env.observe()
    assert first
    assert rew == 1
Esempio n. 4
0
 def __init__(
     self,
     ob_space: types.ValType = types.TensorType(eltype=types.Discrete(
         256, dtype_name="uint8"),
                                                shape=(64, 64, 3)),
     ac_space: types.ValType = types.discrete_scalar(2),
     num: int = 1,
     episode_len: int = 1000,
     delay_seconds: float = 0.0,
 ) -> None:
     super().__init__(ob_space=ob_space, ac_space=ac_space, num=num)
     self._delay_seconds = delay_seconds
     self._episode_len = episode_len
     self._ob = types_np.zeros(self.ob_space, bshape=(self.num, ))
     self._rews = np.zeros((self.num, ), dtype=np.float32)
     self._steps = 0
     self._none_first = np.zeros((self.num, ), dtype=np.bool)
     self._all_first = np.ones((self.num, ), dtype=np.bool)
     self._infos = [{} for _ in range(self.num)]
Esempio n. 5
0
def test_identity_env_delay():
    delay = 2
    for space in [types.discrete_scalar(1024)]:
        env = IdentityEnv(space=space,
                          episode_len=delay * 2,
                          delay_steps=delay)
        _rew, obs, _first = env.observe()
        obs_queue = [obs]
        for i in range(delay):
            env.act(0)
            _rew, obs, _first = env.observe()
            obs_queue.append(obs)

        first = False
        for i in range(delay):
            env.act(obs_queue.pop(0))
            rew, obs, first = env.observe()
            obs_queue.append(obs)
            assert rew == 1
        assert first
Esempio n. 6
0
class IdentityEnv(Env):
    """
    An environment for testing where the observation at each step is the correct action
    to take on that step.

    :param space: observation/action space for the environment
    :param episode_len: steps per episode
    :param delay_steps: delay the correct action by this many steps
    :param seed: random seed used to determine observations
    """

    DEFAULT_TYPE = types.discrete_scalar(2)

    def __init__(
        self,
        space: types.ValType = DEFAULT_TYPE,
        episode_len: int = 1,
        delay_steps: int = 0,
        seed: int = 0,
    ) -> None:
        super().__init__(ob_space=space, ac_space=space, num=1)
        self._seed = seed
        self._episode_len = episode_len
        self._step = None
        self._rews = np.zeros((1, ), dtype=np.float32)
        self._firsts = np.zeros((1, ), dtype=np.bool)
        self._delay_steps = delay_steps
        self._q = deque(maxlen=delay_steps + 1)
        self._rng = np.random.RandomState(seed)
        self._reset()

    def _reset(self) -> None:
        self._q.clear()
        for _ in range(self._delay_steps + 1):
            self._q.append(
                types_np.sample(self.ac_space,
                                bshape=(self.num, ),
                                rng=self._rng))
        self._step = 0
        self._firsts[0] = True

    def observe(self) -> Tuple[np.ndarray, Any, np.ndarray]:
        return self._rews, self._q[-1], self._firsts

    def act(self, ac: Any) -> None:
        self._firsts[0] = False
        state = self._q.popleft()

        rews = []

        def add_reward(subspace, substate, subval):
            if isinstance(subspace.eltype, types.Discrete):
                r = 1 if (substate == subval).all() else 0
            elif isinstance(subspace.eltype, types.Real):
                diff = subval - substate
                diff = diff[:]
                r = -0.5 * np.dot(diff, diff)
            else:
                raise Exception(
                    f"unrecognized action space eltype {subspace.eltype}")
            rews.append(r)

        types.multimap(add_reward, self.ac_space, state, ac)
        rew = sum(rews) / len(rews)

        if self._step < self._delay_steps:
            # don't give any reward for guessing un-observed states
            rew = 0
        self._rews[0] = rew
        self._q.append(
            types_np.sample(self.ac_space, bshape=(self.num, ), rng=self._rng))
        self._step += 1
        if self._step >= self._episode_len:
            self._reset()