Ejemplo n.º 1
0
    def act(self, ac: Any) -> None:
        self._firsts[0] = False
        state = self._q.popleft()

        rews = []

        def add_reward(subspace, substate, subval):
            if isinstance(subspace.eltype, types.Discrete):
                r = 1 if (substate == subval).all() else 0
            elif isinstance(subspace.eltype, types.Real):
                diff = subval - substate
                diff = diff[:]
                r = -0.5 * np.dot(diff, diff)
            else:
                raise Exception(
                    f"unrecognized action space eltype {subspace.eltype}")
            rews.append(r)

        types.multimap(add_reward, self.ac_space, state, ac)
        rew = sum(rews) / len(rews)

        if self._step < self._delay_steps:
            # don't give any reward for guessing un-observed states
            rew = 0
        self._rews[0] = rew
        self._q.append(
            types_np.sample(self.ac_space, bshape=(self.num, ), rng=self._rng))
        self._step += 1
        if self._step >= self._episode_len:
            self._reset()
Ejemplo n.º 2
0
    def act(self, ac: Any) -> None:
        _, ob, _ = self.observe()
        info = self.get_info()

        # We have to wait for the first call to act() to initialize the _trajectories list, because
        # sometimes the environment returns observations with dtypes that do not match self.env.ob_space.
        if self._trajectories is None:
            self._ob_actual_dtype = multimap(lambda x: x.dtype, ob)
            self._ac_actual_dtype = multimap(lambda x: x.dtype, ac)
            self._trajectories = [
                self._new_trajectory_dict() for _ in range(self.env.num)
            ]

        for i in range(self.env.num):
            # With non-dict spaces, the `ob` and/or `ac` is a numpy array of shape [batch, obs_shape...] so separating
            # each trajectory into its own structure was relatively simple.
            # Take ob[i] then append it to self._trajectories[i]['ob'].
            #
            # With dict spaces, the returned ob becomes a nested dict
            # {
            #     'obs_key1': [batch, obs1_shape...],
            #     'obs_key2': [batch, obs2_shape...]
            # }
            # So to separate each trajectory, we have to take ob['obs_key1'][i] then append it to
            # self._trajectories[i]['ob']['obs_key1']
            self._trajectories[i]["ob"] = concat(
                [
                    self._trajectories[i]["ob"],
                    multimap(lambda x: x[i:i + 1], ob)
                ],
                axis=0,
            )
            self._trajectories[i]["act"] = concat(
                [
                    self._trajectories[i]["act"],
                    multimap(lambda x: x[i:i + 1], ac)
                ],
                axis=0,
            )
            self._trajectories[i]["info"].append(info[i])

        super().act(ac)

        reward, _, first = self.observe()
        for i in range(self.env.num):
            self._trajectories[i]["reward"].append(reward[i])

        # For each completed trajectory, write it out
        for i in range(self.env.num):
            if first[i]:
                self._write_and_reset_trajectory(i)
Ejemplo n.º 3
0
 def step(self, ac):
     _, prev_ob, _ = self.env.observe()
     self.env.act(np.array([ac]))
     rew, ob, first = self.env.observe()
     if first[0]:
         ob = prev_ob
     return multimap(lambda x: x[0], ob), rew[0], first[0], self.env.get_info()[0]
Ejemplo n.º 4
0
def _vt2space(vt: ValType):
    from gym import spaces

    def tt2space(tt: TensorType):
        if isinstance(tt.eltype, Discrete):
            if tt.ndim == 0:
                return spaces.Discrete(tt.eltype.n)
            else:
                return spaces.Box(
                    low=0,
                    high=tt.eltype.n - 1,
                    shape=tt.shape,
                    dtype=types_np.dtype(tt),
                )
        elif isinstance(tt.eltype, Real):
            return spaces.Box(
                shape=tt.shape,
                dtype=types_np.dtype(tt),
                low=float("-inf"),
                high=float("inf"),
            )
        else:
            raise NotImplementedError

    space = multimap(tt2space, vt)

    def dict2dict_space(d):
        if isinstance(d, dict):
            return spaces.Dict({k: dict2dict_space(v) for k, v in d.items()})
        else:
            return d

    return dict2dict_space(space)
Ejemplo n.º 5
0
 def observe(self) -> Tuple[Any, Any, Any]:
     return (
         np.array([self.last_rew], "f"),
         multimap(lambda val: np.expand_dims(np.array(val), axis=0),
                  self.last_ob),
         np.array([self.last_first], bool),
     )
Ejemplo n.º 6
0
def concat(xs: Sequence[Any], axis: int = 0) -> Any:
    """
    Concatenate the (leaf) arrays from xs

    :param xs: list of trees with the same shape, where the leaf values are numpy arrays
    :param axis: axis to concatenate along
    """
    return multimap(lambda *xs: np.concatenate(xs, axis=axis), *xs)
Ejemplo n.º 7
0
def stack(xs: Sequence[Any], axis: int = 0) -> Any:
    """
    Stack the (leaf) arrays from xs

    :param xs: list of trees with the same shape, where the leaf values are numpy arrays
    :param axis: axis to stack along
    """
    return multimap(lambda *xs: np.stack(xs, axis=axis), *xs)
Ejemplo n.º 8
0
def stack(xs: Sequence[Any], dim: int = 0) -> Any:
    """
    Stack the (leaf) tensors from xs

    :param xs: list of trees with the same shape, where the leaf values are torch tensors
    :param dim: dimension to stack along
    """
    return multimap(lambda *xs: th.stack(xs, dim=dim), *xs)
Ejemplo n.º 9
0
def sample(vt: ValType, bshape: Tuple) -> Any:
    """
    :param vt: ValType to create sample for
    :param bshape: batch shape to prepend to the shape of each torch tensor created by this function

    :returns: tree of torch tensors matching vt
    """
    return multimap(partial(_sample_tensor, bshape=bshape), vt)
Ejemplo n.º 10
0
def zeros(vt: ValType, bshape: Tuple) -> Any:
    """
    :param vt: ValType to create zeros for
    :param bshape: batch shape to prepend to the shape of each tensor created by this function

    :returns: tree of torch tensors matching vt
    """
    return multimap(
        lambda subdt: th.zeros(bshape + subdt.shape, dtype=dtype(subdt)), vt)
Ejemplo n.º 11
0
def sample(
    vt: ValType, bshape: Tuple, rng: Optional[np.random.RandomState] = None
) -> Any:
    """
    :param vt: ValType to create sample for
    :param bshape: batch shape to prepend to the shape of each numpy array created by this function
    :param rng: np.random.RandomState to use for sampling

    :returns: tree of numpy arrays matching vt
    """
    return multimap(partial(_sample_tensor, bshape=bshape, rng=rng), vt)
Ejemplo n.º 12
0
 def act(self, ac: Any) -> None:
     # Check we got an action consistent with num_envs=1
     _assert_num_envs_1(ac)
     aczero = multimap(lambda x: x[0], ac)
     self.last_ob, self.last_rew, self.last_first, self.info = self.gym_env.step(
         aczero)
     if self.render_mode == "rgb_array":
         self.info["rgb"] = self.gym_env.render(mode="rgb_array")
     elif self.render_mode is not None:
         self.gym_env.render(mode=self.render_mode)
     if self.last_first:
         self.last_ob = self.gym_env.reset()
Ejemplo n.º 13
0
 def _new_trajectory_dict(self):
     assert self._ob_actual_dtype is not None, (
         "Not supposed to happen; self._ob_actual_dtype should have been set"
         " in the first act() call before _new_trajectory_dict is called")
     traj_dict = dict(
         reward=list(),
         ob=zeros(self.env.ob_space, (0, )),
         info=list(),
         act=zeros(self.env.ac_space, (0, )),
     )
     traj_dict["ob"] = multimap(
         lambda arr, my_dtype: arr.astype(my_dtype),
         traj_dict["ob"],
         self._ob_actual_dtype,
     )
     traj_dict["act"] = multimap(
         lambda arr, my_dtype: arr.astype(my_dtype),
         traj_dict["act"],
         self._ac_actual_dtype,
     )
     return traj_dict
Ejemplo n.º 14
0
def split(x: Any, sections: Sequence[int]) -> Sequence[Any]:
    """
    Split the (leaf) arrays from the tree x

    Examples:

        split([1,2,3,4], [1,2,3,4]) => [[1], [2], [3], [4]]
        split([1,2,3,4], [1,3,4]) => [[1], [2, 3], [4]]

    :param x: a tree where the leaf values are numpy arrays
    :param sections: list of indices to split at (not sizes of each split)

    :returns: list of trees with length `len(sections)` with the same shape as x
            where each leaf is the corresponding section of the leaf in x
    """
    result = []
    start = 0
    for end in sections:
        select_tree = multimap(lambda arr: arr[start:end], x)
        start = end
        result.append(select_tree)
    return result
Ejemplo n.º 15
0
 def reset(self):
     _rew, ob, first = self.env.observe()
     if not first[0]:
         print("Warning: early reset ignored")
     return multimap(lambda x: x[0], ob)
Ejemplo n.º 16
0
 def act(self, ac: Any) -> None:
     types.multimap(self._assert_val_matches_space, self.ac_space, ac)
     self.env.act(ac=ac)
Ejemplo n.º 17
0
 def observe(self) -> Tuple[np.ndarray, Any, np.ndarray]:
     rew, ob, first = self.env.observe()
     types.multimap(self._assert_val_matches_space, self.ob_space, ob)
     assert rew.dtype is np.dtype(np.float32) and rew.shape == (self.num, )
     assert first.dtype is np.dtype(np.bool) and first.shape == (self.num, )
     return rew, ob, first