Beispiel #1
0
 def skill_stopping_func(self, path):
     # Stop if I'm on target  OR  if I don't move (less useless moves = shorter training)
     moves = np.array([[-1, 0], [0, 1], [1, 0], [0, -1]])
     last_pos = path['observations'][-1][:2]
     if len(path['observations']) > 1  \
             and  np.array_equal(last_pos, path['observations'][-2][:2]):
         return True  # I don't move -> stop skill
     a = special.from_onehot(path["actions"][-1])
     last_move = moves[a]
     return np.array_equal(last_pos + last_move, self.target)
Beispiel #2
0
def unflatten(space, obs):
    if isinstance(space, gym.spaces.Box):
        return np.asarray(obs).reshape(space.shape)
    elif isinstance(space, gym.spaces.Discrete):
        return special.from_onehot(obs)
    elif isinstance(space, gym.spaces.Tuple):
        dims = [flat_dim(c) for c in space.spaces]
        flat_xs = np.split(obs, np.cumsum(dims)[:-1])
        return tuple(unflatten(c, xi) for c, xi in zip(space.spaces, flat_xs))
    else:
        raise NotImplementedError
Beispiel #3
0
 def unflatten(self, x):
     return special.from_onehot(x)