class MyEnv(Env):
    def __init__(self) -> None:
        super().__init__()
        self.state = None
        self.current_goal = None
        self.iter = 0
        self._action_space = FloatBox(low=-0.01, high=0.01, shape=2)
        self._observation_space = FloatBox(low=-1., high=1.,
                                           shape=4)  # current state and goal
        self.goal_space = FloatBox(low=-1., high=1., shape=2)

    def get_obs(self):
        return np.concatenate([self.state,
                               self.current_goal]).astype(np.float32)

    def step(self, action):
        self.iter += 1
        self.state += action * self._action_space.high[0]
        dist = np.linalg.norm(self.state - self.current_goal)
        rew = np.exp(-0.5 * dist) / self.horizon
        return EnvStep(self.get_obs(), rew, self.iter == self.horizon,
                       EnvInfo())

    def reset(self):
        self.state = np.zeros(2)
        self.current_goal = self.goal_space.sample()
        self.iter = 0
        return self.get_obs()

    @property
    def horizon(self):
        return horizon
Esempio n. 2
0
class Box(Space):
    """A box in R^n, with specificiable bound and dtype."""
    def __init__(self,
                 low,
                 high,
                 shape=None,
                 dtype="float32",
                 null_value=None):
        """
        low and high are scalars, applied across all dimensions of shape.
        """
        dtype = np.dtype(dtype)
        if dtype.kind == 'i' or dtype.kind == 'u':
            self.box = IntBox(low,
                              high,
                              shape=shape,
                              dtype=dtype,
                              null_value=None)
        elif dtype.kind == 'f':
            self.box = FloatBox(low,
                                high,
                                shape=shape,
                                dtype=dtype,
                                null_value=None)
        else:
            raise NotImplementedError(dtype)

    def sample(self):
        return self.box.sample()

    def null_value(self):
        return self.box.null_value()

    def __repr__(self):
        return f"Box({self.box.low}-{self.box.high - 1} shape={self.box.shape} dtype={self.box.dtype})"

    @property
    def shape(self):
        return self.box.shape

    @property
    def bounds(self):
        return self.box.bounds
Esempio n. 3
0
class MyEnv(Env):
    def __init__(self, batch_T, batch_B) -> None:
        super().__init__()
        self.batch_T = batch_T
        self.batch_B = batch_B
        self.state = None
        self.current_goal = None
        self.iter = 0
        self.action_discrete_mapping = np.array([
            [0.0, 0.0],
            [-0.1, 0.0],
            [0.0, -0.1],
            [0.1, 0.0],
            [0.0, 0.1],
        ])
        self._action_space = IntBox(low=0,
                                    high=len(self.action_discrete_mapping))
        self._observation_space = FloatBox(low=-1., high=1.,
                                           shape=4)  # current state and goal
        self.goal_space = FloatBox(low=-1., high=1., shape=(self.batch_B, 2))

    def get_obs(self):
        return np.concatenate([self.state, self.current_goal],
                              axis=-1).astype(np.float32)

    def step(self, action):
        self.iter += 1
        self.state += self.action_discrete_mapping[action]
        dist = np.linalg.norm(self.state - self.current_goal, axis=-1)
        rew = np.exp(-0.5 * dist) / self.horizon
        return EnvStep(self.get_obs(), rew, self.iter == self.horizon,
                       EnvInfo())

    def reset(self):
        self.state = np.zeros((self.batch_B, 2))
        self.current_goal = self.goal_space.sample()
        self.iter = 0
        return self.get_obs()

    @property
    def horizon(self):
        return self.batch_T
Esempio n. 4
0
 def action_space(self):
     shape = (self.env.action_space.n, )
     space = FloatBox(low=0, high=1, shape=shape, dtype=self._dtype)
     space.sample = self._sample_action
     return space