class MyEnv(Env): def __init__(self) -> None: super().__init__() self.state = None self.current_goal = None self.iter = 0 self._action_space = FloatBox(low=-0.01, high=0.01, shape=2) self._observation_space = FloatBox(low=-1., high=1., shape=4) # current state and goal self.goal_space = FloatBox(low=-1., high=1., shape=2) def get_obs(self): return np.concatenate([self.state, self.current_goal]).astype(np.float32) def step(self, action): self.iter += 1 self.state += action * self._action_space.high[0] dist = np.linalg.norm(self.state - self.current_goal) rew = np.exp(-0.5 * dist) / self.horizon return EnvStep(self.get_obs(), rew, self.iter == self.horizon, EnvInfo()) def reset(self): self.state = np.zeros(2) self.current_goal = self.goal_space.sample() self.iter = 0 return self.get_obs() @property def horizon(self): return horizon
class Box(Space): """A box in R^n, with specificiable bound and dtype.""" def __init__(self, low, high, shape=None, dtype="float32", null_value=None): """ low and high are scalars, applied across all dimensions of shape. """ dtype = np.dtype(dtype) if dtype.kind == 'i' or dtype.kind == 'u': self.box = IntBox(low, high, shape=shape, dtype=dtype, null_value=None) elif dtype.kind == 'f': self.box = FloatBox(low, high, shape=shape, dtype=dtype, null_value=None) else: raise NotImplementedError(dtype) def sample(self): return self.box.sample() def null_value(self): return self.box.null_value() def __repr__(self): return f"Box({self.box.low}-{self.box.high - 1} shape={self.box.shape} dtype={self.box.dtype})" @property def shape(self): return self.box.shape @property def bounds(self): return self.box.bounds
class MyEnv(Env): def __init__(self, batch_T, batch_B) -> None: super().__init__() self.batch_T = batch_T self.batch_B = batch_B self.state = None self.current_goal = None self.iter = 0 self.action_discrete_mapping = np.array([ [0.0, 0.0], [-0.1, 0.0], [0.0, -0.1], [0.1, 0.0], [0.0, 0.1], ]) self._action_space = IntBox(low=0, high=len(self.action_discrete_mapping)) self._observation_space = FloatBox(low=-1., high=1., shape=4) # current state and goal self.goal_space = FloatBox(low=-1., high=1., shape=(self.batch_B, 2)) def get_obs(self): return np.concatenate([self.state, self.current_goal], axis=-1).astype(np.float32) def step(self, action): self.iter += 1 self.state += self.action_discrete_mapping[action] dist = np.linalg.norm(self.state - self.current_goal, axis=-1) rew = np.exp(-0.5 * dist) / self.horizon return EnvStep(self.get_obs(), rew, self.iter == self.horizon, EnvInfo()) def reset(self): self.state = np.zeros((self.batch_B, 2)) self.current_goal = self.goal_space.sample() self.iter = 0 return self.get_obs() @property def horizon(self): return self.batch_T
def action_space(self): shape = (self.env.action_space.n, ) space = FloatBox(low=0, high=1, shape=shape, dtype=self._dtype) space.sample = self._sample_action return space