def __init__(self, max_episode_length=500, random_stable_position=False): CartPoleEnv.__init__(self) self.action_high = np.asarray([self.force_mag]) self.action_space = spaces.Box(-self.action_high, self.action_high) self._max_episode_length = max_episode_length self._time_step = 0 self._stable_x = None if random_stable_position: self._rand_pos_max = self.x_threshold - 0.4 self._stable_x = np.random.uniform(-self._rand_pos_max, self._rand_pos_max) # log.info("obs high : {}".format(self.observation_space.high)) oh = np.hstack((self.observation_space.high, np.asarray([self._rand_pos_max]))) self.observation_space = spaces.Box(-oh, oh) log.debug("Action Space {}".format(self.action_space)) log.debug("Observations Space {}".format(self.observation_space))
def __init__(self): CartPoleEnv.__init__(self) self.steps_beyond_done = 0 self.success_steps = 0