def step(self, action): r = np.maximum(-self.omega_max, np.minimum(self.omega_max, action[0])) new_state = np.empty(4) new_state[0] = self._state[0] + self._v * np.sin(self._state[2]) *\ self._dt new_state[1] = self._state[1] + self._v * np.cos(self._state[2]) *\ self._dt new_state[2] = normalize_angle(self._state[2] + self._state[3] * self._dt) new_state[3] = self._state[3] + (r - self._state[3]) * self._dt /\ self._T if new_state[0] > self.field_size or new_state[1] > self.field_size\ or new_state[0] < 0 or new_state[1] < 0: reward = -100 absorbing = True elif self._through_gate(self._state[:2], new_state[:2]): reward = 0 absorbing = True else: reward = -1 absorbing = False self._state = new_state return self._state, reward, absorbing, {}
def step(self, action): u = np.maximum(-self._max_u, np.minimum(self._max_u, action[0])) new_state = odeint(self._dynamics, self._state, [0, self._dt], (u, )) self._state = np.array(new_state[-1]) self._state[0] = normalize_angle(self._state[0]) self._state[1] = np.maximum( -self._max_omega, np.minimum(self._state[1], self._max_omega)) reward = np.cos(self._state[0]) return self._state, reward, False, {}
def reset(self, state=None): if state is None: if self._random: angle = np.random.uniform(-np.pi, np.pi) else: angle = np.pi / 2 self._state = np.array([angle, 0.]) else: self._state = state self._state[0] = normalize_angle(self._state[0]) self._state[1] = np.maximum( -self._max_omega, np.minimum(self._state[1], self._max_omega)) return self._state
def get_state(self): ok = False while not ok: res = self._model_state_service('turtlebot3_burger', '') ok = res.success x = res.pose.position.x y = res.pose.position.y quaternion = ( res.pose.orientation.x, res.pose.orientation.y, res.pose.orientation.z, res.pose.orientation.w) euler = tf.transformations.euler_from_quaternion(quaternion) yaw = normalize_angle(euler[2]) return np.array([x, y, yaw]), False
def step(self, action): r = np.maximum(-self.omega_max, np.minimum(self.omega_max, action[0])) new_state = self._state for _ in range(self.n_steps_action): state = new_state new_state = np.empty(4) new_state[0] = state[0] + self._v * np.cos(state[2]) * self._dt new_state[1] = state[1] + self._v * np.sin(state[2]) * self._dt new_state[2] = normalize_angle(state[2] + state[3] * self._dt) new_state[3] = state[3] + (r - state[3]) * self._dt / self._T if new_state[0] > self.field_size \ or new_state[1] > self.field_size \ or new_state[0] < 0 or new_state[1] < 0: new_state[0] = np.maximum( 0, np.minimum(new_state[0], self.field_size)) new_state[1] = np.maximum( 0, np.minimum(new_state[1], self.field_size)) reward = self._out_reward absorbing = True break elif self._through_gate(self._state[:2], new_state[:2]): reward = self._success_reward absorbing = True break else: reward = -1 absorbing = False self._state = new_state return self._state, reward, absorbing, {}