Ejemplo n.º 1
0
    def step(self, action):
        r = np.maximum(-self.omega_max, np.minimum(self.omega_max, action[0]))
        new_state = np.empty(4)
        new_state[0] = self._state[0] + self._v * np.sin(self._state[2]) *\
            self._dt
        new_state[1] = self._state[1] + self._v * np.cos(self._state[2]) *\
            self._dt
        new_state[2] = normalize_angle(self._state[2] +
                                       self._state[3] * self._dt)
        new_state[3] = self._state[3] + (r - self._state[3]) * self._dt /\
            self._T

        if new_state[0] > self.field_size or new_state[1] > self.field_size\
           or new_state[0] < 0 or new_state[1] < 0:
            reward = -100
            absorbing = True
        elif self._through_gate(self._state[:2], new_state[:2]):
            reward = 0
            absorbing = True
        else:
            reward = -1
            absorbing = False

        self._state = new_state

        return self._state, reward, absorbing, {}
Ejemplo n.º 2
0
    def step(self, action):

        u = np.maximum(-self._max_u, np.minimum(self._max_u, action[0]))
        new_state = odeint(self._dynamics, self._state, [0, self._dt], (u, ))

        self._state = np.array(new_state[-1])
        self._state[0] = normalize_angle(self._state[0])
        self._state[1] = np.maximum(
            -self._max_omega, np.minimum(self._state[1], self._max_omega))

        reward = np.cos(self._state[0])

        return self._state, reward, False, {}
Ejemplo n.º 3
0
    def reset(self, state=None):
        if state is None:
            if self._random:
                angle = np.random.uniform(-np.pi, np.pi)
            else:
                angle = np.pi / 2

            self._state = np.array([angle, 0.])
        else:
            self._state = state
            self._state[0] = normalize_angle(self._state[0])
            self._state[1] = np.maximum(
                -self._max_omega, np.minimum(self._state[1], self._max_omega))

        return self._state
Ejemplo n.º 4
0
    def get_state(self):
        ok = False
        while not ok:
            res = self._model_state_service('turtlebot3_burger', '')
            ok = res.success

        x = res.pose.position.x
        y = res.pose.position.y

        quaternion = (
            res.pose.orientation.x,
            res.pose.orientation.y,
            res.pose.orientation.z,
            res.pose.orientation.w)
        euler = tf.transformations.euler_from_quaternion(quaternion)

        yaw = normalize_angle(euler[2])

        return np.array([x, y, yaw]), False
    def step(self, action):

        r = np.maximum(-self.omega_max, np.minimum(self.omega_max, action[0]))

        new_state = self._state

        for _ in range(self.n_steps_action):
            state = new_state
            new_state = np.empty(4)
            new_state[0] = state[0] + self._v * np.cos(state[2]) * self._dt
            new_state[1] = state[1] + self._v * np.sin(state[2]) * self._dt
            new_state[2] = normalize_angle(state[2] + state[3] * self._dt)
            new_state[3] = state[3] + (r - state[3]) * self._dt / self._T

            if new_state[0] > self.field_size \
               or new_state[1] > self.field_size \
               or new_state[0] < 0 or new_state[1] < 0:

                new_state[0] = np.maximum(
                    0, np.minimum(new_state[0], self.field_size))
                new_state[1] = np.maximum(
                    0, np.minimum(new_state[1], self.field_size))
                reward = self._out_reward
                absorbing = True
                break

            elif self._through_gate(self._state[:2], new_state[:2]):
                reward = self._success_reward
                absorbing = True
                break
            else:
                reward = -1
                absorbing = False

        self._state = new_state

        return self._state, reward, absorbing, {}