Beispiel #1
0
    def step(self, action):
        if action == 0:
            u = -self._max_u
        elif action == 1:
            u = 0.
        else:
            u = self._max_u

        self._last_u = u

        u += np.random.uniform(-self._noise_u, self._noise_u)
        new_state = odeint(self._dynamics, self._state, [0, self._dt],
                           (u,))

        self._state = np.array(new_state[-1])
        self._state[0] = normalize_angle(self._state[0])

        if np.abs(self._state[0]) > np.pi * .5:
            reward = -1.
            absorbing = True
        else:
            reward = 0.
            absorbing = False

        return self._state, reward, absorbing, {}
Beispiel #2
0
    def reset(self, state=None):
        if state is None:
            angle = np.random.uniform(-np.pi / 8., np.pi / 8.)

            self._state = np.array([angle, 0.])
        else:
            self._state = state
            self._state[0] = normalize_angle(self._state[0])

        self._last_u = 0
        return self._state
    def step(self, action):
        u = self._bound(action[0], -self._max_u, self._max_u)
        new_state = odeint(self._dynamics, self._state, [0, self._dt],
                           (u,))

        self._state = np.array(new_state[-1])
        self._state[0] = normalize_angle(self._state[0])
        self._state[1] = self._bound(self._state[1], -self._max_omega,
                                     self._max_omega)

        reward = np.cos(self._state[0])

        self._last_u = u.item()

        return self._state, reward, False, {}
Beispiel #4
0
    def get_state(self):
        ok = False
        while not ok:
            res = self._model_state_service('turtlebot3_burger', '')
            ok = res.success

        x = res.pose.position.x
        y = res.pose.position.y

        quaternion = (res.pose.orientation.x, res.pose.orientation.y,
                      res.pose.orientation.z, res.pose.orientation.w)
        euler = tf.transformations.euler_from_quaternion(quaternion)

        yaw = normalize_angle(euler[2])

        return np.array([x, y, yaw]), False
    def reset(self, state=None):
        if state is None:
            if self._random:
                angle = np.random.uniform(-np.pi, np.pi)
            else:
                angle = np.pi / 2

            self._state = np.array([angle, 0.])
        else:
            self._state = state
            self._state[0] = normalize_angle(self._state[0])
            self._state[1] = self._bound(self._state[1], -self._max_omega,
                                         self._max_omega)

        self._last_u = 0.0
        return self._state
Beispiel #6
0
    def step(self, action):
        u = self._bound(action[0], -self._max_u, self._max_u)
        new_state = odeint(self._dynamics, self._state, [0, self._dt], (u, ))

        self._state = np.array(new_state[-1])
        self._state[0] = normalize_angle(self._state[0])

        if abs(self._state[0]) > np.pi / 2:
            absorbing = True
            reward = -10000
        else:
            absorbing = False
            Q = np.diag([3.0, 0.1, 0.1])

            x = self._state

            J = x.dot(Q).dot(x)

            reward = -J

        return self._state, reward, absorbing, {}
Beispiel #7
0
    def step(self, action):

        r = self._bound(action[0], -self.omega_max, self.omega_max)

        new_state = self._state

        for _ in range(self.n_steps_action):
            state = new_state
            new_state = np.empty(4)
            new_state[0] = state[0] + self._v * np.cos(state[2]) * self._dt
            new_state[1] = state[1] + self._v * np.sin(state[2]) * self._dt
            new_state[2] = normalize_angle(state[2] + state[3] * self._dt)
            new_state[3] = state[3] + (r - state[3]) * self._dt / self._T

            if new_state[0] > self.field_size \
               or new_state[1] > self.field_size \
               or new_state[0] < 0 or new_state[1] < 0:

                new_state[0] = self._bound(new_state[0], 0, self.field_size)
                new_state[1] = self._bound(new_state[1], 0, self.field_size)

                reward = self._out_reward
                absorbing = True
                break

            elif self._through_gate(state[:2], new_state[:2]):
                reward = self._success_reward
                absorbing = True
                break
            else:
                reward = -1
                absorbing = False

        self._state = new_state

        return self._state, reward, absorbing, {}