コード例 #1
0
    def step(self, action):
        r = np.maximum(-self.omega_max, np.minimum(self.omega_max, action[0]))
        new_state = np.empty(4)
        new_state[0] = self._state[0] + self._v * np.sin(self._state[2]) * \
                       self._dt
        new_state[1] = self._state[1] + self._v * np.cos(self._state[2]) * \
                       self._dt
        new_state[2] = normalize_angle(self._state[2] + self._state[3] * self._dt)
        new_state[3] = self._state[3] + (r - self._state[3]) * self._dt / \
                       self._T

        pos = np.array([new_state[0], new_state[1]])
        if new_state[0] > self.field_size or new_state[1] > self.field_size \
                or new_state[0] < 0 or new_state[1] < 0:
            reward = self._out_reward
            absorbing = True
        elif np.linalg.norm(pos - self.goal_pos) <= 10:
            reward = 100
            absorbing = True
        else:
            reward = -1
            absorbing = False

        theta_ref = normalize_angle(np.arctan2(pos[1] - self.goal_pos[1], pos[0] - self.goal_pos[0]))
        theta = new_state[2]
        theta = normalize_angle(np.pi / 2 - theta)
        del_theta = shortest_angular_distance(from_angle=theta, to_angle=theta_ref)
        power = -del_theta ** 2 / ((np.pi / 6) * (np.pi / 6))
        reward = reward + np.expm1(power)


        self._state = new_state

        return self._state, reward, absorbing, {}
コード例 #2
0
    def reset(self, state=None):
        if state is None:
            angle = np.random.uniform(-np.pi / 8., np.pi / 8.)

            self._state = np.array([angle, 0.])
        else:
            self._state = state
            self._state[0] = normalize_angle(self._state[0])

        return self._state
コード例 #3
0
def rototranslate(inputs):
    new_states = np.zeros(4)
    active_direction = inputs[0]
    x = inputs[1][0]
    y = inputs[1][1]
    theta = inputs[1][2]
    theta_dot = inputs[1][3]
    x0 = inputs[2][0]
    y0 = inputs[2][1]

    if active_direction < 4:
        small_offset = 40
        large_offset = 75
    else:
        small_offset = 40
        large_offset = 40

    if active_direction == 0:  #R
        new_states[0] = x - x0 + small_offset
        new_states[1] = y - y0 + large_offset
        new_states[2] = normalize_angle(theta)
    elif active_direction == 1:  #D
        new_states[0] = y0 - y + small_offset
        new_states[1] = x - x0 + large_offset
        new_states[2] = normalize_angle(theta + np.pi / 2)
    elif active_direction == 2:  #L
        new_states[0] = x0 - x + small_offset
        new_states[1] = y0 - y + large_offset
        new_states[2] = normalize_angle(theta + np.pi)
    elif active_direction == 3:  #U
        new_states[0] = y - y0 + small_offset
        new_states[1] = x0 - x + large_offset
        new_states[2] = normalize_angle(theta + 1.5 * np.pi)
    elif active_direction == 4:  #UR
        new_states[0] = x - x0 + small_offset
        new_states[1] = y - y0 + small_offset
        new_states[2] = normalize_angle(theta)
    elif active_direction == 5:  #DR
        new_states[0] = y0 - y + small_offset
        new_states[1] = x - x0 + small_offset
        new_states[2] = normalize_angle(theta + np.pi / 2)
    elif active_direction == 6:  #DL
        new_states[0] = x0 - x + small_offset
        new_states[1] = y0 - y + small_offset
        new_states[2] = normalize_angle(theta + np.pi)
    else:  #UL
        new_states[0] = y - y0 + small_offset
        new_states[1] = x0 - x + small_offset
        new_states[2] = normalize_angle(theta + np.pi * 1.5)

    new_states[3] = theta_dot

    return new_states
コード例 #4
0
def angle_ref_angle_difference(ins):
    theta_ref = normalize_angle(ins[0])
    theta = ins[2][2]
    pos_ref = ins[1]
    del_theta = shortest_angular_distance(from_angle=theta, to_angle=theta_ref)
    x = ins[2][0]
    y = ins[2][1]
    pos = np.array([x, y])
    del_pos = np.linalg.norm(pos - pos_ref)

    return np.array([del_theta, del_pos])
コード例 #5
0
    def reset(self, state=None):
        if state is None:
            if self._random:
                angle = np.random.uniform(-np.pi / 4, np.pi / 4)
            else:
                angle = -np.pi / 8

            self._state = np.array([-self._goal_distance, angle, 0., 0.])
        else:
            self._state = state
            self._state[1] = normalize_angle(self._state[1])

        return self._state
コード例 #6
0
    def step(self, action):
        u = self._bound(action[0], -self._max_u, self._max_u)
        new_state = odeint(self._dynamics, self._state, [0, self._dt], (u, ))

        self._state = np.array(new_state[-1])
        self._state[0] = normalize_angle(self._state[0])
        self._state[1] = self._bound(self._state[1], -self._max_omega,
                                     self._max_omega)

        reward = np.cos(self._state[0])

        self._last_u = u

        return self._state, reward, False, {}
コード例 #7
0
    def step(self, action):
        u = self._bound(action[0], -self._max_u, self._max_u)
        new_state = odeint(self._dynamics, self._state, [0, self._dt],
                           (u,))

        self._state = np.array(new_state[-1])
        self._state[0] = normalize_angle(self._state[0])
        self._state[1] = self._bound(self._state[1], -self._max_omega,
                                     self._max_omega)

        reward = np.cos(self._state[0])

        self._last_u = u

        return self._state, reward, False, {}
コード例 #8
0
    def reset(self, state=None):
        if state is None:
            if self._random:
                angle = np.random.uniform(-np.pi / 2, np.pi / 2)
            else:
                angle = -np.pi / 8

            self._state = np.array([angle, 0., 0.])
        else:
            self._state = state
            self._state[0] = normalize_angle(self._state[0])

        self._last_x = 0

        return self._state
コード例 #9
0
ファイル: segway.py プロジェクト: ronald-xie/mushroom
    def reset(self, state=None):
        if state is None:
            if self._random:
                angle = np.random.uniform(-np.pi/2, np.pi/2)
            else:
                angle = -np.pi/8

            self._state = np.array([angle, 0., 0.])
        else:
            self._state = state
            self._state[0] = normalize_angle(self._state[0])

        self._last_x = 0

        return self._state
コード例 #10
0
    def reset(self, state=None):
        if state is None:
            if self._random:
                angle = np.random.uniform(-np.pi, np.pi)
            else:
                angle = np.pi / 2

            self._state = np.array([angle, 0.])
        else:
            self._state = state
            self._state[0] = normalize_angle(self._state[0])
            self._state[1] = self._bound(self._state[1], -self._max_omega,
                                         self._max_omega)

        return self._state
コード例 #11
0
def pos_ref_angle_difference(ins):
    x_ref = ins[0][0]
    y_ref = ins[0][1]
    x = ins[1][0]
    y = ins[1][1]
    theta = ins[1][2]
    del_x = x_ref - x
    del_y = y_ref - y
    theta_ref = normalize_angle(np.arctan2(del_y, del_x))
    del_theta = shortest_angular_distance(from_angle=theta, to_angle=theta_ref)

    goal_pos = np.array([x_ref, y_ref])
    pos = np.array([x, y])
    del_pos = np.linalg.norm(pos - goal_pos)

    return np.array([del_theta, del_pos])
コード例 #12
0
    def step(self, action):

        u = np.maximum(-self.max_u, np.minimum(self.max_u, action[0]))
        new_state = odeint(self._dynamics, self._state, [0, self.dt], (u, ))
        self._state = np.array(new_state[-1])
        self._state[1] = normalize_angle(self._state[1])

        if abs(self._state[1]) > np.pi / 2 \
                or abs(self._state[0]) > 2*self._goal_distance:
            absorbing = True
            reward = -10000
        else:
            absorbing = False
            Q = np.diag([10.0, 3.0, 0.1, 0.1])

            x = self._state
            J = x.dot(Q).dot(x)
            reward = -J

        return self._state, reward, absorbing, {}
コード例 #13
0
    def step(self, action):
        u = self._bound(action[0], -self._max_u, self._max_u)
        new_state = odeint(self._dynamics, self._state, [0, self._dt], (u, ))

        self._state = np.array(new_state[-1])
        self._state[0] = normalize_angle(self._state[0])

        if abs(self._state[0]) > np.pi / 2:
            absorbing = True
            reward = -10000
        else:
            absorbing = False
            Q = np.diag([3.0, 0.1, 0.1])

            x = self._state

            J = x.dot(Q).dot(x)

            reward = -J

        return self._state, reward, absorbing, {}
コード例 #14
0
    def step(self, action):

        r = np.maximum(-self.omega_max, np.minimum(self.omega_max, action[0]))
        new_state = self._state

        for _ in range(self.n_steps_action):

            state = new_state

            new_state = np.empty(8)

            new_state[0] = state[0] + self._v * np.cos(state[2]) * self._dt
            new_state[1] = state[1] + self._v * np.sin(state[2]) * self._dt
            new_state[2] = normalize_angle(state[2] + state[3] * self._dt)
            new_state[3] = state[3] + (r - state[3]) * self._dt / self._T
            new_state[4:] = state[4:]
            absorbing = False

            reward = 0

            if new_state[0] > self.field_size \
               or new_state[1] > self.field_size \
               or new_state[0] < 0 or new_state[1] < 0:
                reward = self._out_reward
                absorbing = True
                break
            else:
                for i, gate in enumerate(self._gate_list):
                    if self._through_gate(state[:2], new_state[:2], gate):
                        new_state[4 + i] += 1
                        if new_state[4 + i] == 1:
                            reward = 10

            if np.all(new_state[5:] > 0):
                absorbing = True
                break

        self._state = new_state

        return self._state, reward, absorbing, {}
コード例 #15
0
ファイル: segway.py プロジェクト: ronald-xie/mushroom
    def step(self, action):
        u = self._bound(action[0], -self._max_u, self._max_u)
        new_state = odeint(self._dynamics, self._state, [0, self._dt],
                           (u,))

        self._state = np.array(new_state[-1])
        self._state[0] = normalize_angle(self._state[0])

        if abs(self._state[0]) > np.pi / 2:
            absorbing = True
            reward = -10000
        else:
            absorbing = False
            Q = np.diag([3.0, 0.1, 0.1])

            x = self._state

            J = x.dot(Q).dot(x)

            reward = -J

        return self._state, reward, absorbing, {}
コード例 #16
0
    def step(self, action):
        if action == 0:
            u = -self._max_u
        elif action == 1:
            u = 0.
        else:
            u = self._max_u
        u += np.random.uniform(-self._noise_u, self._noise_u)
        new_state = odeint(self._dynamics, self._state, [0, self._dt], (u, ))

        self._state = np.array(new_state[-1])
        self._state[0] = normalize_angle(self._state[0])

        if np.abs(self._state[0]) > np.pi * .5:
            reward = -1.
            absorbing = True
        else:
            reward = 0.
            absorbing = False

        self._last_u = u

        return self._state, reward, absorbing, {}
コード例 #17
0
    def step(self, action):

        r = self._bound(action[0], -self.omega_max, self.omega_max)

        new_state = self._state

        for _ in range(self.n_steps_action):
            state = new_state
            new_state = np.empty(4)
            new_state[0] = state[0] + self._v * np.cos(state[2]) * self._dt
            new_state[1] = state[1] + self._v * np.sin(state[2]) * self._dt
            new_state[2] = normalize_angle(state[2] + state[3] * self._dt)
            new_state[3] = state[3] + (r - state[3]) * self._dt / self._T

            if new_state[0] > self.field_size \
               or new_state[1] > self.field_size \
               or new_state[0] < 0 or new_state[1] < 0:

                new_state[0] = self._bound(new_state[0], 0, self.field_size)
                new_state[1] = self._bound(new_state[1], 0, self.field_size)

                reward = self._out_reward
                absorbing = True
                break

            elif self._through_gate(state[:2], new_state[:2]):
                reward = self._success_reward
                absorbing = True
                break
            else:
                reward = -1
                absorbing = False

        self._state = new_state

        return self._state, reward, absorbing, {}
コード例 #18
0
    def step(self, action):
        if action == 0:
            u = -self._max_u
        elif action == 1:
            u = 0.
        else:
            u = self._max_u
        u += np.random.uniform(-self._noise_u, self._noise_u)
        new_state = odeint(self._dynamics, self._state, [0, self._dt],
                           (u,))

        self._state = np.array(new_state[-1])
        self._state[0] = normalize_angle(self._state[0])

        if np.abs(self._state[0]) > np.pi * .5:
            reward = -1.
            absorbing = True
        else:
            reward = 0.
            absorbing = False

        self._last_u = u

        return self._state, reward, absorbing, {}