def draw_action(self, state):
        angle_setpoint = state[0]*self._weights[0]

        new_state = state[1:]

        new_state[0] = shortest_angular_distance(angle_setpoint, new_state[0])

        return np.atleast_1d(np.abs(self._weights[1:]).dot(new_state))
Example #2
0
def angle_to_angle_diff_complete_state(inputs):
    alpha_ref = inputs[0]
    states = inputs[1]
    alpha = states[1]
    alpha_dot = states[2]
    beta_dot = states[3]
    delta_alpha = shortest_angular_distance(alpha_ref[0], alpha)

    return np.array([delta_alpha, alpha_dot, beta_dot])
Example #3
0
def angle_ref_angle_difference(ins):
    theta_ref = normalize_angle(ins[0])
    theta = ins[2][2]
    pos_ref = ins[1]
    del_theta = shortest_angular_distance(from_angle=theta, to_angle=theta_ref)
    x = ins[2][0]
    y = ins[2][1]
    pos = np.array([x, y])
    del_pos = np.linalg.norm(pos - pos_ref)

    return np.array([del_theta, del_pos])
    def step_low_level(self, action):

        r = np.maximum(-self.omega_max, np.minimum(self.omega_max, action[0]))
        new_state = np.empty(4)
        new_state[0] = self._state[0] + self._v * np.sin(self._state[2]) * \
                       self._dt
        new_state[1] = self._state[1] + self._v * np.cos(self._state[2]) * \
                       self._dt
        new_state[2] = normalize_angle(self._state[2] +
                                       self._state[3] * self._dt)
        new_state[3] = self._state[3] + (r - self._state[3]) * self._dt / \
                       self._T

        pos = np.array([new_state[0], new_state[1]])

        if new_state[0] > self.field_size or new_state[1] > self.field_size \
                or new_state[0] < 0 or new_state[1] < 0:
            reward = self._out_reward
            absorbing = True
        elif np.linalg.norm(pos - self.goal_pos) <= 10.0:
            self.success_count += 1
            reward = self._success_reward
            absorbing = True
        else:
            reward = -1
            absorbing = False

        if absorbing:
            self.ep_count += 1

        if self.ep_count == 1000:
            self.success_per_thousand_ep.append(self.success_count)
            self.success_count = 0
            self.ep_count = 0

        theta_ref = normalize_angle(
            np.arctan2(self.goal_pos[1] - pos[1], self.goal_pos[0] - pos[0]))
        theta = new_state[2]
        theta = normalize_angle(np.pi / 2 - theta)
        del_theta = shortest_angular_distance(from_angle=theta,
                                              to_angle=theta_ref)
        power = -del_theta**2 / ((np.pi / 6) * (np.pi / 6))

        reward = reward + np.expm1(power)

        self._state = new_state

        return self._state, reward, absorbing, {}
Example #5
0
def pos_ref_angle_difference(ins):
    x_ref = ins[0][0]
    y_ref = ins[0][1]
    x = ins[1][0]
    y = ins[1][1]
    theta = ins[1][2]
    del_x = x_ref - x
    del_y = y_ref - y
    theta_ref = normalize_angle(np.arctan2(del_y, del_x))
    del_theta = shortest_angular_distance(from_angle=theta, to_angle=theta_ref)

    goal_pos = np.array([x_ref, y_ref])
    pos = np.array([x, y])
    del_pos = np.linalg.norm(pos - goal_pos)

    return np.array([del_theta, del_pos])