def step(self, action): r = np.maximum(-self.omega_max, np.minimum(self.omega_max, action[0])) new_state = np.empty(4) new_state[0] = self._state[0] + self._v * np.sin(self._state[2]) * \ self._dt new_state[1] = self._state[1] + self._v * np.cos(self._state[2]) * \ self._dt new_state[2] = normalize_angle(self._state[2] + self._state[3] * self._dt) new_state[3] = self._state[3] + (r - self._state[3]) * self._dt / \ self._T pos = np.array([new_state[0], new_state[1]]) if new_state[0] > self.field_size or new_state[1] > self.field_size \ or new_state[0] < 0 or new_state[1] < 0: reward = self._out_reward absorbing = True elif np.linalg.norm(pos - self.goal_pos) <= 10: reward = 100 absorbing = True else: reward = -1 absorbing = False theta_ref = normalize_angle(np.arctan2(pos[1] - self.goal_pos[1], pos[0] - self.goal_pos[0])) theta = new_state[2] theta = normalize_angle(np.pi / 2 - theta) del_theta = shortest_angular_distance(from_angle=theta, to_angle=theta_ref) power = -del_theta ** 2 / ((np.pi / 6) * (np.pi / 6)) reward = reward + np.expm1(power) self._state = new_state return self._state, reward, absorbing, {}
def reset(self, state=None): if state is None: angle = np.random.uniform(-np.pi / 8., np.pi / 8.) self._state = np.array([angle, 0.]) else: self._state = state self._state[0] = normalize_angle(self._state[0]) return self._state
def rototranslate(inputs): new_states = np.zeros(4) active_direction = inputs[0] x = inputs[1][0] y = inputs[1][1] theta = inputs[1][2] theta_dot = inputs[1][3] x0 = inputs[2][0] y0 = inputs[2][1] if active_direction < 4: small_offset = 40 large_offset = 75 else: small_offset = 40 large_offset = 40 if active_direction == 0: #R new_states[0] = x - x0 + small_offset new_states[1] = y - y0 + large_offset new_states[2] = normalize_angle(theta) elif active_direction == 1: #D new_states[0] = y0 - y + small_offset new_states[1] = x - x0 + large_offset new_states[2] = normalize_angle(theta + np.pi / 2) elif active_direction == 2: #L new_states[0] = x0 - x + small_offset new_states[1] = y0 - y + large_offset new_states[2] = normalize_angle(theta + np.pi) elif active_direction == 3: #U new_states[0] = y - y0 + small_offset new_states[1] = x0 - x + large_offset new_states[2] = normalize_angle(theta + 1.5 * np.pi) elif active_direction == 4: #UR new_states[0] = x - x0 + small_offset new_states[1] = y - y0 + small_offset new_states[2] = normalize_angle(theta) elif active_direction == 5: #DR new_states[0] = y0 - y + small_offset new_states[1] = x - x0 + small_offset new_states[2] = normalize_angle(theta + np.pi / 2) elif active_direction == 6: #DL new_states[0] = x0 - x + small_offset new_states[1] = y0 - y + small_offset new_states[2] = normalize_angle(theta + np.pi) else: #UL new_states[0] = y - y0 + small_offset new_states[1] = x0 - x + small_offset new_states[2] = normalize_angle(theta + np.pi * 1.5) new_states[3] = theta_dot return new_states
def angle_ref_angle_difference(ins): theta_ref = normalize_angle(ins[0]) theta = ins[2][2] pos_ref = ins[1] del_theta = shortest_angular_distance(from_angle=theta, to_angle=theta_ref) x = ins[2][0] y = ins[2][1] pos = np.array([x, y]) del_pos = np.linalg.norm(pos - pos_ref) return np.array([del_theta, del_pos])
def reset(self, state=None): if state is None: if self._random: angle = np.random.uniform(-np.pi / 4, np.pi / 4) else: angle = -np.pi / 8 self._state = np.array([-self._goal_distance, angle, 0., 0.]) else: self._state = state self._state[1] = normalize_angle(self._state[1]) return self._state
def step(self, action): u = self._bound(action[0], -self._max_u, self._max_u) new_state = odeint(self._dynamics, self._state, [0, self._dt], (u, )) self._state = np.array(new_state[-1]) self._state[0] = normalize_angle(self._state[0]) self._state[1] = self._bound(self._state[1], -self._max_omega, self._max_omega) reward = np.cos(self._state[0]) self._last_u = u return self._state, reward, False, {}
def step(self, action): u = self._bound(action[0], -self._max_u, self._max_u) new_state = odeint(self._dynamics, self._state, [0, self._dt], (u,)) self._state = np.array(new_state[-1]) self._state[0] = normalize_angle(self._state[0]) self._state[1] = self._bound(self._state[1], -self._max_omega, self._max_omega) reward = np.cos(self._state[0]) self._last_u = u return self._state, reward, False, {}
def reset(self, state=None): if state is None: if self._random: angle = np.random.uniform(-np.pi / 2, np.pi / 2) else: angle = -np.pi / 8 self._state = np.array([angle, 0., 0.]) else: self._state = state self._state[0] = normalize_angle(self._state[0]) self._last_x = 0 return self._state
def reset(self, state=None): if state is None: if self._random: angle = np.random.uniform(-np.pi/2, np.pi/2) else: angle = -np.pi/8 self._state = np.array([angle, 0., 0.]) else: self._state = state self._state[0] = normalize_angle(self._state[0]) self._last_x = 0 return self._state
def reset(self, state=None): if state is None: if self._random: angle = np.random.uniform(-np.pi, np.pi) else: angle = np.pi / 2 self._state = np.array([angle, 0.]) else: self._state = state self._state[0] = normalize_angle(self._state[0]) self._state[1] = self._bound(self._state[1], -self._max_omega, self._max_omega) return self._state
def pos_ref_angle_difference(ins): x_ref = ins[0][0] y_ref = ins[0][1] x = ins[1][0] y = ins[1][1] theta = ins[1][2] del_x = x_ref - x del_y = y_ref - y theta_ref = normalize_angle(np.arctan2(del_y, del_x)) del_theta = shortest_angular_distance(from_angle=theta, to_angle=theta_ref) goal_pos = np.array([x_ref, y_ref]) pos = np.array([x, y]) del_pos = np.linalg.norm(pos - goal_pos) return np.array([del_theta, del_pos])
def step(self, action): u = np.maximum(-self.max_u, np.minimum(self.max_u, action[0])) new_state = odeint(self._dynamics, self._state, [0, self.dt], (u, )) self._state = np.array(new_state[-1]) self._state[1] = normalize_angle(self._state[1]) if abs(self._state[1]) > np.pi / 2 \ or abs(self._state[0]) > 2*self._goal_distance: absorbing = True reward = -10000 else: absorbing = False Q = np.diag([10.0, 3.0, 0.1, 0.1]) x = self._state J = x.dot(Q).dot(x) reward = -J return self._state, reward, absorbing, {}
def step(self, action): u = self._bound(action[0], -self._max_u, self._max_u) new_state = odeint(self._dynamics, self._state, [0, self._dt], (u, )) self._state = np.array(new_state[-1]) self._state[0] = normalize_angle(self._state[0]) if abs(self._state[0]) > np.pi / 2: absorbing = True reward = -10000 else: absorbing = False Q = np.diag([3.0, 0.1, 0.1]) x = self._state J = x.dot(Q).dot(x) reward = -J return self._state, reward, absorbing, {}
def step(self, action): r = np.maximum(-self.omega_max, np.minimum(self.omega_max, action[0])) new_state = self._state for _ in range(self.n_steps_action): state = new_state new_state = np.empty(8) new_state[0] = state[0] + self._v * np.cos(state[2]) * self._dt new_state[1] = state[1] + self._v * np.sin(state[2]) * self._dt new_state[2] = normalize_angle(state[2] + state[3] * self._dt) new_state[3] = state[3] + (r - state[3]) * self._dt / self._T new_state[4:] = state[4:] absorbing = False reward = 0 if new_state[0] > self.field_size \ or new_state[1] > self.field_size \ or new_state[0] < 0 or new_state[1] < 0: reward = self._out_reward absorbing = True break else: for i, gate in enumerate(self._gate_list): if self._through_gate(state[:2], new_state[:2], gate): new_state[4 + i] += 1 if new_state[4 + i] == 1: reward = 10 if np.all(new_state[5:] > 0): absorbing = True break self._state = new_state return self._state, reward, absorbing, {}
def step(self, action): u = self._bound(action[0], -self._max_u, self._max_u) new_state = odeint(self._dynamics, self._state, [0, self._dt], (u,)) self._state = np.array(new_state[-1]) self._state[0] = normalize_angle(self._state[0]) if abs(self._state[0]) > np.pi / 2: absorbing = True reward = -10000 else: absorbing = False Q = np.diag([3.0, 0.1, 0.1]) x = self._state J = x.dot(Q).dot(x) reward = -J return self._state, reward, absorbing, {}
def step(self, action): if action == 0: u = -self._max_u elif action == 1: u = 0. else: u = self._max_u u += np.random.uniform(-self._noise_u, self._noise_u) new_state = odeint(self._dynamics, self._state, [0, self._dt], (u, )) self._state = np.array(new_state[-1]) self._state[0] = normalize_angle(self._state[0]) if np.abs(self._state[0]) > np.pi * .5: reward = -1. absorbing = True else: reward = 0. absorbing = False self._last_u = u return self._state, reward, absorbing, {}
def step(self, action): r = self._bound(action[0], -self.omega_max, self.omega_max) new_state = self._state for _ in range(self.n_steps_action): state = new_state new_state = np.empty(4) new_state[0] = state[0] + self._v * np.cos(state[2]) * self._dt new_state[1] = state[1] + self._v * np.sin(state[2]) * self._dt new_state[2] = normalize_angle(state[2] + state[3] * self._dt) new_state[3] = state[3] + (r - state[3]) * self._dt / self._T if new_state[0] > self.field_size \ or new_state[1] > self.field_size \ or new_state[0] < 0 or new_state[1] < 0: new_state[0] = self._bound(new_state[0], 0, self.field_size) new_state[1] = self._bound(new_state[1], 0, self.field_size) reward = self._out_reward absorbing = True break elif self._through_gate(state[:2], new_state[:2]): reward = self._success_reward absorbing = True break else: reward = -1 absorbing = False self._state = new_state return self._state, reward, absorbing, {}
def step(self, action): if action == 0: u = -self._max_u elif action == 1: u = 0. else: u = self._max_u u += np.random.uniform(-self._noise_u, self._noise_u) new_state = odeint(self._dynamics, self._state, [0, self._dt], (u,)) self._state = np.array(new_state[-1]) self._state[0] = normalize_angle(self._state[0]) if np.abs(self._state[0]) > np.pi * .5: reward = -1. absorbing = True else: reward = 0. absorbing = False self._last_u = u return self._state, reward, absorbing, {}