def step_reward(self, action, obs, ds): done = False x, y = self.ship.position step_reward = 0 # Living penalty # step_reward -= 0.001 # TODO Increase living penalty if not done and self.reward < -50: done = True if not done and abs(self.s - self.path.length) < 1: done = True for o in self.static_obstacles + self.dynamic_obstacles: if not done and distance(self.ship.position, o.position) < self.ship.radius + o.radius: done = True step_reward -= OBST_PENALTY break if not done and abs(x) > PLAYFIELD or abs(y) > PLAYFIELD: done = True step_reward -= 50 if not done and distance(self.ship.position, self.path.get_endpoint()) < 20: done = True # step_reward += 50 if not done: # Reward progress along path, penalise backwards progress step_reward += ds / 4 if not done: # Penalise cross track error if too far away from path # state_error = obs[:6] # step_reward += (0.2 - np.clip(np.linalg.norm(state_error), 0, 0.4))/100 # heading_err = state_error[2] # surge_err = state_error[3] # TODO Punish for facing wrong way / Reward for advancing along path surge_error = obs[NR + 0] cross_track_error = obs[NR + 2] # step_reward -= abs(cross_track_error)*0.1 # step_reward -= max(0, -surge_error)*0.1 step_reward -= abs(cross_track_error) * 0.5 + max( 0, -surge_error) * 0.5 # step_reward -= (max(0.1, -obs[0]) - 0.1)*0.3 # dist_from_path = np.sqrt(x_err ** 2 + y_err ** 2) # path_angle = self.path.get_angle(self.s) # If the reference is pointing towards the path, don't penalise # if dist_from_path > 0.25 and sign(float(Angle(path_angle - self.ship.ref[1]))) == sign(y_err): # step_reward -= 0.1*(dist_from_path - 0.25) return done, step_reward
def step(self): self.state = self.model(self.ref) self.position = tuple(self.state[0:2, :].flatten()) self.angle = float(self.state[2]) self.linearVelocity = tuple(self.state[3:5].flatten()) self.angularVelocity = float(self.state[5]) if self.path_taken == [] or distance(self.position, self.path_taken[-1]) > 3: self.path_taken.append(self.position)
def update_closest_obstacles(self): # Deallocate static obstacles that are out of range for i, slot in self.active_static.copy().items(): if distance(self.ship.position, self.static_obstacles[i].position) > OBST_RANGE * 1.05: self.active_static.pop(i) # Deallocate dynamic obstacles that are out of range for i, slot in self.active_dynamic.copy().items(): if distance( self.ship.position, self.dynamic_obstacles[i].position) > OBST_RANGE * 1.05: self.active_dynamic.pop(i) if STATIC_OBST_SLOTS > 0: # Allocate static obstacles distances = { i: distance(self.ship.position, obst.position) for i, obst in enumerate(self.static_obstacles) } for obsti, obst in enumerate(self.static_obstacles): dist = distances[obsti] if dist < OBST_RANGE and obsti not in self.active_static: available_slots = tuple( set(range(STATIC_OBST_SLOTS)) - set(self.active_static.values())) if len(available_slots) > 0: slot = np.random.choice(available_slots) self.active_static[obsti] = slot else: active_distances = { k: distances[k] for k in self.active_static.keys() } i_max = max(active_distances, key=active_distances.get) if dist < active_distances[i_max]: slot_max = self.active_static[i_max] self.active_static.pop(i_max) self.active_static[obsti] = slot_max if DYNAMIC_OBST_SLOTS > 0: # Allocate dynamic obstacles distances = { i: distance(self.ship.position, obst.position) for i, obst in enumerate(self.dynamic_obstacles) } for obsti, obst in enumerate(self.dynamic_obstacles): dist = distances[obsti] if dist < OBST_RANGE and obsti not in self.active_dynamic: available_slots = tuple( set(range(DYNAMIC_OBST_SLOTS)) - set(self.active_dynamic.values())) if len(available_slots) > 0: slot = np.random.choice(available_slots) self.active_dynamic[obsti] = slot else: active_distances = { k: distances[k] for k in self.active_dynamic.keys() } i_max = max(active_distances, key=active_distances.get) if dist < active_distances[i_max]: slot_max = self.active_dynamic[i_max] self.active_dynamic.pop(i_max) self.active_dynamic[obsti] = slot_max
def navigate(self, state=None): if state is None: state = self.ship.state.flatten() self.update_closest_obstacles() # TODO Try lookahead vector instead of closest point closest_point = self.path(self.s).flatten() closest_angle = self.path.get_angle(self.s) target = self.path(self.s + LOS_DISTANCE).flatten() target_angle = self.path.get_angle(self.s + LOS_DISTANCE) # Ref error surge_ref_error = self.speed - self.ship.ref[0] heading_ref_error = float(Angle(target_angle - self.ship.ref[1] * pi)) # State and path errors surge_error = self.speed - state[3] heading_error = float(Angle(target_angle - state[2])) cross_track_error = rotate(closest_point - self.ship.position, -closest_angle)[1] target_dist = distance(self.ship.position, target) # Construct observation vector obs = np.zeros( (NR + NS + 2 * STATIC_OBST_SLOTS + 4 * DYNAMIC_OBST_SLOTS, )) if NR == 2: # obs[0] = np.clip(surge_ref_error / MAX_SURGE, -1, 1) obs[1] = np.clip(heading_ref_error / pi, -1, 1) obs[NR + 0] = np.clip(surge_error / MAX_SURGE, -1, 1) obs[NR + 1] = np.clip(heading_error / pi, -1, 1) obs[NR + 2] = np.clip(cross_track_error / OBST_RANGE, -1, 1) obs[NR + 3] = np.clip(target_dist / OBST_RANGE, 0, 1) if False: state_error = np.array([ target[0], target[1], target_angle, self.speed, 0, 0 ]) - state # plus obstacles state_error[0:2] = rotate(state_error[0:2], self.ship.angle) # Rotate to body frame state_error[2] = float(Angle( state_error[2])) # Bring back into -pi, pi range # Reference error obs[0] = np.clip(ref_error[0] / MAX_SURGE, -1, 1) obs[1] = np.clip(ref_error[0] / pi, -1, 1) # Path errors obs[2] = np.clip(state_error[0] / LOS_DISTANCE, -1, 1) obs[3] = np.clip(state_error[1] / LOS_DISTANCE, -1, 1) obs[4] = np.clip((((state_error[2] + pi) % (2 * pi)) - pi) / pi, -1, 1) obs[5] = np.clip(state_error[3] / MAX_SURGE, -1, 1) obs[6] = np.clip(state_error[4] / MAX_SURGE, -1, 1) obs[7] = np.clip(state_error[5] / MAX_SURGE, -1, 1) # Write static obstacle data into observation for i, slot in self.active_static.items(): obst = self.static_obstacles[i] vec = obst.position - self.ship.position obs[NS + NR + 2 * slot] = float( Angle(arctan2(vec[1], vec[0]) - self.ship.angle)) / pi obs[NS + NR + 2 * slot + 1] = 1 - np.clip( (np.linalg.norm(vec) - self.ship.radius - obst.radius) / LOS_DISTANCE, 0, 1) # Write dynamic obstacle data into observation for i, slot in self.active_dynamic.items(): obst = self.dynamic_obstacles[i] vec = obst.position - self.ship.position obs[NS + NR + STATIC_OBST_SLOTS * 2 + 4 * slot] = float( Angle(arctan2(vec[1], vec[0]) - self.ship.angle)) / pi obs[NS + NR + STATIC_OBST_SLOTS * 2 + 4 * slot + 1] = 1 - np.clip( (np.linalg.norm(vec) - self.ship.radius - obst.radius) / LOS_DISTANCE, 0, 1) vel = rotate(obst.linearVelocity, self.ship.angle) obs[NS + NR + STATIC_OBST_SLOTS * 2 + 4 * slot + 2] = vel[0] obs[NS + NR + STATIC_OBST_SLOTS * 2 + 4 * slot + 3] = vel[1] return obs