Beispiel #1
0
    def step_reward(self, action, obs, ds):
        done = False
        x, y = self.ship.position
        step_reward = 0

        # Living penalty
        # step_reward -= 0.001  # TODO Increase living penalty

        if not done and self.reward < -50:
            done = True

        if not done and abs(self.s - self.path.length) < 1:
            done = True

        for o in self.static_obstacles + self.dynamic_obstacles:
            if not done and distance(self.ship.position,
                                     o.position) < self.ship.radius + o.radius:
                done = True
                step_reward -= OBST_PENALTY
                break

        if not done and abs(x) > PLAYFIELD or abs(y) > PLAYFIELD:
            done = True
            step_reward -= 50

        if not done and distance(self.ship.position,
                                 self.path.get_endpoint()) < 20:
            done = True
            # step_reward += 50

        if not done:  # Reward progress along path, penalise backwards progress
            step_reward += ds / 4

        if not done:  # Penalise cross track error if too far away from path
            # state_error = obs[:6]
            # step_reward += (0.2 - np.clip(np.linalg.norm(state_error), 0, 0.4))/100
            # heading_err = state_error[2]
            # surge_err = state_error[3]
            # TODO Punish for facing wrong way / Reward for advancing along path

            surge_error = obs[NR + 0]
            cross_track_error = obs[NR + 2]

            # step_reward -= abs(cross_track_error)*0.1
            # step_reward -= max(0, -surge_error)*0.1

            step_reward -= abs(cross_track_error) * 0.5 + max(
                0, -surge_error) * 0.5

            # step_reward -= (max(0.1, -obs[0]) - 0.1)*0.3
            # dist_from_path = np.sqrt(x_err ** 2 + y_err ** 2)
            # path_angle = self.path.get_angle(self.s)
            # If the reference is pointing towards the path, don't penalise
            # if dist_from_path > 0.25 and sign(float(Angle(path_angle - self.ship.ref[1]))) == sign(y_err):
            #     step_reward -= 0.1*(dist_from_path - 0.25)

        return done, step_reward
Beispiel #2
0
    def step(self):
        self.state = self.model(self.ref)
        self.position = tuple(self.state[0:2, :].flatten())
        self.angle = float(self.state[2])
        self.linearVelocity = tuple(self.state[3:5].flatten())
        self.angularVelocity = float(self.state[5])

        if self.path_taken == [] or distance(self.position,
                                             self.path_taken[-1]) > 3:
            self.path_taken.append(self.position)
Beispiel #3
0
    def update_closest_obstacles(self):
        # Deallocate static obstacles that are out of range
        for i, slot in self.active_static.copy().items():
            if distance(self.ship.position,
                        self.static_obstacles[i].position) > OBST_RANGE * 1.05:
                self.active_static.pop(i)

        # Deallocate dynamic obstacles that are out of range
        for i, slot in self.active_dynamic.copy().items():
            if distance(
                    self.ship.position,
                    self.dynamic_obstacles[i].position) > OBST_RANGE * 1.05:
                self.active_dynamic.pop(i)

        if STATIC_OBST_SLOTS > 0:
            # Allocate static obstacles
            distances = {
                i: distance(self.ship.position, obst.position)
                for i, obst in enumerate(self.static_obstacles)
            }
            for obsti, obst in enumerate(self.static_obstacles):
                dist = distances[obsti]
                if dist < OBST_RANGE and obsti not in self.active_static:
                    available_slots = tuple(
                        set(range(STATIC_OBST_SLOTS)) -
                        set(self.active_static.values()))
                    if len(available_slots) > 0:
                        slot = np.random.choice(available_slots)
                        self.active_static[obsti] = slot
                    else:
                        active_distances = {
                            k: distances[k]
                            for k in self.active_static.keys()
                        }
                        i_max = max(active_distances, key=active_distances.get)
                        if dist < active_distances[i_max]:
                            slot_max = self.active_static[i_max]
                            self.active_static.pop(i_max)
                            self.active_static[obsti] = slot_max

        if DYNAMIC_OBST_SLOTS > 0:
            # Allocate dynamic obstacles
            distances = {
                i: distance(self.ship.position, obst.position)
                for i, obst in enumerate(self.dynamic_obstacles)
            }
            for obsti, obst in enumerate(self.dynamic_obstacles):
                dist = distances[obsti]
                if dist < OBST_RANGE and obsti not in self.active_dynamic:
                    available_slots = tuple(
                        set(range(DYNAMIC_OBST_SLOTS)) -
                        set(self.active_dynamic.values()))
                    if len(available_slots) > 0:
                        slot = np.random.choice(available_slots)
                        self.active_dynamic[obsti] = slot
                    else:
                        active_distances = {
                            k: distances[k]
                            for k in self.active_dynamic.keys()
                        }
                        i_max = max(active_distances, key=active_distances.get)
                        if dist < active_distances[i_max]:
                            slot_max = self.active_dynamic[i_max]
                            self.active_dynamic.pop(i_max)
                            self.active_dynamic[obsti] = slot_max
Beispiel #4
0
    def navigate(self, state=None):
        if state is None:
            state = self.ship.state.flatten()

        self.update_closest_obstacles()

        # TODO Try lookahead vector instead of closest point
        closest_point = self.path(self.s).flatten()
        closest_angle = self.path.get_angle(self.s)
        target = self.path(self.s + LOS_DISTANCE).flatten()
        target_angle = self.path.get_angle(self.s + LOS_DISTANCE)

        # Ref error
        surge_ref_error = self.speed - self.ship.ref[0]
        heading_ref_error = float(Angle(target_angle - self.ship.ref[1] * pi))

        # State and path errors
        surge_error = self.speed - state[3]
        heading_error = float(Angle(target_angle - state[2]))
        cross_track_error = rotate(closest_point - self.ship.position,
                                   -closest_angle)[1]
        target_dist = distance(self.ship.position, target)

        # Construct observation vector
        obs = np.zeros(
            (NR + NS + 2 * STATIC_OBST_SLOTS + 4 * DYNAMIC_OBST_SLOTS, ))

        if NR == 2:
            # obs[0] = np.clip(surge_ref_error / MAX_SURGE, -1, 1)
            obs[1] = np.clip(heading_ref_error / pi, -1, 1)

        obs[NR + 0] = np.clip(surge_error / MAX_SURGE, -1, 1)
        obs[NR + 1] = np.clip(heading_error / pi, -1, 1)
        obs[NR + 2] = np.clip(cross_track_error / OBST_RANGE, -1, 1)
        obs[NR + 3] = np.clip(target_dist / OBST_RANGE, 0, 1)

        if False:
            state_error = np.array([
                target[0], target[1], target_angle, self.speed, 0, 0
            ]) - state  # plus obstacles
            state_error[0:2] = rotate(state_error[0:2],
                                      self.ship.angle)  # Rotate to body frame
            state_error[2] = float(Angle(
                state_error[2]))  # Bring back into -pi, pi range

            # Reference error
            obs[0] = np.clip(ref_error[0] / MAX_SURGE, -1, 1)
            obs[1] = np.clip(ref_error[0] / pi, -1, 1)

            # Path errors
            obs[2] = np.clip(state_error[0] / LOS_DISTANCE, -1, 1)
            obs[3] = np.clip(state_error[1] / LOS_DISTANCE, -1, 1)
            obs[4] = np.clip((((state_error[2] + pi) % (2 * pi)) - pi) / pi,
                             -1, 1)
            obs[5] = np.clip(state_error[3] / MAX_SURGE, -1, 1)
            obs[6] = np.clip(state_error[4] / MAX_SURGE, -1, 1)
            obs[7] = np.clip(state_error[5] / MAX_SURGE, -1, 1)

        # Write static obstacle data into observation
        for i, slot in self.active_static.items():
            obst = self.static_obstacles[i]
            vec = obst.position - self.ship.position
            obs[NS + NR + 2 * slot] = float(
                Angle(arctan2(vec[1], vec[0]) - self.ship.angle)) / pi
            obs[NS + NR + 2 * slot + 1] = 1 - np.clip(
                (np.linalg.norm(vec) - self.ship.radius - obst.radius) /
                LOS_DISTANCE, 0, 1)

        # Write dynamic obstacle data into observation
        for i, slot in self.active_dynamic.items():
            obst = self.dynamic_obstacles[i]
            vec = obst.position - self.ship.position
            obs[NS + NR + STATIC_OBST_SLOTS * 2 + 4 * slot] = float(
                Angle(arctan2(vec[1], vec[0]) - self.ship.angle)) / pi
            obs[NS + NR + STATIC_OBST_SLOTS * 2 + 4 * slot + 1] = 1 - np.clip(
                (np.linalg.norm(vec) - self.ship.radius - obst.radius) /
                LOS_DISTANCE, 0, 1)
            vel = rotate(obst.linearVelocity, self.ship.angle)
            obs[NS + NR + STATIC_OBST_SLOTS * 2 + 4 * slot + 2] = vel[0]
            obs[NS + NR + STATIC_OBST_SLOTS * 2 + 4 * slot + 3] = vel[1]

        return obs