Ejemplo n.º 1
0
def do_dqn_control(state: prediction.HighwayState, dqn, epsilon):
    if random.random() < epsilon:
        current_action = random.randrange(dqn.num_outputs)
    else:
        with torch.no_grad():
            state_vector = get_state_vector_from_base_state(state)
            qs = dqn.get_q_values(state_vector)
            current_action = torch.argmax(qs).item()
    control.set_ego_jerk(dqn.get_jerk(current_action))
    return current_action
Ejemplo n.º 2
0
def get_control(next_state, Q, visits=None, epsilon=0.0):
    if random.random() < epsilon:
        next_action = random.choice(list(control.Settings.JERK_VALUES.keys()))
    else:
        if visits is not None and control.Settings.AVOID_UNVISITED_STATES:
            next_action = np.argmax(get_visited_q_values(
                Q, visits, next_state))
        else:
            next_action = np.argmax(get_q_values(Q, next_state))
    jerk = get_jerk(next_action)
    control.set_ego_jerk(jerk)
    return next_action
Ejemplo n.º 3
0
 def _handle_jerk(self, selected_jerk):
     projected_acceleration = self.previous_acceleration + selected_jerk * Settings.TICK_LENGTH
     projected_speed = self.previous_state.ego_speed + projected_acceleration * Settings.TICK_LENGTH
     if projected_acceleration > Settings.MAX_POSITIVE_ACCELERATION or projected_acceleration < Settings.MAX_NEGATIVE_ACCELERATION:
         self.invalid_action_reward = self.penalty_for_invalid_action * Settings.TICK_LENGTH
         projected_acceleration = np.clip(projected_acceleration, Settings.MAX_NEGATIVE_ACCELERATION, Settings.MAX_POSITIVE_ACCELERATION)
     elif projected_speed > Settings.MAX_SPEED or projected_speed < 0:
         self.invalid_action_reward = self.penalty_for_invalid_action * Settings.TICK_LENGTH
         projected_speed = np.clip(projected_speed, 0, Settings.MAX_SPEED)
         projected_acceleration = (projected_speed - self.previous_state.ego_speed) / Settings.TICK_LENGTH
     else:
         self.invalid_action_reward = 0
     self.projected_jerk = (projected_acceleration - self.previous_acceleration) / Settings.TICK_LENGTH
     control.set_ego_jerk(selected_jerk)
Ejemplo n.º 4
0
 def _do_action(self, action):
     projected_acceleration = Settings.ACCELERATION_VALUES_DQN[action]
     projected_speed = self.previous_state.ego_speed + projected_acceleration * Settings.TICK_LENGTH
     self.projected_jerk = (projected_acceleration - self.previous_acceleration) / Settings.TICK_LENGTH
     if self.projected_jerk > Settings.MAXIMUM_POSITIVE_JERK:
         self.invalid_action_reward = self.penalty_for_invalid_action * Settings.TICK_LENGTH
         self.projected_jerk = Settings.MAXIMUM_POSITIVE_JERK
         control.set_ego_jerk(Settings.MAXIMUM_POSITIVE_JERK)
     elif self.projected_jerk < Settings.MINIMUM_NEGATIVE_JERK:
         self.invalid_action_reward = self.penalty_for_invalid_action * Settings.TICK_LENGTH
         self.projected_jerk = Settings.MINIMUM_NEGATIVE_JERK
         control.set_ego_jerk(Settings.MINIMUM_NEGATIVE_JERK)
     elif projected_speed > Settings.MAX_SPEED or projected_speed < 0:
         self.invalid_action_reward = self.penalty_for_invalid_action * Settings.TICK_LENGTH
         projected_speed = np.clip(projected_speed, 0, Settings.MAX_SPEED)
         projected_acceleration = (projected_speed - self.previous_state.ego_speed) / Settings.TICK_LENGTH
         self.projected_jerk = (projected_acceleration - self.previous_acceleration) / Settings.TICK_LENGTH
         control.set_ego_speed(projected_speed)
     else:
         self.invalid_action_reward = 0
         control.set_ego_speed(projected_speed)
Ejemplo n.º 5
0
 def do_control(self, state: prediction.HighwayState) -> float:
     return control.set_ego_jerk(self.get_control(state))
Ejemplo n.º 6
0
    def do_combined_control(self, state: prediction.HighwayState) -> float:
        start_state = state
        first_action = self.get_control(state)
        future_action = first_action
        rollout_s_history = [control.get_ego_s(start_state.ego_position)]
        crash_predicted = False
        test_state = None
        selected_speed = 0
        last_choice_rl = True
        if len(self.takeover_history) > 0 and self.takeover_history[-1]:
            last_choice_rl = False
        i = 0
        while not (crash_predicted or i >= max(Settings.ROLLOUT_LENGTH, 1)):
            i += 1
            if i != 1:
                future_action = self.get_control(state)
            current_speed = state.ego_speed
            current_acceleration = state.ego_acceleration
            selected_speed = control.get_ego_speed_from_jerk(
                current_speed, current_acceleration, future_action)
            state, crash_predicted = state.predict_step_with_ego(
                selected_speed, Settings.TICK_LENGTH,
                Settings.COMBINATION_MIN_DISTANCE)
            if i == Settings.ST_TEST_ROLLOUTS:
                test_state = state
            rollout_s_history.append(control.get_ego_s(state.ego_position))
            if state.ego_position[0] > Settings.STOP_X:
                break
        if test_state is None:
            test_state = state
        if Settings.CHECK_ROLLOUT_CRASH and crash_predicted:
            print("Crash predicted")
            self.takeover_history.append(True)
            return st.do_st_control(start_state)
        elif Settings.LIMIT_DQN_SPEED and selected_speed > Settings.DESIRED_SPEED:
            print("DDPG going too fast")
            self.takeover_history.append(True)
            return st.do_st_control(start_state)
        elif Settings.TEST_ROLLOUT_STATE and st.test_guaranteed_crash_from_state(
                test_state):
            print("ST solver not happy with rollout state")
            self.takeover_history.append(True)
            return st.do_st_control(start_state)
        elif Settings.TEST_ST_STRICTLY_BETTER:
            s_sequence, obstacles, s_values, t_values, distances = st.get_appropriate_base_st_path_and_obstacles(
                start_state)
            end_point = len(s_sequence)
            while s_sequence[end_point - 1] == 0:
                end_point -= 1
            s_sequence = s_sequence[:end_point]
            if Settings.TICK_LENGTH < Settings.T_DISCRETIZATION:
                s_sequence = st.finer_fit(s_sequence, Settings.TICK_LENGTH,
                                          Settings.T_DISCRETIZATION,
                                          start_state.ego_speed,
                                          start_state.ego_acceleration)

            # We are essentially on top of another car, so there's not much we can do anyway
            if len(s_sequence) <= 1:
                self.takeover_history.append(False)
                return control.set_ego_jerk(first_action)

            min_planning_length = min(len(s_sequence), len(rollout_s_history))
            st_jerk = st.get_path_mean_abs_jerk(
                s_sequence[:min_planning_length], start_state.ego_speed,
                start_state.ego_acceleration, Settings.TICK_LENGTH)
            rl_jerk = st.get_path_mean_abs_jerk(
                rollout_s_history[:min_planning_length], start_state.ego_speed,
                start_state.ego_acceleration, Settings.TICK_LENGTH)
            st_distance = s_sequence[min_planning_length - 1] - s_sequence[0]
            rl_distance = rollout_s_history[min_planning_length -
                                            1] - rollout_s_history[0]
            if last_choice_rl or not Settings.REMEMBER_LAST_CHOICE_FOR_SWITCHING_COMBINED:
                if (st_jerk < rl_jerk
                        and st_distance > rl_distance) or rl_distance == 0:
                    print("ST Path deemed better")
                    self.takeover_history.append(True)
                    planned_distance_first_step = s_sequence[1] - s_sequence[0]
                    end_speed_first_step = planned_distance_first_step / (
                        Settings.TICK_LENGTH)
                    control.set_ego_speed(end_speed_first_step)
                    return end_speed_first_step
                else:
                    self.takeover_history.append(False)
                    return control.set_ego_jerk(first_action)
            else:
                if rl_jerk < st_jerk and rl_distance > st_distance:
                    print("RL path deemed better")
                    self.takeover_history.append(False)
                    return control.set_ego_jerk(first_action)
                else:
                    self.takeover_history.append(True)
                    planned_distance_first_step = s_sequence[1] - s_sequence[0]
                    end_speed_first_step = planned_distance_first_step / (
                        Settings.TICK_LENGTH)
                    control.set_ego_speed(end_speed_first_step)
                    return end_speed_first_step
        else:
            self.takeover_history.append(False)
            return control.set_ego_jerk(first_action)