def do_dqn_control(state: prediction.HighwayState, dqn, epsilon): if random.random() < epsilon: current_action = random.randrange(dqn.num_outputs) else: with torch.no_grad(): state_vector = get_state_vector_from_base_state(state) qs = dqn.get_q_values(state_vector) current_action = torch.argmax(qs).item() control.set_ego_jerk(dqn.get_jerk(current_action)) return current_action
def get_control(next_state, Q, visits=None, epsilon=0.0): if random.random() < epsilon: next_action = random.choice(list(control.Settings.JERK_VALUES.keys())) else: if visits is not None and control.Settings.AVOID_UNVISITED_STATES: next_action = np.argmax(get_visited_q_values( Q, visits, next_state)) else: next_action = np.argmax(get_q_values(Q, next_state)) jerk = get_jerk(next_action) control.set_ego_jerk(jerk) return next_action
def _handle_jerk(self, selected_jerk): projected_acceleration = self.previous_acceleration + selected_jerk * Settings.TICK_LENGTH projected_speed = self.previous_state.ego_speed + projected_acceleration * Settings.TICK_LENGTH if projected_acceleration > Settings.MAX_POSITIVE_ACCELERATION or projected_acceleration < Settings.MAX_NEGATIVE_ACCELERATION: self.invalid_action_reward = self.penalty_for_invalid_action * Settings.TICK_LENGTH projected_acceleration = np.clip(projected_acceleration, Settings.MAX_NEGATIVE_ACCELERATION, Settings.MAX_POSITIVE_ACCELERATION) elif projected_speed > Settings.MAX_SPEED or projected_speed < 0: self.invalid_action_reward = self.penalty_for_invalid_action * Settings.TICK_LENGTH projected_speed = np.clip(projected_speed, 0, Settings.MAX_SPEED) projected_acceleration = (projected_speed - self.previous_state.ego_speed) / Settings.TICK_LENGTH else: self.invalid_action_reward = 0 self.projected_jerk = (projected_acceleration - self.previous_acceleration) / Settings.TICK_LENGTH control.set_ego_jerk(selected_jerk)
def _do_action(self, action): projected_acceleration = Settings.ACCELERATION_VALUES_DQN[action] projected_speed = self.previous_state.ego_speed + projected_acceleration * Settings.TICK_LENGTH self.projected_jerk = (projected_acceleration - self.previous_acceleration) / Settings.TICK_LENGTH if self.projected_jerk > Settings.MAXIMUM_POSITIVE_JERK: self.invalid_action_reward = self.penalty_for_invalid_action * Settings.TICK_LENGTH self.projected_jerk = Settings.MAXIMUM_POSITIVE_JERK control.set_ego_jerk(Settings.MAXIMUM_POSITIVE_JERK) elif self.projected_jerk < Settings.MINIMUM_NEGATIVE_JERK: self.invalid_action_reward = self.penalty_for_invalid_action * Settings.TICK_LENGTH self.projected_jerk = Settings.MINIMUM_NEGATIVE_JERK control.set_ego_jerk(Settings.MINIMUM_NEGATIVE_JERK) elif projected_speed > Settings.MAX_SPEED or projected_speed < 0: self.invalid_action_reward = self.penalty_for_invalid_action * Settings.TICK_LENGTH projected_speed = np.clip(projected_speed, 0, Settings.MAX_SPEED) projected_acceleration = (projected_speed - self.previous_state.ego_speed) / Settings.TICK_LENGTH self.projected_jerk = (projected_acceleration - self.previous_acceleration) / Settings.TICK_LENGTH control.set_ego_speed(projected_speed) else: self.invalid_action_reward = 0 control.set_ego_speed(projected_speed)
def do_control(self, state: prediction.HighwayState) -> float: return control.set_ego_jerk(self.get_control(state))
def do_combined_control(self, state: prediction.HighwayState) -> float: start_state = state first_action = self.get_control(state) future_action = first_action rollout_s_history = [control.get_ego_s(start_state.ego_position)] crash_predicted = False test_state = None selected_speed = 0 last_choice_rl = True if len(self.takeover_history) > 0 and self.takeover_history[-1]: last_choice_rl = False i = 0 while not (crash_predicted or i >= max(Settings.ROLLOUT_LENGTH, 1)): i += 1 if i != 1: future_action = self.get_control(state) current_speed = state.ego_speed current_acceleration = state.ego_acceleration selected_speed = control.get_ego_speed_from_jerk( current_speed, current_acceleration, future_action) state, crash_predicted = state.predict_step_with_ego( selected_speed, Settings.TICK_LENGTH, Settings.COMBINATION_MIN_DISTANCE) if i == Settings.ST_TEST_ROLLOUTS: test_state = state rollout_s_history.append(control.get_ego_s(state.ego_position)) if state.ego_position[0] > Settings.STOP_X: break if test_state is None: test_state = state if Settings.CHECK_ROLLOUT_CRASH and crash_predicted: print("Crash predicted") self.takeover_history.append(True) return st.do_st_control(start_state) elif Settings.LIMIT_DQN_SPEED and selected_speed > Settings.DESIRED_SPEED: print("DDPG going too fast") self.takeover_history.append(True) return st.do_st_control(start_state) elif Settings.TEST_ROLLOUT_STATE and st.test_guaranteed_crash_from_state( test_state): print("ST solver not happy with rollout state") self.takeover_history.append(True) return st.do_st_control(start_state) elif Settings.TEST_ST_STRICTLY_BETTER: s_sequence, obstacles, s_values, t_values, distances = st.get_appropriate_base_st_path_and_obstacles( start_state) end_point = len(s_sequence) while s_sequence[end_point - 1] == 0: end_point -= 1 s_sequence = s_sequence[:end_point] if Settings.TICK_LENGTH < Settings.T_DISCRETIZATION: s_sequence = st.finer_fit(s_sequence, Settings.TICK_LENGTH, Settings.T_DISCRETIZATION, start_state.ego_speed, start_state.ego_acceleration) # We are essentially on top of another car, so there's not much we can do anyway if len(s_sequence) <= 1: self.takeover_history.append(False) return control.set_ego_jerk(first_action) min_planning_length = min(len(s_sequence), len(rollout_s_history)) st_jerk = st.get_path_mean_abs_jerk( s_sequence[:min_planning_length], start_state.ego_speed, start_state.ego_acceleration, Settings.TICK_LENGTH) rl_jerk = st.get_path_mean_abs_jerk( rollout_s_history[:min_planning_length], start_state.ego_speed, start_state.ego_acceleration, Settings.TICK_LENGTH) st_distance = s_sequence[min_planning_length - 1] - s_sequence[0] rl_distance = rollout_s_history[min_planning_length - 1] - rollout_s_history[0] if last_choice_rl or not Settings.REMEMBER_LAST_CHOICE_FOR_SWITCHING_COMBINED: if (st_jerk < rl_jerk and st_distance > rl_distance) or rl_distance == 0: print("ST Path deemed better") self.takeover_history.append(True) planned_distance_first_step = s_sequence[1] - s_sequence[0] end_speed_first_step = planned_distance_first_step / ( Settings.TICK_LENGTH) control.set_ego_speed(end_speed_first_step) return end_speed_first_step else: self.takeover_history.append(False) return control.set_ego_jerk(first_action) else: if rl_jerk < st_jerk and rl_distance > st_distance: print("RL path deemed better") self.takeover_history.append(False) return control.set_ego_jerk(first_action) else: self.takeover_history.append(True) planned_distance_first_step = s_sequence[1] - s_sequence[0] end_speed_first_step = planned_distance_first_step / ( Settings.TICK_LENGTH) control.set_ego_speed(end_speed_first_step) return end_speed_first_step else: self.takeover_history.append(False) return control.set_ego_jerk(first_action)