def generate_logs(self, observation, highwayenv_score): ego_state = observation.ego_vehicle_state start = observation.ego_vehicle_state.mission.start goal = observation.ego_vehicle_state.mission.goal path = get_path_to_goal(goal=goal, paths=observation.waypoint_paths, start=start) closest_wp, _ = get_closest_waypoint( num_lookahead=100, goal_path=path, ego_position=ego_state.position, ego_heading=ego_state.heading, ) signed_dist_from_center = closest_wp.signed_lateral_error( ego_state.position) lane_width = closest_wp.lane_width * 0.5 ego_dist_center = signed_dist_from_center / lane_width linear_jerk = np.linalg.norm(ego_state.linear_jerk) angular_jerk = np.linalg.norm(ego_state.angular_jerk) # Distance to goal ego_2d_position = ego_state.position[0:2] goal_dist = distance.euclidean(ego_2d_position, goal.position) angle_error = closest_wp.relative_heading( ego_state.heading) # relative heading radians [-pi, pi] # number of violations ( ego_num_violations, social_num_violations, ) = ego_social_safety( observation, d_min_ego=1.0, t_c_ego=1.0, d_min_social=1.0, t_c_social=1.0, ignore_vehicle_behind=True, ) info = dict( position=ego_state.position, speed=ego_state.speed, steering=ego_state.steering, heading=ego_state.heading, dist_center=abs(ego_dist_center), start=start, goal=goal, closest_wp=closest_wp, events=observation.events, ego_num_violations=ego_num_violations, social_num_violations=social_num_violations, goal_dist=goal_dist, linear_jerk=np.linalg.norm(ego_state.linear_jerk), angular_jerk=np.linalg.norm(ego_state.angular_jerk), env_score=self.ultra_scores.reward_adapter(observation, highwayenv_score), ) return info
def reward_adapter(observation, reward): env_reward = reward ego_events = observation.events ego_observation = observation.ego_vehicle_state start = observation.ego_vehicle_state.mission.start goal = observation.ego_vehicle_state.mission.goal path = get_path_to_goal( goal=goal, paths=observation.waypoint_paths, start=start ) linear_jerk = np.linalg.norm(ego_observation.linear_jerk) angular_jerk = np.linalg.norm(ego_observation.angular_jerk) # Distance to goal ego_2d_position = ego_observation.position[0:2] goal_dist = distance.euclidean(ego_2d_position, goal.position) closest_wp, _ = get_closest_waypoint( num_lookahead=num_lookahead, goal_path=path, ego_position=ego_observation.position, ego_heading=ego_observation.heading, ) angle_error = closest_wp.relative_heading( ego_observation.heading ) # relative heading radians [-pi, pi] # Distance from center signed_dist_from_center = closest_wp.signed_lateral_error( observation.ego_vehicle_state.position ) lane_width = closest_wp.lane_width * 0.5 ego_dist_center = signed_dist_from_center / lane_width # number of violations (ego_num_violations, social_num_violations,) = ego_social_safety( observation, d_min_ego=1.0, t_c_ego=1.0, d_min_social=1.0, t_c_social=1.0, ignore_vehicle_behind=True, ) speed_fraction = max(0, ego_observation.speed / closest_wp.speed_limit) ego_step_reward = 0.02 * min(speed_fraction, 1) * np.cos(angle_error) ego_speed_reward = min( 0, (closest_wp.speed_limit - ego_observation.speed) * 0.01 ) # m/s ego_collision = len(ego_events.collisions) > 0 ego_collision_reward = -1.0 if ego_collision else 0.0 ego_off_road_reward = -1.0 if ego_events.off_road else 0.0 ego_off_route_reward = -1.0 if ego_events.off_route else 0.0 ego_wrong_way = -0.02 if ego_events.wrong_way else 0.0 ego_goal_reward = 0.0 ego_time_out = 0.0 ego_dist_center_reward = -0.002 * min(1, abs(ego_dist_center)) ego_angle_error_reward = -0.005 * max(0, np.cos(angle_error)) ego_reached_goal = 1.0 if ego_events.reached_goal else 0.0 ego_safety_reward = -0.02 if ego_num_violations > 0 else 0 social_safety_reward = -0.02 if social_num_violations > 0 else 0 ego_lat_speed = 0.0 # -0.1 * abs(long_lat_speed[1]) ego_linear_jerk = -0.0001 * linear_jerk ego_angular_jerk = -0.0001 * angular_jerk * math.cos(angle_error) env_reward /= 100 # DG: Different speed reward ego_speed_reward = -0.1 if speed_fraction >= 1 else 0.0 ego_speed_reward += -0.01 if speed_fraction < 0.01 else 0.0 rewards = sum( [ ego_goal_reward, ego_collision_reward, ego_off_road_reward, ego_off_route_reward, ego_wrong_way, ego_speed_reward, # ego_time_out, ego_dist_center_reward, ego_angle_error_reward, ego_reached_goal, ego_step_reward, env_reward, # ego_linear_jerk, # ego_angular_jerk, # ego_lat_speed, # ego_safety_reward, # social_safety_reward, ] ) return rewards
def adapt( observation: Observation, reward: float, info: Dict[str, Any] ) -> Dict[str, Any]: """Adapts a raw environment observation, an environment reward, and info about the agent from the environment into custom information about the agent. The raw observation from the environment must include the ego vehicle's state, events, waypoint paths, and neighborhood vehicles. See smarts.core.sensors for more information on the Observation type. Args: observation (Observation): The raw environment observation received from SMARTS. reward (float): The environment reward received from SMARTS. info (dict): Information about the agent received from SMARTS. Returns: dict: The adapted information. A dictionary containing the same information as the original info argument, but also including a "logs" key containing more information about the agent. """ ego_state = observation.ego_vehicle_state start = observation.ego_vehicle_state.mission.start goal = observation.ego_vehicle_state.mission.goal path = get_path_to_goal(goal=goal, paths=observation.waypoint_paths, start=start) closest_wp, _ = get_closest_waypoint( num_lookahead=100, goal_path=path, ego_position=ego_state.position, ego_heading=ego_state.heading, ) signed_dist_from_center = closest_wp.signed_lateral_error(ego_state.position) lane_width = closest_wp.lane_width * 0.5 ego_dist_center = signed_dist_from_center / lane_width linear_jerk = np.linalg.norm(ego_state.linear_jerk) angular_jerk = np.linalg.norm(ego_state.angular_jerk) # Distance to goal ego_2d_position = ego_state.position[0:2] goal_dist = distance.euclidean(ego_2d_position, goal.position) angle_error = closest_wp.relative_heading( ego_state.heading ) # relative heading radians [-pi, pi] # number of violations (ego_num_violations, social_num_violations,) = ego_social_safety( observation, d_min_ego=1.0, t_c_ego=1.0, d_min_social=1.0, t_c_social=1.0, ignore_vehicle_behind=True, ) info["logs"] = dict( position=ego_state.position, speed=ego_state.speed, steering=ego_state.steering, heading=ego_state.heading, dist_center=abs(ego_dist_center), start=start, goal=goal, closest_wp=closest_wp, events=observation.events, ego_num_violations=ego_num_violations, social_num_violations=social_num_violations, goal_dist=goal_dist, linear_jerk=np.linalg.norm(ego_state.linear_jerk), angular_jerk=np.linalg.norm(ego_state.angular_jerk), env_score=default_reward_adapter.adapt(observation, reward), ) return info