def parse_vehicles(self, vehicles, ego_vehicle_id): # vehicles is a dictionary that maps each vehicle's id to # a dictionary of information about that vehicle. Each such dictionary # contains four items: the vehicle's id, position, orientation, and # bounding_box (represented as four points in GPS coordinates). vehicles_list = [] for veh_dict in vehicles.values(): vehicle_id = veh_dict['id'] location = pylot.utils.Location.from_gps(*veh_dict['position']) roll, pitch, yaw = veh_dict['orientation'] rotation = pylot.utils.Rotation(pitch, yaw, roll) if vehicle_id == ego_vehicle_id: # Can compare against canbus output to check that # transformations are working. self._logger.debug( 'Ego vehicle location with ground_obstacles: {}'.format( location)) else: vehicles_list.append( Obstacle( None, # We currently don't use bounding box 1.0, # confidence 'vehicle', vehicle_id, pylot.utils.Transform(location, rotation))) return vehicles_list
def extract_data_in_pylot_format(actor_list): """Extracts actor information in pylot format from an actor list. Args: actor_list (carla.ActorList): An actor list object with all the simulation actors. Returns: A tuple that contains objects for all different types of actors. """ # Note: the output will include the ego vehicle as well. vec_actors = actor_list.filter('vehicle.*') vehicles = [ Obstacle.from_simulator_actor(vec_actor) for vec_actor in vec_actors ] person_actors = actor_list.filter('walker.pedestrian.*') people = [ Obstacle.from_simulator_actor(ped_actor) for ped_actor in person_actors ] tl_actors = actor_list.filter('traffic.traffic_light*') traffic_lights = [ TrafficLight.from_simulator_actor(tl_actor) for tl_actor in tl_actors ] speed_limit_actors = actor_list.filter('traffic.speed_limit*') speed_limits = [ SpeedLimitSign.from_simulator_actor(ts_actor) for ts_actor in speed_limit_actors ] traffic_stop_actors = actor_list.filter('traffic.stop') traffic_stops = [ StopSign.from_simulator_actor(ts_actor) for ts_actor in traffic_stop_actors ] return (vehicles, people, traffic_lights, speed_limits, traffic_stops)
def parse_static_obstacles(self, static_obstacles): # Each static obstacle has an id and position. static_obstacles_list = [] for static_obstacle_dict in static_obstacles.values(): static_obstacle_id = static_obstacle_dict['id'] location = pylot.utils.Location.from_gps( *static_obstacle_dict['position']) static_obstacles_list.append( Obstacle( None, # bounding box 1.0, # confidence 'static_obstacle', static_obstacle_id, pylot.utils.Transform(location, pylot.utils.Rotation()))) return static_obstacles_list
def track(self, frame): """ Tracks obstacles in a frame. Args: frame (:py:class:`~pylot.perception.camera_frame.CameraFrame`): Frame to track in. """ self._tracker = SiamRPN_track(self._tracker, frame.frame) target_pos = self._tracker['target_pos'] target_sz = self._tracker['target_sz'] self.obstacle.bounding_box = BoundingBox2D( int(target_pos[0] - target_sz[0] / 2.0), int(target_pos[0] + target_sz[0] / 2.0), int(target_pos[1] - target_sz[1] / 2.0), int(target_pos[1] + target_sz[1] / 2.0)) return Obstacle(self.obstacle.bounding_box, self.obstacle.confidence, self.obstacle.label, self.obstacle.id)
def parse_people(self, people): # Similar to vehicles, each entry of people is a dictionary that # contains four items, the person's id, position, orientation, # and bounding box. people_list = [] for person_dict in people.values(): person_id = person_dict['id'] location = pylot.utils.Location.from_gps(*person_dict['position']) roll, pitch, yaw = person_dict['orientation'] rotation = pylot.utils.Rotation(pitch, yaw, roll) people_list.append( Obstacle( None, # bounding box 1.0, # confidence 'person', person_id, pylot.utils.Transform(location, rotation))) return people_list
def track(self, frame, obstacles=[]): """ Tracks obstacles in a frame. Args: frame (:py:class:`~pylot.perception.camera_frame.CameraFrame`): Frame to track in. """ # If obstacles, run deep sort to update tracker with detections. # Otherwise, step each confirmed track one step forward. if obstacles: bboxes, labels, confidence_scores, ids = [], [], [], [] for obstacle in obstacles: bboxes.append(obstacle.bounding_box.as_width_height_bbox()) labels.append(obstacle.label) confidence_scores.append(obstacle.confidence) ids.append(obstacle.id) self._deepsort.run_deep_sort(frame.frame, confidence_scores, bboxes, labels, ids) else: for track in self._deepsort.tracker.tracks: if track.is_confirmed(): track.predict(self._deepsort.tracker.kf) tracked_obstacles = [] for track in self._deepsort.tracker.tracks: if track.is_confirmed(): # Converts x, y, w, h bbox to tlbr bbox (top left and bottom # right coords). bbox = track.to_tlbr() # Converts to xmin, xmax, ymin, ymax format. xmin = int(bbox[0]) xmax = int(bbox[2]) ymin = int(bbox[1]) ymax = int(bbox[3]) if xmin < xmax and ymin < ymax: bbox = BoundingBox2D(xmin, xmax, ymin, ymax) tracked_obstacles.append( Obstacle(bbox, 0, track.label, track.track_id)) else: self._logger.error( "Tracker found invalid bounding box {} {} {} {}". format(xmin, xmax, ymin, ymax)) return True, tracked_obstacles
def on_frame_msg(self, msg, obstacle_tracking_stream): """Invoked when a FrameMessage is received on the camera stream.""" self._logger.debug('@{}: {} received frame'.format( msg.timestamp, self.config.name)) assert msg.frame.encoding == 'BGR', 'Expects BGR frames' image_np = msg.frame.as_bgr_numpy_array() results = self.run_model(image_np) obstacles = [] for res in results: track_id = res['tracking_id'] bbox = res['bbox'] score = res['score'] (label_id, ) = res['class'] - 1, if label_id > 80: continue label = self.trained_dataset.class_name[label_id] if label in ['Pedestrian', 'pedestrian']: label = 'person' elif label == 'Car': label = 'car' elif label == 'Cyclist': label == 'bicycle' if label in OBSTACLE_LABELS: bounding_box_2D = BoundingBox2D(bbox[0], bbox[2], bbox[1], bbox[3]) bounding_box_3D = None if 'dim' in res and 'loc' in res and 'rot_y' in res: bounding_box_3D = BoundingBox3D.from_dimensions( res['dim'], res['loc'], res['rot_y']) obstacles.append( Obstacle(bounding_box_3D, score, label, track_id, bounding_box_2D=bounding_box_2D)) obstacle_tracking_stream.send( ObstaclesMessage(msg.timestamp, obstacles, 0))
def track(self, frame): """ Tracks obstacles in a frame. Args: frame (:py:class:`~pylot.perception.camera_frame.CameraFrame`): Frame to track in. """ # each track in tracks has format ([xmin, ymin, xmax, ymax], id) obstacles = [] for track in self.tracker.trackers: coords = track.predict()[0].tolist() # changing to xmin, xmax, ymin, ymax format xmin = int(coords[0]) xmax = int(coords[2]) ymin = int(coords[1]) ymax = int(coords[3]) if xmin < xmax and ymin < ymax: bbox = BoundingBox2D(xmin, xmax, ymin, ymax) obstacles.append(Obstacle(bbox, 0, track.label, track.id)) else: self._logger.error( "Tracker found invalid bounding box {} {} {} {}".format( xmin, xmax, ymin, ymax)) return True, obstacles
def on_msg_camera_stream(self, msg, obstacles_stream): """Invoked whenever a frame message is received on the stream. Args: msg (:py:class:`~pylot.perception.messages.FrameMessage`): Message received. obstacles_stream (:py:class:`erdos.WriteStream`): Stream on which the operator sends :py:class:`~pylot.perception.messages.ObstaclesMessage` messages. """ self._logger.debug('@{}: {} received message'.format( msg.timestamp, self.config.name)) start_time = time.time() # The models expect BGR images. assert msg.frame.encoding == 'BGR', 'Expects BGR frames' num_detections, res_boxes, res_scores, res_classes = self.__run_model( msg.frame.frame) obstacles = [] for i in range(0, num_detections): if res_classes[i] in self._coco_labels: if (res_scores[i] >= self._flags.obstacle_detection_min_score_threshold): if (self._coco_labels[res_classes[i]] in OBSTACLE_LABELS): obstacles.append( Obstacle(BoundingBox2D( int(res_boxes[i][1] * msg.frame.camera_setup.width), int(res_boxes[i][3] * msg.frame.camera_setup.width), int(res_boxes[i][0] * msg.frame.camera_setup.height), int(res_boxes[i][2] * msg.frame.camera_setup.height)), res_scores[i], self._coco_labels[res_classes[i]], id=self._unique_id)) self._unique_id += 1 else: self._logger.warning( 'Ignoring non essential detection {}'.format( self._coco_labels[res_classes[i]])) else: self._logger.warning('Filtering unknown class: {}'.format( res_classes[i])) self._logger.debug('@{}: {} obstacles: {}'.format( msg.timestamp, self.config.name, obstacles)) # Get runtime in ms. runtime = (time.time() - start_time) * 1000 # Send out obstacles. obstacles_stream.send( ObstaclesMessage(msg.timestamp, obstacles, runtime)) obstacles_stream.send(erdos.WatermarkMessage(msg.timestamp)) if self._flags.log_detector_output: msg.frame.annotate_with_bounding_boxes(msg.timestamp, obstacles, None, self._bbox_colors) msg.frame.save(msg.timestamp.coordinates[0], self._flags.data_path, 'detector-{}'.format(self.config.name))
def on_watermark(self, timestamp, obstacles_stream): """Invoked whenever a frame message is received on the stream. Args: msg (:py:class:`~pylot.perception.messages.FrameMessage`): Message received. obstacles_stream (:py:class:`erdos.WriteStream`): Stream on which the operator sends :py:class:`~pylot.perception.messages.ObstaclesMessage` messages. """ if timestamp.is_top: return start_time = time.time() ttd_msg = self._ttd_msgs.popleft() frame_msg = self._frame_msgs.popleft() ttd = ttd_msg.data self.update_model_choice(ttd) frame = frame_msg.frame inputs = frame.as_rgb_numpy_array() detector_start_time = time.time() outputs_np = self._tf_session.run( self._signitures['prediction'], feed_dict={self._signitures['image_arrays']: [inputs]})[0] detector_end_time = time.time() self._logger.debug("@{}: detector runtime {}".format( timestamp, (detector_end_time - detector_start_time) * 1000)) obstacles = [] camera_setup = frame.camera_setup for _, ymin, xmin, ymax, xmax, score, _class in outputs_np: xmin, ymin, xmax, ymax = int(xmin), int(ymin), int(xmax), int(ymax) if _class in self._coco_labels: if (score >= self._flags.obstacle_detection_min_score_threshold and self._coco_labels[_class] in OBSTACLE_LABELS): xmin, xmax = max(0, xmin), min(xmax, camera_setup.width) ymin, ymax = max(0, ymin), min(ymax, camera_setup.height) if xmin < xmax and ymin < ymax: obstacles.append( Obstacle(BoundingBox2D(xmin, xmax, ymin, ymax), score, self._coco_labels[_class], id=self._unique_id)) self._unique_id += 1 self._csv_logger.info( "{},{},detection,{},{:4f}".format( pylot.utils.time_epoch_ms(), timestamp.coordinates[0], self._coco_labels[_class], score)) else: self._logger.debug( 'Filtering unknown class: {}'.format(_class)) if self._flags.log_detector_output: frame.annotate_with_bounding_boxes(timestamp, obstacles, None, self._bbox_colors) frame.save(timestamp.coordinates[0], self._flags.data_path, 'detector-{}'.format(self.config.name)) # end_time = time.time() obstacles_stream.send(ObstaclesMessage(timestamp, obstacles, 0)) operator_time_total_end = time.time() self._logger.debug("@{}: total time spent: {}".format( timestamp, (operator_time_total_end - start_time) * 1000))
def process_depth_images(msg, depth_camera_setup, ego_vehicle, speed, csv, surface, visualize=False): print("Received a message for the time: {}".format(msg.timestamp)) # If we are in distance to the destination, stop and exit with success. if ego_vehicle.get_location().distance(VEHICLE_DESTINATION) <= 5: ego_vehicle.set_velocity(carla.Vector3D()) CLEANUP_FUNCTION() sys.exit(0) # Get the RGB image corresponding to the given depth image timestamp. rgb_image = retrieve_rgb_image(msg.timestamp) # Get the semantic image corresponding to the given depth image timestamp. semantic_image = retrieve_semantic_image(msg.timestamp) semantic_frame = SegmentedFrame.from_carla_image(semantic_image, depth_frame.camera_setup) # Visualize the image and the bounding boxes if needed. if visualize: draw_image(rgb_image, surface) # Transform people into obstacles relative to the current frame. bb_surface = None resolution = (depth_camera_setup.width, depth_camera_setup.height) if visualize: bb_surface = pygame.Surface(resolution) bb_surface.set_colorkey((0, 0, 0)) vehicle_transform = Transform.from_carla_transform( ego_vehicle.get_transform()) depth_frame = DepthFrame.from_carla_frame(msg, depth_camera_setup) # Transform the static camera setup with respect to the location of the # vehicle in the world. depth_frame.camera_setup.set_transform(vehicle_transform * depth_frame.camera_setup.transform) detected_people = [] for person in ego_vehicle.get_world().get_actors().filter('walker.*'): obstacle = Obstacle.from_carla_actor(person) if obstacle.distance(vehicle_transform) > 125: bbox = None else: bbox = obstacle.to_camera_view(depth_frame, semantic_frame.frame) if bbox is not None: detected_people.append(bbox) if visualize: draw_bounding_box(bbox, bb_surface) # We have drawn all the bounding boxes on the bb_surface, now put it on # the RGB image surface. if visualize: surface.blit(bb_surface, (0, 0)) pygame.display.flip() # Compute the mAP. print("We detected a total of {} people.".format(len(detected_people))) compute_and_log_map(detected_people, msg.timestamp, csv) # Move the ego_vehicle according to the given speed. ego_vehicle.set_velocity(carla.Vector3D(x=-speed)) ego_vehicle.get_world().tick()