def track(self, frame, obstacles=None): """ Tracks obstacles in a frame. Args: frame: perception.camera_frame.CameraFrame to track in. """ if obstacles: bboxes = [ obstacle.bounding_box.as_width_height_bbox() for obstacle in obstacles ] confidence_scores = [obstacle.confidence for obstacle in obstacles] self.tracker, detections_class = self._deepsort.run_deep_sort( frame.frame, confidence_scores, bboxes) if self.tracker: obstacles = [] for track in self.tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue # Converts x, y, w, h bbox to tlbr bbox (top left and bottom # right coords). bbox = track.to_tlbr() # Converts to xmin, xmax, ymin, ymax format. obstacles.append( DetectedObstacle( BoundingBox2D(int(bbox[0]), int(bbox[2]), int(bbox[1]), int(bbox[3])), 0, "", track.track_id)) return True, obstacles return False, []
def __get_obstacles(self, obstacles, vehicle_transform, depth_frame, segmented_frame): """ Transforms obstacles into detected obstacles. Args: obstacles: List of pylot.perception.detection.obstacle.Obstacle. vehicle_transform: The transform of the ego vehicle. depth_frame: perception.depth_frame.DepthFrame taken at the time when obstacles were collected. segmented_frame: perception.segmentation.segmented_frame.SegmentedFrame taken at the time when the obstacles were collected. """ det_obstacles = [] for obstacle in obstacles: # Calculate the distance of the obstacle from the vehicle, and # convert to camera view if it is less than 125 metres away. if obstacle.distance(vehicle_transform) > 125: bbox = None else: bbox = obstacle.to_camera_view(depth_frame, segmented_frame) if bbox: det_obstacles.append( DetectedObstacle(bbox, 1.0, obstacle.label, obstacle.id)) return det_obstacles
def track(self, frame, obstacles=[]): """ Tracks obstacles in a frame. Args: frame (:py:class:`~pylot.perception.camera_frame.CameraFrame`): Frame to track in. """ # If obstacles, run deep sort to update tracker with detections. # Otherwise, step each confirmed track one step forward. if obstacles: bboxes, labels, confidence_scores, ids = [], [], [], [] for obstacle in obstacles: bboxes.append(obstacle.bounding_box.as_width_height_bbox()) labels.append(obstacle.label) confidence_scores.append(obstacle.confidence) ids.append(obstacle.id) self._deepsort.run_deep_sort(frame.frame, confidence_scores, bboxes, labels, ids) else: for track in self._deepsort.tracker.tracks: if track.is_confirmed(): track.predict(self._deepsort.tracker.kf) tracked_obstacles = [] for track in self._deepsort.tracker.tracks: if track.is_confirmed(): # Converts x, y, w, h bbox to tlbr bbox (top left and bottom # right coords). bbox = track.to_tlbr() # Converts to xmin, xmax, ymin, ymax format. bbox_2d = BoundingBox2D(int(bbox[0]), int(bbox[2]), int(bbox[1]), int(bbox[3])) tracked_obstacles.append( DetectedObstacle(bbox_2d, 0, track.label, track.track_id)) return True, tracked_obstacles
def parse_vehicles(self, vehicles, ego_vehicle_id): # vehicles is a dictionary that maps each vehicle's id to # a dictionary of information about that vehicle. Each such dictionary # contains four items: the vehicle's id, position, orientation, and # bounding_box (represented as four points in GPS coordinates). # # Positions are originally represented as (latitude, longitude, altitude) # before they are converted using _gps_to_location. vehicles_list = [] for veh_dict in vehicles.values(): vehicle_id = veh_dict['id'] location = _gps_to_location(*veh_dict['position']) roll, pitch, yaw = veh_dict['orientation'] rotation = pylot.utils.Rotation(pitch, yaw, roll) if vehicle_id == ego_vehicle_id: # Can compare against canbus output to check that # transformations are working. self._logger.debug( 'Ego vehicle location with ground_obstacles: {}'.format( location)) else: vehicles_list.append( DetectedObstacle( None, # We currently don't use bounding box 1.0, # confidence 'vehicle', vehicle_id, pylot.utils.Transform(location, rotation))) return vehicles_list
def __convert_to_detected_tl(self, boxes, scores, labels, height, width): traffic_lights = [] for index in range(len(scores)): if scores[ index] > self._flags.traffic_light_det_min_score_threshold: bbox = BoundingBox2D( int(boxes[index][1] * width), # x_min int(boxes[index][3] * width), # x_max int(boxes[index][0] * height), # y_min int(boxes[index][2] * height) # y_max ) traffic_lights.append( DetectedObstacle(bbox, scores[index], labels[index])) return traffic_lights
def parse_static_obstacles(self, static_obstacles): # Each static obstacle has an id and position. static_obstacles_list = [] for static_obstacle_dict in static_obstacles.values(): static_obstacle_id = static_obstacle_dict['id'] location = _gps_to_location(*static_obstacle_dict['position']) static_obstacles_list.append( DetectedObstacle( None, # bounding box 1.0, # confidence 'static_obstacle', static_obstacle_id, pylot.utils.Transform(location, pylot.utils.Rotation()))) return static_obstacles_list
def track(self, frame): """ Tracks obstacles in a frame. Args: frame: perception.camera_frame.CameraFrame to track in. """ ok, bboxes = self._tracker.update(frame.frame) if not ok: return False, [] obstacles = [] for (xmin, ymin, w, h) in bboxes: obstacles.append( DetectedObstacle(BoundingBox2D(xmin, xmin + w, ymin, ymin + h), "", 0)) return True, obstacles
def track(self, frame): """ Tracks obstacles in a frame. Args: frame: perception.camera_frame.CameraFrame to track in. """ # each track in tracks has format ([xmin, ymin, xmax, ymax], id) obstacles = [] for track in self.tracker.trackers: coords = track.predict()[0].tolist() # changing to xmin, xmax, ymin, ymax format bbox = BoundingBox2D(int(coords[0]), int(coords[2]), int(coords[1]), int(coords[3])) obstacles.append(DetectedObstacle(bbox, 0, "", track.id)) return True, obstacles
def get_all_detected_traffic_light_boxes(self, town_name, depth_frame, segmented_image): """ Returns DetectedObstacles for all boxes of a traffic light. Args: town_name: The name of the town in which the traffic light is. depth_frame: A pylot.perception.depth_frame.DepthFrame segmented_image: A segmented image np array used to refine the bboxes. Returns: A list of DetectedObstacles. """ detected = [] bboxes = self._get_bboxes(town_name) # Convert the returned bounding boxes to 2D and check if the # light is occluded. If not, add it to the detected obstacle list. for bbox in bboxes: bounding_box = [ loc.to_camera_view( depth_frame.camera_setup.get_extrinsic_matrix(), depth_frame.camera_setup.get_intrinsic_matrix()) for loc in bbox ] bbox_2d = get_bounding_box_in_camera_view( bounding_box, depth_frame.camera_setup.width, depth_frame.camera_setup.height) if not bbox_2d: continue # Crop the segmented and depth image to the given bounding box. cropped_image = segmented_image[bbox_2d.y_min:bbox_2d.y_max, bbox_2d.x_min:bbox_2d.x_max] cropped_depth = depth_frame.frame[bbox_2d.y_min:bbox_2d.y_max, bbox_2d.x_min:bbox_2d.x_max] if cropped_image.size > 0: masked_image = np.zeros_like(cropped_image) masked_image[np.where(cropped_image == 12)] = 1 if np.sum(masked_image) >= 0.20 * masked_image.size: masked_depth = cropped_depth[np.where(masked_image == 1)] mean_depth = np.mean(masked_depth) * 1000 if abs(mean_depth - bounding_box[0].z) <= 2 and mean_depth < 150: detected.append( DetectedObstacle(bbox_2d, 1.0, self.state.get_label())) return detected
def track(self, frame): """ Tracks obstacles in a frame. Args: frame: perception.camera_frame.CameraFrame to track in. """ self._tracker = SiamRPN_track(self._tracker, frame.frame) target_pos = self._tracker['target_pos'] target_sz = self._tracker['target_sz'] self.obstacle.bounding_box = BoundingBox2D( int(target_pos[0] - target_sz[0] / 2.0), int(target_pos[0] + target_sz[0] / 2.0), int(target_pos[1] - target_sz[1] / 2.0), int(target_pos[1] + target_sz[1] / 2.0)) return DetectedObstacle(self.obstacle.bounding_box, self.obstacle.confidence, self.obstacle.label, self.obstacle.id)
def parse_people(self, people): # Similar to vehicles, each entry of people is a dictionary that # contains four items, the person's id, position, orientation, # and bounding box. people_list = [] for person_dict in people.values(): person_id = person_dict['id'] location = _gps_to_location(*person_dict['position']) roll, pitch, yaw = person_dict['orientation'] rotation = pylot.utils.Rotation(pitch, yaw, roll) people_list.append( DetectedObstacle( None, # bounding box 1.0, # confidence 'person', person_id, pylot.utils.Transform(location, rotation))) return people_list
def __get_obstacles(self, obstacles, vehicle_transform, depth_frame, segmented_frame): """Transforms obstacles into detected obstacles. Args: obstacles (list(:py:class:`~pylot.perception.detection.obstacle.Obstacle`)): List of obstacles. vehicle_transform (:py:class:`~pylot.utils.Transform`): Transform of the ego vehicle. depth_frame (:py:class:`~pylot.perception.depth_frame.DepthFrame`): Depth frame taken at the time when obstacles were collected. segmented_frame (:py:class:`~pylot.perception.segmentation.segmented_frame.SegmentedFrame`): Segmented frame taken at the time when the obstacles were collected. Returns: list(:py:class:`~pylot.perception.detection.utils.DetectedObstacle`): List of detected obstacles. """ det_obstacles = [] for obstacle in obstacles: # Calculate the distance of the obstacle from the vehicle, and # convert to camera view if it is less than # perfect_detection_max_distance metres away. if obstacle.distance( vehicle_transform ) > self._flags.perfect_detection_max_distance: bbox = None else: bbox = obstacle.to_camera_view(depth_frame, segmented_frame) if bbox: det_obstacles.append( DetectedObstacle(bbox, 1.0, obstacle.label, obstacle.id, obstacle.transform, obstacle.detailed_label)) return det_obstacles
def on_msg_camera_stream(self, msg, obstacles_stream): """ Invoked when the operator receives a message on the data stream.""" self._logger.debug('@{}: {} received message'.format( msg.timestamp, self._name)) start_time = time.time() # The models expect BGR images. assert msg.frame.encoding == 'BGR', 'Expects BGR frames' # Expand dimensions since the model expects images to have # shape: [1, None, None, 3] image_np_expanded = np.expand_dims(msg.frame.frame, axis=0) (boxes, scores, classes, num_detections) = self._tf_session.run( [ self._detection_boxes, self._detection_scores, self._detection_classes, self._num_detections ], feed_dict={self._image_tensor: image_np_expanded}) num_detections = int(num_detections[0]) res_classes = classes[0][:num_detections] res_boxes = boxes[0][:num_detections] res_scores = scores[0][:num_detections] obstacles = [] for i in range(0, num_detections): if (res_classes[i] in self._coco_labels and res_scores[i] >= self._flags.obstacle_detection_min_score_threshold): obstacles.append( DetectedObstacle( BoundingBox2D( int(res_boxes[i][1] * msg.frame.camera_setup.width), int(res_boxes[i][3] * msg.frame.camera_setup.width), int(res_boxes[i][0] * msg.frame.camera_setup.height), int(res_boxes[i][2] * msg.frame.camera_setup.height)), res_scores[i], self._coco_labels[res_classes[i]])) else: self._logger.warning('Filtering unknown class: {}'.format( res_classes[i])) self._logger.debug('@{}: {} obstacles: {}'.format( msg.timestamp, self._name, obstacles)) if (self._flags.visualize_detected_obstacles or self._flags.log_detector_output): msg.frame.annotate_with_bounding_boxes(msg.timestamp, obstacles, self._bbox_colors) if self._flags.visualize_detected_obstacles: msg.frame.visualize(self._name) if self._flags.log_detector_output: msg.frame.save(msg.timestamp.coordinates[0], self._flags.data_path, 'detector-{}'.format(self._name)) # Get runtime in ms. runtime = (time.time() - start_time) * 1000 self._csv_logger.info('{},{},"{}",{}'.format(time_epoch_ms(), self._name, msg.timestamp, runtime)) # Send out obstacles. obstacles_stream.send( ObstaclesMessage(obstacles, msg.timestamp, runtime))
def on_watermark(self, timestamp, obstacles_stream): """Invoked whenever a frame message is received on the stream. Args: msg (:py:class:`~pylot.perception.messages.FrameMessage`): Message received. obstacles_stream (:py:class:`erdos.WriteStream`): Stream on which the operator sends :py:class:`~pylot.perception.messages.ObstaclesMessage` messages. """ start_time = time.time() #ttd_msg = self._ttd_msgs.popleft() frame_msg = self._frame_msgs.popleft() #ttd, detection_deadline = ttd_msg.data #self.update_model_choice(detection_deadline) frame = frame_msg.frame inputs = frame.as_rgb_numpy_array() detector_start_time = time.time() outputs_np = self._driver.serve_images([inputs])[0] detector_end_time = time.time() self._logger.debug("@{}: detector runtime {}".format( timestamp, (detector_end_time - detector_start_time) * 1000)) obstacles = [] camera_setup = frame.camera_setup for _, y, x, height, width, score, _class in outputs_np: xmin = int(x) ymin = int(y) xmax = int(x + width) ymax = int(y + height) if _class in self._coco_labels: if (score >= self._flags.obstacle_detection_min_score_threshold and self._coco_labels[_class] in self._important_labels): xmin, xmax = max(0, xmin), min(xmax, camera_setup.width) ymin, ymax = max(0, ymin), min(ymax, camera_setup.height) if xmin < xmax and ymin < ymax: obstacles.append( DetectedObstacle(BoundingBox2D( xmin, xmax, ymin, ymax), score, self._coco_labels[_class], id=self._unique_id)) self._unique_id += 1 self._csv_logger.info( "{},{},detection,{},{:4f}".format( pylot.utils.time_epoch_ms(), timestamp.coordinates[0], self._coco_labels[_class], score)) else: self._logger.debug( 'Filtering unknown class: {}'.format(_class)) if (self._flags.visualize_detected_obstacles or self._flags.log_detector_output): frame.annotate_with_bounding_boxes(timestamp, obstacles, None, self._bbox_colors) if self._flags.visualize_detected_obstacles: frame.visualize(self.config.name, pygame_display=pylot.utils.PYGAME_DISPLAY) if self._flags.log_detector_output: frame.save(timestamp.coordinates[0], self._flags.data_path, 'detector-{}'.format(self.config.name)) end_time = time.time() obstacles_stream.send(ObstaclesMessage(timestamp, obstacles, 0)) obstacles_stream.send(erdos.WatermarkMessage(timestamp)) operator_time_total_end = time.time() self._logger.debug("@{}: total time spent: {}".format( timestamp, (operator_time_total_end - start_time) * 1000))
def get_detected_traffic_stops(traffic_stops, depth_frame): """ Get traffic stop lane markings that are withing the camera frame. Args: traffic_stops: List of traffic stop actors in the world. depth_frame: A pylot.perception.depth_frame.DepthFrame, with a camera_setup relative to the world. Returns: List of DetectedObstacles. """ def get_stop_markings_bbox(bbox3d, depth_frame): """ Gets a 2D stop marking bounding box from a 3D bounding box.""" # Move trigger_volume by -0.85 so that the top plane is on the ground. ext_z_value = bbox3d.extent.z - 0.85 ext = [ pylot.utils.Location(x=+bbox3d.extent.x, y=+bbox3d.extent.y, z=ext_z_value), pylot.utils.Location(x=+bbox3d.extent.x, y=-bbox3d.extent.y, z=ext_z_value), pylot.utils.Location(x=-bbox3d.extent.x, y=+bbox3d.extent.y, z=ext_z_value), pylot.utils.Location(x=-bbox3d.extent.x, y=-bbox3d.extent.y, z=ext_z_value), ] bbox = bbox3d.transform.transform_points(ext) camera_transform = depth_frame.camera_setup.get_transform() coords = [] for loc in bbox: loc_view = loc.to_camera_view( camera_transform.matrix, depth_frame.camera_setup.get_intrinsic_matrix()) if (loc_view.z >= 0 and loc_view.x >= 0 and loc_view.y >= 0 and loc_view.x < depth_frame.camera_setup.width and loc_view.y < depth_frame.camera_setup.height): coords.append(loc_view) if len(coords) == 4: xmin = min(coords[0].x, coords[1].x, coords[2].x, coords[3].x) xmax = max(coords[0].x, coords[1].x, coords[2].x, coords[3].x) ymin = min(coords[0].y, coords[1].y, coords[2].y, coords[3].y) ymax = max(coords[0].y, coords[1].y, coords[2].y, coords[3].y) # Check if the bbox is not obstructed and if it's sufficiently # big for the text to be readable. if (ymax - ymin > 15 and depth_frame.pixel_has_same_depth( int(coords[0].x), int(coords[0].y), coords[0].z, 0.4)): return BoundingBox2D(int(xmin), int(xmax), int(ymin), int(ymax)) return None if not isinstance(depth_frame, DepthFrame): raise ValueError( 'depth_frame should be of type perception.depth_frame.DepthFrame') det_obstacles = [] for transform, bbox in traffic_stops: bbox_2d = get_stop_markings_bbox(bbox, depth_frame) if bbox_2d is not None: det_obstacles.append(DetectedObstacle(bbox_2d, 1.0, 'stop marking')) return det_obstacles
def on_msg_camera_stream(self, msg, obstacles_stream): """Invoked whenever a frame message is received on the stream. Args: msg (:py:class:`~pylot.perception.messages.FrameMessage`): Message received. obstacles_stream (:py:class:`erdos.WriteStream`): Stream on which the operator sends :py:class:`~pylot.perception.messages.ObstaclesMessage` messages. """ operator_time_total_start = time.time() start_time = time.time() self._logger.debug('@{}: {} received message'.format( msg.timestamp, self.config.name)) inputs = msg.frame.as_rgb_numpy_array() results = [] start_time = time.time() if MODIFIED_AUTOML: (boxes_np, scores_np, classes_np, num_detections_np) = self._tf_session.run( self._detections_batch, feed_dict={self._image_placeholder: inputs}) num_detections = num_detections_np[0] boxes = boxes_np[0][:num_detections] scores = scores_np[0][:num_detections] classes = classes_np[0][:num_detections] results = zip(boxes, scores, classes) else: outputs_np = self._tf_session.run( self._detections_batch, feed_dict={self._image_placeholder: inputs})[0] for _, x, y, width, height, score, _class in outputs_np: results.append(((y, x, y + height, x + width), score, _class)) obstacles = [] for (ymin, xmin, ymax, xmax), score, _class in results: if np.isclose(ymin, ymax) or np.isclose(xmin, xmax): continue if MODIFIED_AUTOML: # The alternate NMS implementation screws up the class labels. _class = int(_class) + 1 if _class in self._coco_labels: if (score >= self._flags.obstacle_detection_min_score_threshold and self._coco_labels[_class] in self._important_labels): camera_setup = msg.frame.camera_setup width, height = camera_setup.width, camera_setup.height xmin, xmax = max(0, int(xmin)), min(int(xmax), width) ymin, ymax = max(0, int(ymin)), min(int(ymax), height) if xmin < xmax and ymin < ymax: obstacles.append( DetectedObstacle(BoundingBox2D( xmin, xmax, ymin, ymax), score, self._coco_labels[_class], id=self._unique_id)) self._unique_id += 1 self._csv_logger.info( "{},{},detection,{},{:4f}".format( pylot.utils.time_epoch_ms(), msg.timestamp, self._coco_labels[_class], score)) else: self._logger.debug( 'Filtering unknown class: {}'.format(_class)) if (self._flags.visualize_detected_obstacles or self._flags.log_detector_output): msg.frame.annotate_with_bounding_boxes(msg.timestamp, obstacles, None, self._bbox_colors) if self._flags.visualize_detected_obstacles: msg.frame.visualize(self.config.name, pygame_display=pylot.utils.PYGAME_DISPLAY) if self._flags.log_detector_output: msg.frame.save(msg.timestamp.coordinates[0], self._flags.data_path, 'detector-{}'.format(self.config.name)) end_time = time.time() obstacles_stream.send(ObstaclesMessage(msg.timestamp, obstacles, 0)) obstacles_stream.send(erdos.WatermarkMessage(msg.timestamp)) operator_time_total_end = time.time() self._logger.debug("@{}: runtime of the detector: {}".format( msg.timestamp, (end_time - start_time) * 1000)) self._logger.debug("@{}: total time spent: {}".format( msg.timestamp, (operator_time_total_end - operator_time_total_start) * 1000))
def on_msg_camera_stream(self, msg, obstacles_stream): """Invoked whenever a frame message is received on the stream. Args: msg (:py:class:`~pylot.perception.messages.FrameMessage`): Message received. obstacles_stream (:py:class:`erdos.WriteStream`): Stream on which the operator sends :py:class:`~pylot.perception.messages.ObstaclesMessage` messages. """ self._logger.debug('@{}: {} received message'.format( msg.timestamp, self.config.name)) start_time = time.time() # The models expect BGR images. assert msg.frame.encoding == 'BGR', 'Expects BGR frames' # Expand dimensions since the model expects images to have # shape: [1, None, None, 3] image_np_expanded = np.expand_dims(msg.frame.frame, axis=0) (boxes, scores, classes, num_detections) = self._tf_session.run( [ self._detection_boxes, self._detection_scores, self._detection_classes, self._num_detections ], feed_dict={self._image_tensor: image_np_expanded}) num_detections = int(num_detections[0]) res_classes = [int(cls) for cls in classes[0][:num_detections]] res_boxes = boxes[0][:num_detections] res_scores = scores[0][:num_detections] obstacles = [] for i in range(0, num_detections): if res_classes[i] in self._coco_labels: if (res_scores[i] >= self._flags.obstacle_detection_min_score_threshold and self._coco_labels[ res_classes[i]] in self._important_labels): obstacles.append( DetectedObstacle(BoundingBox2D( int(res_boxes[i][1] * msg.frame.camera_setup.width), int(res_boxes[i][3] * msg.frame.camera_setup.width), int(res_boxes[i][0] * msg.frame.camera_setup.height), int(res_boxes[i][2] * msg.frame.camera_setup.height)), res_scores[i], self._coco_labels[res_classes[i]], id=self._unique_id)) self._unique_id += 1 else: self._logger.warning('Filtering unknown class: {}'.format( res_classes[i])) self._logger.debug('@{}: {} obstacles: {}'.format( msg.timestamp, self.config.name, obstacles)) if (self._flags.visualize_detected_obstacles or self._flags.log_detector_output): msg.frame.annotate_with_bounding_boxes(msg.timestamp, obstacles, None, self._bbox_colors) if self._flags.visualize_detected_obstacles: msg.frame.visualize(self.config.name) if self._flags.log_detector_output: msg.frame.save(msg.timestamp.coordinates[0], self._flags.data_path, 'detector-{}'.format(self.config.name)) # Get runtime in ms. runtime = (time.time() - start_time) * 1000 # Send out obstacles. obstacles_stream.send( ObstaclesMessage(msg.timestamp, obstacles, runtime))