Exemple #1
0
def get_traffic_stop_det_objs(
        traffic_stops,
        camera_transform,
        depth_frame,
        frame_width,
        frame_height,
        fov):
    """ Get traffic stop lane markings that are withing the camera frame.

    Args:
        traffic_stops: List of traffic stop actors in the world.
        camera_transform: Camera transform in world coordinates.
        fov: Camera field of view.

    Returns:
        List of DetectedObjects.
    """
    det_objs = []
    bgr_intrinsic = create_intrinsic_matrix(frame_width, frame_height, fov)
    for transform, bbox in traffic_stops:
        bbox2d = _get_stop_markings_bbox(
            bbox, depth_frame, camera_transform, bgr_intrinsic,
            frame_width, frame_height)
        if bbox2d:
            det_objs.append(DetectedObject(bbox2d, 1.0, 'stop marking'))
    return det_objs
Exemple #2
0
 def track(self, frame):
     # each track in tracks has format ([xmin, ymin, xmax, ymax], id)
     tracked_objects = []
     for track in self.tracker.trackers:
         coords = track.predict()[0].tolist()
         # changing to xmin, xmax, ymin, ymax format
         coords = (coords[0], coords[2], coords[1], coords[3])
         tracked_objects.append(DetectedObject(coords, "", 0, track.id))
     return True, tracked_objects
Exemple #3
0
 def track(self, frame):
     self._tracker = SiamRPN_track(self._tracker, frame)
     target_pos = self._tracker['target_pos']
     target_sz = self._tracker['target_sz']
     bbox = (int(target_pos[0] - target_sz[0] / 2.0),
             int(target_pos[0] + target_sz[0] / 2.0),
             int(target_pos[1] - target_sz[1] / 2.0),
             int(target_pos[1] + target_sz[1] / 2.0))
     return DetectedObject(bbox, "", 0)
Exemple #4
0
def get_traffic_light_det_objs_legacy(traffic_lights, vehicle_transform,
                                      camera_transform, depth_frame,
                                      frame_width, frame_height, fov,
                                      segmented_frame):
    """ Get the traffic lights that are withing the camera frame.
    Note: this method works with Carla 0.8.4.

    Args:
        traffic_lights: List of traffic lights in the world.
        vehicle_transform: Ego-vehicle transform in world coordinates.
        camera_transform: Camera transform in world coordinates.
        fov: Camera field of view.
        segmented_frame: Segmented frame.
    """
    # Get 3d world positions for all traffic signs (some of which are
    # traffic lights).
    traffic_signs_frame = get_traffic_sign_pixels(segmented_frame)
    bboxes = get_bounding_boxes_from_segmented(traffic_signs_frame)

    # Get the positions of the bounding box centers.
    x_mids = [(bbox[0] + bbox[1]) / 2 for bbox in bboxes]
    y_mids = [(bbox[2] + bbox[3]) / 2 for bbox in bboxes]
    pos_3d = batch_get_3d_world_position_with_depth_map(
        x_mids, y_mids, depth_frame, frame_width, frame_height, fov,
        camera_transform)
    pos_and_bboxes = zip(pos_3d, bboxes)

    # Map traffic lights to bounding boxes based on 3d world position.
    tl_bboxes = match_bboxes_with_traffic_lights(vehicle_transform,
                                                 pos_and_bboxes,
                                                 traffic_lights)
    det_objs = []

    for bbox, color in tl_bboxes:
        if color == TrafficLightColor.GREEN:
            det_objs.append(DetectedObject(bbox, 1.0, 'green traffic light'))
        elif color == TrafficLightColor.YELLOW:
            det_objs.append(DetectedObject(bbox, 1.0, 'yellow traffic light'))
        elif color == TrafficLightColor.RED:
            det_objs.append(DetectedObject(bbox, 1.0, 'red traffic light'))
        else:
            det_objs.append(DetectedObject(bbox, 1.0, 'off traffic light'))
    return det_objs
Exemple #5
0
def get_traffic_light_det_objs(traffic_lights,
                               camera_transform,
                               depth_array,
                               segmented_image,
                               frame_width,
                               frame_height,
                               town_name,
                               fov=90):
    """ Get the traffic lights that are within the camera frame.
    Note: This method should be used with Carla 0.9.*
    """
    # Create the extrinsic and intrinsic matrices for the given camera.
    extrinsic_matrix = camera_transform.matrix
    intrinsic_matrix = create_intrinsic_matrix(frame_width, frame_height, fov)

    # Iterate over all the traffic lights, and figure out which ones are
    # facing us and are visible in the camera view.
    detected = []
    for light in traffic_lights:

        if not is_traffic_light_visible(camera_transform, light, town_name):
            continue

        bboxes = get_traffic_lights_bbox_state(camera_transform, [light],
                                               town_name)

        # Convert the returned bounding boxes to 2D and check if the
        # light is occluded. If not, add it to the detected object list.
        for box, color in bboxes:
            bounding_box = locations_3d_to_view(box, extrinsic_matrix,
                                                intrinsic_matrix)
            bounding_box = [(bb.x, bb.y, bb.z) for bb in bounding_box]
            thresholded_coordinates = get_bounding_box_in_camera_view(
                bounding_box, frame_width, frame_height)
            if not thresholded_coordinates:
                continue

            xmin, xmax, ymin, ymax = thresholded_coordinates

            # Crop the segmented and depth image to the given bounding box.
            cropped_image = segmented_image[ymin:ymax, xmin:xmax]
            cropped_depth = depth_array[ymin:ymax, xmin:xmax]

            if cropped_image.size > 0:
                masked_image = np.zeros_like(cropped_image)
                masked_image[np.where(cropped_image == 12)] = 1
                if np.sum(masked_image) >= 0.20 * masked_image.size:
                    masked_depth = cropped_depth[np.where(masked_image == 1)]
                    mean_depth = np.mean(masked_depth) * 1000
                    if abs(mean_depth -
                           bounding_box[0][-1]) <= 2 and mean_depth < 150:
                        detected.append(
                            DetectedObject((xmin, xmax, ymin, ymax), 1.0,
                                           color.get_label()))
    return detected
 def __get_output_bboxes(self, results):
     """ Transform from CenterNet output format to our format."""
     output = []
     for category in range(1, self._opt.num_classes + 1):
         for bbox in results[category]:
             confidence = bbox[4]
             if confidence >= self._flags.detector_min_score_threshold:
                 corners = (int(bbox[0]), int(bbox[2]), int(bbox[1]),
                            int(bbox[3]))
                 output.append(
                     DetectedObject(corners, confidence,
                                    self._coco_labels[category]))
     return output
Exemple #7
0
 def __convert_to_detected_objs(self, boxes, scores, labels, height, width):
     index = 0
     detected_objects = []
     while index < len(boxes) and index < len(scores):
         if scores[index] >= self._flags.detector_min_score_threshold:
             ymin = int(boxes[index][0] * height)
             xmin = int(boxes[index][1] * width)
             ymax = int(boxes[index][2] * height)
             xmax = int(boxes[index][3] * width)
             corners = (xmin, xmax, ymin, ymax)
             detected_objects.append(
                 DetectedObject(corners, scores[index], labels[index]))
         index += 1
     return detected_objects
Exemple #8
0
 def track(self, frame, confidence_scores=None, bboxes=None):
     if bboxes:
         detections = self.convert_detections_for_deepsort_alg(bboxes)
         self.tracker, detections_class = self._deepsort.run_deep_sort(
             frame, confidence_scores, detections)
     if self.tracker:
         tracked_objects = []
         for track in self.tracker.tracks:
             if not track.is_confirmed() or track.time_since_update > 1:
                 continue
             bbox = track.to_tlbr(
             )  # converts x, y, w, h bbox to tlbr bbox (top left and bottom right coords)
             corners = (bbox[0], bbox[2], bbox[1], bbox[3]
                        )  # converts to xmin, xmax, ymin, ymax format
             tracked_objects.append(
                 DetectedObject(corners, "", 0, track.track_id))
         return True, tracked_objects
     return False, []
Exemple #9
0
 def __get_vehicles(self, vehicles, vehicle_transform, depth_array):
     """ Transforms vehicles into detected objects.
     Args:
         vehicles: List of Vehicle objects.
         vehicle_transform: Ego-vehicle transform.
         depth_array: Depth frame taken at the time when pedestrians were
                      collected.
     """
     det_objs = []
     for vehicle in vehicles:
         bbox = get_2d_bbox_from_3d_box(depth_array, vehicle_transform,
                                        vehicle.transform,
                                        vehicle.bounding_box,
                                        self._bgr_transform,
                                        self._bgr_intrinsic,
                                        self._bgr_img_size, 3.0, 3.0)
         if bbox is not None:
             det_objs.append(DetectedObject(bbox, 1.0, 'vehicle'))
     return det_objs
 def __get_pedestrians(self, pedestrians, vehicle_transform, depth_array,
                       segmented_image):
     """ Transforms pedestrians into detected objects.
     Args:
         pedestrians: List of Pedestrian objects.
         vehicle_transform: Ego-vehicle transform.
         depth_array: Depth frame taken at the time when pedestrians were
                      collected.
         segmented_image: The segmentation frame taken at the time when
             the pedestrians were collected.
     """
     det_objs = []
     for pedestrian in pedestrians:
         bbox = get_2d_bbox_from_3d_box(
             vehicle_transform, pedestrian.transform,
             pedestrian.bounding_box, self._bgr_transform,
             self._bgr_intrinsic, self._bgr_img_size, depth_array,
             segmented_image, 4)
         if bbox is not None:
             det_objs.append(DetectedObject(bbox, 1.0, 'pedestrian', pedestrian.id))
     return det_objs
Exemple #11
0
def get_traffic_light_det_objs(traffic_lights,
                               camera_transform,
                               depth_array,
                               frame_width,
                               frame_height,
                               town_name,
                               fov=90):
    """ Get the traffic lights that are within the camera frame.
    Note: This method should be used with Carla 0.9.*
    """
    # Get the location of the bounding boxes for these lights.
    bbox_state = get_traffic_lights_bbox_state(camera_transform,
                                               traffic_lights, town_name)

    # Convert the bounding boxes to a camera view.
    extrinsic_matrix = camera_transform.matrix
    intrinsic_matrix = create_intrinsic_matrix(frame_width, frame_height, fov)
    det_objs = []
    for box, color in bbox_state:
        bounding_box = []
        for location in box:
            bounding_box.append(
                location_3d_to_view(location, extrinsic_matrix,
                                    intrinsic_matrix))

        # Check if they are in front and visible.
        z_values = [loc.z > 0 for loc in bounding_box]
        if not any(z_values):
            continue

        # They are in the front, now find if they are visible in the view.
        x_min = x_max = int(bounding_box[0].x)
        y_min = y_max = int(bounding_box[0].y)
        for i in range(1, 4):
            x_min = min(x_min, int(bounding_box[i].x))
            x_max = max(x_max, int(bounding_box[i].x))
            y_min = min(y_min, int(bounding_box[i].y))
            y_max = max(y_max, int(bounding_box[i].y))
        x_bounds = (x_min >= 0 and x_min < frame_width and x_max >= 0
                    and x_max < frame_width)
        y_bounds = (y_min >= 0 and y_min < frame_height and y_max >= 0
                    and y_max < frame_height)
        if (x_bounds and y_bounds and x_max - x_min >= 3
                and y_max - y_min > 6):
            middle_x = (x_min + x_max) / 2
            middle_y = (y_min + y_max) / 2
            depth = depth_array[middle_y][middle_x] * 1000
            # Ignore tl if it is occluded or far away.
            if abs(depth - bounding_box[0].z) > 2 or depth > 150:
                continue
            label = ''
            if color == TrafficLightColor.GREEN:
                label = 'green traffic light'
            elif color == TrafficLightColor.YELLOW:
                label = 'yellow traffic light'
            elif color == TrafficLightColor.RED:
                label = 'red traffic light'
            else:
                label = 'off traffic light'
            det_objs.append(
                DetectedObject((x_min, x_max, y_min, y_max), 1.0, label))
    return det_objs