Ejemplo n.º 1
0
    def track(self, frame, obstacles=None):
        """ Tracks obstacles in a frame.

        Args:
            frame: perception.camera_frame.CameraFrame to track in.
        """
        if obstacles:
            bboxes = [
                obstacle.bounding_box.as_width_height_bbox()
                for obstacle in obstacles
            ]
            confidence_scores = [obstacle.confidence for obstacle in obstacles]
            self.tracker, detections_class = self._deepsort.run_deep_sort(
                frame.frame, confidence_scores, bboxes)
        if self.tracker:
            obstacles = []
            for track in self.tracker.tracks:
                if not track.is_confirmed() or track.time_since_update > 1:
                    continue
                # Converts x, y, w, h bbox to tlbr bbox (top left and bottom
                # right coords).
                bbox = track.to_tlbr()
                # Converts to xmin, xmax, ymin, ymax format.
                obstacles.append(
                    DetectedObstacle(
                        BoundingBox2D(int(bbox[0]), int(bbox[2]), int(bbox[1]),
                                      int(bbox[3])), 0, "", track.track_id))
            return True, obstacles
        return False, []
Ejemplo n.º 2
0
    def __get_obstacles(self, obstacles, vehicle_transform, depth_frame,
                        segmented_frame):
        """ Transforms obstacles into detected obstacles.

        Args:
            obstacles: List of pylot.perception.detection.obstacle.Obstacle.
            vehicle_transform: The transform of the ego vehicle.
            depth_frame: perception.depth_frame.DepthFrame taken at the
                time when obstacles were collected.
            segmented_frame: perception.segmentation.segmented_frame.SegmentedFrame
                taken at the time when the obstacles were collected.
        """
        det_obstacles = []
        for obstacle in obstacles:
            # Calculate the distance of the obstacle from the vehicle, and
            # convert to camera view if it is less than 125 metres away.
            if obstacle.distance(vehicle_transform) > 125:
                bbox = None
            else:
                bbox = obstacle.to_camera_view(depth_frame, segmented_frame)
                if bbox:
                    det_obstacles.append(
                        DetectedObstacle(bbox, 1.0, obstacle.label,
                                         obstacle.id))
        return det_obstacles
Ejemplo n.º 3
0
    def track(self, frame, obstacles=[]):
        """ Tracks obstacles in a frame.

        Args:
            frame (:py:class:`~pylot.perception.camera_frame.CameraFrame`):
                Frame to track in.
        """
        # If obstacles, run deep sort to update tracker with detections.
        # Otherwise, step each confirmed track one step forward.
        if obstacles:
            bboxes, labels, confidence_scores, ids = [], [], [], []
            for obstacle in obstacles:
                bboxes.append(obstacle.bounding_box.as_width_height_bbox())
                labels.append(obstacle.label)
                confidence_scores.append(obstacle.confidence)
                ids.append(obstacle.id)
            self._deepsort.run_deep_sort(frame.frame, confidence_scores,
                                         bboxes, labels, ids)
        else:
            for track in self._deepsort.tracker.tracks:
                if track.is_confirmed():
                    track.predict(self._deepsort.tracker.kf)
        tracked_obstacles = []
        for track in self._deepsort.tracker.tracks:
            if track.is_confirmed():
                # Converts x, y, w, h bbox to tlbr bbox (top left and bottom
                # right coords).
                bbox = track.to_tlbr()
                # Converts to xmin, xmax, ymin, ymax format.
                bbox_2d = BoundingBox2D(int(bbox[0]), int(bbox[2]),
                                        int(bbox[1]), int(bbox[3]))
                tracked_obstacles.append(
                    DetectedObstacle(bbox_2d, 0, track.label, track.track_id))
        return True, tracked_obstacles
Ejemplo n.º 4
0
 def parse_vehicles(self, vehicles, ego_vehicle_id):
     # vehicles is a dictionary that maps each vehicle's id to
     # a dictionary of information about that vehicle. Each such dictionary
     # contains four items: the vehicle's id, position, orientation, and
     # bounding_box (represented as four points in GPS coordinates).
     #
     # Positions are originally represented as (latitude, longitude, altitude)
     # before they are converted using _gps_to_location.
     vehicles_list = []
     for veh_dict in vehicles.values():
         vehicle_id = veh_dict['id']
         location = _gps_to_location(*veh_dict['position'])
         roll, pitch, yaw = veh_dict['orientation']
         rotation = pylot.utils.Rotation(pitch, yaw, roll)
         if vehicle_id == ego_vehicle_id:
             # Can compare against canbus output to check that
             # transformations are working.
             self._logger.debug(
                 'Ego vehicle location with ground_obstacles: {}'.format(
                     location))
         else:
             vehicles_list.append(
                 DetectedObstacle(
                     None,  # We currently don't use bounding box
                     1.0,  # confidence
                     'vehicle',
                     vehicle_id,
                     pylot.utils.Transform(location, rotation)))
     return vehicles_list
Ejemplo n.º 5
0
 def __convert_to_detected_tl(self, boxes, scores, labels, height, width):
     traffic_lights = []
     for index in range(len(scores)):
         if scores[
                 index] > self._flags.traffic_light_det_min_score_threshold:
             bbox = BoundingBox2D(
                 int(boxes[index][1] * width),  # x_min
                 int(boxes[index][3] * width),  # x_max
                 int(boxes[index][0] * height),  # y_min
                 int(boxes[index][2] * height)  # y_max
             )
             traffic_lights.append(
                 DetectedObstacle(bbox, scores[index], labels[index]))
     return traffic_lights
Ejemplo n.º 6
0
 def parse_static_obstacles(self, static_obstacles):
     # Each static obstacle has an id and position.
     static_obstacles_list = []
     for static_obstacle_dict in static_obstacles.values():
         static_obstacle_id = static_obstacle_dict['id']
         location = _gps_to_location(*static_obstacle_dict['position'])
         static_obstacles_list.append(
             DetectedObstacle(
                 None,  # bounding box
                 1.0,  # confidence
                 'static_obstacle',
                 static_obstacle_id,
                 pylot.utils.Transform(location, pylot.utils.Rotation())))
     return static_obstacles_list
Ejemplo n.º 7
0
    def track(self, frame):
        """ Tracks obstacles in a frame.

        Args:
            frame: perception.camera_frame.CameraFrame to track in.
        """
        ok, bboxes = self._tracker.update(frame.frame)
        if not ok:
            return False, []
        obstacles = []
        for (xmin, ymin, w, h) in bboxes:
            obstacles.append(
                DetectedObstacle(BoundingBox2D(xmin, xmin + w, ymin, ymin + h),
                                 "", 0))
        return True, obstacles
Ejemplo n.º 8
0
    def track(self, frame):
        """ Tracks obstacles in a frame.

        Args:
            frame: perception.camera_frame.CameraFrame to track in.
        """
        # each track in tracks has format ([xmin, ymin, xmax, ymax], id)
        obstacles = []
        for track in self.tracker.trackers:
            coords = track.predict()[0].tolist()
            # changing to xmin, xmax, ymin, ymax format
            bbox = BoundingBox2D(int(coords[0]), int(coords[2]),
                                 int(coords[1]), int(coords[3]))
            obstacles.append(DetectedObstacle(bbox, 0, "", track.id))
        return True, obstacles
Ejemplo n.º 9
0
    def get_all_detected_traffic_light_boxes(self, town_name, depth_frame,
                                             segmented_image):
        """ Returns DetectedObstacles for all boxes of a traffic light.

        Args:
            town_name: The name of the town in which the traffic light is.
            depth_frame: A pylot.perception.depth_frame.DepthFrame
            segmented_image: A segmented image np array used to refine the bboxes.
        Returns:
            A list of DetectedObstacles.
        """
        detected = []
        bboxes = self._get_bboxes(town_name)
        # Convert the returned bounding boxes to 2D and check if the
        # light is occluded. If not, add it to the detected obstacle list.
        for bbox in bboxes:
            bounding_box = [
                loc.to_camera_view(
                    depth_frame.camera_setup.get_extrinsic_matrix(),
                    depth_frame.camera_setup.get_intrinsic_matrix())
                for loc in bbox
            ]
            bbox_2d = get_bounding_box_in_camera_view(
                bounding_box, depth_frame.camera_setup.width,
                depth_frame.camera_setup.height)
            if not bbox_2d:
                continue

            # Crop the segmented and depth image to the given bounding box.
            cropped_image = segmented_image[bbox_2d.y_min:bbox_2d.y_max,
                                            bbox_2d.x_min:bbox_2d.x_max]
            cropped_depth = depth_frame.frame[bbox_2d.y_min:bbox_2d.y_max,
                                              bbox_2d.x_min:bbox_2d.x_max]

            if cropped_image.size > 0:
                masked_image = np.zeros_like(cropped_image)
                masked_image[np.where(cropped_image == 12)] = 1
                if np.sum(masked_image) >= 0.20 * masked_image.size:
                    masked_depth = cropped_depth[np.where(masked_image == 1)]
                    mean_depth = np.mean(masked_depth) * 1000
                    if abs(mean_depth -
                           bounding_box[0].z) <= 2 and mean_depth < 150:
                        detected.append(
                            DetectedObstacle(bbox_2d, 1.0,
                                             self.state.get_label()))
        return detected
Ejemplo n.º 10
0
    def track(self, frame):
        """ Tracks obstacles in a frame.

        Args:
            frame: perception.camera_frame.CameraFrame to track in.
        """
        self._tracker = SiamRPN_track(self._tracker, frame.frame)
        target_pos = self._tracker['target_pos']
        target_sz = self._tracker['target_sz']
        self.obstacle.bounding_box = BoundingBox2D(
            int(target_pos[0] - target_sz[0] / 2.0),
            int(target_pos[0] + target_sz[0] / 2.0),
            int(target_pos[1] - target_sz[1] / 2.0),
            int(target_pos[1] + target_sz[1] / 2.0))
        return DetectedObstacle(self.obstacle.bounding_box,
                                self.obstacle.confidence, self.obstacle.label,
                                self.obstacle.id)
Ejemplo n.º 11
0
 def parse_people(self, people):
     # Similar to vehicles, each entry of people is a dictionary that
     # contains four items, the person's id, position, orientation,
     # and bounding box.
     people_list = []
     for person_dict in people.values():
         person_id = person_dict['id']
         location = _gps_to_location(*person_dict['position'])
         roll, pitch, yaw = person_dict['orientation']
         rotation = pylot.utils.Rotation(pitch, yaw, roll)
         people_list.append(
             DetectedObstacle(
                 None,  # bounding box
                 1.0,  # confidence
                 'person',
                 person_id,
                 pylot.utils.Transform(location, rotation)))
     return people_list
Ejemplo n.º 12
0
    def __get_obstacles(self, obstacles, vehicle_transform, depth_frame,
                        segmented_frame):
        """Transforms obstacles into detected obstacles.

        Args:
            obstacles (list(:py:class:`~pylot.perception.detection.obstacle.Obstacle`)):
                List of obstacles.
            vehicle_transform (:py:class:`~pylot.utils.Transform`): Transform of
                the ego vehicle.
            depth_frame (:py:class:`~pylot.perception.depth_frame.DepthFrame`):
                Depth frame taken at the time when obstacles were collected.
            segmented_frame (:py:class:`~pylot.perception.segmentation.segmented_frame.SegmentedFrame`):
                Segmented frame taken at the time when the obstacles were
                collected.

        Returns:
            list(:py:class:`~pylot.perception.detection.utils.DetectedObstacle`):
            List of detected obstacles.
        """
        det_obstacles = []
        for obstacle in obstacles:
            # Calculate the distance of the obstacle from the vehicle, and
            # convert to camera view if it is less than
            # perfect_detection_max_distance metres away.
            if obstacle.distance(
                    vehicle_transform
            ) > self._flags.perfect_detection_max_distance:
                bbox = None
            else:
                bbox = obstacle.to_camera_view(depth_frame, segmented_frame)
                if bbox:
                    det_obstacles.append(
                        DetectedObstacle(bbox, 1.0, obstacle.label,
                                         obstacle.id, obstacle.transform,
                                         obstacle.detailed_label))
        return det_obstacles
Ejemplo n.º 13
0
    def on_msg_camera_stream(self, msg, obstacles_stream):
        """ Invoked when the operator receives a message on the data stream."""
        self._logger.debug('@{}: {} received message'.format(
            msg.timestamp, self._name))
        start_time = time.time()
        # The models expect BGR images.
        assert msg.frame.encoding == 'BGR', 'Expects BGR frames'
        # Expand dimensions since the model expects images to have
        # shape: [1, None, None, 3]
        image_np_expanded = np.expand_dims(msg.frame.frame, axis=0)
        (boxes, scores, classes, num_detections) = self._tf_session.run(
            [
                self._detection_boxes, self._detection_scores,
                self._detection_classes, self._num_detections
            ],
            feed_dict={self._image_tensor: image_np_expanded})

        num_detections = int(num_detections[0])
        res_classes = classes[0][:num_detections]
        res_boxes = boxes[0][:num_detections]
        res_scores = scores[0][:num_detections]

        obstacles = []
        for i in range(0, num_detections):
            if (res_classes[i] in self._coco_labels and res_scores[i] >=
                    self._flags.obstacle_detection_min_score_threshold):
                obstacles.append(
                    DetectedObstacle(
                        BoundingBox2D(
                            int(res_boxes[i][1] *
                                msg.frame.camera_setup.width),
                            int(res_boxes[i][3] *
                                msg.frame.camera_setup.width),
                            int(res_boxes[i][0] *
                                msg.frame.camera_setup.height),
                            int(res_boxes[i][2] *
                                msg.frame.camera_setup.height)), res_scores[i],
                        self._coco_labels[res_classes[i]]))
            else:
                self._logger.warning('Filtering unknown class: {}'.format(
                    res_classes[i]))

        self._logger.debug('@{}: {} obstacles: {}'.format(
            msg.timestamp, self._name, obstacles))

        if (self._flags.visualize_detected_obstacles
                or self._flags.log_detector_output):
            msg.frame.annotate_with_bounding_boxes(msg.timestamp, obstacles,
                                                   self._bbox_colors)
            if self._flags.visualize_detected_obstacles:
                msg.frame.visualize(self._name)
            if self._flags.log_detector_output:
                msg.frame.save(msg.timestamp.coordinates[0],
                               self._flags.data_path,
                               'detector-{}'.format(self._name))

        # Get runtime in ms.
        runtime = (time.time() - start_time) * 1000
        self._csv_logger.info('{},{},"{}",{}'.format(time_epoch_ms(),
                                                     self._name, msg.timestamp,
                                                     runtime))
        # Send out obstacles.
        obstacles_stream.send(
            ObstaclesMessage(obstacles, msg.timestamp, runtime))
Ejemplo n.º 14
0
    def on_watermark(self, timestamp, obstacles_stream):
        """Invoked whenever a frame message is received on the stream.

        Args:
            msg (:py:class:`~pylot.perception.messages.FrameMessage`): Message
                received.
            obstacles_stream (:py:class:`erdos.WriteStream`): Stream on which
                the operator sends
                :py:class:`~pylot.perception.messages.ObstaclesMessage`
                messages.
        """
        start_time = time.time()
        #ttd_msg = self._ttd_msgs.popleft()
        frame_msg = self._frame_msgs.popleft()
        #ttd, detection_deadline = ttd_msg.data
        #self.update_model_choice(detection_deadline)
        frame = frame_msg.frame
        inputs = frame.as_rgb_numpy_array()
        detector_start_time = time.time()
        outputs_np = self._driver.serve_images([inputs])[0]
        detector_end_time = time.time()
        self._logger.debug("@{}: detector runtime {}".format(
            timestamp, (detector_end_time - detector_start_time) * 1000))
        obstacles = []
        camera_setup = frame.camera_setup
        for _, y, x, height, width, score, _class in outputs_np:
            xmin = int(x)
            ymin = int(y)
            xmax = int(x + width)
            ymax = int(y + height)
            if _class in self._coco_labels:
                if (score >= self._flags.obstacle_detection_min_score_threshold
                        and self._coco_labels[_class]
                        in self._important_labels):
                    xmin, xmax = max(0, xmin), min(xmax, camera_setup.width)
                    ymin, ymax = max(0, ymin), min(ymax, camera_setup.height)
                    if xmin < xmax and ymin < ymax:
                        obstacles.append(
                            DetectedObstacle(BoundingBox2D(
                                xmin, xmax, ymin, ymax),
                                             score,
                                             self._coco_labels[_class],
                                             id=self._unique_id))
                        self._unique_id += 1
                        self._csv_logger.info(
                            "{},{},detection,{},{:4f}".format(
                                pylot.utils.time_epoch_ms(),
                                timestamp.coordinates[0],
                                self._coco_labels[_class], score))
            else:
                self._logger.debug(
                    'Filtering unknown class: {}'.format(_class))

        if (self._flags.visualize_detected_obstacles
                or self._flags.log_detector_output):
            frame.annotate_with_bounding_boxes(timestamp, obstacles, None,
                                               self._bbox_colors)
            if self._flags.visualize_detected_obstacles:
                frame.visualize(self.config.name,
                                pygame_display=pylot.utils.PYGAME_DISPLAY)
            if self._flags.log_detector_output:
                frame.save(timestamp.coordinates[0], self._flags.data_path,
                           'detector-{}'.format(self.config.name))
        end_time = time.time()
        obstacles_stream.send(ObstaclesMessage(timestamp, obstacles, 0))
        obstacles_stream.send(erdos.WatermarkMessage(timestamp))
        operator_time_total_end = time.time()
        self._logger.debug("@{}: total time spent: {}".format(
            timestamp, (operator_time_total_end - start_time) * 1000))
Ejemplo n.º 15
0
def get_detected_traffic_stops(traffic_stops, depth_frame):
    """ Get traffic stop lane markings that are withing the camera frame.

    Args:
        traffic_stops: List of traffic stop actors in the world.
        depth_frame: A pylot.perception.depth_frame.DepthFrame, with a
            camera_setup relative to the world.

    Returns:
        List of DetectedObstacles.
    """
    def get_stop_markings_bbox(bbox3d, depth_frame):
        """ Gets a 2D stop marking bounding box from a 3D bounding box."""
        # Move trigger_volume by -0.85 so that the top plane is on the ground.
        ext_z_value = bbox3d.extent.z - 0.85
        ext = [
            pylot.utils.Location(x=+bbox3d.extent.x,
                                 y=+bbox3d.extent.y,
                                 z=ext_z_value),
            pylot.utils.Location(x=+bbox3d.extent.x,
                                 y=-bbox3d.extent.y,
                                 z=ext_z_value),
            pylot.utils.Location(x=-bbox3d.extent.x,
                                 y=+bbox3d.extent.y,
                                 z=ext_z_value),
            pylot.utils.Location(x=-bbox3d.extent.x,
                                 y=-bbox3d.extent.y,
                                 z=ext_z_value),
        ]
        bbox = bbox3d.transform.transform_points(ext)
        camera_transform = depth_frame.camera_setup.get_transform()
        coords = []
        for loc in bbox:
            loc_view = loc.to_camera_view(
                camera_transform.matrix,
                depth_frame.camera_setup.get_intrinsic_matrix())
            if (loc_view.z >= 0 and loc_view.x >= 0 and loc_view.y >= 0
                    and loc_view.x < depth_frame.camera_setup.width
                    and loc_view.y < depth_frame.camera_setup.height):
                coords.append(loc_view)
        if len(coords) == 4:
            xmin = min(coords[0].x, coords[1].x, coords[2].x, coords[3].x)
            xmax = max(coords[0].x, coords[1].x, coords[2].x, coords[3].x)
            ymin = min(coords[0].y, coords[1].y, coords[2].y, coords[3].y)
            ymax = max(coords[0].y, coords[1].y, coords[2].y, coords[3].y)
            # Check if the bbox is not obstructed and if it's sufficiently
            # big for the text to be readable.
            if (ymax - ymin > 15 and depth_frame.pixel_has_same_depth(
                    int(coords[0].x), int(coords[0].y), coords[0].z, 0.4)):
                return BoundingBox2D(int(xmin), int(xmax), int(ymin),
                                     int(ymax))
        return None

    if not isinstance(depth_frame, DepthFrame):
        raise ValueError(
            'depth_frame should be of type perception.depth_frame.DepthFrame')
    det_obstacles = []
    for transform, bbox in traffic_stops:
        bbox_2d = get_stop_markings_bbox(bbox, depth_frame)
        if bbox_2d is not None:
            det_obstacles.append(DetectedObstacle(bbox_2d, 1.0,
                                                  'stop marking'))
    return det_obstacles
Ejemplo n.º 16
0
    def on_msg_camera_stream(self, msg, obstacles_stream):
        """Invoked whenever a frame message is received on the stream.

        Args:
            msg (:py:class:`~pylot.perception.messages.FrameMessage`): Message
                received.
            obstacles_stream (:py:class:`erdos.WriteStream`): Stream on which
                the operator sends
                :py:class:`~pylot.perception.messages.ObstaclesMessage`
                messages.
        """
        operator_time_total_start = time.time()
        start_time = time.time()
        self._logger.debug('@{}: {} received message'.format(
            msg.timestamp, self.config.name))
        inputs = msg.frame.as_rgb_numpy_array()

        results = []
        start_time = time.time()
        if MODIFIED_AUTOML:
            (boxes_np, scores_np, classes_np,
             num_detections_np) = self._tf_session.run(
                 self._detections_batch,
                 feed_dict={self._image_placeholder: inputs})
            num_detections = num_detections_np[0]
            boxes = boxes_np[0][:num_detections]
            scores = scores_np[0][:num_detections]
            classes = classes_np[0][:num_detections]
            results = zip(boxes, scores, classes)
        else:
            outputs_np = self._tf_session.run(
                self._detections_batch,
                feed_dict={self._image_placeholder: inputs})[0]
            for _, x, y, width, height, score, _class in outputs_np:
                results.append(((y, x, y + height, x + width), score, _class))
        obstacles = []
        for (ymin, xmin, ymax, xmax), score, _class in results:
            if np.isclose(ymin, ymax) or np.isclose(xmin, xmax):
                continue
            if MODIFIED_AUTOML:
                # The alternate NMS implementation screws up the class labels.
                _class = int(_class) + 1
            if _class in self._coco_labels:
                if (score >= self._flags.obstacle_detection_min_score_threshold
                        and self._coco_labels[_class]
                        in self._important_labels):
                    camera_setup = msg.frame.camera_setup
                    width, height = camera_setup.width, camera_setup.height
                    xmin, xmax = max(0, int(xmin)), min(int(xmax), width)
                    ymin, ymax = max(0, int(ymin)), min(int(ymax), height)
                    if xmin < xmax and ymin < ymax:
                        obstacles.append(
                            DetectedObstacle(BoundingBox2D(
                                xmin, xmax, ymin, ymax),
                                             score,
                                             self._coco_labels[_class],
                                             id=self._unique_id))
                        self._unique_id += 1
                        self._csv_logger.info(
                            "{},{},detection,{},{:4f}".format(
                                pylot.utils.time_epoch_ms(), msg.timestamp,
                                self._coco_labels[_class], score))
            else:
                self._logger.debug(
                    'Filtering unknown class: {}'.format(_class))

        if (self._flags.visualize_detected_obstacles
                or self._flags.log_detector_output):
            msg.frame.annotate_with_bounding_boxes(msg.timestamp, obstacles,
                                                   None, self._bbox_colors)
            if self._flags.visualize_detected_obstacles:
                msg.frame.visualize(self.config.name,
                                    pygame_display=pylot.utils.PYGAME_DISPLAY)
            if self._flags.log_detector_output:
                msg.frame.save(msg.timestamp.coordinates[0],
                               self._flags.data_path,
                               'detector-{}'.format(self.config.name))
        end_time = time.time()
        obstacles_stream.send(ObstaclesMessage(msg.timestamp, obstacles, 0))
        obstacles_stream.send(erdos.WatermarkMessage(msg.timestamp))
        operator_time_total_end = time.time()
        self._logger.debug("@{}: runtime of the detector: {}".format(
            msg.timestamp, (end_time - start_time) * 1000))
        self._logger.debug("@{}: total time spent: {}".format(
            msg.timestamp,
            (operator_time_total_end - operator_time_total_start) * 1000))
Ejemplo n.º 17
0
    def on_msg_camera_stream(self, msg, obstacles_stream):
        """Invoked whenever a frame message is received on the stream.

        Args:
            msg (:py:class:`~pylot.perception.messages.FrameMessage`): Message
                received.
            obstacles_stream (:py:class:`erdos.WriteStream`): Stream on which
                the operator sends
                :py:class:`~pylot.perception.messages.ObstaclesMessage`
                messages.
        """
        self._logger.debug('@{}: {} received message'.format(
            msg.timestamp, self.config.name))
        start_time = time.time()
        # The models expect BGR images.
        assert msg.frame.encoding == 'BGR', 'Expects BGR frames'
        # Expand dimensions since the model expects images to have
        # shape: [1, None, None, 3]
        image_np_expanded = np.expand_dims(msg.frame.frame, axis=0)
        (boxes, scores, classes, num_detections) = self._tf_session.run(
            [
                self._detection_boxes, self._detection_scores,
                self._detection_classes, self._num_detections
            ],
            feed_dict={self._image_tensor: image_np_expanded})

        num_detections = int(num_detections[0])
        res_classes = [int(cls) for cls in classes[0][:num_detections]]
        res_boxes = boxes[0][:num_detections]
        res_scores = scores[0][:num_detections]

        obstacles = []
        for i in range(0, num_detections):
            if res_classes[i] in self._coco_labels:
                if (res_scores[i] >=
                        self._flags.obstacle_detection_min_score_threshold
                        and self._coco_labels[
                            res_classes[i]] in self._important_labels):
                    obstacles.append(
                        DetectedObstacle(BoundingBox2D(
                            int(res_boxes[i][1] *
                                msg.frame.camera_setup.width),
                            int(res_boxes[i][3] *
                                msg.frame.camera_setup.width),
                            int(res_boxes[i][0] *
                                msg.frame.camera_setup.height),
                            int(res_boxes[i][2] *
                                msg.frame.camera_setup.height)),
                                         res_scores[i],
                                         self._coco_labels[res_classes[i]],
                                         id=self._unique_id))
                    self._unique_id += 1
            else:
                self._logger.warning('Filtering unknown class: {}'.format(
                    res_classes[i]))

        self._logger.debug('@{}: {} obstacles: {}'.format(
            msg.timestamp, self.config.name, obstacles))

        if (self._flags.visualize_detected_obstacles
                or self._flags.log_detector_output):
            msg.frame.annotate_with_bounding_boxes(msg.timestamp, obstacles,
                                                   None, self._bbox_colors)
            if self._flags.visualize_detected_obstacles:
                msg.frame.visualize(self.config.name)
            if self._flags.log_detector_output:
                msg.frame.save(msg.timestamp.coordinates[0],
                               self._flags.data_path,
                               'detector-{}'.format(self.config.name))

        # Get runtime in ms.
        runtime = (time.time() - start_time) * 1000
        # Send out obstacles.
        obstacles_stream.send(
            ObstaclesMessage(msg.timestamp, obstacles, runtime))