예제 #1
0
def _classification_to_kwiver_detections(classification, w, h):
    """
    Convert kwarray classifications to kwiver deteted object sets

    Args:
        classification (bioharn.clf_predict.Classification)
        w (int): width of image
        h (int): height of image

    Returns:
        kwiver.vital.types.DetectedObjectSet
    """
    detected_objects = DetectedObjectSet()

    if classification.data.get('prob', None) is not None:
        # If we have a probability for each class, uses that
        class_names = list(classification.classes)
        class_prob = classification.prob
        detected_object_type = DetectedObjectType(class_names, class_prob)
    else:
        # Otherwise we only have the score for the predicted calss
        class_name = classification.classes[classification.cidx]
        class_score = classification.conf
        detected_object_type = DetectedObjectType(class_name, class_score)

    bounding_box = BoundingBoxD(0, 0, w, h)
    detected_object = DetectedObject(bounding_box, classification.conf,
                                     detected_object_type)
    detected_objects.add(detected_object)
    return detected_objects
예제 #2
0
def _kwimage_to_kwiver_detections(detections):
    """
    Convert kwimage detections to kwiver deteted object sets

    Args:
        detected_objects (kwimage.Detections)

    Returns:
        kwiver.vital.types.DetectedObjectSet
    """

    # convert segmentation masks
    if 'segmentations' in detections.data:
        print("Warning: segmentations not implemented")

    boxes = detections.boxes.to_tlbr()
    scores = detections.scores
    class_idxs = detections.class_idxs

    # convert to kwiver format, apply threshold
    detected_objects = DetectedObjectSet()

    for tlbr, score, cidx in zip(boxes.data, scores, class_idxs):
        class_name = detections.classes[cidx]

        bbox_int = np.round(tlbr).astype(np.int32)
        bounding_box = BoundingBox(bbox_int[0], bbox_int[1], bbox_int[2],
                                   bbox_int[3])

        detected_object_type = DetectedObjectType(class_name, score)
        detected_object = DetectedObject(bounding_box, score,
                                         detected_object_type)
        detected_objects.add(detected_object)
    return detected_objects
예제 #3
0
    def filter_truth(self, init_truth, categories):
        filtered_truth = DetectedObjectSet()
        use_frame = True
        max_length = int(self._max_scale_wrt_chip * float(self._chip_width))
        for i, item in enumerate(init_truth):
            if item.type is None:
                continue
            class_lbl = item.type.get_most_likely_class()
            if categories is not None and not categories.has_class_name(
                    class_lbl):
                if self._mode == "detection_refiner":
                    class_lbl = self._negative_category
                else:
                    continue
            if categories is not None:
                class_lbl = categories.get_class_name(class_lbl)
            elif class_lbl not in self._categories:
                self._categories.append(class_lbl)

            item.type = DetectedObjectType(class_lbl, 1.0)

            if self._mode == "detector" and \
               ( item.bounding_box.width() > max_length or \
                 item.bounding_box.height() > max_length ):
                use_frame = False
                break

            filtered_truth.add(item)

        if self._gt_frames_only and len(init_truth) == 0:
            use_frame = False

        return filtered_truth, use_frame
예제 #4
0
    def _step(self):
        try:
            # Grab image container from port using traits
            in_img_c = self.grab_input_using_trait('image')
            timestamp = self.grab_input_using_trait('timestamp')
            dos_ptr = self.grab_input_using_trait('detected_object_set')
            print('timestamp = {!r}'.format(timestamp))

            # Get current frame and give it to app feature extractor
            im = get_pil_image(in_img_c.image())
            self._app_feature_extractor.frame = im

            bbox_num = 0

            # Get detection bbox
            dos = dos_ptr.select(self._select_threshold)
            bbox_num = dos.size()

            det_obj_set = DetectedObjectSet()

            if bbox_num == 0:
                print(
                    '!!! No bbox is provided on this frame and skip this frame !!!'
                )
            else:
                # appearance features (format: pytorch tensor)
                app_f_begin = timer()
                pt_app_features = self._app_feature_extractor(dos, False)
                app_f_end = timer()
                print('%%%app feature eclapsed time: {}'.format(app_f_end -
                                                                app_f_begin))

                # get new track state from new frame and detections
                for idx, item in enumerate(dos):
                    bbox = item.bounding_box()
                    fid = timestamp.get_frame()
                    ts = timestamp.get_time_usec()
                    d_obj = item

                    # store app feature to detectedObject
                    app_f = new_descriptor(pt_app_features[idx].numpy().size)
                    app_f[:] = pt_app_features[idx].numpy()
                    # print( pt_app_features[idx].numpy() )
                    d_obj.set_descriptor(app_f)
                    det_obj_set.add(d_obj)

            # push track set to output port
            self.push_to_port_using_trait('detected_object_set', det_obj_set)
            self._base_step()

        except BaseException as e:
            print(repr(e))
            import traceback
            print(traceback.format_exc())
            sys.stdout.flush()
            raise
예제 #5
0
    def detect(self, in_img_c):

        import tensorflow as tf
        import humanfriendly

        image_height = in_img_c.height()
        image_width = in_img_c.width()

        if (self.norm_image_type and self.norm_image_type != "none"):
            print("Normalizing input image")

            in_img = in_img_c.image().asarray().astype("uint16")

            bottom, top = self.get_scaling_values(self.norm_image_type, in_img,
                                                  image_height)
            in_img = self.lin_normalize_image(in_img, bottom, top)

            in_img = np.tile(in_img, (1, 1, 3))
        else:
            in_img = np.array(get_pil_image(in_img_c.image()).convert("RGB"))

        start_time = time.time()
        boxes, scores, classes = self.generate_detection(
            self.detection_graph, in_img)
        elapsed = time.time() - start_time
        print("Done running detector in {}".format(
            humanfriendly.format_timespan(elapsed)))

        good_boxes = []
        detections = DetectedObjectSet()

        for i in range(0, len(scores)):
            if (scores[i] >= self.confidence_thresh):
                bbox = boxes[i]
                good_boxes.append(bbox)

                top_rel = bbox[0]
                left_rel = bbox[1]
                bottom_rel = bbox[2]
                right_rel = bbox[3]

                xmin = left_rel * image_width
                ymin = top_rel * image_height
                xmax = right_rel * image_width
                ymax = bottom_rel * image_height

                dot = DetectedObjectType(self.category_name, scores[i])
                obj = DetectedObject(BoundingBoxD(xmin, ymin, xmax, ymax),
                                     scores[i], dot)
                detections.add(obj)

        print("Detected {}".format(len(good_boxes)))
        return detections
예제 #6
0
 def _step(self):
     image_container = self.grab_input_using_trait("image")
     timestamp = self.grab_input_using_trait("timestamp")
     file_name = self.grab_input_using_trait("file_name")
     image = image_container.asarray()
     h, w, _ = image.shape
     bbox_x = w//2
     bbox_y = h//2
     bbox = BoundingBox( bbox_x - int(self.config_value("bbox_width"))//2,
                         bbox_y - int(self.config_value("bbox_height"))//2,
                         bbox_x + int(self.config_value("bbox_width"))//2,
                         bbox_y + int(self.config_value("bbox_height"))//2 )
     dot = DetectedObjectType("Test", 1.0)
     do = DetectedObject(bbox, 1.0, dot)
     dos = DetectedObjectSet()
     dos.add(do)
     self.push_to_port_using_trait("detected_object_set", dos)
예제 #7
0
    def _dowork(self, img_container):
        """
        Helper to decouple the algorithm and pipeline logic

        CommandLine:
            xdoctest viame.processes.camtrawl.processes CamtrawlDetectFishProcess._dowork

        Example:
            >>> from viame.processes.camtrawl.processes import *
            >>> from kwiver.vital.types import ImageContainer
            >>> import kwiver.sprokit.pipeline.config
            >>> # construct dummy process instance
            >>> conf = kwiver.sprokit.pipeline.config.empty_config()
            >>> self = CamtrawlDetectFishProcess(conf)
            >>> self._configure()
            >>> # construct test data
            >>> from vital.util import VitalPIL
            >>> from PIL import Image as PILImage
            >>> pil_img = PILImage.open(ub.grabdata('https://i.imgur.com/Jno2da3.png'))
            >>> pil_img = PILImage.fromarray(np.zeros((512, 512, 3), dtype=np.uint8))
            >>> img_container = ImageContainer(VitalPIL.from_pil(pil_img))
            >>> # Initialize the background detector by sending 10 black frames
            >>> for i in range(10):
            >>>     empty_set = self._dowork(img_container)
            >>> # now add a white box that should be detected
            >>> np_img = np.zeros((512, 512, 3), dtype=np.uint8)
            >>> np_img[300:340, 220:380] = 255
            >>> img_container = ImageContainer.fromarray(np_img)
            >>> detection_set = self._dowork(img_container)
            >>> assert len(detection_set) == 1
            >>> obj = detection_set[0]
        """
        # This should be read as np.uint8
        np_img = img_container.asarray()

        detection_set = DetectedObjectSet()
        ct_detections = self.detector.detect(np_img)

        for detection in ct_detections:
            bbox = BoundingBoxD(*detection.bbox.coords)
            mask = detection.mask.astype(np.uint8)
            vital_mask = ImageContainer.fromarray(mask)
            dot = DetectedObjectType("Motion", 1.0)
            obj = DetectedObject(bbox, 1.0, dot, mask=vital_mask)
            detection_set.add(obj)
        return detection_set
예제 #8
0
    def merge(self, det_sets):

        # Get detection HL info in a list
        pred_sets = []
        for det_set in det_sets:
            pred_set = []
            for det in det_set:
                # Extract box info for this det
                bbox = det.bounding_box

                bbox_min_x = int(bbox.min_x())
                bbox_max_x = int(bbox.max_x())
                bbox_min_y = int(bbox.min_y())
                bbox_max_y = int(bbox.max_y())

                # Extract type info for this det
                if det.type is None:
                    continue

                #class_names = list( det.type.class_names() )
                #class_scores = [ det.type.score( n ) for n in class_names ]
                class_name = det.type.get_most_likely_class()
                class_score = det.type.score(class_name)

                pred_set.append([
                    bbox_min_x, bbox_min_y, bbox_max_x, bbox_max_y, class_name,
                    class_score
                ])
            pred_sets.append(pred_set)

        # Run merging algorithm
        #ensemble_preds = ensemble_box( preds_set, self._fusion_weights,
        #  self._iou_thr, self._skip_box_thr, self._sigma, self._fusion_type )
        ensemble_preds = []

        # Compile output detections
        output = DetectedObjectSet()

        for pred in ensemble_preds:
            score = pred[5]
            bbox = BoundingBoxD(pred[0], pred[1], pred[2], pred[3])
            dot = DetectedObjectType(pred[4], score)
            det = DetectedObject(bbox, score, dot)
            output.add(det)

        return output
예제 #9
0
def _kwimage_to_kwiver_detections(detections):
    """
    Convert kwimage detections to kwiver deteted object sets

    Args:
        detected_objects (kwimage.Detections)

    Returns:
        kwiver.vital.types.DetectedObjectSet
    """
    from kwiver.vital.types.types import ImageContainer, Image

    segmentations = None
    # convert segmentation masks
    if 'segmentations' in detections.data:
        segmentations = detections.data['segmentations']

    boxes = detections.boxes.to_tlbr()
    scores = detections.scores
    class_idxs = detections.class_idxs

    if not segmentations:
        # Placeholders
        segmentations = (None, ) * len(boxes)

    # convert to kwiver format, apply threshold
    detected_objects = DetectedObjectSet()

    for tlbr, score, cidx, seg in zip(boxes.data, scores, class_idxs,
                                      segmentations):
        class_name = detections.classes[cidx]

        bbox_int = np.round(tlbr).astype(np.int32)
        bounding_box = BoundingBoxD(bbox_int[0], bbox_int[1], bbox_int[2],
                                    bbox_int[3])

        detected_object_type = DetectedObjectType(class_name, score)
        detected_object = DetectedObject(bounding_box, score,
                                         detected_object_type)
        if seg:
            mask = seg.to_relative_mask().numpy().data
            detected_object.mask = ImageContainer(Image(mask))

        detected_objects.add(detected_object)
    return detected_objects
예제 #10
0
 def detect(self, image_data):
     dot = DetectedObjectSet([
         DetectedObject(
             BoundingBox(
                 self.m_center_x + self.frame_ct * self.m_dx -
                 self.m_width / 2.0, self.m_center_y +
                 self.frame_ct * self.m_dy - self.m_height / 2.0,
                 self.m_center_x + self.frame_ct * self.m_dx +
                 self.m_width / 2.0, self.m_center_y +
                 self.frame_ct * self.m_dy + self.m_height / 2.0))
     ])
     self.frame_ct += 1
     return dot
예제 #11
0
    def _step(self):
        # grab image container from port using traits
        in_img_c = self.grab_input_using_trait('image')

        # Get python image from conatiner (just for show)
        in_img = in_img_c.image()

        # Print out text to screen
        print("Text: " + str(self.text))

        # push dummy detections object to output port
        detections = DetectedObjectSet()
        self.push_to_port_using_trait('detected_object_set', detections)

        self._base_step()
예제 #12
0
def _create_detected_object_set():
    from kwiver.vital.types import DetectedObject, DetectedObjectSet, BoundingBox

    dos = DetectedObjectSet()
    bbox = BoundingBox(0, 10, 100, 50)
    dos.add(DetectedObject(bbox, 0.2))
    dos.add(DetectedObject(bbox, 0.5))
    dos.add(DetectedObject(bbox, 0.4))

    return dos
예제 #13
0
    def detect(self, image_data):
        input_image = image_data.asarray().astype('uint8')
        if self._rgb_to_bgr:
            input_image = cv2.cvtColor(input_image, cv2.COLOR_RGB2BGR)

        from mmdet.apis import inference_detector
        detections = inference_detector(self._model, input_image)

        if isinstance(detections, tuple):
            bbox_result, segm_result = detections
        else:
            bbox_result, segm_result = detections, None

        if np.size(bbox_result) > 0:
            bboxes = np.vstack(bbox_result)
        else:
            bboxes = []

        # convert segmentation masks
        masks = []
        if segm_result is not None:
            segms = mmcv.concat_list(segm_result)
            inds = np.where(bboxes[:, -1] > score_thr)[0]
            for i in inds:
                masks.append(maskUtils.decode(segms[i]).astype(np.bool))

        # collect labels
        labels = [
            np.full(bbox.shape[0], i, dtype=np.int32)
            for i, bbox in enumerate(bbox_result)
        ]

        if np.size(labels) > 0:
            labels = np.concatenate(labels)
        else:
            labels = []

        # convert to kwiver format, apply threshold
        output = DetectedObjectSet()

        for bbox, label in zip(bboxes, labels):
            class_confidence = float(bbox[-1])
            if class_confidence < self._thresh:
                continue

            bbox_int = bbox.astype(np.int32)
            bounding_box = BoundingBoxD(bbox_int[0], bbox_int[1], bbox_int[2],
                                        bbox_int[3])

            class_name = self._labels[label]
            detected_object_type = DetectedObjectType(class_name,
                                                      class_confidence)

            detected_object = DetectedObject(bounding_box,
                                             np.max(class_confidence),
                                             detected_object_type)
            output.add(detected_object)

        if np.size(labels) > 0 and self._display_detections:
            mmcv.imshow_det_bboxes(input_image,
                                   bboxes,
                                   labels,
                                   class_names=self._labels,
                                   score_thr=self._thresh,
                                   show=True)

        return output
예제 #14
0
 def __init__(self, do_):
     dos.__init__(self)
     self.det_objs = do_
예제 #15
0
    def extract_chips_for_dets(self, image_files, truth_sets):
        import cv2
        output_files = []
        output_dets = []

        for i in range(len(image_files)):
            filename = image_files[i]
            groundtruth = truth_sets[i]
            detections = []
            scale = 1.0

            if self._target_type_scales:
                scale = self.compute_scale_factor(groundtruth)

            if len(groundtruth) > 0:
                img = cv2.imread(filename)

                if len(np.shape(img)) < 2:
                    continue

                img_max_x = np.shape(img)[1]
                img_max_y = np.shape(img)[0]

                # Optionally scale image
                if scale != 1.0:
                    img_max_x = int(scale * img_max_x)
                    img_max_y = int(scale * img_max_y)
                    img = cv2.resize(img, (img_max_x, img_max_y))

                # Run optional background detector on data
                if self._detector_model:
                    kw_image = Image(img)
                    kw_image_container = ImageContainer(kw_image)
                    detections = self._detector.detect(kw_image_container)

            if len(groundtruth) == 0 and len(detections) == 0:
                continue

            overlaps = np.zeros((len(detections), len(groundtruth)))
            det_boxes = []

            for det in detections:
                bbox = det.bounding_box
                det_boxes.append((int(bbox.min_x()), int(bbox.min_y()),
                                  int(bbox.width()), int(bbox.height())))

            for i, gt in enumerate(groundtruth):
                # Extract chip for this detection
                bbox = gt.bounding_box

                bbox_min_x = int(bbox.min_x() * scale)
                bbox_max_x = int(bbox.max_x() * scale)
                bbox_min_y = int(bbox.min_y() * scale)
                bbox_max_y = int(bbox.max_y() * scale)

                bbox_width = bbox_max_x - bbox_min_x
                bbox_height = bbox_max_y - bbox_min_y

                max_overlap = 0.0

                for j, det in enumerate(det_boxes):

                    # Compute overlap between detection and truth
                    (det_min_x, det_min_y, det_width, det_height) = det

                    # Get the overlap rectangle
                    overlap_x0 = max(bbox_min_x, det_min_x)
                    overlap_y0 = max(bbox_min_y, det_min_y)
                    overlap_x1 = min(bbox_max_x, det_min_x + det_width)
                    overlap_y1 = min(bbox_max_y, det_min_y + det_height)

                    # Check if there is an overlap
                    if overlap_x1 - overlap_x0 <= 0 or overlap_y1 - overlap_y0 <= 0:
                        continue

                    # If yes, calculate the ratio of the overlap
                    det_area = float(det_width * det_height)
                    gt_area = float(bbox_width * bbox_height)
                    int_area = float(
                        (overlap_x1 - overlap_x0) * (overlap_y1 - overlap_y0))
                    overlap = min(int_area / det_area, int_area / gt_area)
                    overlaps[j, i] = overlap

                    if overlap >= self._min_overlap_for_association and overlap > max_overlap:
                        max_overlap = overlap

                        bbox_min_x = det_min_x
                        bbox_min_y = det_min_y
                        bbox_max_x = det_min_x + det_width
                        bbox_max_y = det_min_y + det_height

                        bbox_width = det_width
                        bbox_height = det_height

                if self._chip_method == "fixed_width":
                    chip_width = int(self._chip_width)
                    half_width = int(chip_width / 2)

                    bbox_min_x = int(
                        (bbox_min_x + bbox_max_x) / 2) - half_width
                    bbox_min_y = int(
                        (bbox_min_y + bbox_max_y) / 2) - half_width
                    bbox_max_x = bbox_min_x + chip_width
                    bbox_max_y = bbox_min_y + chip_width

                    bbox_width = chip_width
                    bbox_height = chip_width

                bbox_area = bbox_width * bbox_height

                if self._area_lower_bound > 0 and bbox_area < self._area_lower_bound:
                    continue
                if self._area_upper_bound > 0 and bbox_area > self._area_upper_bound:
                    continue

                if self._reduce_category and gt.type() and \
                  gt.type().get_most_likely_class() == self._reduce_category and \
                  random.uniform( 0, 1 ) < 0.90:
                    continue

                if self._border_exclude > 0:
                    if bbox_min_x <= self._border_exclude:
                        continue
                    if bbox_min_y <= self._border_exclude:
                        continue
                    if bbox_max_x >= img_max_x - self._border_exclude:
                        continue
                    if bbox_max_y >= img_max_y - self._border_exclude:
                        continue

                crop = img[bbox_min_y:bbox_max_y, bbox_min_x:bbox_max_x]
                self._sample_count = self._sample_count + 1
                crop_str = ('%09d' % self._sample_count) + ".png"
                new_file = os.path.join(self._chip_directory, crop_str)
                cv2.imwrite(new_file, crop)

                # Set new box size for this detection
                gt.bounding_box = BoundingBoxD(0, 0,
                                               np.shape(crop)[1],
                                               np.shape(crop)[0])
                new_set = DetectedObjectSet()
                new_set.add(gt)

                output_files.append(new_file)
                output_dets.append(new_set)

            neg_count = 0

            for j, det in enumerate(detections):

                if max(overlaps[j]) >= self._max_overlap_for_negative:
                    continue

                bbox = det.bounding_box

                bbox_min_x = int(bbox.min_x())
                bbox_max_x = int(bbox.max_x())
                bbox_min_y = int(bbox.min_y())
                bbox_max_y = int(bbox.max_y())

                bbox_width = bbox_max_x - bbox_min_x
                bbox_height = bbox_max_y - bbox_min_y

                bbox_area = bbox_width * bbox_height

                if self._chip_method == "fixed_width":
                    chip_width = int(self._chip_width)
                    half_width = int(chip_width / 2)

                    bbox_min_x = int(
                        (bbox_min_x + bbox_max_x) / 2) - half_width
                    bbox_min_y = int(
                        (bbox_min_y + bbox_max_y) / 2) - half_width
                    bbox_max_x = bbox_min_x + chip_width
                    bbox_max_y = bbox_min_y + chip_width

                    bbox_width = chip_width
                    bbox_height = chip_width

                if self._area_lower_bound > 0 and bbox_area < self._area_lower_bound:
                    continue
                if self._area_upper_bound > 0 and bbox_area > self._area_upper_bound:
                    continue

                if self._border_exclude > 0:
                    if bbox_min_x <= self._border_exclude:
                        continue
                    if bbox_min_y <= self._border_exclude:
                        continue
                    if bbox_max_x >= img_max_x - self._border_exclude:
                        continue
                    if bbox_max_y >= img_max_y - self._border_exclude:
                        continue

                # Handle random factor
                if self._max_neg_per_frame < 1.0 and random.uniform(
                        0, 1) > self._max_neg_per_frame:
                    break

                crop = img[bbox_min_y:bbox_max_y, bbox_min_x:bbox_max_x]
                self._sample_count = self._sample_count + 1
                crop_str = ('%09d' % self._sample_count) + ".png"
                new_file = os.path.join(self._chip_directory, crop_str)
                cv2.imwrite(new_file, crop)

                # Set new box size for this detection
                det.bounding_box = BoundingBoxD(0, 0,
                                                np.shape(crop)[1],
                                                np.shape(crop)[0])
                det.type = DetectedObjectType(self._negative_category, 1.0)
                new_set = DetectedObjectSet()
                new_set.add(det)

                output_files.append(new_file)
                output_dets.append(new_set)

                # Check maximum negative count
                neg_count = neg_count + 1
                if neg_count > self._max_neg_per_frame:
                    break

        return [output_files, output_dets]
예제 #16
0
    def _step(self):

        # Get all inputs even ones we don't use
        in_img_c = self.grab_input_using_trait('image')
        timestamp = self.grab_input_using_trait('timestamp')

        if not timestamp.has_valid_frame():
            raise RuntimeError("Frame timestamps must contain frame IDs")

        frame_id = timestamp.get_frame()

        if self.has_input_port_edge_using_trait('detected_object_set'):
            detections = self.grab_input_using_trait('detected_object_set')
        else:
            detections = DetectedObjectSet()
        if self.has_input_port_edge_using_trait('initializations'):
            initializations = self.grab_input_using_trait('initializations')
        else:
            initializations = ObjectTrackSet()
        if self.has_input_port_edge_using_trait('recommendations'):
            recommendations = self.grab_input_using_trait('recommendations')
        else:
            recommendations = ObjectTrackSet()
        if self.has_input_port_edge_using_trait('evaluation_requests'):
            requests = self.grab_input_using_trait('evaluation_requests')
        else:
            requests = DetectedObjectSet()

        print('mdnet tracker timestamp = {!r}'.format(timestamp))

        # Handle new track external initialization
        init_track_pool = initializations.tracks()
        recc_track_pool = recommendations.tracks()
        init_track_ids = []
        img_used = False

        if len(init_track_pool) != 0 or len(self._trackers) != 0:
            img_npy = self.format_image(in_img_c)
            img_used = True

        for trk in init_track_pool:
            # Special case, initialize a track on a previous frame
            if trk[trk.last_frame].frame_id == self._last_frame_id and \
              ( not trk.id in self._track_init_frames or \
              self._track_init_frames[ trk.id ] < self._last_frame_id ):
                tid = trk.id
                cbox = trk[trk.last_frame].detection().bounding_box()
                bbox = [
                    cbox.min_x(),
                    cbox.min_y(),
                    cbox.width(),
                    cbox.height()
                ]
                self._last_frame = self.format_image(self._last_frame)
                self._trackers[tid] = mdnet.MDNetTracker(
                    self._last_frame, bbox)
                self._tracks[tid] = [ObjectTrackState(timestamp, cbox, 1.0)]
                self._track_init_frames[tid] = self._last_frame_id
            # This track has an initialization signal for the current frame
            elif trk[trk.last_frame].frame_id == frame_id:
                tid = trk.id
                cbox = trk[trk.last_frame].detection().bounding_box()
                bbox = [
                    cbox.min_x(),
                    cbox.min_y(),
                    cbox.width(),
                    cbox.height()
                ]
                self._trackers[tid] = mdnet.MDNetTracker(img_npy, bbox)
                self._tracks[tid] = [ObjectTrackState(timestamp, cbox, 1.0)]
                init_track_ids.append(tid)
                self._track_init_frames[tid] = frame_id

        # Update existing tracks
        for tid in self._trackers.keys():
            if tid in init_track_ids:
                continue  # Already processed (initialized) on frame
            # Check if there's a recommendation for the update
            recc_bbox = []
            for trk in recc_track_pool:
                if trk.id == tid and trk[trk.last_frame].frame_id == frame_id:
                    cbox = trk[trk.last_frame].detection().bounding_box()
                    recc_bbox = [
                        cbox.min_x(),
                        cbox.min_y(),
                        cbox.width(),
                        cbox.height()
                    ]
                    break
            bbox, score = self._trackers[tid].update(img_npy,
                                                     likely_bbox=recc_bbox)
            if score > mdnet.opts['success_thr']:
                cbox = BoundingBoxD(bbox[0], bbox[1], bbox[0] + bbox[2],
                                    bbox[1] + bbox[3])
                new_state = ObjectTrackState(timestamp, cbox, score)
                self._tracks[tid].append(new_state)

        # Handle track termination
        # TODO: Remove old or dead tracks

        # Classify requested evaluations
        # TODO: Evaluate input detections
        output_evaluations = DetectedObjectSet()

        # Output results
        output_tracks = ObjectTrackSet(
            [Track(tid, trk) for tid, trk in self._tracks.items()])

        self.push_to_port_using_trait('timestamp', timestamp)
        self.push_to_port_using_trait('object_track_set', output_tracks)
        self.push_to_port_using_trait('evaluations', output_evaluations)

        self._last_frame_id = timestamp.get_frame()
        if img_used:
            self._last_frame = img_npy
        else:
            self._last_frame = in_img_c
        self._base_step()
예제 #17
0
    def refine(self, image_data, detections):

        if len(detections) == 0:
            return detections

        img = image_data.asarray().astype('uint8')
        predictor = self.predictor
        scale = 1.0

        img_max_x = np.shape(img)[1]
        img_max_y = np.shape(img)[0]

        if self._target_type_scales:
            scale = self.compute_scale_factor(detections)
            if scale != 1.0:
                img_max_x = int(img_max_x * scale)
                img_max_y = int(img_max_y * scale)
                img = cv2.resize(img, (img_max_x, img_max_y))

        # Extract patches for ROIs
        image_chips = []
        detection_ids = []

        for i, det in enumerate(detections):
            # Extract chip for this detection
            bbox = det.bounding_box

            bbox_min_x = int(bbox.min_x() * scale)
            bbox_max_x = int(bbox.max_x() * scale)
            bbox_min_y = int(bbox.min_y() * scale)
            bbox_max_y = int(bbox.max_y() * scale)

            if self._kwiver_config['chip_method'] == "fixed_width":
                chip_width = int(self._kwiver_config['chip_width'])
                half_width = int(chip_width / 2)

                bbox_min_x = int((bbox_min_x + bbox_max_x) / 2) - half_width
                bbox_min_y = int((bbox_min_y + bbox_max_y) / 2) - half_width
                bbox_max_x = bbox_min_x + chip_width
                bbox_max_y = bbox_min_y + chip_width

            if self._border_exclude > 0:
                if bbox_min_x <= self._border_exclude:
                    continue
                if bbox_min_y <= self._border_exclude:
                    continue
                if bbox_max_x >= img_max_x - self._border_exclude:
                    continue
                if bbox_max_y >= img_max_y - self._border_exclude:
                    continue
            else:
                if bbox_min_x < 0:
                    bbox_min_x = 0
                if bbox_min_y < 0:
                    bbox_min_y = 0
                if bbox_max_x > img_max_x:
                    bbox_max_x = img_max_x
                if bbox_max_y > img_max_y:
                    bbox_max_y = img_max_y

            bbox_area = (bbox_max_x - bbox_min_x) * (bbox_max_y - bbox_min_y)

            if self._area_lower_bound > 0 and bbox_area < self._area_lower_bound:
                continue
            if self._area_upper_bound > 0 and bbox_area > self._area_upper_bound:
                continue

            crop = img[bbox_min_y:bbox_max_y, bbox_min_x:bbox_max_x]
            image_chips.append(crop)
            detection_ids.append(i)

        # Run classifier on ROIs
        classifications = list(predictor.predict(image_chips))

        # Put classifications back into detections
        output = DetectedObjectSet()

        for i, det in enumerate(detections):
            if len(detection_ids) == 0 or i != detection_ids[0]:
                output.add(det)
                continue

            new_class = classifications[0]

            if new_class.data.get('prob', None) is not None:
                # If we have a probability for each class, uses that
                class_names = list(new_class.classes)
                class_scores = list(new_class.prob)
            else:
                # Otherwise we only have the score for the predicted class
                class_names = [new_class.classes[new_class.cidx]]
                class_scores = [new_class.conf]

            if self._average_prior and det.type is not None:
                priors = det.type
                prior_names = priors.class_names()
                for name in prior_names:
                    if name in class_names:
                        class_scores[class_names.index(name)] += priors.score(
                            name)
                    else:
                        class_names.append(name)
                        class_scores.append(priors.score(name))
                for i in range(len(class_scores)):
                    class_scores[i] = class_scores[i] * 0.5

            detected_object_type = DetectedObjectType(class_names,
                                                      class_scores)
            det.type = detected_object_type

            output.add(det)
            detection_ids.pop(0)
            classifications.pop(0)

        return output
예제 #18
0
def create_detected_object_set():
    dos = DetectedObjectSet()
    dos.add(DetectedObject(create_bounding_box()))
    return dos
예제 #19
0
    def _step(self):
        logger.debug(' ----- ' + self.__class__.__name__ + ' step')
        self.prog.step()

        if self.cal is None:
            self.cal = True
            logger.debug(' ----- ' + self.__class__.__name__ + ' grab cam1')
            # grab camera only if we dont have one yet
            camera1 = self.grab_input_using_trait('camera' + '1')
            logger.debug(' ----- ' + self.__class__.__name__ + ' grab cam2')
            camera2 = self.grab_input_using_trait('camera' + '2')

            def _cal_from_vital(vital_camera):
                vci = vital_camera.intrinsics
                cam_dict = {
                    'extrinsic': {
                        'om': vital_camera.rotation.rodrigues().ravel(),
                        'T': vital_camera.translation.ravel(),
                    },
                    'intrinsic': {
                        'cc':
                        vci.principle_point.ravel(),
                        'fc': [
                            vci.focal_length,
                            vci.focal_length / vci.aspect_ratio
                        ],
                        'alpha_c':
                        vci.skew,
                        'kc':
                        vci.dist_coeffs.ravel(),
                    }
                }
                return cam_dict

            logger.debug(' ----- ' + self.__class__.__name__ +
                         ' parse cameras')
            self.cal = ctalgo.StereoCalibration({
                'left':
                _cal_from_vital(camera1),
                'right':
                _cal_from_vital(camera2),
            })
            logger.debug(' ----- ' + self.__class__.__name__ +
                         ' no more need for cameras')

        detection_set1 = self.grab_input_using_trait('detected_object_set' +
                                                     '1')
        detection_set2 = self.grab_input_using_trait('detected_object_set' +
                                                     '2')

        # Convert back to the format the algorithm understands
        def _detections_from_vital(detection_set):
            for vital_det in detection_set:
                bbox = vital_det.bounding_box
                coords = [
                    bbox.min_x(),
                    bbox.min_y(),
                    bbox.max_x(),
                    bbox.max_y()
                ]
                mask = vital_det.mask.asarray()
                ct_bbox = ctalgo.BoundingBox(coords)

                # TODO: to measure distances between special keypoint
                # detections add an additional argument to DetectedObject
                # >>> special_keypoints = {
                # ...    'head': [x1, y1],
                # ...    'tail': [x2, y2],
                # ... }
                # >>> ct_det = ctalgo.DetectedObject(
                # ...     ct_bbox, mask, special_keypoints=special_keypoints)
                ct_det = ctalgo.DetectedObject(ct_bbox, mask)
                yield ct_det

        detections1 = list(_detections_from_vital(detection_set1))
        detections2 = list(_detections_from_vital(detection_set2))

        assignment, assign_data, cand_errors = self.triangulator.find_matches(
            self.cal, detections1, detections2)

        logger.debug(' ----- ' + self.__class__.__name__ +
                     ' found {} matches'.format(len(assign_data)))

        # Append assignments to the measurements
        if self.measurement_file:

            def csv_repr(d):
                if isinstance(d, np.ndarray):
                    d = d.tolist()
                s = repr(d)
                return s.replace('\n', '').replace(',', ';').replace(' ', '')

            for data in assign_data:
                data['current_frame'] = self.frame_id
                self.frame_id = self.frame_id + 1
                line = ','.join(
                    [csv_repr(d) for d in ub.take(data, self.headers)])
                self.output_file.write(line + '\n')

            if assign_data:
                self.output_file.flush()

        # Create output detection vectors
        output1 = [d for d in detection_set1]
        output2 = [d for d in detection_set2]

        # Assign all points to detections for now
        for match in assign_data:
            i1 = match["ij"][0]
            i2 = match["ij"][1]
            output1[i1].set_length(match["fishlen"])
            output2[i2].set_length(match["fishlen"])
            head, tail = detections1[i1].center_keypoints()
            output1[i1].add_keypoint('head', Point2d(head))
            output1[i1].add_keypoint('tail', Point2d(tail))
            head, tail = detections2[i2].center_keypoints()
            output2[i2].add_keypoint('head', Point2d(head))
            output2[i2].add_keypoint('tail', Point2d(tail))

        output1 = DetectedObjectSet(output1)
        output2 = DetectedObjectSet(output2)

        # Push output detections to port
        self.push_to_port_using_trait('detected_object_set1', output1)
        self.push_to_port_using_trait('detected_object_set2', output2)
        self._base_step()
예제 #20
0
    def _step(self):
        try:
            def timing(desc, f):
                """Return f(), printing a message about how long it took"""
                start = timer()
                result = f()
                end = timer()
                print('%%%', desc, ' elapsed time: ', end - start, sep='')
                return result

            print('step', self._step_id)

            # grab image container from port using traits
            in_img_c = self.grab_input_using_trait('image')
            timestamp = self.grab_input_using_trait('timestamp')
            dos_ptr = self.grab_input_using_trait('detected_object_set')
            print('timestamp =', repr(timestamp))

            # Get current frame
            im = get_pil_image(in_img_c.image()).convert('RGB')

            # Get detection bbox
            if self._gtbbox_flag:
                dos = self._m_bbox[self._step_id]
                bbox_num = len(dos)
            else:
                dos = dos_ptr.select(self._select_threshold)
                bbox_num = dos.size()
            #print('bbox list len is', dos.size())

            det_obj_set = DetectedObjectSet()
            if bbox_num == 0:
                print('!!! No bbox is provided on this frame.  Skipping this frame !!!')
            else:
                # interaction features
                grid_feature_list = timing('grid feature', lambda:
                                           self._grid(im.size, dos, self._gtbbox_flag))

                # appearance features (format: pytorch tensor)
                pt_app_features = timing('app feature', lambda:
                                         self._app_feature_extractor(im, dos, self._gtbbox_flag))

                track_state_list = []
                next_track_id = int(self._track_set.get_max_track_id()) + 1

                # get new track state from new frame and detections
                for idx, item in enumerate(dos):
                    if self._gtbbox_flag:
                        bbox = item
                        fid = self._step_id
                        ts = self._step_id
                        d_obj = DetectedObject(bbox=item, confidence=1.0)
                    else:
                        bbox = item.bounding_box()
                        fid = timestamp.get_frame()
                        ts = timestamp.get_time_usec()
                        d_obj = item

                    if self._add_features_to_detections:
                        # store app feature to detected_object
                        app_f = new_descriptor(g_config.A_F_num)
                        app_f[:] = pt_app_features[idx].numpy()
                        d_obj.set_descriptor(app_f)
                    det_obj_set.add(d_obj)

                    # build track state for current bbox for matching
                    cur_ts = track_state(frame_id=self._step_id,
                                        bbox_center=bbox.center(),
                                        interaction_feature=grid_feature_list[idx],
                                        app_feature=pt_app_features[idx],
                                        bbox=[int(bbox.min_x()), int(bbox.min_y()),
                                              int(bbox.width()), int(bbox.height())],
                                        detected_object=d_obj,
                                        sys_frame_id=fid, sys_frame_time=ts)
                    track_state_list.append(cur_ts)

                # if there are no tracks, generate new tracks from the track_state_list
                if not self._track_flag:
                    next_track_id = self._track_set.add_new_track_state_list(next_track_id,
                                    track_state_list, self._track_initialization_threshold)
                    self._track_flag = True
                else:
                    # check whether we need to terminate a track
                    for track in list(self._track_set.iter_active()):
                        # terminating a track based on readin_frame_id or original_frame_id gap
                        if (self._step_id - track[-1].frame_id > self._terminate_track_threshold
                            or fid - track[-1].sys_frame_id > self._sys_terminate_track_threshold):
                            self._track_set.deactivate_track(track)


                    # call IOU tracker
                    if self._IOU_flag:
                        self._track_set, track_state_list = timing('IOU tracking', lambda: (
                            self._iou_tracker(self._track_set, track_state_list)
                        ))

                    #print('***track_set len', len(self._track_set))
                    #print('***track_state_list len', len(track_state_list))

                    # estimate similarity matrix
                    similarity_mat, track_idx_list = timing('SRNN association', lambda: (
                        self._srnn_matching(self._track_set, track_state_list, self._ts_threshold)
                    ))

                    # reset update_flag
                    self._track_set.reset_updated_flag()

                    # Hungarian algorithm
                    row_idx_list, col_idx_list = timing('Hungarian algorithm', lambda: (
                        sp.optimize.linear_sum_assignment(similarity_mat)
                    ))

                    for i in range(len(row_idx_list)):
                        r = row_idx_list[i]
                        c = col_idx_list[i]

                        if -similarity_mat[r, c] < self._similarity_threshold:
                            # initialize a new track
                            if (track_state_list[c].detected_object.confidence()
                                   >= self._track_initialization_threshold):
                                self._track_set.add_new_track_state(next_track_id,
                                        track_state_list[c])
                                next_track_id += 1
                        else:
                            # add to existing track
                            self._track_set.update_track(track_idx_list[r], track_state_list[c])

                    # for the remaining unmatched track states, we initialize new tracks
                    if len(track_state_list) - len(col_idx_list) > 0:
                        for i in range(len(track_state_list)):
                            if (i not in col_idx_list
                                and (track_state_list[i].detected_object.confidence()
                                     >= self._track_initialization_threshold)):
                                self._track_set.add_new_track_state(next_track_id,
                                        track_state_list[i])
                                next_track_id += 1

                print('total tracks', len(self._track_set))

            # push track set to output port
            ot_list = ts2ot_list(self._track_set)
            ots = ObjectTrackSet(ot_list)

            self.push_to_port_using_trait('object_track_set', ots)
            self.push_to_port_using_trait('detected_object_set', det_obj_set)

            self._step_id += 1

            self._base_step()

        except BaseException as e:
            print( repr( e ) )
            import traceback
            print( traceback.format_exc() )
            sys.stdout.flush()
            raise