Esempio n. 1
0
def draw_final_outputs(img, results):
    """
    Args:
        results: [DetectionResult]
    """
    if len(results) == 0:
        return img

    # Display in largest to smallest order to reduce occlusion
    boxes = np.asarray([r.box for r in results])
    areas = np_area(boxes)
    sorted_inds = np.argsort(-areas)

    ret = img
    tags = []

    for result_id in sorted_inds:
        r = results[result_id]
        if r.mask is not None:
            ret = draw_mask(ret, r.mask)

    for r in results:
        tags.append("{},{:.2f}".format(cfg.DATA.CLASS_NAMES[r.class_id],
                                       r.score))
    ret = viz.draw_boxes(ret, boxes, tags)
    return ret
Esempio n. 2
0
 def training_roidbs(self):
     lines = self.read_lines()
     roidbs = []
     for line in lines:
         file_name = self.read_file_name(line["md5"])
         if os.path.isfile(file_name):
             boxes, classes = self.read_labels(line["labels"])
             # Remove boxes with empty area
             if boxes.size:
                 non_zero_area = np_area(boxes) > 0
                 boxes = boxes[non_zero_area, :]
                 classes = classes[non_zero_area]
             boxes = np.float32(boxes)
             roidb = {
                 "file_name": file_name,
                 "boxes": boxes,
                 "class": classes,
                 "is_crowd": np.zeros((classes.shape[0]))
             }
             roidbs.append(roidb)
     return roidbs
Esempio n. 3
0
    def __call__(self, roidb):
        fname, boxes, klass, is_crowd = roidb["file_name"], roidb[
            "boxes"], roidb["class"], roidb["is_crowd"]
        assert boxes.ndim == 2 and boxes.shape[1] == 4, boxes.shape
        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype("float32")
        height, width = im.shape[:2]
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return float32 boxes!"

        if not self.cfg.DATA.ABSOLUTE_COORD:
            boxes[:, 0::2] *= width
            boxes[:, 1::2] *= height

        # augmentation:
        tfms = self.aug.get_transform(im)
        im = tfms.apply_image(im)
        points = box_to_point8(boxes)
        points = tfms.apply_coords(points)
        boxes = point8_to_box(points)
        if len(boxes):
            assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        ret = {"image": im}
        # Add rpn data to dataflow:
        try:
            if self.cfg.MODE_FPN:
                multilevel_anchor_inputs = self.get_multilevel_rpn_anchor_input(
                    im, boxes, is_crowd)
                for i, (anchor_labels,
                        anchor_boxes) in enumerate(multilevel_anchor_inputs):
                    ret["anchor_labels_lvl{}".format(i + 2)] = anchor_labels
                    ret["anchor_boxes_lvl{}".format(i + 2)] = anchor_boxes
            else:
                ret["anchor_labels"], ret[
                    "anchor_boxes"] = self.get_rpn_anchor_input(
                        im, boxes, is_crowd)

            boxes = boxes[is_crowd == 0]  # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            ret["gt_boxes"] = boxes
            ret["gt_labels"] = klass
        except MalformedData as e:
            log_once(
                "Input {} is filtered for training: {}".format(fname, str(e)),
                "warn")
            return None

        if self.cfg.MODE_MASK:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(roidb["segmentation"])
            segmentation = [
                segmentation[k] for k in range(len(segmentation))
                if not is_crowd[k]
            ]
            assert len(segmentation) == len(boxes)

            # Apply augmentation on polygon coordinates.
            # And produce one image-sized binary mask per box.
            masks = []
            width_height = np.asarray([width, height], dtype=np.float32)
            gt_mask_width = int(np.ceil(im.shape[1] / 8.0) *
                                8)  # pad to 8 in order to pack mask into bits

            for polys in segmentation:
                if not self.cfg.DATA.ABSOLUTE_COORD:
                    polys = [p * width_height for p in polys]
                polys = [tfms.apply_coords(p) for p in polys]
                masks.append(
                    polygons_to_mask(polys, im.shape[0], gt_mask_width))

            if len(masks):
                masks = np.asarray(masks, dtype='uint8')  # values in {0, 1}
                masks = np.packbits(masks, axis=-1)
            else:  # no gt on the image
                masks = np.zeros((0, im.shape[0], gt_mask_width // 8),
                                 dtype='uint8')

            ret['gt_masks_packed'] = masks

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        return ret