Example #1
0
    def check(self, mask, expected):
        bbox = mask_to_bbox(mask)

        self.assertIsInstance(bbox, type(expected))
        np.testing.assert_equal(
            cuda.to_cpu(bbox),
            cuda.to_cpu(expected))
Example #2
0
    def check_proposal_target_creator(self, roi, mask, label,
                                      proposal_target_creator):
        xp = cuda.get_array_module(roi)
        bbox = mask_to_bbox(mask)
        sample_roi, gt_roi_mask, gt_roi_label, gt_roi_loc =\
            proposal_target_creator(
                roi, mask, label, bbox, mask_size=self.mask_size)

        # Test types
        self.assertIsInstance(sample_roi, xp.ndarray)
        self.assertIsInstance(gt_roi_loc, xp.ndarray)
        self.assertIsInstance(gt_roi_mask, xp.ndarray)
        self.assertIsInstance(gt_roi_label, xp.ndarray)

        sample_roi = cuda.to_cpu(sample_roi)
        gt_roi_loc = cuda.to_cpu(gt_roi_loc)
        gt_roi_mask = cuda.to_cpu(gt_roi_mask)
        gt_roi_label = cuda.to_cpu(gt_roi_label)

        # Test shapes
        self.assertEqual(sample_roi.shape, (self.n_sample, 4))
        self.assertEqual(gt_roi_loc.shape, (self.n_sample, 4))
        self.assertEqual(gt_roi_mask.shape,
                         (self.n_sample, self.mask_size, self.mask_size))
        self.assertEqual(gt_roi_label.shape, (self.n_sample, ))

        # Test foreground and background labels
        np.testing.assert_equal(np.sum(gt_roi_label >= 0), self.n_sample)
        n_pos = np.sum(gt_roi_label >= 1)
        n_neg = np.sum(gt_roi_label == 0)
        self.assertLessEqual(n_pos, self.n_sample * self.pos_ratio)
        self.assertLessEqual(n_neg, self.n_sample - n_pos)
Example #3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu', type=int, default=-1)
    parser.add_argument('--pretrained-model', default='sbd')
    parser.add_argument('image')
    args = parser.parse_args()

    model = FCISPSROIAlignResNet101(n_fg_class=20,
                                    pretrained_model=args.pretrained_model)

    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    img = read_image(args.image, color=True)

    masks, labels, scores = model.predict([img])
    mask, label, score = masks[0], labels[0], scores[0]
    bbox = mask_to_bbox(mask)
    colors = voc_colormap(list(range(1, len(mask) + 1)))
    ax = vis_bbox(img, bbox, instance_colors=colors, alpha=0.5, linewidth=1.5)
    vis_instance_segmentation(
        None,
        mask,
        label,
        score,
        label_names=sbd_instance_segmentation_label_names,
        instance_colors=colors,
        alpha=0.7,
        ax=ax)
    plt.show()
Example #4
0
def refine_bbox(row):
    masks = segm_to_mask(
        row['segmentation'],
        row['width'], row['height'],
    )[None, :, :]
    bbox_ch = mask_to_bbox(masks)[0]
    bbox_mdnt = reform_bbox(bbox_ch)
    return bbox_mdnt
Example #5
0
 def predict(self, img):
     img = img[:, :, ::-1].transpose((2, 0, 1))
     imgs = img[None]
     masks, labels, scores = self.model.predict(imgs)
     mask, label, score = masks[0], labels[0], scores[0]
     bbox = mask_to_bbox(mask)
     bbox = np.round(bbox).astype(np.int32)
     mask = (mask * 255).astype(np.uint8)
     roi_mask = mask_to_roi_mask(mask, bbox)
     return roi_mask, bbox, label, score
Example #6
0
    def check(self, mask, expected):
        in_type = type(mask)
        bbox = mask_to_bbox(mask)
        size = 4
        out_mask = scale_mask(mask, bbox, size)

        self.assertIsInstance(out_mask, in_type)
        self.assertEqual(out_mask.dtype, np.bool)

        np.testing.assert_equal(cuda.to_cpu(out_mask), cuda.to_cpu(expected))
Example #7
0
 def rebase_sst(self, s_in, s_st, bboxes):
     _sst = []
     for sin, sst, bbox in zip(s_in, s_st, bboxes):
         n, h, w = sst.shape
         union_masks = np.empty((n, h, w), dtype=np.float32)
         for idx, s_mask in enumerate(sst):
             union_masks[idx] = np.bitwise_or(sin, s_mask)
         union_bboxes = mask_to_bbox(union_masks)
         iou = np.squeeze(bbox_iou(union_bboxes, np.array([bbox])))
         order = np.argsort(iou, axis=0)[::-1]
         _sst.append(sst[order])
     return _sst
Example #8
0
    def box_alignment(self, img, bboxes, masks, boxes):
        s_in, s_st = self.get_initial_sets(img, bboxes, masks, boxes)

        if len(s_in) == 0 or len(s_st) == 0:
            return [], [], []

        s_st = self.rebase_sst(s_in, s_st, bboxes)
        final_boxes = []
        final_masks = []
        added_superpixel_masks = []
        for bbox, sin, sst in zip(bboxes, s_in, s_st):
            s = sin
            if s.ndim == 0:
                continue
            assert len(sst) >= 1, "No straddling boxes are found"

            proc = 0
            new_superpixels = np.zeros_like(s)
            new_s = np.bitwise_or(s, sst[0])
            iou_old = bbox_iou(mask_to_bbox(np.array([s])),
                               np.array([bbox]))[0][0]
            iou_new = bbox_iou(mask_to_bbox(np.array([new_s])),
                               np.array([bbox]))[0][0]
            for sk in sst[1:]:
                if iou_old > iou_new:
                    break
                iou_old = iou_new
                s = new_s
                new_s = np.bitwise_or(s, sk)
                iou_new = bbox_iou(mask_to_bbox(np.array([new_s])),
                                   np.array([bbox]))[0][0]
                proc += 1
                new_superpixels = np.bitwise_or(new_superpixels, sk)
            final_masks.append(s)
            final_boxes.append(mask_to_bbox(np.array([s]))[-1])
            added_superpixel_masks.append(new_superpixels.astype(np.int32))
            if self.verbosity:
                print('No. of superpixels added: {:2d}'.format(proc))
        final_masks, final_boxes = np.array(final_masks), np.array(final_boxes)
        return final_boxes, final_masks, added_superpixel_masks
Example #9
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu', type=int, default=-1)
    parser.add_argument('--pretrained-model', default=None)
    parser.add_argument('--dataset', choices=('sbd', 'coco'), default='sbd')
    parser.add_argument('image')
    args = parser.parse_args()

    if args.dataset == 'sbd':
        if args.pretrained_model is None:
            args.pretrained_model = 'sbd'
        label_names = sbd_instance_segmentation_label_names
        model = FCISResNet101(n_fg_class=len(label_names),
                              pretrained_model=args.pretrained_model)
    elif args.dataset == 'coco':
        if args.pretrained_model is None:
            args.pretrained_model = 'coco'
        label_names = coco_instance_segmentation_label_names
        proposal_creator_params = FCISResNet101.proposal_creator_params
        proposal_creator_params['min_size'] = 2
        model = FCISResNet101(n_fg_class=len(label_names),
                              anchor_scales=(4, 8, 16, 32),
                              pretrained_model=args.pretrained_model,
                              proposal_creator_params=proposal_creator_params)

    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    img = read_image(args.image, color=True)

    masks, labels, scores = model.predict([img])
    mask, label, score = masks[0], labels[0], scores[0]
    bbox = mask_to_bbox(mask)
    colors = voc_colormap(list(range(1, len(mask) + 1)))
    ax = vis_bbox(img, bbox, instance_colors=colors, alpha=0.5, linewidth=1.5)
    vis_instance_segmentation(None,
                              mask,
                              label,
                              score,
                              label_names=label_names,
                              instance_colors=colors,
                              alpha=0.7,
                              ax=ax)
    plt.show()
Example #10
0
    def _check_mask_head_loss_pre(self, xp):
        n_inst = 12
        segm_size = 28
        rois = [
            xp.array(((4, 1, 6, 3), ), dtype=np.float32),
            xp.array(((0, 1, 2, 3), (5, 4, 10, 6)), dtype=np.float32),
            xp.array(((10, 4, 12, 10), ), dtype=np.float32),
        ]
        roi_indices = [
            xp.array((0, ), dtype=np.int32),
            xp.array((1, 0), dtype=np.int32),
            xp.array((1, ), dtype=np.int32),
        ]
        masks = [
            _random_array(xp, (n_inst, 60, 70)),
            _random_array(xp, (n_inst, 60, 70)),
        ]
        bboxes = [mask_to_bbox(mask) for mask in masks]
        labels = [
            xp.array((1, ), dtype=np.int32),
            xp.array((10, 4), dtype=np.int32),
            xp.array((3, ), dtype=np.int32),
        ]
        rois, roi_indices, gt_segms, gt_mask_labels = mask_head_loss_pre(
            rois, roi_indices, masks, bboxes, labels, segm_size)

        self.assertEqual(len(rois), 3)
        self.assertEqual(len(roi_indices), 3)
        self.assertEqual(len(gt_segms), 3)
        self.assertEqual(len(gt_mask_labels), 3)
        for l in range(3):
            self.assertIsInstance(rois[l], xp.ndarray)
            self.assertIsInstance(roi_indices[l], xp.ndarray)
            self.assertIsInstance(gt_segms[l], xp.ndarray)
            self.assertIsInstance(gt_mask_labels[l], xp.ndarray)

            self.assertEqual(rois[l].shape[0], roi_indices[l].shape[0])
            self.assertEqual(rois[l].shape[0], gt_segms[l].shape[0])
            self.assertEqual(rois[l].shape[0], gt_mask_labels[l].shape[0])
            self.assertEqual(rois[l].shape[1:], (4, ))
            self.assertEqual(roi_indices[l].shape[1:], ())
            self.assertEqual(gt_segms[l].shape[1:], (segm_size, segm_size))
            self.assertEqual(gt_mask_labels[l].shape[1:], ())
            self.assertEqual(gt_segms[l].dtype, np.float32)
            self.assertEqual(gt_mask_labels[l].dtype, np.int32)
Example #11
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu', type=int, default=-1)
    parser.add_argument('--pretrained-model', default='coco')
    parser.add_argument('image')
    args = parser.parse_args()

    proposal_creator_params = {
        'nms_thresh': 0.7,
        'n_train_pre_nms': 12000,
        'n_train_post_nms': 2000,
        'n_test_pre_nms': 6000,
        'n_test_post_nms': 1000,
        'force_cpu_nms': False,
        'min_size': 0
    }

    model = FCISPSROIAlignResNet101(
        n_fg_class=len(coco_instance_segmentation_label_names),
        min_size=800, max_size=1333,
        anchor_scales=(2, 4, 8, 16, 32),
        pretrained_model=args.pretrained_model,
        proposal_creator_params=proposal_creator_params)

    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    img = read_image(args.image, color=True)

    masks, labels, scores = model.predict([img])
    mask, label, score = masks[0], labels[0], scores[0]
    bbox = mask_to_bbox(mask)
    colors = voc_colormap(list(range(1, len(mask) + 1)))
    ax = vis_bbox(
        img, bbox, instance_colors=colors, alpha=0.5, linewidth=1.5)
    vis_instance_segmentation(
        None, mask, label, score,
        label_names=coco_instance_segmentation_label_names,
        instance_colors=colors, alpha=0.7, ax=ax)
    plt.show()
Example #12
0
    def test(self):
        H = 80
        W = 90
        n_inst = 10

        mask = np.zeros((n_inst, H, W), dtype=np.bool)
        bbox = generate_random_bbox(n_inst, (H, W), 10, 30).astype(np.int32)
        for i, bb in enumerate(bbox):
            y_min, x_min, y_max, x_max = bb
            m = np.random.randint(0, 2, size=(y_max - y_min, x_max - x_min))
            m[5, 5] = 1  # At least one element is one
            mask[i, y_min:y_max, x_min:x_max] = m
        bbox = mask_to_bbox(mask)
        size = H * 2
        out_H = size
        out_W = W * 2
        out_mask = scale_mask(mask, bbox, size)

        expected = resize(mask.astype(np.float32), (out_H, out_W),
                          interpolation=PIL.Image.NEAREST).astype(np.bool)
        np.testing.assert_equal(out_mask, expected)
Example #13
0
 def check_call(self, model, imgs, masks, labels, scale):
     bboxes = mask_to_bbox(masks[0])[None]
     loss = model(imgs, masks, labels, bboxes, scale)
     self.assertEqual(loss.shape, ())
    def callback(self, imgmsg):
        bridge = cv_bridge.CvBridge()
        img = bridge.imgmsg_to_cv2(imgmsg, desired_encoding='rgb8')
        img_chw = img.transpose((2, 0, 1))  # C, H, W

        if self.gpu >= 0:
            chainer.cuda.get_device_from_id(self.gpu).use()
        if self.model_name == 'mask_rcnn_resnet50':
            bboxes, masks, labels, scores = self.model.predict([img_chw])
            bboxes = bboxes[0]
            masks = masks[0]
            labels = labels[0]
            scores = scores[0]
        else:
            img_chw = img_chw.astype(np.float32)
            masks, labels, scores = self.model.predict([img_chw])
            masks = masks[0]
            labels = labels[0]
            scores = scores[0]
            bboxes = mask_to_bbox(masks)

        msg_indices = ClusterPointIndices(header=imgmsg.header)
        msg_labels = LabelArray(header=imgmsg.header)
        # -1: label for background
        lbl_cls = -np.ones(img.shape[:2], dtype=np.int32)
        lbl_ins = -np.ones(img.shape[:2], dtype=np.int32)
        for ins_id, (mask, label) in enumerate(zip(masks, labels)):
            indices = np.where(mask.flatten())[0]
            indices_msg = PointIndices(header=imgmsg.header, indices=indices)
            msg_indices.cluster_indices.append(indices_msg)
            class_name = self.fg_class_names[label]
            msg_labels.labels.append(Label(id=label, name=class_name))
            lbl_cls[mask] = label
            lbl_ins[mask] = ins_id  # instance_id
        self.pub_indices.publish(msg_indices)
        self.pub_labels.publish(msg_labels)

        msg_lbl_cls = bridge.cv2_to_imgmsg(lbl_cls)
        msg_lbl_ins = bridge.cv2_to_imgmsg(lbl_ins)
        msg_lbl_cls.header = msg_lbl_ins.header = imgmsg.header
        self.pub_lbl_cls.publish(msg_lbl_cls)
        self.pub_lbl_ins.publish(msg_lbl_ins)

        cls_msg = ClassificationResult(
            header=imgmsg.header,
            classifier=self.classifier_name,
            target_names=self.fg_class_names,
            labels=labels,
            label_names=[self.fg_class_names[l] for l in labels],
            label_proba=scores,
        )

        rects_msg = RectArray(header=imgmsg.header)
        for bbox in bboxes:
            rect = Rect(x=bbox[1],
                        y=bbox[0],
                        width=bbox[3] - bbox[1],
                        height=bbox[2] - bbox[0])
            rects_msg.rects.append(rect)
        self.pub_rects.publish(rects_msg)
        self.pub_class.publish(cls_msg)

        if self.pub_viz.get_num_connections() > 0:
            n_fg_class = len(self.fg_class_names)
            captions = [
                '{:d}: {:s}'.format(l, self.fg_class_names[l]) for l in labels
            ]
            viz = chainer_mask_rcnn.utils.draw_instance_bboxes(
                img,
                bboxes,
                labels + 1,
                n_class=n_fg_class + 1,
                masks=masks,
                captions=captions)
            msg_viz = bridge.cv2_to_imgmsg(viz, encoding='rgb8')
            msg_viz.header = imgmsg.header
            self.pub_viz.publish(msg_viz)
Example #15
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu', type=int, default=-1)
    parser.add_argument('--pretrained-model', default='sbd')
    parser.add_argument('video')
    args = parser.parse_args()

    model = FCISResNet101(
        n_fg_class=20, pretrained_model=args.pretrained_model)

    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    if args.video == "0":
        vid = cv2.VideoCapture(0)
    else:
        vid = cv2.VideoCapture(args.video)
    if not vid.isOpened():
        raise ImportError("Couldn't open video file or webcam.")

    # Compute aspect ratio of video
    vidw = vid.get(cv2.CAP_PROP_FRAME_WIDTH)
    vidh = vid.get(cv2.CAP_PROP_FRAME_HEIGHT)
    # vidar = vidw / vidh
    print(vidw)
    print(vidh)

    accum_time = 0
    curr_fps = 0
    fps = "FPS: ??"
    prev_time = timer()

    frame_count = 1
    while True:
        ret, frame = vid.read()
        if ret == False:
            print("Done!")
            return

        # BGR -> RGB
        rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Result image
        result = frame.copy()

        # (H, W, C) -> (C, H, W)
        img = np.asarray(rgb, dtype=np.float32).transpose((2, 0, 1))

        # Object Detection
        masks, labels, scores = model.predict([img])
        mask, label, score = masks[0], labels[0], scores[0]
        bbox = mask_to_bbox(mask)
        colors = voc_colormap(list(range(1, len(mask) + 1)))

        # For Colors
        n_inst = len(bbox)
        instance_colors = voc_colormap(list(range(1, n_inst + 1)))
        instance_colors = np.array(instance_colors)

        # For Mask
        _, H, W = mask.shape
        canvas_img = np.zeros((H, W, 4), dtype=np.uint8)
        alf_img = np.zeros((H, W, 1), dtype=np.uint8)

        if len(bbox) != 0:
            # for i, bb in enumerate(bbox):
            for i, (bb, msk) in enumerate(zip(bbox, mask)):
                # print(i)
                lb = label[i]
                conf = score[i].tolist()
                ymin = int(bb[0])
                xmin = int(bb[1])
                ymax = int(bb[2])
                xmax = int(bb[3])

                class_num = int(lb)

                # Draw box
                # cv2.rectangle(result, (xmin, ymin), (xmax, ymax), (0,255,0), 2)

                text = sbd_instance_segmentation_label_names[
                           class_num] + " " + ('%.2f' % conf)
                print(text)

                # text_pos 1
                test_x = round(xmax - xmin / 2) - 30
                test_y = round(ymax - ymin / 2) - 30
                text_top = (test_x, test_y - 10)
                text_bot = (test_x + 80, test_y + 5)
                text_pos = (test_x + 5, test_y)

                # text_pos 2
                # text_top = (xmin, ymin - 10)
                # text_bot = (xmin + 80, ymin + 5)
                # text_pos = (xmin + 5, ymin)

                # Draw label
                cv2.rectangle(result, text_top, text_bot, (255, 255, 255), -1)
                cv2.putText(result, text, text_pos,
                            cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0, 0, 0), 1)

                # Draw msk 1
                color = instance_colors[i % len(instance_colors)]
                rgba = np.append(color, 0.7 * 255)  # alpha=0.7
                if ymax > ymin and xmax > xmin:
                    canvas_img[msk] = rgba
                    mask_img = np.asarray(canvas_img)
                    tmp_bgr = cv2.split(result)
                    mask_result = cv2.merge(tmp_bgr + [alf_img])
                    mask_result = cv2.addWeighted(mask_result, 1, mask_img,
                                                  0.5, 0)

                # Draw msk 2
                # rgba = np.append((0,255,0), 0.7 * 255) # alpha=0.7
                # if ymax > ymin and xmax > xmin:
                #     canvas_img[msk] = rgba
                #     mask_img = np.asarray(canvas_img)
                #     tmp_bgr = cv2.split(result)
                #     mask_result = cv2.merge(tmp_bgr + [alf_img])
                #     mask_result = cv2.addWeighted(mask_result, 1, mask_img, 0.5, 0)

        # Calculate FPS
        curr_time = timer()
        exec_time = curr_time - prev_time
        prev_time = curr_time
        accum_time = accum_time + exec_time
        curr_fps = curr_fps + 1
        if accum_time > 1:
            accum_time = accum_time - 1
            fps = "FPS:" + str(curr_fps)
            curr_fps = 0

        # Draw FPS in top right corner
        cv2.rectangle(result, (590, 0), (640, 17), (0, 0, 0), -1)
        cv2.putText(result, fps, (595, 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.35, (255, 255, 255), 1)

        # Draw Frame Number
        cv2.rectangle(result, (0, 0), (50, 17), (0, 0, 0), -1)
        cv2.putText(result, str(frame_count), (0, 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.35, (255, 255, 255), 1)

        # Output Result
        # cv2.imshow("BBOX Result", result)
        # cv2.imshow("Mask img", mask_img)
        cv2.imshow("Fcis Result", mask_result)

        # For Debug
        print("===== BBOX Result =====")
        print(type(result))
        print(result.shape)
        print(type(result.shape))

        print("===== Mask img =====")
        print(type(mask_img))
        print(mask_img.shape)
        print(type(mask_img.shape))

        # Stop Processing
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

        frame_count += 1
Example #16
0
    def multi_thresholding_superpixel_merging(self,
                                              img,
                                              initial_boxes,
                                              aligned_boxes,
                                              aligned_masks,
                                              s_masks,
                                              s_boxes,
                                              threshold=None):
        """ 1. performs multi-thresholding step for different thresholds
            2. incorporate some randomness by scoring these randomly
            3. remove redundant boxes using non-maximum suppression

        args:
            initial_boxes: bboxes predicted from detector
            aligned_boxes: bboxes after bbox-alignment
            aligned_masks: masks  after bbox-alignment
                           `aligned_boxes` are generated by enclosing these masks
            s_masks      : masks corresponding to superpixels
            s_boxes      : bounding boxes for the corresponding superpixels
            threshold    : straddling expansion threshold
        """
        _, h, w = img.shape

        def get_thresholded_spixels(threshold, s_masks, a_bbox):
            """ generates the set of superpixels which have greater than `threshold`
                overlap with the `a_bbox`
            """
            req_masks = []
            box_mask = self.box_to_mask(a_bbox, (h, w))
            for mask in s_masks:
                intersect = np.bitwise_and(box_mask, mask).sum()
                ratio = intersect / np.count_nonzero(mask)
                if ratio >= threshold:
                    req_masks.append(mask)
            return np.array(req_masks).astype(np.bool)

        # generate sets for different thresholds
        thresholds = [0.1, 0.2, 0.3, 0.4, 0.5]
        final_set_ = {}
        for idx, (a_mask,
                  a_bbox) in enumerate(zip(aligned_masks, aligned_boxes)):
            box_set = {}
            for threshold in thresholds:
                req_superpixels = get_thresholded_spixels(
                    threshold, s_masks, a_bbox)
                super_segment = np.sum(req_superpixels, axis=0)
                final_segment = np.bitwise_or(super_segment, a_mask)
                final_bbox = mask_to_bbox(np.array([final_segment]))[0]
                box_set.update({threshold: [final_segment, final_bbox]})
            final_set_.update({idx: box_set})

        # score the boxes
        for box_set in final_set_.values():
            for idx, (thresh, seg_box) in enumerate(box_set.items()):
                r = np.random.rand()
                score = r * (idx + 1)
                box_set.update({thresh: seg_box + [score]})

        # nms
        for key, box_set in final_set_.items():
            segments, bboxes, scores = zip(*box_set.values())
            segments, bboxes, scores = np.array(segments), np.array(
                bboxes), np.array(scores)
            idxs = nms(bboxes, thresh=0.9, score=scores)
            final_picks = [segments[idxs][0], bboxes[idxs][0], scores[idxs][0]]
            final_set_.update({key: final_picks})
        return final_set_