def check(self, mask, expected): bbox = mask_to_bbox(mask) self.assertIsInstance(bbox, type(expected)) np.testing.assert_equal( cuda.to_cpu(bbox), cuda.to_cpu(expected))
def check_proposal_target_creator(self, roi, mask, label, proposal_target_creator): xp = cuda.get_array_module(roi) bbox = mask_to_bbox(mask) sample_roi, gt_roi_mask, gt_roi_label, gt_roi_loc =\ proposal_target_creator( roi, mask, label, bbox, mask_size=self.mask_size) # Test types self.assertIsInstance(sample_roi, xp.ndarray) self.assertIsInstance(gt_roi_loc, xp.ndarray) self.assertIsInstance(gt_roi_mask, xp.ndarray) self.assertIsInstance(gt_roi_label, xp.ndarray) sample_roi = cuda.to_cpu(sample_roi) gt_roi_loc = cuda.to_cpu(gt_roi_loc) gt_roi_mask = cuda.to_cpu(gt_roi_mask) gt_roi_label = cuda.to_cpu(gt_roi_label) # Test shapes self.assertEqual(sample_roi.shape, (self.n_sample, 4)) self.assertEqual(gt_roi_loc.shape, (self.n_sample, 4)) self.assertEqual(gt_roi_mask.shape, (self.n_sample, self.mask_size, self.mask_size)) self.assertEqual(gt_roi_label.shape, (self.n_sample, )) # Test foreground and background labels np.testing.assert_equal(np.sum(gt_roi_label >= 0), self.n_sample) n_pos = np.sum(gt_roi_label >= 1) n_neg = np.sum(gt_roi_label == 0) self.assertLessEqual(n_pos, self.n_sample * self.pos_ratio) self.assertLessEqual(n_neg, self.n_sample - n_pos)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--pretrained-model', default='sbd') parser.add_argument('image') args = parser.parse_args() model = FCISPSROIAlignResNet101(n_fg_class=20, pretrained_model=args.pretrained_model) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() img = read_image(args.image, color=True) masks, labels, scores = model.predict([img]) mask, label, score = masks[0], labels[0], scores[0] bbox = mask_to_bbox(mask) colors = voc_colormap(list(range(1, len(mask) + 1))) ax = vis_bbox(img, bbox, instance_colors=colors, alpha=0.5, linewidth=1.5) vis_instance_segmentation( None, mask, label, score, label_names=sbd_instance_segmentation_label_names, instance_colors=colors, alpha=0.7, ax=ax) plt.show()
def refine_bbox(row): masks = segm_to_mask( row['segmentation'], row['width'], row['height'], )[None, :, :] bbox_ch = mask_to_bbox(masks)[0] bbox_mdnt = reform_bbox(bbox_ch) return bbox_mdnt
def predict(self, img): img = img[:, :, ::-1].transpose((2, 0, 1)) imgs = img[None] masks, labels, scores = self.model.predict(imgs) mask, label, score = masks[0], labels[0], scores[0] bbox = mask_to_bbox(mask) bbox = np.round(bbox).astype(np.int32) mask = (mask * 255).astype(np.uint8) roi_mask = mask_to_roi_mask(mask, bbox) return roi_mask, bbox, label, score
def check(self, mask, expected): in_type = type(mask) bbox = mask_to_bbox(mask) size = 4 out_mask = scale_mask(mask, bbox, size) self.assertIsInstance(out_mask, in_type) self.assertEqual(out_mask.dtype, np.bool) np.testing.assert_equal(cuda.to_cpu(out_mask), cuda.to_cpu(expected))
def rebase_sst(self, s_in, s_st, bboxes): _sst = [] for sin, sst, bbox in zip(s_in, s_st, bboxes): n, h, w = sst.shape union_masks = np.empty((n, h, w), dtype=np.float32) for idx, s_mask in enumerate(sst): union_masks[idx] = np.bitwise_or(sin, s_mask) union_bboxes = mask_to_bbox(union_masks) iou = np.squeeze(bbox_iou(union_bboxes, np.array([bbox]))) order = np.argsort(iou, axis=0)[::-1] _sst.append(sst[order]) return _sst
def box_alignment(self, img, bboxes, masks, boxes): s_in, s_st = self.get_initial_sets(img, bboxes, masks, boxes) if len(s_in) == 0 or len(s_st) == 0: return [], [], [] s_st = self.rebase_sst(s_in, s_st, bboxes) final_boxes = [] final_masks = [] added_superpixel_masks = [] for bbox, sin, sst in zip(bboxes, s_in, s_st): s = sin if s.ndim == 0: continue assert len(sst) >= 1, "No straddling boxes are found" proc = 0 new_superpixels = np.zeros_like(s) new_s = np.bitwise_or(s, sst[0]) iou_old = bbox_iou(mask_to_bbox(np.array([s])), np.array([bbox]))[0][0] iou_new = bbox_iou(mask_to_bbox(np.array([new_s])), np.array([bbox]))[0][0] for sk in sst[1:]: if iou_old > iou_new: break iou_old = iou_new s = new_s new_s = np.bitwise_or(s, sk) iou_new = bbox_iou(mask_to_bbox(np.array([new_s])), np.array([bbox]))[0][0] proc += 1 new_superpixels = np.bitwise_or(new_superpixels, sk) final_masks.append(s) final_boxes.append(mask_to_bbox(np.array([s]))[-1]) added_superpixel_masks.append(new_superpixels.astype(np.int32)) if self.verbosity: print('No. of superpixels added: {:2d}'.format(proc)) final_masks, final_boxes = np.array(final_masks), np.array(final_boxes) return final_boxes, final_masks, added_superpixel_masks
def main(): parser = argparse.ArgumentParser() parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--pretrained-model', default=None) parser.add_argument('--dataset', choices=('sbd', 'coco'), default='sbd') parser.add_argument('image') args = parser.parse_args() if args.dataset == 'sbd': if args.pretrained_model is None: args.pretrained_model = 'sbd' label_names = sbd_instance_segmentation_label_names model = FCISResNet101(n_fg_class=len(label_names), pretrained_model=args.pretrained_model) elif args.dataset == 'coco': if args.pretrained_model is None: args.pretrained_model = 'coco' label_names = coco_instance_segmentation_label_names proposal_creator_params = FCISResNet101.proposal_creator_params proposal_creator_params['min_size'] = 2 model = FCISResNet101(n_fg_class=len(label_names), anchor_scales=(4, 8, 16, 32), pretrained_model=args.pretrained_model, proposal_creator_params=proposal_creator_params) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() img = read_image(args.image, color=True) masks, labels, scores = model.predict([img]) mask, label, score = masks[0], labels[0], scores[0] bbox = mask_to_bbox(mask) colors = voc_colormap(list(range(1, len(mask) + 1))) ax = vis_bbox(img, bbox, instance_colors=colors, alpha=0.5, linewidth=1.5) vis_instance_segmentation(None, mask, label, score, label_names=label_names, instance_colors=colors, alpha=0.7, ax=ax) plt.show()
def _check_mask_head_loss_pre(self, xp): n_inst = 12 segm_size = 28 rois = [ xp.array(((4, 1, 6, 3), ), dtype=np.float32), xp.array(((0, 1, 2, 3), (5, 4, 10, 6)), dtype=np.float32), xp.array(((10, 4, 12, 10), ), dtype=np.float32), ] roi_indices = [ xp.array((0, ), dtype=np.int32), xp.array((1, 0), dtype=np.int32), xp.array((1, ), dtype=np.int32), ] masks = [ _random_array(xp, (n_inst, 60, 70)), _random_array(xp, (n_inst, 60, 70)), ] bboxes = [mask_to_bbox(mask) for mask in masks] labels = [ xp.array((1, ), dtype=np.int32), xp.array((10, 4), dtype=np.int32), xp.array((3, ), dtype=np.int32), ] rois, roi_indices, gt_segms, gt_mask_labels = mask_head_loss_pre( rois, roi_indices, masks, bboxes, labels, segm_size) self.assertEqual(len(rois), 3) self.assertEqual(len(roi_indices), 3) self.assertEqual(len(gt_segms), 3) self.assertEqual(len(gt_mask_labels), 3) for l in range(3): self.assertIsInstance(rois[l], xp.ndarray) self.assertIsInstance(roi_indices[l], xp.ndarray) self.assertIsInstance(gt_segms[l], xp.ndarray) self.assertIsInstance(gt_mask_labels[l], xp.ndarray) self.assertEqual(rois[l].shape[0], roi_indices[l].shape[0]) self.assertEqual(rois[l].shape[0], gt_segms[l].shape[0]) self.assertEqual(rois[l].shape[0], gt_mask_labels[l].shape[0]) self.assertEqual(rois[l].shape[1:], (4, )) self.assertEqual(roi_indices[l].shape[1:], ()) self.assertEqual(gt_segms[l].shape[1:], (segm_size, segm_size)) self.assertEqual(gt_mask_labels[l].shape[1:], ()) self.assertEqual(gt_segms[l].dtype, np.float32) self.assertEqual(gt_mask_labels[l].dtype, np.int32)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--pretrained-model', default='coco') parser.add_argument('image') args = parser.parse_args() proposal_creator_params = { 'nms_thresh': 0.7, 'n_train_pre_nms': 12000, 'n_train_post_nms': 2000, 'n_test_pre_nms': 6000, 'n_test_post_nms': 1000, 'force_cpu_nms': False, 'min_size': 0 } model = FCISPSROIAlignResNet101( n_fg_class=len(coco_instance_segmentation_label_names), min_size=800, max_size=1333, anchor_scales=(2, 4, 8, 16, 32), pretrained_model=args.pretrained_model, proposal_creator_params=proposal_creator_params) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() img = read_image(args.image, color=True) masks, labels, scores = model.predict([img]) mask, label, score = masks[0], labels[0], scores[0] bbox = mask_to_bbox(mask) colors = voc_colormap(list(range(1, len(mask) + 1))) ax = vis_bbox( img, bbox, instance_colors=colors, alpha=0.5, linewidth=1.5) vis_instance_segmentation( None, mask, label, score, label_names=coco_instance_segmentation_label_names, instance_colors=colors, alpha=0.7, ax=ax) plt.show()
def test(self): H = 80 W = 90 n_inst = 10 mask = np.zeros((n_inst, H, W), dtype=np.bool) bbox = generate_random_bbox(n_inst, (H, W), 10, 30).astype(np.int32) for i, bb in enumerate(bbox): y_min, x_min, y_max, x_max = bb m = np.random.randint(0, 2, size=(y_max - y_min, x_max - x_min)) m[5, 5] = 1 # At least one element is one mask[i, y_min:y_max, x_min:x_max] = m bbox = mask_to_bbox(mask) size = H * 2 out_H = size out_W = W * 2 out_mask = scale_mask(mask, bbox, size) expected = resize(mask.astype(np.float32), (out_H, out_W), interpolation=PIL.Image.NEAREST).astype(np.bool) np.testing.assert_equal(out_mask, expected)
def check_call(self, model, imgs, masks, labels, scale): bboxes = mask_to_bbox(masks[0])[None] loss = model(imgs, masks, labels, bboxes, scale) self.assertEqual(loss.shape, ())
def callback(self, imgmsg): bridge = cv_bridge.CvBridge() img = bridge.imgmsg_to_cv2(imgmsg, desired_encoding='rgb8') img_chw = img.transpose((2, 0, 1)) # C, H, W if self.gpu >= 0: chainer.cuda.get_device_from_id(self.gpu).use() if self.model_name == 'mask_rcnn_resnet50': bboxes, masks, labels, scores = self.model.predict([img_chw]) bboxes = bboxes[0] masks = masks[0] labels = labels[0] scores = scores[0] else: img_chw = img_chw.astype(np.float32) masks, labels, scores = self.model.predict([img_chw]) masks = masks[0] labels = labels[0] scores = scores[0] bboxes = mask_to_bbox(masks) msg_indices = ClusterPointIndices(header=imgmsg.header) msg_labels = LabelArray(header=imgmsg.header) # -1: label for background lbl_cls = -np.ones(img.shape[:2], dtype=np.int32) lbl_ins = -np.ones(img.shape[:2], dtype=np.int32) for ins_id, (mask, label) in enumerate(zip(masks, labels)): indices = np.where(mask.flatten())[0] indices_msg = PointIndices(header=imgmsg.header, indices=indices) msg_indices.cluster_indices.append(indices_msg) class_name = self.fg_class_names[label] msg_labels.labels.append(Label(id=label, name=class_name)) lbl_cls[mask] = label lbl_ins[mask] = ins_id # instance_id self.pub_indices.publish(msg_indices) self.pub_labels.publish(msg_labels) msg_lbl_cls = bridge.cv2_to_imgmsg(lbl_cls) msg_lbl_ins = bridge.cv2_to_imgmsg(lbl_ins) msg_lbl_cls.header = msg_lbl_ins.header = imgmsg.header self.pub_lbl_cls.publish(msg_lbl_cls) self.pub_lbl_ins.publish(msg_lbl_ins) cls_msg = ClassificationResult( header=imgmsg.header, classifier=self.classifier_name, target_names=self.fg_class_names, labels=labels, label_names=[self.fg_class_names[l] for l in labels], label_proba=scores, ) rects_msg = RectArray(header=imgmsg.header) for bbox in bboxes: rect = Rect(x=bbox[1], y=bbox[0], width=bbox[3] - bbox[1], height=bbox[2] - bbox[0]) rects_msg.rects.append(rect) self.pub_rects.publish(rects_msg) self.pub_class.publish(cls_msg) if self.pub_viz.get_num_connections() > 0: n_fg_class = len(self.fg_class_names) captions = [ '{:d}: {:s}'.format(l, self.fg_class_names[l]) for l in labels ] viz = chainer_mask_rcnn.utils.draw_instance_bboxes( img, bboxes, labels + 1, n_class=n_fg_class + 1, masks=masks, captions=captions) msg_viz = bridge.cv2_to_imgmsg(viz, encoding='rgb8') msg_viz.header = imgmsg.header self.pub_viz.publish(msg_viz)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--pretrained-model', default='sbd') parser.add_argument('video') args = parser.parse_args() model = FCISResNet101( n_fg_class=20, pretrained_model=args.pretrained_model) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() if args.video == "0": vid = cv2.VideoCapture(0) else: vid = cv2.VideoCapture(args.video) if not vid.isOpened(): raise ImportError("Couldn't open video file or webcam.") # Compute aspect ratio of video vidw = vid.get(cv2.CAP_PROP_FRAME_WIDTH) vidh = vid.get(cv2.CAP_PROP_FRAME_HEIGHT) # vidar = vidw / vidh print(vidw) print(vidh) accum_time = 0 curr_fps = 0 fps = "FPS: ??" prev_time = timer() frame_count = 1 while True: ret, frame = vid.read() if ret == False: print("Done!") return # BGR -> RGB rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # Result image result = frame.copy() # (H, W, C) -> (C, H, W) img = np.asarray(rgb, dtype=np.float32).transpose((2, 0, 1)) # Object Detection masks, labels, scores = model.predict([img]) mask, label, score = masks[0], labels[0], scores[0] bbox = mask_to_bbox(mask) colors = voc_colormap(list(range(1, len(mask) + 1))) # For Colors n_inst = len(bbox) instance_colors = voc_colormap(list(range(1, n_inst + 1))) instance_colors = np.array(instance_colors) # For Mask _, H, W = mask.shape canvas_img = np.zeros((H, W, 4), dtype=np.uint8) alf_img = np.zeros((H, W, 1), dtype=np.uint8) if len(bbox) != 0: # for i, bb in enumerate(bbox): for i, (bb, msk) in enumerate(zip(bbox, mask)): # print(i) lb = label[i] conf = score[i].tolist() ymin = int(bb[0]) xmin = int(bb[1]) ymax = int(bb[2]) xmax = int(bb[3]) class_num = int(lb) # Draw box # cv2.rectangle(result, (xmin, ymin), (xmax, ymax), (0,255,0), 2) text = sbd_instance_segmentation_label_names[ class_num] + " " + ('%.2f' % conf) print(text) # text_pos 1 test_x = round(xmax - xmin / 2) - 30 test_y = round(ymax - ymin / 2) - 30 text_top = (test_x, test_y - 10) text_bot = (test_x + 80, test_y + 5) text_pos = (test_x + 5, test_y) # text_pos 2 # text_top = (xmin, ymin - 10) # text_bot = (xmin + 80, ymin + 5) # text_pos = (xmin + 5, ymin) # Draw label cv2.rectangle(result, text_top, text_bot, (255, 255, 255), -1) cv2.putText(result, text, text_pos, cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0, 0, 0), 1) # Draw msk 1 color = instance_colors[i % len(instance_colors)] rgba = np.append(color, 0.7 * 255) # alpha=0.7 if ymax > ymin and xmax > xmin: canvas_img[msk] = rgba mask_img = np.asarray(canvas_img) tmp_bgr = cv2.split(result) mask_result = cv2.merge(tmp_bgr + [alf_img]) mask_result = cv2.addWeighted(mask_result, 1, mask_img, 0.5, 0) # Draw msk 2 # rgba = np.append((0,255,0), 0.7 * 255) # alpha=0.7 # if ymax > ymin and xmax > xmin: # canvas_img[msk] = rgba # mask_img = np.asarray(canvas_img) # tmp_bgr = cv2.split(result) # mask_result = cv2.merge(tmp_bgr + [alf_img]) # mask_result = cv2.addWeighted(mask_result, 1, mask_img, 0.5, 0) # Calculate FPS curr_time = timer() exec_time = curr_time - prev_time prev_time = curr_time accum_time = accum_time + exec_time curr_fps = curr_fps + 1 if accum_time > 1: accum_time = accum_time - 1 fps = "FPS:" + str(curr_fps) curr_fps = 0 # Draw FPS in top right corner cv2.rectangle(result, (590, 0), (640, 17), (0, 0, 0), -1) cv2.putText(result, fps, (595, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (255, 255, 255), 1) # Draw Frame Number cv2.rectangle(result, (0, 0), (50, 17), (0, 0, 0), -1) cv2.putText(result, str(frame_count), (0, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (255, 255, 255), 1) # Output Result # cv2.imshow("BBOX Result", result) # cv2.imshow("Mask img", mask_img) cv2.imshow("Fcis Result", mask_result) # For Debug print("===== BBOX Result =====") print(type(result)) print(result.shape) print(type(result.shape)) print("===== Mask img =====") print(type(mask_img)) print(mask_img.shape) print(type(mask_img.shape)) # Stop Processing if cv2.waitKey(1) & 0xFF == ord('q'): break frame_count += 1
def multi_thresholding_superpixel_merging(self, img, initial_boxes, aligned_boxes, aligned_masks, s_masks, s_boxes, threshold=None): """ 1. performs multi-thresholding step for different thresholds 2. incorporate some randomness by scoring these randomly 3. remove redundant boxes using non-maximum suppression args: initial_boxes: bboxes predicted from detector aligned_boxes: bboxes after bbox-alignment aligned_masks: masks after bbox-alignment `aligned_boxes` are generated by enclosing these masks s_masks : masks corresponding to superpixels s_boxes : bounding boxes for the corresponding superpixels threshold : straddling expansion threshold """ _, h, w = img.shape def get_thresholded_spixels(threshold, s_masks, a_bbox): """ generates the set of superpixels which have greater than `threshold` overlap with the `a_bbox` """ req_masks = [] box_mask = self.box_to_mask(a_bbox, (h, w)) for mask in s_masks: intersect = np.bitwise_and(box_mask, mask).sum() ratio = intersect / np.count_nonzero(mask) if ratio >= threshold: req_masks.append(mask) return np.array(req_masks).astype(np.bool) # generate sets for different thresholds thresholds = [0.1, 0.2, 0.3, 0.4, 0.5] final_set_ = {} for idx, (a_mask, a_bbox) in enumerate(zip(aligned_masks, aligned_boxes)): box_set = {} for threshold in thresholds: req_superpixels = get_thresholded_spixels( threshold, s_masks, a_bbox) super_segment = np.sum(req_superpixels, axis=0) final_segment = np.bitwise_or(super_segment, a_mask) final_bbox = mask_to_bbox(np.array([final_segment]))[0] box_set.update({threshold: [final_segment, final_bbox]}) final_set_.update({idx: box_set}) # score the boxes for box_set in final_set_.values(): for idx, (thresh, seg_box) in enumerate(box_set.items()): r = np.random.rand() score = r * (idx + 1) box_set.update({thresh: seg_box + [score]}) # nms for key, box_set in final_set_.items(): segments, bboxes, scores = zip(*box_set.values()) segments, bboxes, scores = np.array(segments), np.array( bboxes), np.array(scores) idxs = nms(bboxes, thresh=0.9, score=scores) final_picks = [segments[idxs][0], bboxes[idxs][0], scores[idxs][0]] final_set_.update({key: final_picks}) return final_set_