def testSingleImageGroundtruthExport(self): masks = np.array( [[[1, 1,], [1, 1]], [[0, 0], [0, 1]], [[0, 0], [0, 0]]], dtype=np.uint8) boxes = np.array([[0, 0, 1, 1], [0, 0, .5, .5], [.5, .5, 1, 1]], dtype=np.float32) coco_boxes = np.array([[0, 0, 1, 1], [0, 0, .5, .5], [.5, .5, .5, .5]], dtype=np.float32) classes = np.array([1, 2, 3], dtype=np.int32) is_crowd = np.array([0, 1, 0], dtype=np.int32) next_annotation_id = 1 expected_counts = ['04', '31', '4'] # Tests exporting without passing in is_crowd (for backward compatibility). coco_annotations = coco_tools.ExportSingleImageGroundtruthToCoco( image_id='first_image', category_id_set=set([1, 2, 3]), next_annotation_id=next_annotation_id, groundtruth_boxes=boxes, groundtruth_classes=classes, groundtruth_masks=masks) for i, annotation in enumerate(coco_annotations): self.assertEqual(annotation['segmentation']['counts'], expected_counts[i]) self.assertTrue(np.all(np.equal(mask.decode( annotation['segmentation']), masks[i]))) self.assertTrue(np.all(np.isclose(annotation['bbox'], coco_boxes[i]))) self.assertEqual(annotation['image_id'], 'first_image') self.assertEqual(annotation['category_id'], classes[i]) self.assertEqual(annotation['id'], i + next_annotation_id) # Tests exporting with is_crowd. coco_annotations = coco_tools.ExportSingleImageGroundtruthToCoco( image_id='first_image', category_id_set=set([1, 2, 3]), next_annotation_id=next_annotation_id, groundtruth_boxes=boxes, groundtruth_classes=classes, groundtruth_masks=masks, groundtruth_is_crowd=is_crowd) for i, annotation in enumerate(coco_annotations): self.assertEqual(annotation['segmentation']['counts'], expected_counts[i]) self.assertTrue(np.all(np.equal(mask.decode( annotation['segmentation']), masks[i]))) self.assertTrue(np.all(np.isclose(annotation['bbox'], coco_boxes[i]))) self.assertEqual(annotation['image_id'], 'first_image') self.assertEqual(annotation['category_id'], classes[i]) self.assertEqual(annotation['iscrowd'], is_crowd[i]) self.assertEqual(annotation['id'], i + next_annotation_id)
def load_dataset(self): dataset = self.cfg.dataset dataset_phase = self.cfg.dataset_phase dataset_ann = self.cfg.dataset_ann # initialize COCO api annFile = '%s/annotations/%s_%s.json'%(dataset,dataset_ann,dataset_phase) self.coco = COCO(annFile) imgIds = self.coco.getImgIds() data = [] # loop through each image for imgId in imgIds: item = DataItem() img = self.coco.loadImgs(imgId)[0] item.im_path = "%s/images/%s/%s"%(dataset, dataset_phase, img["file_name"]) item.im_size = [3, img["height"], img["width"]] item.coco_id = imgId annIds = self.coco.getAnnIds(imgIds=img['id'], iscrowd=False) anns = self.coco.loadAnns(annIds) all_person_keypoints = [] masked_persons_RLE = [] visible_persons_RLE = [] all_visibilities = [] # Consider only images with people has_people = len(anns) > 0 if not has_people and self.cfg.coco_only_images_with_people: continue for ann in anns: # loop through each person person_keypoints = [] visibilities = [] if ann["num_keypoints"] != 0: for i in range(self.cfg.num_joints): x_coord = ann["keypoints"][3 * i] y_coord = ann["keypoints"][3 * i + 1] visibility = ann["keypoints"][3 * i + 2] visibilities.append(visibility) if visibility != 0: # i.e. if labeled person_keypoints.append([i, x_coord, y_coord]) all_person_keypoints.append(np.array(person_keypoints)) visible_persons_RLE.append(maskUtils.decode(self.coco.annToRLE(ann))) all_visibilities.append(visibilities) if ann["num_keypoints"] == 0: masked_persons_RLE.append(self.coco.annToRLE(ann)) item.joints = all_person_keypoints item.im_neg_mask = maskUtils.merge(masked_persons_RLE) if self.cfg.use_gt_segm: item.gt_segm = np.moveaxis(np.array(visible_persons_RLE), 0, -1) item.visibilities = all_visibilities data.append(item) self.has_gt = self.cfg.dataset is not "image_info" return data
def testExportSegmentsToCOCO(self): image_ids = ['first', 'second'] detection_masks = [np.array( [[[0, 1, 0, 1], [0, 1, 1, 0], [0, 0, 0, 1], [0, 1, 0, 1]]], dtype=np.uint8), np.array( [[[0, 1, 0, 1], [0, 1, 1, 0], [0, 0, 0, 1], [0, 1, 0, 1]]], dtype=np.uint8)] for i, detection_mask in enumerate(detection_masks): detection_masks[i] = detection_mask[:, :, :, None] detection_scores = [np.array([.8], np.float), np.array([.7], np.float)] detection_classes = [np.array([1], np.int32), np.array([1], np.int32)] categories = [{'id': 0, 'name': 'person'}, {'id': 1, 'name': 'cat'}, {'id': 2, 'name': 'dog'}] output_path = os.path.join(tf.test.get_temp_dir(), 'segments.json') result = coco_tools.ExportSegmentsToCOCO( image_ids, detection_masks, detection_scores, detection_classes, categories, output_path=output_path) with tf.gfile.GFile(output_path, 'r') as f: written_result = f.read() written_result = json.loads(written_result) mask_load = mask.decode([written_result[0]['segmentation']]) self.assertTrue(np.allclose(mask_load, detection_masks[0])) self.assertAlmostEqual(result, written_result)
def rle_masks_to_boxes(masks): """Computes the bounding box of each mask in a list of RLE encoded masks.""" if len(masks) == 0: return [] decoded_masks = [ np.array(mask_util.decode(rle), dtype=np.float32) for rle in masks ] def get_bounds(flat_mask): inds = np.where(flat_mask > 0)[0] return inds.min(), inds.max() boxes = np.zeros((len(decoded_masks), 4)) keep = [True] * len(decoded_masks) for i, mask in enumerate(decoded_masks): if mask.sum() == 0: keep[i] = False continue flat_mask = mask.sum(axis=0) x0, x1 = get_bounds(flat_mask) flat_mask = mask.sum(axis=1) y0, y1 = get_bounds(flat_mask) boxes[i, :] = (x0, y0, x1, y1) return boxes, np.where(keep)[0]
def polys_to_mask_wrt_box(polygons, box, M): """Convert from the COCO polygon segmentation format to a binary mask encoded as a 2D array of data type numpy.float32. The polygon segmentation is understood to be enclosed in the given box and rasterized to an M x M mask. The resulting mask is therefore of shape (M, M). """ w = box[2] - box[0] h = box[3] - box[1] w = np.maximum(w, 1) h = np.maximum(h, 1) polygons_norm = [] for poly in polygons: p = np.array(poly, dtype=np.float32) p[0::2] = (p[0::2] - box[0]) * M / w p[1::2] = (p[1::2] - box[1]) * M / h polygons_norm.append(p) rle = mask_util.frPyObjects(polygons_norm, M, M) mask = np.array(mask_util.decode(rle), dtype=np.float32) # Flatten in case polygons was a list mask = np.sum(mask, axis=2) mask = np.array(mask > 0, dtype=np.float32) return mask
def evaluate_masks( json_dataset, all_boxes, all_segms, output_dir, use_salt=True, cleanup=False ): if cfg.CLUSTER.ON_CLUSTER: # On the cluster avoid saving these files in the job directory output_dir = '/tmp' res_file = os.path.join( output_dir, 'segmentations_' + json_dataset.name + '_results') if use_salt: res_file += '_{}'.format(str(uuid.uuid4())) res_file += '.json' results_dir = os.path.join(output_dir, 'results') if not os.path.exists(results_dir): os.mkdir(results_dir) os.environ['CITYSCAPES_DATASET'] = DATASETS[json_dataset.name][RAW_DIR] os.environ['CITYSCAPES_RESULTS'] = output_dir # Load the Cityscapes eval script *after* setting the required env vars, # since the script reads their values into global variables (at load time). import cityscapesscripts.evaluation.evalInstanceLevelSemanticLabeling \ as cityscapes_eval roidb = json_dataset.get_roidb() for i, entry in enumerate(roidb): im_name = entry['image'] basename = os.path.splitext(os.path.basename(im_name))[0] txtname = os.path.join(output_dir, basename + 'pred.txt') with open(txtname, 'w') as fid_txt: if i % 10 == 0: logger.info('i: {}: {}'.format(i, basename)) for j in range(1, len(all_segms)): clss = json_dataset.classes[j] clss_id = cityscapes_eval.name2label[clss].id segms = all_segms[j][i] boxes = all_boxes[j][i] if segms == []: continue masks = mask_util.decode(segms) for k in range(boxes.shape[0]): score = boxes[k, -1] mask = masks[:, :, k] pngname = os.path.join( 'results', basename + '_' + clss + '_{}.png'.format(k)) # write txt fid_txt.write('{} {} {}\n'.format(pngname, clss_id, score)) # save mask cv2.imwrite(os.path.join(output_dir, pngname), mask * 255) logger.info('Evaluating...') cityscapes_eval.main([]) return None
def crop_mask(boxes,segmentations,flipped, imsize): assert (boxes.shape[0]==len(segmentations)) psegmentations=[] for i in xrange(len(segmentations)): gts=segmentations[i] box=boxes[i,:] if type(gts) == list and gts: assert (type(gts[0]) != dict) prle= mask.frPyObjects(gts,imsize[1],imsize[0]) elif type(gts) == dict and type(gts['counts']) == list: prle= mask.frPyObjects([gts],imsize[1],imsize[0]) elif type(gts) == dict and \ type(gts['counts'] == unicode or type(gts['counts']) == str): prle = [gts] else: print '{} box has no segmentation'.format(i) psegmentations.append([]) continue if len(prle)==1: prle=prle[0] else: prle= mask.merge(prle) pmask=mask.decode([prle]) if flipped: pmask=pmask[:,::-1,:] pmask=np.copy(pmask[box[1]:box[3],box[0]:box[2],:],order='F') psegmentations.append(mask.encode(pmask)) return psegmentations
def compute_scmap_weights(self, scmap_shape, joint_id, data_item): size = scmap_shape[0:2] scmask = np.ones(size) m = maskUtils.decode(data_item.im_neg_mask) if m.size: scmask = 1.0 - imresize(m, size) scmask = np.stack([scmask] * self.cfg.num_joints, axis=-1) return scmask
def annToMask(self, ann, height, width): """ Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask. :return: binary mask (numpy 2D array) """ rle = self.annToRLE(ann, height, width) m = maskUtils.decode(rle) return m
def _flip_rle(rle, height, width): if 'counts' in rle and type(rle['counts']) == list: # Magic RLE format handling painfully discovered by looking at the # COCO API showAnns function. rle = mask_util.frPyObjects([rle], height, width) mask = mask_util.decode(rle) mask = mask[:, ::-1, :] rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8)) return rle
def draw_objdb_masks(self, output_dir, objdb=None): if objdb == None: objdb = self.objdb mask_dir = osp.join(output_dir, '{}_objdb_masks'.format(self._image_set)) img_dir = osp.join(output_dir, '{}_objdb_imgs'.format(self._image_set)) ds_utils.maybe_create(output_dir) ds_utils.maybe_create(mask_dir) ds_utils.maybe_create(img_dir) for i in xrange(len(objdb)): obj = objdb[i] im_path = obj['image'] ann_id = obj['obj_id'] poly = obj['poly'] bb = obj['box'].astype(np.int16) cls = obj['cls'] width = obj['width'] height = obj['height'] img = cv2.imread(im_path, cv2.IMREAD_COLOR) msk = np.amax(COCOmask.decode(poly), axis=2) # binarize the mask msk = msk * 255 retVal, msk = cv2.threshold(msk, 127, 255, cv2.THRESH_BINARY) msk = msk.astype(np.uint8) # msk = ds_utils.dilate_mask(msk, 9) # img = (1 - 0.5/255 * msk.reshape((height, width, 1))) * img + \ # 0.5/255 * msk.reshape((height, width, 1)) * \ # np.random.random((1, 3)) * 255 # cv2.rectangle(img, (bb[0], bb[1]), (bb[2], bb[3]), \ # (0, 255, 0), 2) # # fontScale = 0.0009 * math.sqrt(float(width*width + height*height)) # # # cv2.putText(img, '{:}'.format(self.classes[cls]), \ # (bb[0], bb[1] - 2), \ # cv2.FONT_HERSHEY_SIMPLEX, \ # fontScale, (0, 0, 255), 1) im_name, im_ext = osp.splitext(osp.basename(im_path)) output_path = osp.join(mask_dir, im_name+'_'+str(ann_id).zfill(12)+im_ext) # output_path = osp.join(mask_dir, im_name+im_ext) cv2.imwrite(output_path, msk) output_path = osp.join(img_dir, im_name+'_'+str(ann_id).zfill(12)+im_ext) # output_path = osp.join(img_dir, im_name+im_ext) cv2.imwrite(output_path, img) print i
def get_mask(idx): ann_ids = coco.getAnnIds(imgIds=img_ids[idx]) anns = coco.loadAnns(ann_ids) img = coco.loadImgs(img_ids[idx])[0] m = np.zeros((img['height'], img['width'])) for j in anns: if j['iscrowd']: rle = mask.frPyObjects(j['segmentation'], img['height'], img['width']) m += mask.decode(rle) return m < 0.5
def convert(self, mode): width, height = self.size if mode == "mask": rles = mask_utils.frPyObjects( [p.numpy() for p in self.polygons], height, width ) rle = mask_utils.merge(rles) mask = mask_utils.decode(rle) mask = torch.from_numpy(mask) # TODO add squeeze? return mask
def draw_roidb_masks(self, output_dir, roidb=None): mask_dir = osp.join(output_dir, '{}_roidb_masks'.format(self._image_set)) img_dir = osp.join(output_dir, '{}_roidb_imgs'.format(self._image_set)) ds_utils.maybe_create(output_dir) ds_utils.maybe_create(mask_dir) ds_utils.maybe_create(img_dir) if roidb == None: roidb = self.roidb for i in xrange(len(roidb)): rois = roidb[i] im_path = rois['image'] clses = rois['clses'] boxes = rois['boxes'] rles = rois['polys'] width = rois['width'] height = rois['height'] img = cv2.imread(im_path, cv2.IMREAD_COLOR) msk = np.zeros((height, width), dtype=np.uint8) for j in xrange(len(rles)): rle = rles[j] bb = boxes[j,:].astype(np.int) cls = clses[j] tmp = np.amax(COCOmask.decode(rle), axis=2) * 255 retVal, tmp = cv2.threshold(tmp, 127, 255, cv2.THRESH_BINARY) tmp = tmp.astype(np.uint8) tmp = ds_utils.dilate_mask(tmp, 9) msk = np.maximum(msk, tmp) # fontScale = 0.0009 * math.sqrt(float(width*width + height*height)) # cv2.rectangle(img, (bb[0], bb[1]), (bb[2], bb[3]), \ # (0, 255, 0), 2) # cv2.putText(img, '{:}'.format(self.classes[cls]), \ # (bb[0], bb[1] - 2), \ # cv2.FONT_HERSHEY_SIMPLEX, \ # fontScale, (0, 0, 255), 1) # img = (1 - 0.5/255 * msk.reshape((height, width, 1))) * img + \ # 0.5/255 * msk.reshape((height, width, 1)) * \ # np.random.random((1, 3)) * 255 output_path = osp.join(mask_dir, osp.basename(im_path)) cv2.imwrite(output_path, msk) output_path = osp.join(img_dir, osp.basename(im_path)) cv2.imwrite(output_path, img) print i
def polys_to_mask(polygons, height, width): """Convert from the COCO polygon segmentation format to a binary mask encoded as a 2D array of data type numpy.float32. The polygon segmentation is understood to be enclosed inside a height x width image. The resulting mask is therefore of shape (height, width). """ rle = mask_util.frPyObjects(polygons, height, width) mask = np.array(mask_util.decode(rle), dtype=np.float32) # Flatten in case polygons was a list mask = np.sum(mask, axis=2) mask = np.array(mask > 0, dtype=np.float32) return mask
def segmentation_to_mask(polys, height, width): """ Convert polygons to binary masks. Args: polys: a list of nx2 float array Returns: a binary matrix of (height, width) """ polys = [p.flatten().tolist() for p in polys] rles = cocomask.frPyObjects(polys, height, width) rle = cocomask.merge(rles) return cocomask.decode(rle)
def vis_one_image_opencv( im, boxes, segms=None, keypoints=None, thresh=0.9, kp_thresh=2, show_box=False, dataset=None, show_class=False): """Constructs a numpy array with the detections visualized.""" if isinstance(boxes, list): boxes, segms, keypoints, classes = convert_from_cls_format( boxes, segms, keypoints) if boxes is None or boxes.shape[0] == 0 or max(boxes[:, 4]) < thresh: return im if segms is not None: masks = mask_util.decode(segms) color_list = colormap() mask_color_id = 0 # Display in largest to smallest order to reduce occlusion areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) sorted_inds = np.argsort(-areas) for i in sorted_inds: bbox = boxes[i, :4] score = boxes[i, -1] if score < thresh: continue # show box (off by default) if show_box: im = vis_bbox( im, (bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1])) # show class (off by default) if show_class: class_str = get_class_string(classes[i], score, dataset) im = vis_class(im, (bbox[0], bbox[1] - 2), class_str) # show mask if segms is not None and len(segms) > i: color_mask = color_list[mask_color_id % len(color_list), 0:3] mask_color_id += 1 im = vis_mask(im, masks[..., i], color_mask) # show keypoints if keypoints is not None and len(keypoints) > i: im = vis_keypoints(im, keypoints[i], kp_thresh) return im
def _getIgnoreRegion(iid, coco): img = coco.imgs[iid] if not 'ignore_regions_x' in img.keys(): return None if len(img['ignore_regions_x']) == 0: return None rgns_merged = [] for region_x, region_y in zip(img['ignore_regions_x'], img['ignore_regions_y']): rgns = [iter(region_x), iter(region_y)] rgns_merged.append(list(it.next() for it in itertools.cycle(rgns))) rles = maskUtils.frPyObjects(rgns_merged, img['height'], img['width']) rle = maskUtils.merge(rles) return maskUtils.decode(rle)
def _get_mask_targets(polygons): mask_targets_blob = np.zeros((len(polygons), cfg.MWIDTH * cfg.MHEIGHT), dtype=np.float32) mask_targets_weights=mp.zeros((len(polygons),1),dtype=np.float32) img=np.ones( (cfg.MHEIGHT,cfg.MWIDTH, 1), dtype=np.float32) for i, polygon in enumerate(polygons): if not polygon: continue else: #rle=COCOmask.frPyObjects(polygon,cfg.MHEIGHT,cfg.MWIDTH) m = COCOmask.decode(polygon) m = np.sum(m,axis=2) assert max(m.ravel())==1 assert min(m.ravel())==0 m=simage.interpolation.zoom(input=m, zoom=(float(cfg.MHEIGHT)/m.shape[0],float(cfg.MWIDTH)/m.shape[1]), order = 2) # debug mask_targets_blob[i,:]=m.ravel() mask_targets_weights[i]=1. return mask_targets_blob,mask_targets_weights
def polys_to_mask_wrt_box(polygons, box, M): w = box[2] - box[0] h = box[3] - box[1] w = np.maximum(w, 1) h = np.maximum(h, 1) polygons_norm = [] for poly in polygons: p = np.array(poly, dtype=np.float32) p[0::2] = (p[0::2] - box[0]) * M / w p[1::2] = (p[1::2] - box[1]) * M / h polygons_norm.append(p) rle = mask_util.frPyObjects(polygons_norm, M, M) mask = np.array(mask_util.decode(rle), dtype=np.float32) mask = np.sum(mask, axis=2) mask = np.array(mask > 0, dtype=np.float32) return mask
def testSingleImageDetectionMaskExport(self): masks = np.array( [[[1, 1,], [1, 1]], [[0, 0], [0, 1]], [[0, 0], [0, 0]]], dtype=np.uint8) classes = np.array([1, 2, 3], dtype=np.int32) scores = np.array([0.8, 0.2, 0.7], dtype=np.float32) coco_annotations = coco_tools.ExportSingleImageDetectionMasksToCoco( image_id='first_image', category_id_set=set([1, 2, 3]), detection_classes=classes, detection_scores=scores, detection_masks=masks) expected_counts = ['04', '31', '4'] for i, mask_annotation in enumerate(coco_annotations): self.assertEqual(mask_annotation['segmentation']['counts'], expected_counts[i]) self.assertTrue(np.all(np.equal(mask.decode( mask_annotation['segmentation']), masks[i]))) self.assertEqual(mask_annotation['image_id'], 'first_image') self.assertEqual(mask_annotation['category_id'], classes[i]) self.assertAlmostEqual(mask_annotation['score'], scores[i])
def getMask(self, ref): ''' :return: mask, mask-area, mask-center ''' ann = self.refToAnn[ref['ref_id']] image = self.imgs[ref['image_id']] if type(ann['segmentation'][0]) == list: # polygon rle = mask.frPyObjects(ann['segmentation'], image['height'], image['width']) else: # mask rle = ann['segmentation'] m = mask.decode(rle) m = np.sum(m, axis=2) # sometimes there are multiple binary map (corresponding to multiple segs) m = m.astype(np.uint8) # convert to np.uint8 # area area = sum(mask.area(rle)) # very close to ann['area'] # position position_x = np.mean(np.where(m==1)[1]) # [1] means columns (matlab style) -> x (c++ style) position_y = np.mean(np.where(m==1)[0]) # [0] means rows (matlab style) -> y (c++ style) # mass position (If there were multiple regions, we use the largest one.) label_m = label(m, connectivity=m.ndim) regions = regionprops(label_m) if len(regions) > 0: largest_id = np.argmax(np.array([props.filled_area for props in regions])) largest_props = regions[largest_id] mass_y, mass_x = largest_props.centroid else: mass_x, mass_y = position_x, position_y # if centroid is not in mask, we find the closest point to it from mask if m[mass_y, mass_x] != 1: print 'Finding closest mask point...' kernel = np.ones((10, 10),np.uint8) me = cv2.erode(m, kernel, iterations = 1) points = zip(np.where(me == 1)[0].tolist(), np.where(me == 1)[1].tolist()) # row, col style points = np.array(points) dist = np.sum((points - (mass_y, mass_x))**2, axis=1) id = np.argsort(dist)[0] mass_y, mass_x = points[id] # return return {'mask': m, 'area': area, 'position_x': position_x, 'position_y': position_y, 'mass_x': mass_x, 'mass_y': mass_y}
def showRef(self, ref): # show image image = self.imgs[ref['image_id']] I = io.imread(osp.join(self.IMAGE_DIR, image['file_name'])) plt.figure() plt.imshow(I) # show refer expression for sid, sent in enumerate(ref['sentences']): print '%s. %s' % (sid+1, sent['sent']) # show annotation ann_id = ref['ann_id'] ann = self.anns[ann_id] ax = plt.gca() polygons = [] color = [] # c = np.random.random((1, 3)).tolist()[0] c = 'none' if type(ann['segmentation'][0]) == list: # polygon for seg in ann['segmentation']: poly = np.array(seg).reshape((len(seg)/2, 2)) polygons.append(Polygon(poly, True, alpha=0.4)) color.append(c) p = PatchCollection(polygons, facecolors=color, edgecolors=(1,1,0,0), linewidths=3, alpha=1) ax.add_collection(p) # yellow polygon p = PatchCollection(polygons, facecolors=color, edgecolors=(1,0,0,0), linewidths=1, alpha=1) ax.add_collection(p) # red polygon else: # mask rle = ann['segmentation'] m = mask.decode(rle) img = np.ones( (m.shape[0], m.shape[1], 3) ) color_mask = np.array([2.0,166.0,101.0])/255 for i in range(3): img[:,:,i] = color_mask[i] ax.imshow(np.dstack( (img, m*0.5) )) # p = PatchCollection(polygons, facecolors=color, edgecolors=(0,0,0,1), linewidths=3, alpha=0.4) # ax.add_collection(p) plt.show()
def to_mask(polys, size): """Convert list of polygons to full size binary mask Parameters ---------- polys : list of numpy.ndarray Numpy.ndarray with shape (N, 2) where N is the number of bounding boxes. The second axis represents points of the polygons. Specifically, these are :math:`(x, y)`. size : tuple Tuple of length 2: (width, height). Returns ------- numpy.ndarray Full size binary mask of shape (height, width) """ try_import_pycocotools() import pycocotools.mask as cocomask width, height = size polys = [p.flatten().tolist() for p in polys] rles = cocomask.frPyObjects(polys, height, width) rle = cocomask.merge(rles) return cocomask.decode(rle)
def evaluate_masks( self, all_boxes, all_segms, output_dir, ): res_file = os.path.join( output_dir, 'segmentations_' + self.dataset.name + '_results') res_file += '.json' os.environ['CITYSCAPES_DATASET'] = os.path.join( os.path.dirname(__file__), '../../data/cityscapes') os.environ['CITYSCAPES_RESULTS'] = os.path.join(output_dir, 'inst_seg') sys.path.insert( 0, os.path.join(os.path.abspath(os.path.dirname(__file__)), '..', '..', 'lib', 'dataset_devkit', 'cityscapesScripts')) sys.path.insert( 0, os.path.join(os.path.abspath(os.path.dirname(__file__)), '..', '..', 'lib', 'dataset_devkit', 'cityscapesScripts', 'cityscapesscripts', 'evaluation')) # Load the Cityscapes eval script *after* setting the required env vars, # since the script reads their values into global variables (at load time). import cityscapesscripts.evaluation.evalInstanceLevelSemanticLabeling \ as cityscapes_eval sys.argv = [] roidb = self.dataset.get_roidb() for i, entry in enumerate(roidb): im_name = entry['image'] basename = os.path.splitext(os.path.basename(im_name))[0] txtname = os.path.join(output_dir, 'inst_seg', basename + 'pred.txt') os.makedirs(os.path.join(output_dir, 'inst_seg'), exist_ok=True) with open(txtname, 'w') as fid_txt: for j in range(1, len(all_segms)): clss = self.dataset.classes[j] clss_id = cityscapes_eval.name2label[clss].id segms = all_segms[j][i] boxes = all_boxes[j][i] if segms == []: continue masks = mask_util.decode(segms) for k in range(boxes.shape[0]): score = boxes[k, -1] mask = masks[:, :, k] pngname = os.path.join( 'seg_results', basename, basename + '_' + clss + '_{}.png'.format(k)) # write txt fid_txt.write('{} {} {}\n'.format( pngname, clss_id, score)) # save mask os.makedirs(os.path.join(output_dir, 'inst_seg', 'seg_results', basename), exist_ok=True) cv2.imwrite( os.path.join(output_dir, 'inst_seg', pngname), mask * 255) cityscapes_eval.main() return None
def showAnns(self, anns): """ Display the specified annotations. :param anns (array of object): annotations to display :return: None """ if len(anns) == 0: return 0 if 'segmentation' in anns[0] or 'keypoints' in anns[0]: datasetType = 'instances' elif 'caption' in anns[0]: datasetType = 'captions' else: raise Exception('datasetType not supported') if datasetType == 'instances': ax = plt.gca() ax.set_autoscale_on(False) polygons = [] color = [] for ann in anns: c = (np.random.random((1, 3)) * 0.6 + 0.4).tolist()[0] if 'segmentation' in ann: if type(ann['segmentation']) == list: # polygon for seg in ann['segmentation']: poly = np.array(seg).reshape( (int(len(seg) / 2), 2)) polygons.append(Polygon(poly)) color.append(c) else: # mask t = self.imgs[ann['image_id']] if type(ann['segmentation']['counts']) == list: rle = maskUtils.frPyObjects([ann['segmentation']], t['height'], t['width']) else: rle = [ann['segmentation']] m = maskUtils.decode(rle) img = np.ones((m.shape[0], m.shape[1], 3)) if ann['iscrowd'] == 1: color_mask = np.array([2.0, 166.0, 101.0]) / 255 if ann['iscrowd'] == 0: color_mask = np.random.random((1, 3)).tolist()[0] for i in range(3): img[:, :, i] = color_mask[i] ax.imshow(np.dstack((img, m * 0.5))) if 'keypoints' in ann and type(ann['keypoints']) == list: # turn skeleton into zero-based index sks = np.array( self.loadCats(ann['category_id'])[0]['skeleton']) - 1 kp = np.array(ann['keypoints']) x = kp[0::3] y = kp[1::3] v = kp[2::3] for sk in sks: if np.all(v[sk] > 0): plt.plot(x[sk], y[sk], linewidth=3, color=c) plt.plot(x[v > 0], y[v > 0], 'o', markersize=8, markerfacecolor=c, markeredgecolor='k', markeredgewidth=2) plt.plot(x[v > 1], y[v > 1], 'o', markersize=8, markerfacecolor=c, markeredgecolor=c, markeredgewidth=2) p = PatchCollection(polygons, facecolor=color, linewidths=0, alpha=0.4) ax.add_collection(p) p = PatchCollection(polygons, facecolor='none', edgecolors=color, linewidths=2) ax.add_collection(p) elif datasetType == 'captions': for ann in anns: print(ann['caption'])
def main(): with open(__file__, 'r') as f: _file_source = f.read() # Use first line of file docstring as description if it exists. parser = argparse.ArgumentParser( description=__doc__.split('\n')[0] if __doc__ else '', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--fbms-annotation-json', help='FBMS JSON annotations', required=True) parser.add_argument( '--motion-masks-root', required=True, help='Directory containing estimated PNG motion masks for each frame.') parser.add_argument( '--detections-root', help='Directory containing outputs of detectron on FBMS.', required=True) parser.add_argument('--save-pickle', action='store_true') parser.add_argument('--moving-threshold', default=0.5, type=float) parser.add_argument('--output-dir', required=True) parser.add_argument( '--filename-format', choices=['frame', 'sequence_frame', 'fbms'], default='fbms', help=( 'Specifies how to get frame number from the filename. ' '"frame": the filename is the frame number, ' '"sequence_frame": the frame number is separated by an ' 'underscore, ' # noqa: E127 '"fbms": assume fbms style frame numbers')) args = parser.parse_args() detectron_root = Path(args.detections_root) motion_root = Path(args.motion_masks_root) dataset = COCO(args.fbms_annotation_json) output_root = Path(args.output_dir) output_root.mkdir(parents=True) logging_path = str(output_root / (Path(__file__).stem + '.log')) setup_logging(logging_path) file_logger = logging.getLogger(logging_path) file_logger.info('Source:\n%s' % _file_source) logging.info('Args:\n %s', pformat(vars(args))) # Map (sequence, frame_name) to frame_id. frame_key_to_id = {} for annotation in dataset.imgs.values(): # Path ends in 'sequence/frame_name' path = Path(annotation['file_name']) frame_key_to_id[(path.parent.stem, path.stem)] = annotation['id'] logging.info('Loading motion paths') # Map sequence to dict mapping frame index to motion mask path motion_mask_paths = load_motion_masks(motion_root) logging.info('Loading detectron paths') predictions = load_detectron_predictions(detectron_root) logging.info('Outputting moving detections') detection_results = [] segmentation_results = [] if args.filename_format == 'fbms': from utils.fbms.utils import get_framenumber elif args.filename_format == 'sequence_frame': def get_framenumber(x): return int(x.split('_')[-1]) elif args.filename_format == 'frame': get_framenumber = int else: raise ValueError('Unknown --filename-format: %s' % args.filename_format) # The last frame won't have a motion mask, so we use the second to last # frame's mask as the last frame's mask. for sequence in predictions.keys(): frame_index_names = sorted(predictions[sequence].keys(), key=lambda x: get_framenumber(x)) second_last_frame, last_frame = frame_index_names[-2:] if last_frame not in motion_mask_paths: motion_mask_paths[sequence][last_frame] = ( motion_mask_paths[sequence][second_last_frame]) tasks = [(sequence, frame_name) for sequence in predictions.keys() for frame_name in predictions[sequence]] for sequence, frame_name in tasks: frame_key = (sequence, frame_name) # If --save-pickle is true, process every frame. Otherwise, only # process frames that are in --fbms-annotations-json. if not args.save_pickle and frame_key not in frame_key_to_id: continue boxes = predictions[sequence][frame_name]['boxes'] segmentations = predictions[sequence][frame_name]['segmentations'] motion_mask = np.array( Image.open(motion_mask_paths[sequence][frame_name])) != 0 if args.save_pickle: updated_boxes = [] updated_segmentations = [] for i, (box, segmentation) in enumerate(zip(boxes, segmentations)): mask = mask_util.decode(segmentation) x1, y1, x2, y2, score = box.tolist() w = x2 - x1 + 1 h = y2 - y1 + 1 if mask.sum() < 1e-10: moving_portion = 0 else: moving_portion = (mask & motion_mask).sum() / mask.sum() if moving_portion < args.moving_threshold: score = translate_range(score, (0, 1), (0, 0.5)) else: score = translate_range(score, (0, 1), (0.5, 1)) if frame_key in frame_key_to_id: frame_id = frame_key_to_id[frame_key] detection_results.append({ 'image_id': frame_id, 'category_id': 1, 'bbox': [x1, y1, w, h], 'score': score }) segmentation_results.append({ 'image_id': frame_id, 'category_id': 1, 'segmentation': segmentation, 'score': score }) if args.save_pickle: updated_boxes.append([x1, y1, x2, y2, score]) updated_segmentations.append(segmentation) if args.save_pickle: output_path = (output_root / 'pickle' / sequence / frame_name).with_suffix('.pickle') output_path.parent.mkdir(exist_ok=True, parents=True) with open(output_path, 'wb') as f: # TODO(achald): Make this work for multiple classes. updated_boxes = [[], updated_boxes] if len(updated_segmentations): updated_segmentations = [[], updated_segmentations] else: updated_segmentations = None pickle.dump( { 'boxes': updated_boxes, 'segmentations': updated_segmentations, 'keypoints': [[], []] }, f) box_output = output_root / 'bbox_fbms_results.json' logging.info('Writing box results to %s' % box_output) with open(box_output, 'w') as f: json.dump(detection_results, f) segmentation_output = output_root / 'segmentation_fbms_results.json' logging.info('Writing segmentation results to %s' % segmentation_output) with open(segmentation_output, 'w') as f: json.dump(segmentation_results, f) for eval_type, results in (('bbox', detection_results), ('segm', segmentation_results)): predictions_dataset = dataset.loadRes(results) coco_eval = COCOeval(dataset, predictions_dataset, eval_type) coco_eval.evaluate() coco_eval.accumulate() summary_f = io.StringIO() with redirect_stdout(summary_f): coco_eval.summarize() summary = summary_f.getvalue() logging.info('COCO evaluation:') logging.info('\n%s', summary)
def results2txt(self, results, outfile_prefix): """Dump the detection results to a txt file. Args: results (list[list | tuple | ndarray]): Testing results of the dataset. outfile_prefix (str): The filename prefix of the json files. If the prefix is "somepath/xxx", the txt files will be named "somepath/xxx.txt". Returns: list[str: str]: result txt files which contains corresponding instance segmentation images. """ try: import cityscapesscripts.helpers.labels as CSLabels except ImportError: raise ImportError('Please run "pip install citscapesscripts" to ' 'install cityscapesscripts first.') result_files = [] os.makedirs(outfile_prefix, exist_ok=True) prog_bar = mmcv.ProgressBar(len(self)) for idx in range(len(self)): result = results[idx] filename = self.data_infos[idx]['filename'] basename = osp.splitext(osp.basename(filename))[0] pred_txt = osp.join(outfile_prefix, basename + '_pred.txt') bbox_result, segm_result = result bboxes = np.vstack(bbox_result) # segm results if isinstance(segm_result, tuple): # Some detectors use different scores for bbox and mask, # like Mask Scoring R-CNN. Score of segm will be used instead # of bbox score. segms = mmcv.concat_list(segm_result[0]) mask_score = segm_result[1] else: # use bbox score for mask score segms = mmcv.concat_list(segm_result) mask_score = [bbox[-1] for bbox in bboxes] labels = [ np.full(bbox.shape[0], i, dtype=np.int32) for i, bbox in enumerate(bbox_result) ] labels = np.concatenate(labels) assert len(bboxes) == len(segms) == len(labels) num_instances = len(bboxes) prog_bar.update() with open(pred_txt, 'w') as fout: for i in range(num_instances): pred_class = labels[i] classes = self.CLASSES[pred_class] class_id = CSLabels.name2label[classes].id score = mask_score[i] mask = maskUtils.decode(segms[i]).astype(np.uint8) png_filename = osp.join(outfile_prefix, basename + f'_{i}_{classes}.png') mmcv.imwrite(mask, png_filename) fout.write(f'{osp.basename(png_filename)} {class_id} ' f'{score}\n') result_files.append(pred_txt) return result_files
def rle_mask_voting( top_masks, all_masks, all_dets, iou_thresh, binarize_thresh, method='AVG' ): """Returns new masks (in correspondence with `top_masks`) by combining multiple overlapping masks coming from the pool of `all_masks`. Two methods for combining masks are supported: 'AVG' uses a weighted average of overlapping mask pixels; 'UNION' takes the union of all mask pixels. """ if len(top_masks) == 0: return all_not_crowd = [False] * len(all_masks) top_to_all_overlaps = mask_util.iou(top_masks, all_masks, all_not_crowd) decoded_all_masks = [ np.array(mask_util.decode(rle), dtype=np.float32) for rle in all_masks ] decoded_top_masks = [ np.array(mask_util.decode(rle), dtype=np.float32) for rle in top_masks ] all_boxes = all_dets[:, :4].astype(np.int32) all_scores = all_dets[:, 4] # Fill box support with weights mask_shape = decoded_all_masks[0].shape mask_weights = np.zeros((len(all_masks), mask_shape[0], mask_shape[1])) for k in range(len(all_masks)): ref_box = all_boxes[k] x_0 = max(ref_box[0], 0) x_1 = min(ref_box[2] + 1, mask_shape[1]) y_0 = max(ref_box[1], 0) y_1 = min(ref_box[3] + 1, mask_shape[0]) mask_weights[k, y_0:y_1, x_0:x_1] = all_scores[k] mask_weights = np.maximum(mask_weights, 1e-5) top_segms_out = [] for k in range(len(top_masks)): # Corner case of empty mask if decoded_top_masks[k].sum() == 0: top_segms_out.append(top_masks[k]) continue inds_to_vote = np.where(top_to_all_overlaps[k] >= iou_thresh)[0] # Only matches itself if len(inds_to_vote) == 1: top_segms_out.append(top_masks[k]) continue masks_to_vote = [decoded_all_masks[i] for i in inds_to_vote] if method == 'AVG': ws = mask_weights[inds_to_vote] soft_mask = np.average(masks_to_vote, axis=0, weights=ws) mask = np.array(soft_mask > binarize_thresh, dtype=np.uint8) elif method == 'UNION': # Any pixel that's on joins the mask soft_mask = np.sum(masks_to_vote, axis=0) mask = np.array(soft_mask > 1e-5, dtype=np.uint8) else: raise NotImplementedError('Method {} is unknown'.format(method)) rle = mask_util.encode(np.array(mask[:, :, np.newaxis], order='F'))[0] top_segms_out.append(rle) return top_segms_out
def annotations_to_instances(annos, image_size, mask_format="polygon"): """ Create an :class:`Instances` object used by the models, from instance annotations in the dataset dict. Args: annos (list[dict]): a list of instance annotations in one image, each element for one instance. image_size (tuple): height, width Returns: Instances: It will contain fields "gt_boxes", "gt_classes", "gt_masks", "gt_keypoints", if they can be obtained from `annos`. This is the format that builtin models expect. """ boxes = [BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS) for obj in annos] target = Instances(image_size) target.gt_boxes = Boxes(boxes) classes = [obj["category_id"] for obj in annos] classes = torch.tensor(classes, dtype=torch.int64) target.gt_classes = classes if len(annos) and "segmentation" in annos[0]: segms = [obj["segmentation"] for obj in annos] if mask_format == "polygon": # TODO check type and provide better error masks = PolygonMasks(segms) else: assert mask_format == "bitmask", mask_format masks = [] for segm in segms: if isinstance(segm, list): # polygon masks.append(polygons_to_bitmask(segm, *image_size)) elif isinstance(segm, dict): # COCO RLE masks.append(mask_util.decode(segm)) elif isinstance(segm, np.ndarray): assert segm.ndim == 2, "Expect segmentation of 2 dimensions, got {}.".format( segm.ndim ) # mask array masks.append(segm) else: raise ValueError( "Cannot convert segmentation of type '{}' to BitMasks!" "Supported types are: polygons as list[list[float] or ndarray]," " COCO-style RLE as a dict, or a full-image segmentation mask " "as a 2D ndarray.".format(type(segm)) ) # torch.from_numpy does not support array with negative stride. masks = BitMasks( torch.stack([torch.from_numpy(np.ascontiguousarray(x)) for x in masks]) ) target.gt_masks = masks if len(annos) and "keypoints" in annos[0]: kpts = [obj.get("keypoints", []) for obj in annos] target.gt_keypoints = Keypoints(kpts) return target
def polys_to_mask(polygons, height, width): rles = cocomask.frPyObjects(polygons, height, width) rle = cocomask.merge(rles) mask = cocomask.decode(rle) return mask
def show_result(img, result, class_names, score_thr=0.3, wait_time=0, show=True, out_file=None): """Visualize the detection results on the image. Args: img (str or np.ndarray): Image filename or loaded image. result (tuple[list] or list): The detection result, can be either (bbox, segm) or just bbox. class_names (list[str] or tuple[str]): A list of class names. score_thr (float): The threshold to visualize the bboxes and masks. wait_time (int): Value of waitKey param. show (bool, optional): Whether to show the image with opencv or not. out_file (str, optional): If specified, the visualization result will be written to the out file instead of shown in a window. Returns: np.ndarray or None: If neither `show` nor `out_file` is specified, the visualized image is returned, otherwise None is returned. """ assert isinstance(class_names, (tuple, list)) img = mmcv.imread(img) img = img.copy() if isinstance(result, tuple): bbox_result, segm_result = result else: bbox_result, segm_result = result, None bboxes = np.vstack(bbox_result) labels = [ np.full(bbox.shape[0], i, dtype=np.int32) for i, bbox in enumerate(bbox_result) ] labels = np.concatenate(labels) # draw segmentation masks if segm_result is not None: segms = mmcv.concat_list(segm_result) inds = np.where(bboxes[:, -1] > score_thr)[0] np.random.seed(42) color_masks = [ np.random.randint(0, 256, (1, 3), dtype=np.uint8) for _ in range(max(labels) + 1) ] for i in inds: i = int(i) color_mask = color_masks[labels[i]] mask = maskUtils.decode(segms[i]).astype(np.bool) img[mask] = img[mask] * 0.5 + color_mask * 0.5 # if out_file specified, do not show image in window if out_file is not None: show = False # draw bounding boxes mmcv.imshow_det_bboxes(img, bboxes, labels, class_names=class_names, score_thr=score_thr, show=show, wait_time=wait_time, out_file=out_file) if not (show or out_file): return img
def annToMask(ann, i_w, i_h): rle = annToRLE(ann, i_w, i_h) return maskUtils.decode(rle)
def vis_all_mask(self, all_boxes, all_masks, save_path=None): """ visualize all detections in one image :param im_array: [b=1 c h w] in rgb :param detections: [ numpy.ndarray([[x1 y1 x2 y2 score]]) for j in classes ] :param class_names: list of names in imdb :param scale: visualize the scaled image :return: """ import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt from matplotlib.patches import Polygon import random import cv2 palette = { 'person': (220, 20, 60), 'rider': (255, 0, 0), 'car': (0, 0, 142), 'truck': (0, 0, 70), 'bus': (0, 60, 100), 'train': (0, 80, 100), 'motorcycle': (0, 0, 230), 'bicycle': (119, 11, 32), # 'road': (128, 64, 128), 'sidewalk': (244, 35, 232), 'building': (70, 70, 70), 'wall': (102, 102, 156), 'fence': (190, 153, 153), 'pole': (153, 153, 153), 'sky': (70, 130, 180), 'traffic light': (250, 170, 30), 'traffic sign': (220, 220, 0), 'vegetation': (107, 142, 35), 'terrain': (152, 251, 152) } name2id = { 'road': 0, 'sidewalk': 1, 'building': 2, 'wall': 3, 'fence': 4, 'pole': 5, 'traffic light': 6, 'traffic sign': 7, 'vegetation': 8, 'terrain': 9, 'sky': 10 } self.classes = [ '__background__', 'person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle', 'bicycle', ] if save_path is not None: os.makedirs(save_path, exist_ok=True) for i in range(len(self.roidb)): im = np.array(Image.open(self.roidb[i]['image'])) fig = plt.figure(frameon=False) fig.set_size_inches(im.shape[1] / 200, im.shape[0] / 200) ax = plt.Axes(fig, [0., 0., 1., 1.]) ax.axis('off') fig.add_axes(ax) ax.imshow(im) for j, name in enumerate(self.classes): if name == '__background__': continue boxes = all_boxes[j][i] segms = all_masks[j][i] if segms == []: continue masks = mask_util.decode(segms) for k in range(boxes.shape[0]): score = boxes[k, -1] mask = masks[:, :, k] if score < 0.5: continue bbox = boxes[k, :] ax.add_patch( plt.Rectangle((bbox[0], bbox[1]), bbox[2] - bbox[0], bbox[3] - bbox[1], fill=False, edgecolor='g', linewidth=1, alpha=0.5)) ax.text(bbox[0], bbox[1] - 2, name + '{:0.2f}'.format(score).lstrip('0'), fontsize=5, family='serif', bbox=dict(facecolor='g', alpha=0.4, pad=0, edgecolor='none'), color='white') _, contour, hier = cv2.findContours( mask.copy(), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE) color = (palette[name][0] / 255, palette[name][1] / 255, palette[name][2] / 255) for c in contour: ax.add_patch( Polygon(c.reshape((-1, 2)), fill=True, facecolor=color, edgecolor='w', linewidth=0.8, alpha=0.5)) if save_path is None: plt.show() else: fig.savefig(os.path.join( save_path, '{}.png'.format( self.roidb[i]['image'].split('/')[-1][:-16])), dpi=200) plt.close('all')
def create_tf_example(image, image_dir, bbox_annotations=None, category_index=None, include_mask=False): """Converts image and annotations to a tf.Example proto. Args: image: dict with keys: [u'license', u'file_name', u'coco_url', u'height', u'width', u'date_captured', u'flickr_url', u'id', u'not_exhaustive_category_ids', u'neg_category_ids'] image_dir: directory containing the image files. bbox_annotations: list of dicts with keys: [u'segmentation', u'area', u'image_id', u'bbox', u'category_id', u'id'] Notice that bounding box coordinates in the official LVIS dataset are given as [x, y, width, height] tuples using absolute coordinates where x, y represent the top-left (0-indexed) corner. This function converts to the format expected by the Tensorflow Object Detection API (which is which is [ymin, xmin, ymax, xmax] with coordinates normalized relative to image size). category_index: a dict containing LVIS category information keyed by the 'id' field of each category. See the label_map_util.create_category_index function. include_mask: Whether to include instance segmentations masks (PNG encoded) in the result. default: False. Returns: success: whether the conversion is successful filename: image filename example: The converted tf.Example Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ image_height = image['height'] image_width = image['width'] filename = image['coco_url'] filename = osp.join(*filename.split('/')[-2:]) image_id = image['id'] image_not_exhaustive_category_ids = image['not_exhaustive_category_ids'] image_neg_category_ids = image['neg_category_ids'] full_path = os.path.join(image_dir, filename) if not tf.gfile.Exists(full_path): tf.logging.warn(f'image {full_path} not exists! skip') return False, None, None with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() key = hashlib.sha256(encoded_jpg).hexdigest() feature_dict = { 'image/height': dataset_util.int64_feature(image_height), 'image/width': dataset_util.int64_feature(image_width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(str(image_id).encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/not_exhaustive_category_ids': dataset_util.int64_list_feature(image_not_exhaustive_category_ids), 'image/image_neg_category_ids': dataset_util.int64_list_feature(image_neg_category_ids), } if bbox_annotations: xmin = [] xmax = [] ymin = [] ymax = [] is_crowd = [] category_names = [] category_ids = [] area = [] encoded_mask_png = [] for object_annotations in bbox_annotations: (x, y, width, height) = tuple(object_annotations['bbox']) xmin_single = max(float(x) / image_width, 0.0) xmax_single = min(float(x + width) / image_width, 1.0) ymin_single = max(float(y) / image_height, 0.0) ymax_single = min(float(y + height) / image_height, 1.0) if xmax_single <= xmin_single or ymax_single <= ymin_single: continue xmin.append(xmin_single) xmax.append(xmax_single) ymin.append(ymin_single) ymax.append(ymax_single) is_crowd.append(0) category_id = int(object_annotations['category_id']) category_ids.append(category_id) category_names.append( category_index[category_id]['name'].encode('utf8')) area.append(object_annotations['area']) if include_mask: run_len_encoding = mask.frPyObjects( object_annotations['segmentation'], image_height, image_width) binary_mask = mask.decode(run_len_encoding) binary_mask = np.amax(binary_mask, axis=2) pil_image = PIL.Image.fromarray(binary_mask) output_io = io.BytesIO() pil_image.save(output_io, format='PNG') encoded_mask_png.append(output_io.getvalue()) feature_dict.update({ 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(category_names), 'image/object/class/label': dataset_util.int64_list_feature(category_ids), 'image/object/is_crowd': dataset_util.int64_list_feature(is_crowd), 'image/object/area': dataset_util.float_list_feature(area), }) if include_mask: feature_dict['image/object/mask'] = ( dataset_util.bytes_list_feature(encoded_mask_png)) example = tf.train.Example(features=tf.train.Features( feature=feature_dict)) return True, filename, example
else: raise Exception("Category {} is not defined in {}".format(_id, os.path.join(base_path, conf))) font = ImageFont.load_default() # Add bounding boxes and masks for idx, annotation in enumerate(annotations): if annotation["image_id"] == image_idx: draw = ImageDraw.Draw(im) bb = annotation['bbox'] draw.rectangle(((bb[0], bb[1]), (bb[0] + bb[2], bb[1] + bb[3])), fill=None, outline="red") draw.text((bb[0] + 2, bb[1] + 2), get_category(annotation["category_id"]), font=font) if annotation["iscrowd"]: im.putalpha(255) an_sg = annotation["segmentation"] item = mask.decode(mask.frPyObjects(an_sg, im.size[1], im.size[0])).astype(np.uint8) * 255 item = Image.fromarray(item, mode='L') overlay = Image.new('RGBA', im.size) draw_ov = ImageDraw.Draw(overlay) draw_ov.bitmap((0, 0), item, fill=(255, 0, 0, 128)) im = Image.alpha_composite(im, overlay) else: item = annotation["segmentation"][0] poly = Image.new('RGBA', im.size) pdraw = ImageDraw.Draw(poly) pdraw.polygon(item, fill=(255, 255, 255, 127), outline=(255, 255, 255, 255)) im.paste(poly, mask=poly) if save: im.save(os.path.join(base_path, 'coco_annotated_{}.png'.format(image_idx)), "PNG") im.show()
def transform_instance_annotations(annotation, transforms, image_size, *, keypoint_hflip_indices=None): """ Apply transforms to box, segmentation and keypoints annotations of a single instance. It will use `transforms.apply_box` for the box, and `transforms.apply_coords` for segmentation polygons & keypoints. If you need anything more specially designed for each data structure, you'll need to implement your own version of this function or the transforms. Args: annotation (dict): dict of instance annotations for a single instance. It will be modified in-place. transforms (TransformList or list[Transform]): image_size (tuple): the height, width of the transformed image keypoint_hflip_indices (ndarray[int]): see `create_keypoint_hflip_indices`. Returns: dict: the same input dict with fields "bbox", "segmentation", "keypoints" transformed according to `transforms`. The "bbox_mode" field will be set to XYXY_ABS. """ if isinstance(transforms, (tuple, list)): transforms = T.TransformList(transforms) # bbox is 1d (per-instance bounding box) bbox = BoxMode.convert(annotation["bbox"], annotation["bbox_mode"], BoxMode.XYXY_ABS) # clip transformed bbox to image size bbox = transforms.apply_box(np.array([bbox]))[0].clip(min=0) annotation["bbox"] = np.minimum(bbox, list(image_size + image_size)[::-1]) annotation["bbox_mode"] = BoxMode.XYXY_ABS if "segmentation" in annotation: # each instance contains 1 or more polygons segm = annotation["segmentation"] if isinstance(segm, list): # polygons polygons = [np.asarray(p).reshape(-1, 2) for p in segm] annotation["segmentation"] = [ p.reshape(-1) for p in transforms.apply_polygons(polygons) ] elif isinstance(segm, dict): # RLE mask = mask_util.decode(segm) mask = transforms.apply_segmentation(mask) assert tuple(mask.shape[:2]) == image_size annotation["segmentation"] = mask else: raise ValueError( "Cannot transform segmentation of type '{}'!" "Supported types are: polygons as list[list[float] or ndarray]," " COCO-style RLE as a dict.".format(type(segm))) if "keypoints" in annotation: keypoints = transform_keypoint_annotations(annotation["keypoints"], transforms, image_size, keypoint_hflip_indices) annotation["keypoints"] = keypoints return annotation
def rle_mask_voting(top_masks, all_masks, all_dets, iou_thresh, binarize_thresh, method='AVG'): """Returns new masks (in correspondence with `top_masks`) by combining multiple overlapping masks coming from the pool of `all_masks`. Two methods for combining masks are supported: 'AVG' uses a weighted average of overlapping mask pixels; 'UNION' takes the union of all mask pixels. """ if len(top_masks) == 0: return all_not_crowd = [False] * len(all_masks) top_to_all_overlaps = mask_util.iou(top_masks, all_masks, all_not_crowd) decoded_all_masks = [ np.array(mask_util.decode(rle), dtype=np.float32) for rle in all_masks ] decoded_top_masks = [ np.array(mask_util.decode(rle), dtype=np.float32) for rle in top_masks ] all_boxes = all_dets[:, :4].astype(np.int32) all_scores = all_dets[:, 4] # Fill box support with weights mask_shape = decoded_all_masks[0].shape mask_weights = np.zeros((len(all_masks), mask_shape[0], mask_shape[1])) for k in range(len(all_masks)): ref_box = all_boxes[k] x_0 = max(ref_box[0], 0) x_1 = min(ref_box[2] + 1, mask_shape[1]) y_0 = max(ref_box[1], 0) y_1 = min(ref_box[3] + 1, mask_shape[0]) mask_weights[k, y_0:y_1, x_0:x_1] = all_scores[k] mask_weights = np.maximum(mask_weights, 1e-5) top_segms_out = [] for k in range(len(top_masks)): # Corner case of empty mask if decoded_top_masks[k].sum() == 0: top_segms_out.append(top_masks[k]) continue inds_to_vote = np.where(top_to_all_overlaps[k] >= iou_thresh)[0] # Only matches itself if len(inds_to_vote) == 1: top_segms_out.append(top_masks[k]) continue masks_to_vote = [decoded_all_masks[i] for i in inds_to_vote] if method == 'AVG': ws = mask_weights[inds_to_vote] soft_mask = np.average(masks_to_vote, axis=0, weights=ws) mask = np.array(soft_mask > binarize_thresh, dtype=np.uint8) elif method == 'UNION': # Any pixel that's on joins the mask soft_mask = np.sum(masks_to_vote, axis=0) mask = np.array(soft_mask > 1e-5, dtype=np.uint8) else: raise NotImplementedError('Method {} is unknown'.format(method)) rle = mask_util.encode(np.array(mask[:, :, np.newaxis], order='F'))[0] top_segms_out.append(rle) return top_segms_out
def annToMask(self, ann, height, width): rle = self.annToRLE(ann, height, width) m = maskUtils.decode(rle) return m
def vis_one_image_opencv(im, boxes, segms=None, keypoints=None, thresh=0.9, kp_thresh=2, show_box=False, dataset=None, show_class=False, alpha=0.4, show_border=True, border_thick=1, bbox_thick=1, font_scale=0.35): """Constructs a numpy array with the detections visualized.""" if isinstance(boxes, list): boxes, segms, keypoints, classes = convert_from_cls_format( boxes, segms, keypoints) if boxes is None or boxes.shape[0] == 0 or max(boxes[:, 4]) < thresh: return im if segms is not None and len(segms) > 0: masks = mask_util.decode(segms) color_list = colormap() mask_color_id = 0 # Display in largest to smallest order to reduce occlusion areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) sorted_inds = np.argsort(-areas) for i in sorted_inds: bbox = boxes[i, :4] score = boxes[i, -1] if score < thresh: continue # show box (off by default) if show_box: im = vis_bbox( im, (bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1]), thick=bbox_thick) # show class (off by default) if show_class: class_str = get_class_string(classes[i], score, dataset) im = vis_class(im, (bbox[0], bbox[1] - 2), class_str, font_scale=font_scale) # show mask if segms is not None and len(segms) > i: color_mask = color_list[mask_color_id % len(color_list), 0:3] mask_color_id += 1 im = vis_mask(im, masks[..., i], color_mask, alpha=alpha, show_border=show_border, border_thick=border_thick) # show keypoints if keypoints is not None and len(keypoints) > i: im = vis_keypoints(im, keypoints[i], kp_thresh) return im
def _compute_f(gt_data, tracker_data, tracker_data_id, gt_id, bound_th): """ Perform F computation for a given gt and a given tracker ID. Adapted from https://github.com/davisvideochallenge/davis2017-evaluation :param gt_data: the encoded gt masks :param tracker_data: the encoded tracker masks :param tracker_data_id: the tracker ID :param gt_id: the ground truth ID :param bound_th: boundary threshold parameter :return: the F value for the given tracker and gt ID """ # Only loaded when run to reduce minimum requirements from pycocotools import mask as mask_utils from skimage.morphology import disk import cv2 f = np.zeros(len(gt_data)) for t, (gt_masks, tracker_masks) in enumerate(zip(gt_data, tracker_data)): curr_tracker_mask = mask_utils.decode( tracker_masks[tracker_data_id]) curr_gt_mask = mask_utils.decode(gt_masks[gt_id]) bound_pix = bound_th if bound_th >= 1 - np.finfo('float').eps else \ np.ceil(bound_th * np.linalg.norm(curr_tracker_mask.shape)) # Get the pixel boundaries of both masks fg_boundary = JAndF._seg2bmap(curr_tracker_mask) gt_boundary = JAndF._seg2bmap(curr_gt_mask) # fg_dil = binary_dilation(fg_boundary, disk(bound_pix)) fg_dil = cv2.dilate(fg_boundary.astype(np.uint8), disk(bound_pix).astype(np.uint8)) # gt_dil = binary_dilation(gt_boundary, disk(bound_pix)) gt_dil = cv2.dilate(gt_boundary.astype(np.uint8), disk(bound_pix).astype(np.uint8)) # Get the intersection gt_match = gt_boundary * fg_dil fg_match = fg_boundary * gt_dil # Area of the intersection n_fg = np.sum(fg_boundary) n_gt = np.sum(gt_boundary) # % Compute precision and recall if n_fg == 0 and n_gt > 0: precision = 1 recall = 0 elif n_fg > 0 and n_gt == 0: precision = 0 recall = 1 elif n_fg == 0 and n_gt == 0: precision = 1 recall = 1 else: precision = np.sum(fg_match) / float(n_fg) recall = np.sum(gt_match) / float(n_gt) # Compute F measure if precision + recall == 0: f_val = 0 else: f_val = 2 * precision * recall / (precision + recall) f[t] = f_val return f
def show_result(img, result, class_names, score_thr=0.3, wait_time=0, show=True, out_file=None): """Visualize the detection results on the image. Args: img (str or np.ndarray): Image filename or loaded image. result (tuple[list] or list): The detection result, can be either (bbox, segm) or just bbox. class_names (list[str] or tuple[str]): A list of class names. score_thr (float): The threshold to visualize the bboxes and masks. wait_time (int): Value of waitKey param. show (bool, optional): Whether to show the image with opencv or not. out_file (str, optional): If specified, the visualization result will be written to the out file instead of shown in a window. Returns: np.ndarray or None: If neither `show` nor `out_file` is specified, the visualized image is returned, otherwise None is returned. """ assert isinstance(class_names, (tuple, list)) img = mmcv.imread(img) img = img.copy() if isinstance(result, tuple): bbox_result, segm_result = result else: bbox_result, segm_result = result, None bboxes = np.vstack(bbox_result) labels = [ np.full(bbox.shape[0], i, dtype=np.int32) for i, bbox in enumerate(bbox_result) ] labels = np.concatenate(labels) # for 1715.jpg """ bboxes = np.delete(bboxes, [9, 10, 11, 12, 13, 14, 15, 16], axis=0) labels = np.delete(labels, [9, 10, 11, 12, 13, 14, 15, 16], axis=0) bboxes[1][1] = 325 bboxes[2][1] = 308 bboxes[4][1] = 294 """ # for 2894.jpg """ bboxes = np.delete(bboxes, [7, 8, 9, 10, 11, 12, 20, 22], axis=0) labels = np.delete(labels, [7, 8, 9, 10, 11, 12, 20, 22], axis=0) bboxes[7][0] = 400 bboxes[7][1] = 160 bboxes[7][2] = 550 bboxes[7][3] = 900 bboxes[1][0] = 425 bboxes[1][1] = 380 bboxes[1][2] = 469 bboxes[1][3] = 642 bboxes[2][0] = 472 bboxes[2][1] = 379 bboxes[2][3] = 643 bboxes[6][2] = 465 bboxes[3][0] = 472 x = bboxes[7][0] y = bboxes[7][1] for index in range(bboxes.shape[0]): bboxes[index][0] -= x bboxes[index][1] -= y bboxes[index][2] -= x bboxes[index][3] -= y w = bboxes[7][2] h = bboxes[7][3] for index in range(bboxes.shape[0]): x1 = bboxes[index][0] y1 = bboxes[index][1] x2 = bboxes[index][2] y2 = bboxes[index][3] bboxes[index][0] = y1 bboxes[index][1] = w - x2 bboxes[index][2] = y2 bboxes[index][3] = w - x1 bboxes[2][1] = 308 bboxes[4][1] = 294 bboxes = np.append(bboxes, [[71, 565, 79, 571, 0.24], [72, 573, 78,578, 0.34]], axis=0) labels = np.append(labels, [8, 8], axis=0) """ #for 3641.jpg """ bboxes = np.delete(bboxes, [6], axis=0) labels = np.delete(labels, [6], axis=0) bboxes[4][0] = 60 bboxes[4][1] = 565 bboxes[4][2] = 1220 bboxes = np.append(bboxes, [[110, 688, 185, 700, 0.8]], axis=0) labels = np.append(labels, [7], axis=0) """ # for 6411.jpg """ bboxes = np.delete(bboxes, [11, 14, 16], axis=0) labels = np.delete(labels, [11, 14, 16], axis=0) bboxes = np.append(bboxes, [[65, 260, 170, 280, 0.8]], axis=0) labels = np.append(labels, [7], axis=0) """ # draw segmentation masks if segm_result is not None: segms = mmcv.concat_list(segm_result) inds = np.where(bboxes[:, -1] > score_thr)[0] np.random.seed(42) color_masks = [ np.random.randint(0, 256, (1, 3), dtype=np.uint8) for _ in range(max(labels) + 1) ] for i in inds: i = int(i) color_mask = color_masks[labels[i]] mask = maskUtils.decode(segms[i]).astype(np.bool) img[mask] = img[mask] * 0.5 + color_mask * 0.5 # if out_file specified, do not show image in window if out_file is not None: show = False # draw bounding boxes mmcv.imshow_det_bboxes(img, bboxes, labels, class_names=class_names, score_thr=score_thr, show=show, wait_time=wait_time, out_file=out_file) if not (show or out_file): return img
def create_batches(self, batch_size, shuffle=True): # 1 batch = [(image, [([x, y, w, h], id), ([x, y, w, h], id), ...]), ...] batch = [] counter_samples=0 while True: indices = range(len(self.img_ids)) if shuffle: indices = np.random.permutation(indices) for index in indices: index += 1 try: img = self.coco.loadImgs(self.img_ids[index])[0] except: print(index) continue path = os.path.join(self.image_dir, self.get_image_path(id=img['id'], name=img['file_name'])) I = cv2.imread(path).astype(np.uint8)[:, :, ::-1] I = np.ascontiguousarray(I) try: if len(I.shape) != 3: continue except: print("no image exist") continue ann_ids = self.coco.getAnnIds(imgIds=img['id'], catIds=self.cat_ids, iscrowd=None) anns = self.coco.loadAnns(ann_ids) ann_list = [] rles = [] for ann in anns: bb = [f for f in ann["bbox"]] try: rle = frPyObjects(ann['segmentation'], I.shape[0], I.shape[1])[0] bbb = toBbox(rle) except: continue # make sure we dont include unknown classes if self.id2i[ann["category_id"]] < 0 or self.id2i[ann["category_id"]] > config.TOTAL_CLASSES: print("This class cannot be processed %d ...", self.id2i[ann["category_id"]]) continue rles.append(rle) ann_list.append((decode(rle).astype(np.float), bb, self.id2i[ann["category_id"]])) # print("------- RLEs-SHAPE") # print(len(rles)) if len(rles) == 0: print("NO RLE was extracted continue with next picture ...") continue mask = decode(rles).astype(np.float) batch.append((I, mask, ann_list)) # print("------- MASKS-SHAPE") # print(len(mask)) # print(np.max(mask), np.min(mask)) if len(batch) >= batch_size: self.counter_samples += len(batch) print("Getting new batch with %d elements ..." % (len(batch))) yield batch del mask del ann_list del rles del batch batch = []
def visualize_sequences(seq_id, tracks, max_frames_seq, img_folder, gt_folder, output_folder, draw_boxes=False, create_video=True): colors = generate_colors() dpi = 100.0 frames_with_annotations = [ frame for frame in tracks.keys() if len(tracks[frame]) > 0 ] img_sizes = next(iter(tracks[frames_with_annotations[0]])).mask["size"] for t in range(max_frames_seq + 1): print("Processing frame", t) filename_t = img_folder + "/" + seq_id + "/%06d" % t if os.path.exists(filename_t + ".png"): filename_t = filename_t + ".png" elif os.path.exists(filename_t + ".jpg"): filename_t = filename_t + ".jpg" else: print("Image file not found for " + filename_t + ".png/.jpg, continuing...") continue img = np.array(Image.open(filename_t), dtype="float32") / 255 # If gt_folder is provided, combine the predicted frame with gt frame if gt_folder: fname_gt = gt_folder + "/" + seq_id + "/%06d" % t if os.path.exists(fname_gt + ".png"): fname_gt = fname_gt + ".png" elif os.path.exists(fname_gt + ".jpg"): fname_gt = fname_gt + ".jpg" else: print("GT Image file not found for " + fname_gt + ".png/.jpg, continuing...") continue gt_img = np.array(Image.open(fname_gt), dtype="float32") / 255 img_sizes[0] = img_sizes[0] * 2 fig = plt.figure() fig.set_size_inches(img_sizes[1] / dpi, img_sizes[0] / dpi, forward=True) fig.subplots_adjust(left=0, bottom=0, right=1, top=1, wspace=None, hspace=None) ax = fig.subplots() ax.set_axis_off() if t in tracks: for obj in tracks[t]: color = colors[obj.track_id % len(colors)] if obj.class_id == 1: category_name = "Car" elif obj.class_id == 2: category_name = "Pedestrian" else: category_name = "Ignore" color = (0.7, 0.7, 0.7) if obj.class_id == 1 or obj.class_id == 2: # Don't show boxes or ids for ignore regions x, y, w, h = rletools.toBbox(obj.mask) if draw_boxes: import matplotlib.patches as patches rect = patches.Rectangle((x, y), w, h, linewidth=1, edgecolor=color, facecolor='none', alpha=1.0) ax.add_patch(rect) category_name += ":" + str(obj.track_id) ax.annotate(category_name, (x + 0.5 * w, y + 0.5 * h), color=color, weight='bold', fontsize=7, ha='center', va='center', alpha=1.0) binary_mask = rletools.decode(obj.mask) apply_mask(img, binary_mask, color) if gt_folder: # combine predicted images with gt images img = np.vstack((img, gt_img)) ax.imshow(img) fig.savefig(output_folder + "/" + seq_id + "/%06d" % t + ".jpg") plt.close(fig) if create_video: os.chdir(output_folder + "/" + seq_id) call([ "ffmpeg", "-framerate", "10", "-y", "-i", "%06d.jpg", "-c:v", "libx264", "-profile:v", "high", "-crf", "20", "-pix_fmt", "yuv420p", "-vf", "pad=\'width=ceil(iw/2)*2:height=ceil(ih/2)*2\'", "output.mp4" ])
def binary_from_rle(rle): return cocomask.decode(rle)
def vis_one_image(im, im_name, output_dir, boxes, segms=None, keypoints=None, thresh=0.9, kp_thresh=2, dpi=200, box_alpha=0.0, dataset=None, show_class=False, ext='pdf', is_show_boxes=True): """Visual debugging of detections.""" if not os.path.exists(output_dir): os.makedirs(output_dir) if isinstance(boxes, list): boxes, segms, keypoints, classes = convert_from_cls_format( boxes, segms, keypoints) if boxes is None or boxes.shape[0] == 0 or max(boxes[:, 4]) < thresh: return if segms is not None: masks = mask_util.decode(segms) color_list = colormap(rgb=True) / 255 dataset_keypoints, _ = keypoint_utils.get_keypoints() kp_lines = kp_connections(dataset_keypoints) cmap = plt.get_cmap('rainbow') colors = [cmap(i) for i in np.linspace(0, 1, len(kp_lines) + 2)] fig = plt.figure(frameon=False) fig.set_size_inches(im.shape[1] / dpi, im.shape[0] / dpi) ax = plt.Axes(fig, [0., 0., 1., 1.]) ax.axis('off') fig.add_axes(ax) ax.imshow(im) # Display in largest to smallest order to reduce occlusion areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) sorted_inds = np.argsort(-areas) mask_color_id = 0 for i in sorted_inds: bbox = boxes[i, :4] score = boxes[i, -1] if score < thresh: continue print(dataset.classes[classes[i]], score) # show box (off by default, box_alpha=0.0) if is_show_boxes: ax.add_patch( plt.Rectangle((bbox[0], bbox[1]), bbox[2] - bbox[0], bbox[3] - bbox[1], fill=False, edgecolor='g', linewidth=0.5, alpha=box_alpha)) if show_class: ax.text(bbox[0], bbox[1] - 2, get_class_string(classes[i], score, dataset), fontsize=3, family='serif', bbox=dict(facecolor='g', alpha=0.4, pad=0, edgecolor='none'), color='white') # show mask if segms is not None and len(segms) > i: img = np.ones(im.shape) color_mask = color_list[mask_color_id % len(color_list), 0:3] mask_color_id += 1 w_ratio = .4 for c in range(3): color_mask[c] = color_mask[c] * (1 - w_ratio) + w_ratio for c in range(3): img[:, :, c] = color_mask[c] e = masks[:, :, i] _, contour, hier = cv2.findContours(e.copy(), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE) for c in contour: polygon = Polygon(c.reshape((-1, 2)), fill=True, facecolor=color_mask, edgecolor='w', linewidth=1.2, alpha=0.5) ax.add_patch(polygon) # show keypoints if keypoints is not None and len(keypoints) > i: kps = keypoints[i] plt.autoscale(False) for l in range(len(kp_lines)): i1 = kp_lines[l][0] i2 = kp_lines[l][1] if kps[2, i1] > kp_thresh and kps[2, i2] > kp_thresh: x = [kps[0, i1], kps[0, i2]] y = [kps[1, i1], kps[1, i2]] line = ax.plot(x, y) plt.setp(line, color=colors[l], linewidth=1.0, alpha=0.7) if kps[2, i1] > kp_thresh: ax.plot(kps[0, i1], kps[1, i1], '.', color=colors[l], markersize=3.0, alpha=0.7) if kps[2, i2] > kp_thresh: ax.plot(kps[0, i2], kps[1, i2], '.', color=colors[l], markersize=3.0, alpha=0.7) # add mid shoulder / mid hip for better visualization mid_shoulder = ( kps[:2, dataset_keypoints.index('right_shoulder')] + kps[:2, dataset_keypoints.index('left_shoulder')]) / 2.0 sc_mid_shoulder = np.minimum( kps[2, dataset_keypoints.index('right_shoulder')], kps[2, dataset_keypoints.index('left_shoulder')]) mid_hip = (kps[:2, dataset_keypoints.index('right_hip')] + kps[:2, dataset_keypoints.index('left_hip')]) / 2.0 sc_mid_hip = np.minimum( kps[2, dataset_keypoints.index('right_hip')], kps[2, dataset_keypoints.index('left_hip')]) if (sc_mid_shoulder > kp_thresh and kps[2, dataset_keypoints.index('nose')] > kp_thresh): x = [mid_shoulder[0], kps[0, dataset_keypoints.index('nose')]] y = [mid_shoulder[1], kps[1, dataset_keypoints.index('nose')]] line = ax.plot(x, y) plt.setp(line, color=colors[len(kp_lines)], linewidth=1.0, alpha=0.7) if sc_mid_shoulder > kp_thresh and sc_mid_hip > kp_thresh: x = [mid_shoulder[0], mid_hip[0]] y = [mid_shoulder[1], mid_hip[1]] line = ax.plot(x, y) plt.setp(line, color=colors[len(kp_lines) + 1], linewidth=1.0, alpha=0.7) output_name = os.path.basename(im_name) + '.' + ext fig.savefig(os.path.join(output_dir, '{}'.format(output_name)), dpi=dpi) plt.close('all')
def __init__(self, masks, size): """ Arguments: masks: Either torch.tensor of [num_instances, H, W] or list of torch.tensors of [H, W] with num_instances elems, or RLE (Run Length Encoding) - interpreted as list of dicts, or BinaryMaskList. size: absolute image size, width first After initialization, a hard copy will be made, to leave the initializing source data intact. """ assert isinstance(size, (list, tuple)) assert len(size) == 2 if isinstance(masks, torch.Tensor): # The raw data representation is passed as argument masks = masks.clone() elif isinstance(masks, (list, tuple)): if len(masks) == 0: masks = torch.empty([0, size[1], size[0]]) # num_instances = 0! elif isinstance(masks[0], torch.Tensor): masks = torch.stack(masks, dim=2).clone() elif isinstance(masks[0], dict) and "counts" in masks[0]: # RLE interpretation rle_sizes = [tuple(inst["size"]) for inst in masks] masks = mask_utils.decode(masks) # [h, w, n] masks = torch.tensor(masks).permute(2, 0, 1) # [n, h, w] assert rle_sizes.count(rle_sizes[0]) == len(rle_sizes), ( "All the sizes must be the same size: %s" % rle_sizes) # in RLE, height come first in "size" rle_height, rle_width = rle_sizes[0] assert masks.shape[1] == rle_height assert masks.shape[2] == rle_width width, height = size if width != rle_width or height != rle_height: masks = interpolate( input=masks[None].float(), size=(height, width), mode="bilinear", align_corners=False, )[0].type_as(masks) else: RuntimeError( "Type of `masks[0]` could not be interpreted: %s" % type(masks)) elif isinstance(masks, BinaryMaskList): # just hard copy the BinaryMaskList instance's underlying data masks = masks.masks.clone() else: RuntimeError( "Type of `masks` argument could not be interpreted:%s" % type(masks)) if len(masks.shape) == 2: # if only a single instance mask is passed masks = masks[None] assert len(masks.shape) == 3 assert masks.shape[1] == size[1], "%s != %s" % (masks.shape[1], size[1]) assert masks.shape[2] == size[0], "%s != %s" % (masks.shape[2], size[0]) self.masks = masks self.size = tuple(size)
def bbox_merge(dets, segs, iou_thr, scr_thr, mask_thr): # dets: [[x1, y1, x2, y2, score], ... ] if dets.shape[0] <= 1: return dets, segs order = dets[:, -1].ravel().argsort()[::-1] dets = dets[order, :] scr_keep_inds = (np.where(dets[:, -1] > scr_thr))[0] dets = dets[scr_keep_inds, :] segs = [segs[ind] for ind in scr_keep_inds] dets_res = np.zeros([0, 5]) segs_res = [] imgHeight, imgWidth = 1024, 2048 while dets.shape[0] > 0: num = dets.shape[0] # IoU area = (dets[:, 2] - dets[:, 0] + 1) * (dets[:, 3] - dets[:, 1] + 1) xx1 = np.maximum(dets[0, 0], dets[:, 0]) yy1 = np.maximum(dets[0, 1], dets[:, 1]) xx2 = np.minimum(dets[0, 2], dets[:, 2]) yy2 = np.minimum(dets[0, 3], dets[:, 3]) w = np.maximum(0.0, xx2 - xx1 + 1) h = np.maximum(0.0, yy2 - yy1 + 1) inter = w * h o = inter / (area[0] + area[:] - inter) # get needed merge det and delete these det merge_inds = np.where(o >= iou_thr)[0] dets_to_merge = dets[merge_inds, :] segs_to_merge = [segs[ind] for ind in merge_inds] dets = np.delete(dets, merge_inds, 0) # remained dets and segs after remerge. segs = [segs[i] for i in range(num) if i not in merge_inds] if merge_inds.shape[0] <= 1: dets_res = np.row_stack((dets_res, dets_to_merge)) segs_res += segs_to_merge else: scores = dets_to_merge[:, -1:] dets_to_merge[:, :-1] = dets_to_merge[:, :-1] * np.tile( scores, (1, 4)) max_score = np.max(scores) det_merged = np.zeros((1, 5)) det_merged[:, :-1] = np.sum(dets_to_merge[:, :-1], axis=0) / np.sum(scores) det_merged[:, -1] = max_score dets_res = np.row_stack((dets_res, det_merged)) img = np.zeros((imgHeight, imgWidth)) for i in range(merge_inds.shape[0]): mask = maskUtils.decode(segs_to_merge[i]).astype(np.bool) img[mask] += scores[i, -1] img = img / np.max(img) img[img >= mask_thr] = 1 img[img < mask_thr] = 0 img = img.astype(np.uint8) # print(img.shape) seg_merged = maskUtils.encode( np.array(img[:, :, np.newaxis], order='F'))[0] segs_res.append(seg_merged) return dets_res, segs_res
def generate_simulated_scenes(config, split, year): db = coco(config, split, year) data_dir = osp.join(config.data_dir, 'coco') if (split == 'test') or (split == 'aux'): images_dir = osp.join(data_dir, 'crn_images', 'train' + year) noices_dir = osp.join(data_dir, 'crn_noices', 'train' + year) labels_dir = osp.join(data_dir, 'crn_labels', 'train' + year) masks_dir = osp.join(data_dir, 'crn_masks', 'train' + year) else: images_dir = osp.join(data_dir, 'crn_images', split + year) noices_dir = osp.join(data_dir, 'crn_noices', split + year) labels_dir = osp.join(data_dir, 'crn_labels', split + year) masks_dir = osp.join(data_dir, 'crn_masks', split + year) maybe_create(images_dir) maybe_create(noices_dir) maybe_create(labels_dir) maybe_create(masks_dir) traindb = coco(config, 'train', '2017') nn_tables = AllCategoriesTables(traindb) nn_tables.build_nntables_for_all_categories(True) # start_ind = 0 # end_ind = len(db.scenedb) start_ind = 25000 + 14000 * config.seed end_ind = 25000 + 14000 * (config.seed + 1) patches_per_class = traindb.patches_per_class color_transfer_threshold = 0.8 for i in range(start_ind, end_ind): entry = db.scenedb[i] width = entry['width'] height = entry['height'] xywhs = entry['boxes'] masks = entry['masks'] clses = entry['clses'] image_index = entry['image_index'] instance_inds = entry['instance_inds'] full_mask = np.zeros((height, width), dtype=np.float32) full_label = np.zeros((height, width), dtype=np.float32) full_image = np.zeros((height, width, 3), dtype=np.float32) full_noice = np.zeros((height, width, 3), dtype=np.float32) original_image = cv2.imread(db.color_path_from_index(image_index), cv2.IMREAD_COLOR) for j in range(len(masks)): src_img = original_image.astype(np.float32).copy() xywh = xywhs[j] mask = masks[j] cls_idx = clses[j] instance_ind = instance_inds[j] embed_path = db.patch_path_from_indices( image_index, instance_ind, 'patch_feature', 'pkl', config.use_patch_background) with open(embed_path, 'rb') as fid: query_vector = pickle.load(fid) n_samples = min( 100, len(patches_per_class[cls_idx]) ) #min(config.n_nntable_trees, len(patches_per_class[cls_idx])) candidate_patches = nn_tables.retrieve(cls_idx, query_vector, n_samples) candidate_patches = [ x for x in candidate_patches if x['instance_ind'] != instance_ind ] assert (len(candidate_patches) > 1) # candidate_instance_ind = instance_ind # candidate_patch = None # while (candidate_instance_ind == instance_ind): # cid = np.random.randint(0, len(candidate_patches)) # candidate_patch = candidate_patches[cid] # candidate_instance_ind = candidate_patch['instance_ind'] candidate_patch = find_closest_patch(db, traindb, image_index, instance_ind, candidate_patches) # stenciling src_mask = COCOmask.decode(mask) dst_mask = COCOmask.decode(candidate_patch['mask']) src_xyxy = xywh_to_xyxy(xywh, width, height) dst_xyxy = xywh_to_xyxy(candidate_patch['box'], candidate_patch['width'], candidate_patch['height']) dst_mask = dst_mask[dst_xyxy[1]:(dst_xyxy[3] + 1), dst_xyxy[0]:(dst_xyxy[2] + 1)] dst_mask = cv2.resize( dst_mask, (src_xyxy[2] - src_xyxy[0] + 1, src_xyxy[3] - src_xyxy[1] + 1), interpolation=cv2.INTER_NEAREST) src_mask[src_xyxy[1]:(src_xyxy[3]+1), src_xyxy[0]:(src_xyxy[2]+1)] = \ np.minimum(dst_mask, src_mask[src_xyxy[1]:(src_xyxy[3]+1), src_xyxy[0]:(src_xyxy[2]+1)]) # color transfer if random.random() > color_transfer_threshold: candidate_index = candidate_patch['image_index'] candidate_image = cv2.imread( traindb.color_path_from_index(candidate_index), cv2.IMREAD_COLOR).astype(np.float32) candidate_cropped = candidate_image[dst_xyxy[1]:(dst_xyxy[3] + 1), dst_xyxy[0]:(dst_xyxy[2] + 1)] candidate_cropped = cv2.resize(candidate_cropped, (src_xyxy[2] - src_xyxy[0] + 1, src_xyxy[3] - src_xyxy[1] + 1), interpolation=cv2.INTER_CUBIC) original_cropped = src_img[src_xyxy[1]:(src_xyxy[3] + 1), src_xyxy[0]:(src_xyxy[2] + 1)] transfer_cropped = Monge_Kantorovitch_color_transfer( original_cropped, candidate_cropped) src_img[src_xyxy[1]:(src_xyxy[3] + 1), src_xyxy[0]:(src_xyxy[2] + 1)] = transfer_cropped # im1 = cv2.resize(full_image, (128, 128)) # im2 = cv2.resize(src_img[src_xyxy[1]:(src_xyxy[3]+1), src_xyxy[0]:(src_xyxy[2]+1), :], (128, 128)) # # im2 = cv2.resize(np.repeat(255*src_mask[...,None], 3, -1), (128, 128)) # im3 = cv2.resize(candidate_image, (128, 128)) # im4 = cv2.resize(candidate_cropped, (128, 128)) # im = np.concatenate((im1, im2, im3, im4), 1) # cv2.imwrite("%03d_%03d.png"%(i, j), im) full_image = compose(full_image, src_img, src_mask) # boundary elision radius = int(0.05 * min(width, height)) if np.amin(src_mask) > 0: src_mask[0, :] = 0 src_mask[-1, :] = 0 src_mask[:, 0] = 0 src_mask[:, -1] = 0 sobelx = cv2.Sobel(src_mask, cv2.CV_64F, 1, 0, ksize=3) sobely = cv2.Sobel(src_mask, cv2.CV_64F, 0, 1, ksize=3) sobel = np.abs(sobelx) + np.abs(sobely) edge = np.zeros_like(sobel) edge[sobel > 0.9] = 1.0 morp_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (radius, radius)) edge = cv2.dilate(edge, morp_kernel, iterations=1) row, col = np.where(edge > 0) n_edge_pixels = len(row) pixel_indices = np.random.permutation(range(n_edge_pixels)) pixel_indices = pixel_indices[:(n_edge_pixels // 2)] row = row[pixel_indices] col = col[pixel_indices] src_img[row, col, :] = 255 full_mask = np.maximum(full_mask, src_mask) full_label[src_mask > 0] = cls_idx full_noice = compose(full_noice, src_img, src_mask) # im1 = cv2.resize(full_image, (128, 128)) # im2 = cv2.resize(src_img[src_xyxy[1]:(src_xyxy[3]+1), src_xyxy[0]:(src_xyxy[2]+1), :], (128, 128)) # im3 = cv2.resize(candidate_image, (128, 128)) # im4 = cv2.resize(candidate_cropped, (128, 128)) # im = np.concatenate((im1, im2, im3, im4), 1) # cv2.imwrite("%03d_%03d.png"%(i, j), im) output_name = str(image_index).zfill(12) output_path = osp.join(images_dir, output_name + '.jpg') cv2.imwrite(output_path, clamp_array(full_image, 0, 255).astype(np.uint8)) output_path = osp.join(noices_dir, output_name + '.jpg') cv2.imwrite(output_path, clamp_array(full_noice, 0, 255).astype(np.uint8)) output_path = osp.join(masks_dir, output_name + '.png') cv2.imwrite(output_path, clamp_array(255 * full_mask, 0, 255).astype(np.uint8)) output_path = osp.join(labels_dir, output_name + '.png') cv2.imwrite(output_path, full_label.astype(np.uint8)) print(i, image_index)
def create_tf_example(image, annotations_list, image_dir, category_index, include_masks=False): """Converts image and annotations to a tf.Example proto. Args: image: dict with keys: [u'license', u'file_name', u'coco_url', u'height', u'width', u'date_captured', u'flickr_url', u'id'] annotations_list: list of dicts with keys: [u'segmentation', u'area', u'iscrowd', u'image_id', u'bbox', u'category_id', u'id'] Notice that bounding box coordinates in the official COCO dataset are given as [x, y, width, height] tuples using absolute coordinates where x, y represent the top-left (0-indexed) corner. This function converts to the format expected by the Tensorflow Object Detection API (which is which is [ymin, xmin, ymax, xmax] with coordinates normalized relative to image size). image_dir: directory containing the image files. category_index: a dict containing COCO category information keyed by the 'id' field of each category. See the label_map_util.create_category_index function. include_masks: Whether to include instance segmentations masks (PNG encoded) in the result. default: False. Returns: example: The converted tf.Example num_annotations_skipped: Number of (invalid) annotations that were ignored. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ image_height = image['height'] image_width = image['width'] filename = image['file_name'] image_id = image['id'] full_path = os.path.join(image_dir, filename) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) key = hashlib.sha256(encoded_jpg).hexdigest() xmin = [] xmax = [] ymin = [] ymax = [] is_crowd = [] category_names = [] category_ids = [] area = [] encoded_mask_png = [] num_annotations_skipped = 0 for object_annotations in annotations_list: (x, y, width, height) = tuple(object_annotations['bbox']) if width <= 0 or height <= 0: num_annotations_skipped += 1 continue if x + width > image_width or y + height > image_height: num_annotations_skipped += 1 continue xmin.append(float(x) / image_width) xmax.append(float(x + width) / image_width) ymin.append(float(y) / image_height) ymax.append(float(y + height) / image_height) is_crowd.append(object_annotations['iscrowd']) category_id = int(object_annotations['category_id']) category_ids.append(category_id) category_names.append(category_index[category_id]['name'].encode('utf8')) area.append(object_annotations['area']) if include_masks: run_len_encoding = mask.frPyObjects(object_annotations['segmentation'], image_height, image_width) binary_mask = mask.decode(run_len_encoding) if not object_annotations['iscrowd']: binary_mask = np.amax(binary_mask, axis=2) pil_image = PIL.Image.fromarray(binary_mask) output_io = io.BytesIO() pil_image.save(output_io, format='PNG') encoded_mask_png.append(output_io.getvalue()) feature_dict = { 'image/height': dataset_util.int64_feature(image_height), 'image/width': dataset_util.int64_feature(image_width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(str(image_id).encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/label': dataset_util.int64_list_feature(category_ids), 'image/object/is_crowd': dataset_util.int64_list_feature(is_crowd), 'image/object/area': dataset_util.float_list_feature(area), } if include_masks: feature_dict['image/object/mask'] = ( dataset_util.bytes_list_feature(encoded_mask_png)) example = tf.train.Example(features=tf.train.Features(feature=feature_dict)) return key, example, num_annotations_skipped
def vis_one_image( im, im_name, output_dir, boxes, segms=None, keypoints=None, body_uv=None, thresh=0.9, kp_thresh=2, dpi=200, box_alpha=0.0, dataset=None, show_class=False, ext='pdf'): """Visual debugging of detections.""" if not os.path.exists(output_dir): os.makedirs(output_dir) if isinstance(boxes, list): boxes, segms, keypoints, classes = convert_from_cls_format( boxes, segms, keypoints) if boxes is None or boxes.shape[0] == 0 or max(boxes[:, 4]) < thresh: return dataset_keypoints, _ = keypoint_utils.get_keypoints() if segms is not None and len(segms) > 0: masks = mask_util.decode(segms) color_list = colormap(rgb=True) / 255 kp_lines = kp_connections(dataset_keypoints) cmap = plt.get_cmap('rainbow') colors = [cmap(i) for i in np.linspace(0, 1, len(kp_lines) + 2)] fig = plt.figure(frameon=False) fig.set_size_inches(im.shape[1] / dpi, im.shape[0] / dpi) ax = plt.Axes(fig, [0., 0., 1., 1.]) ax.axis('off') fig.add_axes(ax) ax.imshow(im) # Display in largest to smallest order to reduce occlusion areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) sorted_inds = np.argsort(-areas) mask_color_id = 0 for i in sorted_inds: bbox = boxes[i, :4] score = boxes[i, -1] if score < thresh: continue # show box (off by default) ax.add_patch( plt.Rectangle((bbox[0], bbox[1]), bbox[2] - bbox[0], bbox[3] - bbox[1], fill=False, edgecolor='g', linewidth=0.5, alpha=box_alpha)) if show_class: ax.text( bbox[0], bbox[1] - 2, get_class_string(classes[i], score, dataset), fontsize=3, family='serif', bbox=dict( facecolor='g', alpha=0.4, pad=0, edgecolor='none'), color='white') # show mask if segms is not None and len(segms) > i: img = np.ones(im.shape) color_mask = color_list[mask_color_id % len(color_list), 0:3] mask_color_id += 1 w_ratio = .4 for c in range(3): color_mask[c] = color_mask[c] * (1 - w_ratio) + w_ratio for c in range(3): img[:, :, c] = color_mask[c] e = masks[:, :, i] _, contour, hier = cv2.findContours( e.copy(), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE) for c in contour: polygon = Polygon( c.reshape((-1, 2)), fill=True, facecolor=color_mask, edgecolor='w', linewidth=1.2, alpha=0.5) ax.add_patch(polygon) # show keypoints if keypoints is not None and len(keypoints) > i: kps = keypoints[i] plt.autoscale(False) for l in range(len(kp_lines)): i1 = kp_lines[l][0] i2 = kp_lines[l][1] if kps[2, i1] > kp_thresh and kps[2, i2] > kp_thresh: x = [kps[0, i1], kps[0, i2]] y = [kps[1, i1], kps[1, i2]] line = plt.plot(x, y) plt.setp(line, color=colors[l], linewidth=1.0, alpha=0.7) if kps[2, i1] > kp_thresh: plt.plot( kps[0, i1], kps[1, i1], '.', color=colors[l], markersize=3.0, alpha=0.7) if kps[2, i2] > kp_thresh: plt.plot( kps[0, i2], kps[1, i2], '.', color=colors[l], markersize=3.0, alpha=0.7) # add mid shoulder / mid hip for better visualization mid_shoulder = ( kps[:2, dataset_keypoints.index('right_shoulder')] + kps[:2, dataset_keypoints.index('left_shoulder')]) / 2.0 sc_mid_shoulder = np.minimum( kps[2, dataset_keypoints.index('right_shoulder')], kps[2, dataset_keypoints.index('left_shoulder')]) mid_hip = ( kps[:2, dataset_keypoints.index('right_hip')] + kps[:2, dataset_keypoints.index('left_hip')]) / 2.0 sc_mid_hip = np.minimum( kps[2, dataset_keypoints.index('right_hip')], kps[2, dataset_keypoints.index('left_hip')]) if (sc_mid_shoulder > kp_thresh and kps[2, dataset_keypoints.index('nose')] > kp_thresh): x = [mid_shoulder[0], kps[0, dataset_keypoints.index('nose')]] y = [mid_shoulder[1], kps[1, dataset_keypoints.index('nose')]] line = plt.plot(x, y) plt.setp( line, color=colors[len(kp_lines)], linewidth=1.0, alpha=0.7) if sc_mid_shoulder > kp_thresh and sc_mid_hip > kp_thresh: x = [mid_shoulder[0], mid_hip[0]] y = [mid_shoulder[1], mid_hip[1]] line = plt.plot(x, y) plt.setp( line, color=colors[len(kp_lines) + 1], linewidth=1.0, alpha=0.7) # DensePose Visualization Starts!! ## Get full IUV image out IUV_fields = body_uv[1] # All_Coords = np.zeros(im.shape) All_inds = np.zeros([im.shape[0],im.shape[1]]) K = 26 ## inds = np.argsort(boxes[:,4]) ## for i, ind in enumerate(inds): entry = boxes[ind,:] if entry[4] > 0.65: entry=entry[0:4].astype(int) #### output = IUV_fields[ind] #### All_Coords_Old = All_Coords[ entry[1] : entry[1]+output.shape[1],entry[0]:entry[0]+output.shape[2],:] All_Coords_Old[All_Coords_Old==0]=output.transpose([1,2,0])[All_Coords_Old==0] All_Coords[ entry[1] : entry[1]+output.shape[1],entry[0]:entry[0]+output.shape[2],:]= All_Coords_Old ### CurrentMask = (output[0,:,:]>0).astype(np.float32) All_inds_old = All_inds[ entry[1] : entry[1]+output.shape[1],entry[0]:entry[0]+output.shape[2]] All_inds_old[All_inds_old==0] = CurrentMask[All_inds_old==0]*i All_inds[ entry[1] : entry[1]+output.shape[1],entry[0]:entry[0]+output.shape[2]] = All_inds_old # All_Coords[:,:,1:3] = 255. * All_Coords[:,:,1:3] All_Coords[All_Coords>255] = 255. All_Coords = All_Coords.astype(np.uint8) All_inds = All_inds.astype(np.uint8) # IUV_SaveName = os.path.basename(im_name).split('.')[0]+'_IUV.png' INDS_SaveName = os.path.basename(im_name).split('.')[0]+'_INDS.png' cv2.imwrite(os.path.join(output_dir, '{}'.format(IUV_SaveName)), All_Coords ) cv2.imwrite(os.path.join(output_dir, '{}'.format(INDS_SaveName)), All_inds ) print('IUV written to: ' , os.path.join(output_dir, '{}'.format(IUV_SaveName)) ) ### ### DensePose Visualization Done!! # output_name = os.path.basename(im_name) + '.' + ext fig.savefig(os.path.join(output_dir, '{}'.format(output_name)), dpi=dpi) plt.close('all')
def vis_one_image( im, im_name, output_dir, boxes, segms=None, keypoints=None, thresh=0.9, kp_thresh=2, dpi=200, box_alpha=0.0, dataset=None, show_class=False, ext='pdf'): """Visual debugging of detections.""" if not os.path.exists(output_dir): os.makedirs(output_dir) if isinstance(boxes, list): boxes, segms, keypoints, classes = convert_from_cls_format( boxes, segms, keypoints) if boxes is None or boxes.shape[0] == 0 or max(boxes[:, 4]) < thresh: return dataset_keypoints, _ = keypoint_utils.get_keypoints() if segms is not None: masks = mask_util.decode(segms) color_list = colormap(rgb=True) / 255 kp_lines = kp_connections(dataset_keypoints) cmap = plt.get_cmap('rainbow') colors = [cmap(i) for i in np.linspace(0, 1, len(kp_lines) + 2)] fig = plt.figure(frameon=False) fig.set_size_inches(im.shape[1] / dpi, im.shape[0] / dpi) ax = plt.Axes(fig, [0., 0., 1., 1.]) ax.axis('off') fig.add_axes(ax) ax.imshow(im) # Display in largest to smallest order to reduce occlusion areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) sorted_inds = np.argsort(-areas) mask_color_id = 0 for i in sorted_inds: bbox = boxes[i, :4] score = boxes[i, -1] if score < thresh: continue # show box (off by default) ax.add_patch( plt.Rectangle((bbox[0], bbox[1]), bbox[2] - bbox[0], bbox[3] - bbox[1], fill=False, edgecolor='g', linewidth=0.5, alpha=box_alpha)) if show_class: ax.text( bbox[0], bbox[1] - 2, get_class_string(classes[i], score, dataset), fontsize=3, family='serif', bbox=dict( facecolor='g', alpha=0.4, pad=0, edgecolor='none'), color='white') # show mask if segms is not None and len(segms) > i: img = np.ones(im.shape) color_mask = color_list[mask_color_id % len(color_list), 0:3] mask_color_id += 1 w_ratio = .4 for c in range(3): color_mask[c] = color_mask[c] * (1 - w_ratio) + w_ratio for c in range(3): img[:, :, c] = color_mask[c] e = masks[:, :, i] _, contour, hier = cv2.findContours( e.copy(), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE) for c in contour: polygon = Polygon( c.reshape((-1, 2)), fill=True, facecolor=color_mask, edgecolor='w', linewidth=1.2, alpha=0.5) ax.add_patch(polygon) # show keypoints if keypoints is not None and len(keypoints) > i: kps = keypoints[i] plt.autoscale(False) for l in range(len(kp_lines)): i1 = kp_lines[l][0] i2 = kp_lines[l][1] if kps[2, i1] > kp_thresh and kps[2, i2] > kp_thresh: x = [kps[0, i1], kps[0, i2]] y = [kps[1, i1], kps[1, i2]] line = plt.plot(x, y) plt.setp(line, color=colors[l], linewidth=1.0, alpha=0.7) if kps[2, i1] > kp_thresh: plt.plot( kps[0, i1], kps[1, i1], '.', color=colors[l], markersize=3.0, alpha=0.7) if kps[2, i2] > kp_thresh: plt.plot( kps[0, i2], kps[1, i2], '.', color=colors[l], markersize=3.0, alpha=0.7) # add mid shoulder / mid hip for better visualization mid_shoulder = ( kps[:2, dataset_keypoints.index('right_shoulder')] + kps[:2, dataset_keypoints.index('left_shoulder')]) / 2.0 sc_mid_shoulder = np.minimum( kps[2, dataset_keypoints.index('right_shoulder')], kps[2, dataset_keypoints.index('left_shoulder')]) mid_hip = ( kps[:2, dataset_keypoints.index('right_hip')] + kps[:2, dataset_keypoints.index('left_hip')]) / 2.0 sc_mid_hip = np.minimum( kps[2, dataset_keypoints.index('right_hip')], kps[2, dataset_keypoints.index('left_hip')]) if (sc_mid_shoulder > kp_thresh and kps[2, dataset_keypoints.index('nose')] > kp_thresh): x = [mid_shoulder[0], kps[0, dataset_keypoints.index('nose')]] y = [mid_shoulder[1], kps[1, dataset_keypoints.index('nose')]] line = plt.plot(x, y) plt.setp( line, color=colors[len(kp_lines)], linewidth=1.0, alpha=0.7) if sc_mid_shoulder > kp_thresh and sc_mid_hip > kp_thresh: x = [mid_shoulder[0], mid_hip[0]] y = [mid_shoulder[1], mid_hip[1]] line = plt.plot(x, y) plt.setp( line, color=colors[len(kp_lines) + 1], linewidth=1.0, alpha=0.7) output_name = os.path.basename(im_name) + '.' + ext fig.savefig(os.path.join(output_dir, '{}'.format(output_name)), dpi=dpi) plt.close('all')
def vis_extract_func(im, im_name, output_dir, boxes, segms=None, keypoints=None, cls_feats=None, thresh=0.9, kp_thresh=2, dpi=200, box_alpha=0.0, dataset=None, show_class=False, ext='pdf', out_when_no_box=False): """Visual debugging of detections.""" one_human_assigned = 0 human_feats = None if not os.path.exists(output_dir): os.makedirs(output_dir) if isinstance(boxes, list): boxes, segms, keypoints, classes = convert_from_cls_format( boxes, segms, keypoints) #ADDED similar to convert_from_cls_format, but for feats_list feats_list = [b for b in cls_feats if len(b) > 0] if len(feats_list) > 0: feats = np.concatenate(feats_list) else: feats = None if (boxes is None or boxes.shape[0] == 0 or max(boxes[:, 4]) < thresh) and not out_when_no_box: return None, None, 0 dataset_keypoints, _ = keypoint_utils.get_keypoints() if segms is not None and len(segms) > 0: masks = mask_util.decode(segms) color_list = colormap(rgb=True) / 255 kp_lines = kp_connections(dataset_keypoints) cmap = plt.get_cmap('rainbow') colors = [cmap(i) for i in np.linspace(0, 1, len(kp_lines) + 2)] fig = plt.figure(frameon=False) fig.set_size_inches(im.shape[1] / dpi, im.shape[0] / dpi) ax = plt.Axes(fig, [0., 0., 1., 1.]) ax.axis('off') fig.add_axes(ax) ax.imshow(im) if boxes is None: sorted_inds = [] # avoid crash when 'boxes' is None else: # Display in largest to smallest order to reduce occlusion areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) sorted_inds = np.argsort(-areas) mask_color_id = 0 for i in sorted_inds: bbox = boxes[i, :4] score = boxes[i, -1] if score < thresh: continue #ADDED human features are extracted if classes[i] == 1 and not one_human_assigned: human_feats = feats[i] # show box (off by default) ax.add_patch( plt.Rectangle((bbox[0], bbox[1]), bbox[2] - bbox[0], bbox[3] - bbox[1], fill=False, edgecolor='g', linewidth=0.5, alpha=box_alpha)) if show_class: ax.text(bbox[0], bbox[1] - 2, get_class_string(classes[i], score, dataset), fontsize=3, family='serif', bbox=dict(facecolor='g', alpha=0.4, pad=0, edgecolor='none'), color='white') # show mask if segms is not None and len(segms) > i: img = np.ones(im.shape) color_mask = color_list[mask_color_id % len(color_list), 0:3] mask_color_id += 1 w_ratio = .4 for c in range(3): color_mask[c] = color_mask[c] * (1 - w_ratio) + w_ratio for c in range(3): img[:, :, c] = color_mask[c] e = masks[:, :, i] _, contour, hier = cv2.findContours(e.copy(), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE) for c in contour: polygon = Polygon(c.reshape((-1, 2)), fill=True, facecolor=color_mask, edgecolor='w', linewidth=1.2, alpha=0.5) ax.add_patch(polygon) # show keypoints if keypoints is not None and len(keypoints) > i: kps = keypoints[i] bbox_width = bbox[2] - bbox[0] bbox_height = bbox[3] - bbox[1] x_coor = (kps[0] - bbox[0]) / 256 y_coor = (kps[1] - bbox[1]) / 256 #extracted_kps = np.concatenate((np.asarray([bbox_width/255, bbox_height/255]), x_coor, y_coor, kps[3]), axis=0) extracted_kps = np.concatenate( (np.asarray([bbox_width / 255, bbox_height / 255 ]), x_coor[:11], y_coor[:11], kps[3][:11]), axis=0) #print('extracted_kps', extracted_kps) #extracted_kps = [item for sublist in extracted_kps for item in sublist] one_human_assigned = 1 plt.autoscale(False) for l in range(len(kp_lines)): i1 = kp_lines[l][0] i2 = kp_lines[l][1] if kps[2, i1] > kp_thresh and kps[2, i2] > kp_thresh: x = [kps[0, i1], kps[0, i2]] y = [kps[1, i1], kps[1, i2]] line = plt.plot(x, y) plt.setp(line, color=colors[l], linewidth=1.0, alpha=0.7) if kps[2, i1] > kp_thresh: plt.plot(kps[0, i1], kps[1, i1], '.', color=colors[l], markersize=3.0, alpha=0.7) if kps[2, i2] > kp_thresh: plt.plot(kps[0, i2], kps[1, i2], '.', color=colors[l], markersize=3.0, alpha=0.7) # add mid shoulder / mid hip for better visualization mid_shoulder = ( kps[:2, dataset_keypoints.index('right_shoulder')] + kps[:2, dataset_keypoints.index('left_shoulder')]) / 2.0 sc_mid_shoulder = np.minimum( kps[2, dataset_keypoints.index('right_shoulder')], kps[2, dataset_keypoints.index('left_shoulder')]) mid_hip = (kps[:2, dataset_keypoints.index('right_hip')] + kps[:2, dataset_keypoints.index('left_hip')]) / 2.0 sc_mid_hip = np.minimum( kps[2, dataset_keypoints.index('right_hip')], kps[2, dataset_keypoints.index('left_hip')]) if (sc_mid_shoulder > kp_thresh and kps[2, dataset_keypoints.index('nose')] > kp_thresh): x = [ mid_shoulder[0], kps[0, dataset_keypoints.index('nose')] ] y = [ mid_shoulder[1], kps[1, dataset_keypoints.index('nose')] ] line = plt.plot(x, y) plt.setp(line, color=colors[len(kp_lines)], linewidth=1.0, alpha=0.7) if sc_mid_shoulder > kp_thresh and sc_mid_hip > kp_thresh: x = [mid_shoulder[0], mid_hip[0]] y = [mid_shoulder[1], mid_hip[1]] line = plt.plot(x, y) plt.setp(line, color=colors[len(kp_lines) + 1], linewidth=1.0, alpha=0.7) output_name = os.path.basename(im_name) + '.' + ext fig.savefig(os.path.join(output_dir, '{}'.format(output_name)), dpi=dpi) plt.close('all') return extracted_kps, human_feats, one_human_assigned
def _read_segmentation(self, ann, H, W): s = ann['segmentation'] s = s if type(s) == list else [s] return mask.decode(mask.frPyObjects(s, H, W)).max(axis=2)
def create_tf_example(image, annotations_list, image_dir, category_index, include_masks=False): """Converts image and annotations to a tf.Example proto. Args: image: dict with keys: [u'license', u'file_name', u'coco_url', u'height', u'width', u'date_captured', u'flickr_url', u'id'] annotations_list: list of dicts with keys: [u'segmentation', u'area', u'iscrowd', u'image_id', u'bbox', u'category_id', u'id'] Notice that bounding box coordinates in the official COCO dataset are given as [x, y, width, height] tuples using absolute coordinates where x, y represent the top-left (0-indexed) corner. This function converts to the format expected by the Tensorflow Object Detection API (which is which is [ymin, xmin, ymax, xmax] with coordinates normalized relative to image size). image_dir: directory containing the image files. category_index: a dict containing COCO category information keyed by the 'id' field of each category. See the label_map_util.create_category_index function. include_masks: Whether to include instance segmentations masks (PNG encoded) in the result. default: False. Returns: example: The converted tf.Example num_annotations_skipped: Number of (invalid) annotations that were ignored. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ image_height = image['height'] image_width = image['width'] filename = image['file_name'] image_id = image['id'] full_path = os.path.join(image_dir, filename) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) key = hashlib.sha256(encoded_jpg).hexdigest() xmin = [] xmax = [] ymin = [] ymax = [] is_crowd = [] category_names = [] category_ids = [] area = [] encoded_mask_png = [] num_annotations_skipped = 0 for object_annotations in annotations_list: (x, y, width, height) = tuple(object_annotations['bbox']) if width <= 0 or height <= 0: num_annotations_skipped += 1 continue if x + width > image_width or y + height > image_height: num_annotations_skipped += 1 continue xmin_ratio = float(x) / image_width xmax_ratio = float(x + width) / image_width ymin_ratio = float(y) / image_height ymax_ratio = float(y + height) / image_height if xmin_ratio < 0.0 or ymin_ratio < 0.0: num_annotations_skipped += 1 print('NOTICE: skip illegal bounding box ratio: {}, {}'.format(xmin_ratio, ymin_ratio)) continue if xmax_ratio > 1.0 or ymax_ratio > 1.0: num_annotations_skipped += 1 print('NOTICE: skip illegal bounding box ratio: {}, {}'.format(xmin_ratio, ymin_ratio)) continue xmin.append(xmin_ratio) xmax.append(xmax_ratio) ymin.append(ymin_ratio) ymax.append(ymax_ratio) is_crowd.append(object_annotations['iscrowd']) category_id = int(object_annotations['category_id']) category_ids.append(category_id) category_names.append(category_index[category_id]['name'].encode('utf8')) area.append(object_annotations['area']) if include_masks: run_len_encoding = mask.frPyObjects(object_annotations['segmentation'], image_height, image_width) binary_mask = mask.decode(run_len_encoding) if not object_annotations['iscrowd']: binary_mask = np.amax(binary_mask, axis=2) pil_image = PIL.Image.fromarray(binary_mask) output_io = io.BytesIO() pil_image.save(output_io, format='PNG') encoded_mask_png.append(output_io.getvalue()) feature_dict = { 'image/height': dataset_util.int64_feature(image_height), 'image/width': dataset_util.int64_feature(image_width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(str(image_id).encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(category_names), 'image/object/is_crowd': dataset_util.int64_list_feature(is_crowd), 'image/object/area': dataset_util.float_list_feature(area), } if include_masks: feature_dict['image/object/mask'] = ( dataset_util.bytes_list_feature(encoded_mask_png)) example = tf.train.Example(features=tf.train.Features(feature=feature_dict)) return key, example, num_annotations_skipped
def segmToMask(segm, img_size): rle = segmToRLE(segm, img_size) m = maskUtils.decode(rle) return m
def fix_segments_intersections(polygons, height, width, img_name, use_background_label, threshold=0.0, ratio_tolerance=0.001, area_threshold=1): """Find all intersected regions and crop contour for back object by objects which are in front of the first one. It is related to a specialty of segmentation in CVAT annotation. Intersection is calculated via function 'iou' from cocoapi Args: polygons: all objects on image represented as 2D array of objects' contours height: height of image width: width of image img_name: name of image file threshold: threshold of intersection over union of two objects. By default is set to 0 and processes any two intersected objects ratio_tolerance: used for situation when one object is fully or almost fully inside another one and we don't want make "hole" in one of objects """ converted_polygons = [] empty_polygon = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] # Convert points of polygons from string to coco's array. # All polygons must be sorted in order from bottom to top for polygon in polygons: label = polygon['label'] points = polygon['points'].split(';') new_polygon = [] for xy in points: x = float(xy.split(',')[0]) y = float(xy.split(',')[1]) new_polygon.append(x) new_polygon.append(y) converted_polygons.append({'label': label, 'points': new_polygon}) for i in range(0, len(converted_polygons)): rle_bottom = mask_util.frPyObjects([converted_polygons[i]['points']], height, width) segment_overlapped = False for j in range(i + 1, len(converted_polygons)): rle_top = mask_util.frPyObjects([converted_polygons[j]['points']], height, width) iou = mask_util.iou(rle_bottom, rle_top, [0, 0]) area_top = sum(mask_util.area(rle_top)) area_bottom = sum(mask_util.area(rle_bottom)) if area_bottom == 0: continue area_ratio = area_top / area_bottom sum_iou = sum(iou) # If segment is fully inside another one, save this segment as is if area_ratio - ratio_tolerance < sum_iou[0] < area_ratio + ratio_tolerance: continue # Check situation when bottom segment is fully inside top. # It means that in annotation is mistake. Save this segment as is if 1 / area_ratio - ratio_tolerance < sum_iou[0] < 1 / area_ratio + ratio_tolerance: continue if sum_iou[0] > threshold: segment_overlapped = True bottom_mask = np.array(mask_util.decode(rle_bottom), dtype=np.uint8) top_mask = np.array(mask_util.decode(rle_top), dtype=np.uint8) bottom_mask = np.subtract(bottom_mask, top_mask) bottom_mask[bottom_mask > 1] = 0 bottom_mask = np.sum(bottom_mask, axis=2) bottom_mask = np.array(bottom_mask > 0, dtype=np.uint8) converted_polygons[i]['points'] = mask_to_polygon(bottom_mask, area_threshold=area_threshold) # If some segment is empty, do small fix to avoid error in cocoapi function if len(converted_polygons[i]['points']) == 0: converted_polygons[i]['points'] = [empty_polygon] rle_bottom = mask_util.frPyObjects(converted_polygons[i]['points'], height, width) if not segment_overlapped: converted_polygons[i]['points'] = [converted_polygons[i]['points']] output_polygons = [] for i in range(0, len(converted_polygons)): if not use_background_label and converted_polygons[i]['label'] == 'background': continue poly_len = len(converted_polygons[i]['points']) if poly_len == 0 or converted_polygons[i]['points'] == [empty_polygon]: log.warning('Image <{}> has an empty polygon with label <{}>. ' 'Perhaps there is a mistake in annotation'. format(img_name, converted_polygons[i]['label'])) else: output_polygons.append(converted_polygons[i]) return output_polygons
def polygons_to_mask(self, polygons): rle = mask_util.frPyObjects(polygons, self.height, self.width) rle = mask_util.merge(rle) return mask_util.decode(rle)[:, :]
def vis_one_proposal(img_fpath: str, bbox: List, mask: Dict, draw_boxes=True): """ Visualized bbox and mask of one proposal. Args: img_fpath: the image file path. bbox: [x, y, w, h] mask: RLE format """ img_name = img_fpath.split('/')[-1].replace(".jpg", "") colors = generate_colors() dpi = 100.0 img = np.array(Image.open(img_fpath), dtype="float32") / 255 img_sizes = mask["size"] fig = plt.figure() fig.set_size_inches(img_sizes[1] / dpi, img_sizes[0] / dpi, forward=True) fig.subplots_adjust(left=0, bottom=0, right=1, top=1, wspace=None, hspace=None) ax = fig.subplots() ax.set_axis_off() color = colors[0] x, y, w, h = toBbox(mask) rect = patches.Rectangle((x, y), w, h, linewidth=1, linestyle='-.', edgecolor=color, facecolor='none', alpha=1.0) ax.add_patch(rect) if draw_boxes: xb, yb, wb, hb = bbox rect = patches.Rectangle((xb, yb), wb, hb, linewidth=1, edgecolor=colors[-1], facecolor='none', alpha=1.0) ax.add_patch(rect) category_name = "object" ax.annotate(category_name, (x + 0.5 * w, y + 0.5 * h), color=color, weight='bold', fontsize=7, ha='center', va='center', alpha=1.0) binary_mask = decode(mask) apply_mask(img, binary_mask, color) ax.imshow(img) fig.savefig("plots/" + img_name + ".jpg") plt.close(fig)
print(i) all_mask = {} query_roi = gen_bbox(label_0, i) query_img = img_list_from_begin_label[0] if not os.path.exists(sys.argv[3] + 'object_json/' + video_dir + '_%d.json' % i): print('this is not person') search_instance.append(i) continue sort_all_mask = json.loads( open(sys.argv[3] + 'object_json/' + video_dir + '_%d.json' % i).read()) for gallery_img, (x1, y1, x2, y2, mask) in sort_all_mask: x1, y1, x2, y2 = enlarge_bbox([x1, y1, x2, y2]) mask = np.array(maskUtils.decode(mask)) #prob_name = prob_dir + video_dir + '/%05d.png' % (int(gallery_img[-9:-4]) - head_num) if int(gallery_img[-9:-4]) - small_num < start_index: continue prob = pic_list[int(gallery_img[-9:-4]) - head_num] if i in np.unique(prob): continue print(gallery_img, x1, y1, x2, y2) prob[(prob == 0) & (mask == 1)] = i #prob = prob + mask * i pic_list[int(gallery_img[-9:-4]) - head_num] = prob propgate_forward( int(gallery_img[-9:-4]) - head_num, i, gallery_img) propgate_backward(