def crop_mask(boxes,segmentations,flipped, imsize): assert (boxes.shape[0]==len(segmentations)) psegmentations=[] for i in xrange(len(segmentations)): gts=segmentations[i] box=boxes[i,:] if type(gts) == list and gts: assert (type(gts[0]) != dict) prle= mask.frPyObjects(gts,imsize[1],imsize[0]) elif type(gts) == dict and type(gts['counts']) == list: prle= mask.frPyObjects([gts],imsize[1],imsize[0]) elif type(gts) == dict and \ type(gts['counts'] == unicode or type(gts['counts']) == str): prle = [gts] else: print '{} box has no segmentation'.format(i) psegmentations.append([]) continue if len(prle)==1: prle=prle[0] else: prle= mask.merge(prle) pmask=mask.decode([prle]) if flipped: pmask=pmask[:,::-1,:] pmask=np.copy(pmask[box[1]:box[3],box[0]:box[2],:],order='F') psegmentations.append(mask.encode(pmask)) return psegmentations
def get_segmentation(self,gt_segmentation,box,max_ind,imsize): segmentation=[]; gts=gt_segmentation[max_ind] if type(gts) == list: assert (type(gts[0]) != dict) prle= mask.frPyObjects(gts,imsize[0],imsize[1]) elif type(gts) == dict and type(gts['counts']) == list: prle= mask.frPyObjects([gts],imsize[0],imsize[1]) elif type(gts) == dict and \ type(gts['counts'] == unicode or type(gts['counts']) == str): prle = [gts] else: return segmentation if len(prle)==1: prle=prle[0] else: prle= mask.merge(prle) grle=mask.frPyObjects([[box[0],box[1],box[2],box[1],box[2],box[3],box[0],box[3],box[0],box[1]]],imsize[0],imsize[1]) #print grle,'----' pmask=mask.merge([prle,grle[0]],intersect=True) segmentation=pmask # for sm in gts: # poly=Polygon(zip(sm(::2),sm(1::2))) # bpoly=Polygon([(box[0],box[1]),(box[0],box[3]),(box[2],box[3]),(box[2],boxes[1]),(box[0],box[1])]) # bpoly=bpoly.intersection(poly) # coords=array(bpoly.exterior.coords) # coords=coords-[box[0],box[1]] # segmentation.append(coords.ravel().tolist()) return segmentation
def polys_to_mask_wrt_box(polygons, box, M): """Convert from the COCO polygon segmentation format to a binary mask encoded as a 2D array of data type numpy.float32. The polygon segmentation is understood to be enclosed in the given box and rasterized to an M x M mask. The resulting mask is therefore of shape (M, M). """ w = box[2] - box[0] h = box[3] - box[1] w = np.maximum(w, 1) h = np.maximum(h, 1) polygons_norm = [] for poly in polygons: p = np.array(poly, dtype=np.float32) p[0::2] = (p[0::2] - box[0]) * M / w p[1::2] = (p[1::2] - box[1]) * M / h polygons_norm.append(p) rle = mask_util.frPyObjects(polygons_norm, M, M) mask = np.array(mask_util.decode(rle), dtype=np.float32) # Flatten in case polygons was a list mask = np.sum(mask, axis=2) mask = np.array(mask > 0, dtype=np.float32) return mask
def _flip_rle(rle, height, width): if 'counts' in rle and type(rle['counts']) == list: # Magic RLE format handling painfully discovered by looking at the # COCO API showAnns function. rle = mask_util.frPyObjects([rle], height, width) mask = mask_util.decode(rle) mask = mask[:, ::-1, :] rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8)) return rle
def annToRLE(self, ann, height, width): """ Convert annotation which can be polygons, uncompressed RLE to RLE. :return: binary mask (numpy 2D array) """ segm = ann['segmentation'] if isinstance(segm, list): # polygon -- a single object might consist of multiple parts # we merge all parts into one mask rle code rles = maskUtils.frPyObjects(segm, height, width) rle = maskUtils.merge(rles) elif isinstance(segm['counts'], list): # uncompressed RLE rle = maskUtils.frPyObjects(segm, height, width) else: # rle rle = ann['segmentation'] return rle
def get_mask(idx): ann_ids = coco.getAnnIds(imgIds=img_ids[idx]) anns = coco.loadAnns(ann_ids) img = coco.loadImgs(img_ids[idx])[0] m = np.zeros((img['height'], img['width'])) for j in anns: if j['iscrowd']: rle = mask.frPyObjects(j['segmentation'], img['height'], img['width']) m += mask.decode(rle) return m < 0.5
def convert(self, mode): width, height = self.size if mode == "mask": rles = mask_utils.frPyObjects( [p.numpy() for p in self.polygons], height, width ) rle = mask_utils.merge(rles) mask = mask_utils.decode(rle) mask = torch.from_numpy(mask) # TODO add squeeze? return mask
def annToRLE(self, ann): """ Convert annotation which can be polygons, uncompressed RLE to RLE. :return: binary mask (numpy 2D array) """ t = self.imgs[ann['image_id']] h, w = t['height'], t['width'] segm = ann['segmentation'] if type(segm) == list: # polygon -- a single object might consist of multiple parts # we merge all parts into one mask rle code rles = maskUtils.frPyObjects(segm, h, w) rle = maskUtils.merge(rles) elif type(segm['counts']) == list: # uncompressed RLE rle = maskUtils.frPyObjects(segm, h, w) else: # rle rle = ann['segmentation'] return rle
def polys_to_mask(polygons, height, width): """Convert from the COCO polygon segmentation format to a binary mask encoded as a 2D array of data type numpy.float32. The polygon segmentation is understood to be enclosed inside a height x width image. The resulting mask is therefore of shape (height, width). """ rle = mask_util.frPyObjects(polygons, height, width) mask = np.array(mask_util.decode(rle), dtype=np.float32) # Flatten in case polygons was a list mask = np.sum(mask, axis=2) mask = np.array(mask > 0, dtype=np.float32) return mask
def segmentation_to_mask(polys, height, width): """ Convert polygons to binary masks. Args: polys: a list of nx2 float array Returns: a binary matrix of (height, width) """ polys = [p.flatten().tolist() for p in polys] rles = cocomask.frPyObjects(polys, height, width) rle = cocomask.merge(rles) return cocomask.decode(rle)
def _getIgnoreRegion(iid, coco): img = coco.imgs[iid] if not 'ignore_regions_x' in img.keys(): return None if len(img['ignore_regions_x']) == 0: return None rgns_merged = [] for region_x, region_y in zip(img['ignore_regions_x'], img['ignore_regions_y']): rgns = [iter(region_x), iter(region_y)] rgns_merged.append(list(it.next() for it in itertools.cycle(rgns))) rles = maskUtils.frPyObjects(rgns_merged, img['height'], img['width']) rle = maskUtils.merge(rles) return maskUtils.decode(rle)
def polys_to_mask_wrt_box(polygons, box, M): w = box[2] - box[0] h = box[3] - box[1] w = np.maximum(w, 1) h = np.maximum(h, 1) polygons_norm = [] for poly in polygons: p = np.array(poly, dtype=np.float32) p[0::2] = (p[0::2] - box[0]) * M / w p[1::2] = (p[1::2] - box[1]) * M / h polygons_norm.append(p) rle = mask_util.frPyObjects(polygons_norm, M, M) mask = np.array(mask_util.decode(rle), dtype=np.float32) mask = np.sum(mask, axis=2) mask = np.array(mask > 0, dtype=np.float32) return mask
def getMask(self, ref): ''' :return: mask, mask-area, mask-center ''' ann = self.refToAnn[ref['ref_id']] image = self.imgs[ref['image_id']] if type(ann['segmentation'][0]) == list: # polygon rle = mask.frPyObjects(ann['segmentation'], image['height'], image['width']) else: # mask rle = ann['segmentation'] m = mask.decode(rle) m = np.sum(m, axis=2) # sometimes there are multiple binary map (corresponding to multiple segs) m = m.astype(np.uint8) # convert to np.uint8 # area area = sum(mask.area(rle)) # very close to ann['area'] # position position_x = np.mean(np.where(m==1)[1]) # [1] means columns (matlab style) -> x (c++ style) position_y = np.mean(np.where(m==1)[0]) # [0] means rows (matlab style) -> y (c++ style) # mass position (If there were multiple regions, we use the largest one.) label_m = label(m, connectivity=m.ndim) regions = regionprops(label_m) if len(regions) > 0: largest_id = np.argmax(np.array([props.filled_area for props in regions])) largest_props = regions[largest_id] mass_y, mass_x = largest_props.centroid else: mass_x, mass_y = position_x, position_y # if centroid is not in mask, we find the closest point to it from mask if m[mass_y, mass_x] != 1: print 'Finding closest mask point...' kernel = np.ones((10, 10),np.uint8) me = cv2.erode(m, kernel, iterations = 1) points = zip(np.where(me == 1)[0].tolist(), np.where(me == 1)[1].tolist()) # row, col style points = np.array(points) dist = np.sum((points - (mass_y, mass_x))**2, axis=1) id = np.argsort(dist)[0] mass_y, mass_x = points[id] # return return {'mask': m, 'area': area, 'position_x': position_x, 'position_y': position_y, 'mass_x': mass_x, 'mass_y': mass_y}
def to_mask(polys, size): """Convert list of polygons to full size binary mask Parameters ---------- polys : list of numpy.ndarray Numpy.ndarray with shape (N, 2) where N is the number of bounding boxes. The second axis represents points of the polygons. Specifically, these are :math:`(x, y)`. size : tuple Tuple of length 2: (width, height). Returns ------- numpy.ndarray Full size binary mask of shape (height, width) """ try_import_pycocotools() import pycocotools.mask as cocomask width, height = size polys = [p.flatten().tolist() for p in polys] rles = cocomask.frPyObjects(polys, height, width) rle = cocomask.merge(rles) return cocomask.decode(rle)
def create_tf_example(image_path, image, annotations_list, category_index, include_masks=False): image_height = image['height'] image_width = image['width'] filename = image['file_name'] image_id = image['id'] with tf.gfile.GFile(image_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) key = hashlib.sha256(encoded_jpg).hexdigest() xmin = [] xmax = [] ymin = [] ymax = [] is_crowd = [] category_names = [] category_ids = [] area = [] encoded_mask_png = [] num_annotations_skipped = 0 for idx, object_annotations in enumerate(annotations_list): (x, y, width, height) = tuple(object_annotations['bbox']) if width <= 0 or height <= 0: num_annotations_skipped += 1 continue if x + width > image_width or y + height > image_height: num_annotations_skipped += 1 continue xmin.append(float(x) / image_width) xmax.append(float(x + width) / image_width) ymin.append(float(y) / image_height) ymax.append(float(y + height) / image_height) is_crowd.append(object_annotations['iscrowd']) category_id = int(object_annotations['category_id']) category_ids.append(category_id) category_names.append( category_index[category_id]['name'].encode('utf8')) area.append(object_annotations['area']) if include_masks: segm = object_annotations['segmentation'] if isinstance(segm, list): rles = mask.frPyObjects(segm, image_height, image_width) rle = mask.merge(rles) m = mask.decode(rle) else: m = mask.decode(segm) pil_image = PIL.Image.fromarray(m) output_io = io.BytesIO() pil_image.save(output_io, format='PNG') encoded_mask_png.append(output_io.getvalue()) if DUMP_MASK_IMAGES: m[m > 0] = 255 pil_image = PIL.Image.fromarray(m) save_path = filename.split('.')[0] + "_" + str(idx) + ".png" save_path = FLAGS.output_dir + '/' + filename.split( '.')[0] + '_mask_' + str(idx) + '.png' pil_image.save(save_path) feature_dict = { 'image/height': dataset_util.int64_feature(image_height), 'image/width': dataset_util.int64_feature(image_width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(str(image_id).encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/label': dataset_util.int64_list_feature(category_ids), 'image/object/is_crowd': dataset_util.int64_list_feature(is_crowd), 'image/object/area': dataset_util.float_list_feature(area), } if include_masks: feature_dict['image/object/mask'] = ( dataset_util.bytes_list_feature(encoded_mask_png)) example = tf.train.Example(features=tf.train.Features( feature=feature_dict)) return example
def load_sequence(self, sequence): """Load a sequence of images/frames Auxiliary function that loads a sequence of frames with the corresponding ground truth and their filenames. Returns a dict with the images in [0, 1], their corresponding labels, their subset (i.e. category, clip, prefix) and their filenames. """ from pycocotools import mask as cocomask from matplotlib.path import Path X = [] Y = [] F = [] for prefix, img in sequence: if not os.path.exists('%s/%s' % (self.image_path, img['file_name'])): raise RuntimeError('Image %s is missing' % img['file_name']) im = Image.open('%s/%s' % (self.image_path, img['file_name'])).copy() if im.mode == 'L': if self.warn_grayscale: warnings.warn('image %s is grayscale..' % img['file_name'], RuntimeWarning) im = im.convert('RGB') # load the annotations and build the mask anns = self.coco.loadAnns(self.coco.getAnnIds( imgIds=img['id'], catIds=prefix, iscrowd=None)) mask = np.zeros(im.size).transpose(1, 0) for ann in anns: catId = ann['category_id'] if type(ann['segmentation']) == list: # polygon for seg in ann['segmentation']: # xy vertex of the polygon poly = np.array(seg).reshape((len(seg)/2, 2)) closed_path = Path(poly) nx, ny = img['width'], img['height'] x, y = np.meshgrid(np.arange(nx), np.arange(ny)) x, y = x.flatten(), y.flatten() points = np.vstack((x, y)).T grid = closed_path.contains_points(points) if np.count_nonzero(grid) == 0: warnings.warn( 'One of the annotations that compose the mask ' 'of %s was empty' % img['file_name'], RuntimeWarning) grid = grid.reshape((ny, nx)) mask[grid] = catId else: # mask if type(ann['segmentation']['counts']) == list: rle = cocomask.frPyObjects( [ann['segmentation']], img['height'], img['width']) else: rle = [ann['segmentation']] grid = cocomask.decode(rle)[:, :, 0] grid = grid.astype('bool') mask[grid] = catId mask = np.array(mask.astype('int32')) im = np.array(im).astype(floatX) / 255. X.append(im) Y.append(mask) F.append(img['file_name']) ret = {} ret['data'] = np.array(X) ret['labels'] = np.array(Y) ret['subset'] = prefix ret['filenames'] = np.array(F) return ret
def create_tf_example(image, annotations_list, image_dir, category_index, include_masks=False): """Converts image and annotations to a tf.Example proto. Args: image: dict with keys: [u'license', u'file_name', u'coco_url', u'height', u'width', u'date_captured', u'flickr_url', u'id'] annotations_list: list of dicts with keys: [u'segmentation', u'area', u'iscrowd', u'image_id', u'bbox', u'category_id', u'id'] Notice that bounding box coordinates in the official COCO dataset are given as [x, y, width, height] tuples using absolute coordinates where x, y represent the top-left (0-indexed) corner. This function converts to the format expected by the Tensorflow Object Detection API (which is which is [ymin, xmin, ymax, xmax] with coordinates normalized relative to image size). image_dir: directory containing the image files. category_index: a dict containing COCO category information keyed by the 'id' field of each category. See the label_map_util.create_category_index function. include_masks: Whether to include instance segmentations masks (PNG encoded) in the result. default: False. Returns: example: The converted tf.Example num_annotations_skipped: Number of (invalid) annotations that were ignored. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ global src_file_index image_height = image['height'] image_width = image['width'] filename = image['file_name'] image_id = image['id'] full_path = os.path.join(image_dir, filename) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) key = hashlib.sha256(encoded_jpg).hexdigest() xmin = [] xmax = [] ymin = [] ymax = [] is_crowd = [] category_names = [] category_ids = [] area = [] encoded_mask_png = [] num_annotations_skipped = 0 for object_annotations in annotations_list: (x, y, width, height) = tuple(object_annotations['bbox']) if width <= 0 or height <= 0: num_annotations_skipped += 1 continue if x + width > image_width or y + height > image_height: num_annotations_skipped += 1 continue category_id = int(object_annotations['category_id']) if not category_id_filter(category_id): num_annotations_skipped += 1 continue xmin.append(float(x) / image_width) xmax.append(float(x + width) / image_width) ymin.append(float(y) / image_height) ymax.append(float(y + height) / image_height) is_crowd.append(object_annotations['iscrowd']) category_ids.append(category_id) category_names.append( category_index[category_id]['name'].encode('utf8')) area.append(object_annotations['area']) if include_masks: run_len_encoding = mask.frPyObjects( object_annotations['segmentation'], image_height, image_width) binary_mask = mask.decode(run_len_encoding) if not object_annotations['iscrowd']: binary_mask = np.amax(binary_mask, axis=2) pil_image = PIL.Image.fromarray(binary_mask) output_io = io.BytesIO() pil_image.save(output_io, format='PNG') encoded_mask_png.append(output_io.getvalue()) if len(category_ids) == 0: return None, None, None category_ids = trans_category_ids(category_ids) feature_dict = { 'image/height': int64_feature(image_height), 'image/width': int64_feature(image_width), 'image/channels': int64_feature(3), 'image/shape': int64_feature([image_height, image_width, 3]), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(str(image_id).encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/bboxlabel': dataset_util.int64_list_feature(category_ids), 'image/object/bbox/label': dataset_util.int64_list_feature(category_ids), 'image/object/bbox/label_text': bytes_feature(category_to_text(category_ids)), 'image/object/difficult': dataset_util.int64_list_feature(is_crowd), 'image/object/truncated': dataset_util.int64_list_feature(np.ones_like(is_crowd)), 'image/object/area': dataset_util.float_list_feature(area), 'image/file_index': dataset_util.int64_feature(src_file_index), } if include_masks: feature_dict['image/object/mask'] = ( dataset_util.bytes_list_feature(encoded_mask_png)) example = tf.train.Example(features=tf.train.Features( feature=feature_dict)) src_file_index += 1 return key, example, num_annotations_skipped
for im_path in paths: im = cv.imread('restricted/' + im_path) print(im.shape) print(im_path) print(int(im_path[:-4])) seg_list, label_list = get_index(int(im_path[:-4]), load_dict) #print(seg_list) #print(label_list) #masks = np.zeros((im.shape[0],im.shape[1], 1), np.uint8) seg = [] masks = [] cnt = 0 for seg_idx in seg_list: seg = load_dict['annotations'][seg_idx]['segmentation'][ 0] #load first seg in seg list compactedRLE = maskutil.frPyObjects( [seg], im.shape[0], im.shape[1]) #compress through RLE mask = maskutil.decode(compactedRLE) #decode to mask print(mask.shape) mask = np.reshape(mask, (im.shape[0], im.shape[1])) #for display mask = mask * get_color( label_list[cnt]) #change color for different class masks.append(mask) #add sub mask for a full mask print(mask.shape) cnt += 1 final_mask = np.zeros((im.shape[0], im.shape[1]), np.uint8) #final mask for each img for mask in masks: #merge all mask into final mask final_mask = final_mask + mask plt.imshow(final_mask) #show final mask plt.show()
def draw_binary_mask(self, binary_mask, color=None, *, edge_color=None, text=None, alpha=0.5, area_threshold=4096): """ Args: binary_mask (ndarray): numpy array of shape (H, W), where H is the image height and W is the image width. Each value in the array is either a 0 or 1 value of uint8 type. color: color of the mask. Refer to `matplotlib.colors` for a full list of formats that are accepted. If None, will pick a random color. edge_color: color of the polygon edges. Refer to `matplotlib.colors` for a full list of formats that are accepted. text (str): if None, will be drawn in the object's center of mass. alpha (float): blending efficient. Smaller values lead to more transparent masks. area_threshold (float): a connected component small than this will not be shown. Returns: output (VisImage): image object with mask drawn. """ if color is None: color = random_color(rgb=True, maximum=1) if area_threshold is None: area_threshold = 4096 has_valid_segment = False binary_mask = binary_mask.astype("uint8") # opencv needs uint8 mask = GenericMask(binary_mask, self.output.height, self.output.width) shape2d = (binary_mask.shape[0], binary_mask.shape[1]) if not mask.has_holes: # draw polygons for regular masks for segment in mask.polygons: area = mask_util.area( mask_util.frPyObjects([segment], shape2d[0], shape2d[1])) if area < area_threshold: continue has_valid_segment = True segment = segment.reshape(-1, 2) self.draw_polygon(segment, color=color, edge_color=edge_color, alpha=alpha) else: rgba = np.zeros(shape2d + (4, ), dtype="float32") rgba[:, :, :3] = color rgba[:, :, 3] = (mask.mask == 1).astype("float32") * alpha has_valid_segment = True self.output.ax.imshow(rgba) if text is not None and has_valid_segment: # TODO sometimes drawn on wrong objects. the heuristics here can improve. lighter_color = self._change_color_brightness( color, brightness_factor=0.7) _num_cc, cc_labels, stats, centroids = cv2.connectedComponentsWithStats( binary_mask, 8) largest_component_id = np.argmax(stats[1:, -1]) + 1 # draw text on the largest component, as well as other very large components. for cid in range(1, _num_cc): if cid == largest_component_id or stats[ cid, -1] > _LARGE_MASK_AREA_THRESH: # median is more stable than centroid # center = centroids[largest_component_id] center = np.median((cc_labels == cid).nonzero(), axis=1)[::-1] self.draw_text(text, center, color=lighter_color) return self.output
def __getitem__(self, index): # index = (index + 2000) % len(self.roidb) blob = defaultdict(list) im_blob, im_scales = self.get_image_blob([self.roidb[index]]) if config.network.has_rpn: if self.phase != 'test': add_rpn_blobs(blob, im_scales, [self.roidb[index]]) data = {'data': im_blob, 'im_info': blob['im_info']} label = {'roidb': blob['roidb'][0]} for stride in config.network.rpn_feat_stride: label.update({ 'rpn_labels_fpn{}'.format(stride): blob['rpn_labels_int32_wide_fpn{}'.format(stride)].astype( np.int64), 'rpn_bbox_targets_fpn{}'.format(stride): blob['rpn_bbox_targets_wide_fpn{}'.format(stride)], 'rpn_bbox_inside_weights_fpn{}'.format(stride): blob[ 'rpn_bbox_inside_weights_wide_fpn{}'.format(stride)], 'rpn_bbox_outside_weights_fpn{}'.format(stride): blob[ 'rpn_bbox_outside_weights_wide_fpn{}'.format(stride)] }) else: data = {'data': im_blob, 'im_info': np.array([[im_blob.shape[-2], im_blob.shape[-1], im_scales[0]]], np.float32)} label = {'roidb': self.roidb[index]} else: raise NotImplementedError if config.network.has_fcn_head: if self.phase != 'test': seg_gt = np.array(Image.open(self.roidb[index]['image'].replace('images', 'labels').replace('.jpg', '.png'))) if self.roidb[index]['flipped']: seg_gt = np.fliplr(seg_gt) seg_gt = cv2.resize(seg_gt, None, None, fx=im_scales[0], fy=im_scales[0], interpolation=cv2.INTER_NEAREST) label.update({'seg_gt': seg_gt}) # label.update({'seg_gt_4x': cv2.resize(seg_gt, (im_blob.shape[-1] // 4, im_blob.shape[-2] // 4), interpolation=cv2.INTER_NEAREST)}) label.update({'gt_classes': label['roidb']['gt_classes']}) label.update({'mask_gt': np.zeros((len(label['gt_classes']), im_blob.shape[-2], im_blob.shape[-1]))}) for i in range(len(label['gt_classes'])): if type(label['roidb']['segms'][i]) is list and type(label['roidb']['segms'][i][0]) is list: img = Image.new('L', (int(np.round(im_blob.shape[-1] / im_scales[0])), int(np.round(im_blob.shape[-2] / im_scales[0]))), 0) for j in range(len(label['roidb']['segms'][i])): ImageDraw.Draw(img).polygon(tuple(label['roidb']['segms'][i][j]), outline=1, fill=1) # try: # ImageDraw.Draw(img).polygon(tuple(label['roidb']['segms'][i][j]), outline=1, fill=1) # except: # print(label['roidb']['segms'][i], j) # import pdb; pdb.set_trace() # sys.exit() label['mask_gt'][i] = cv2.resize(np.array(img), None, None, fx=im_scales[0], fy=im_scales[0], interpolation=cv2.INTER_NEAREST) else: assert type(label['roidb']['segms'][i]) is dict or type(label['roidb']['segms'][i][0]) is dict if type(label['roidb']['segms'][i]) is dict: label['mask_gt'][i] = cv2.resize(mask_util.decode(mask_util.frPyObjects([label['roidb']['segms'][i]], label['roidb']['segms'][i]['size'][0], label['roidb']['segms'][i]['size'][1]))[:, :, 0], None, None, fx=im_scales[0], fy=im_scales[0], interpolation=cv2.INTER_NEAREST) else: assert len(label['roidb']['segms'][i]) == 1 output = mask_util.decode(label['roidb']['segms'][i]) label['mask_gt'][i] = cv2.resize(output[:, :, 0], None, None, fx=im_scales[0], fy=im_scales[0], interpolation=cv2.INTER_NEAREST) if config.train.fcn_with_roi_loss: gt_boxes = label['roidb']['boxes'][np.where(label['roidb']['gt_classes'] > 0)[0]] gt_boxes = np.around(gt_boxes * im_scales[0]).astype(np.int32) label.update({'seg_roi_gt': np.zeros((len(gt_boxes), config.network.mask_size, config.network.mask_size), dtype=np.int64)}) for i in range(len(gt_boxes)): if gt_boxes[i][3] == gt_boxes[i][1]: gt_boxes[i][3] += 1 if gt_boxes[i][2] == gt_boxes[i][0]: gt_boxes[i][2] += 1 label['seg_roi_gt'][i] = cv2.resize(seg_gt[gt_boxes[i][1]:gt_boxes[i][3], gt_boxes[i][0]:gt_boxes[i][2]], (config.network.mask_size, config.network.mask_size), interpolation=cv2.INTER_NEAREST) else: pass return data, label, index
def determine_max_batch_size(cfg, distributed, dataset_len_per_gpu): def get_fake_input(cfg, orig_img_shape=(128, 128, 3), device='cuda'): test_pipeline = [LoadImage()] + cfg.data.test.pipeline[1:] test_pipeline = Compose(test_pipeline) data = dict(img=np.zeros(orig_img_shape, dtype=np.uint8)) data = test_pipeline(data) data = scatter(collate([data], samples_per_gpu=1), [device])[0] return data model = build_detector(cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg).cuda() if 'pipeline' in cfg.data.train: img_shape = [ t for t in cfg.data.train.pipeline if t['type'] == 'Resize' ][0]['img_scale'] else: img_shape = [ t for t in cfg.data.train.dataset.pipeline if t['type'] == 'Resize' ][0]['img_scale'] channels = 3 fake_input = get_fake_input(cfg, orig_img_shape=list(img_shape) + [channels]) img_shape = fake_input['img_metas'][0][0]['pad_shape'] width, height = img_shape[0], img_shape[1] percentage = 0.9 min_bs = 2 max_bs = min(512, int(dataset_len_per_gpu / percentage) + 1) step = 1 batch_size = min_bs for bs in range(min_bs, max_bs, step): try: gt_boxes = [ torch.tensor([[0., 0., width, height]]).cuda() for _ in range(bs) ] gt_labels = [ torch.tensor([0], dtype=torch.long).cuda() for _ in range(bs) ] img_metas = [fake_input['img_metas'][0][0] for _ in range(bs)] gt_masks = None if isinstance(model, TwoStageDetector) and model.roi_head.with_mask: rles = maskUtils.frPyObjects( [[0.0, 0.0, width, 0.0, width, height, 0.0, height]], height, width) rle = maskUtils.merge(rles) mask = maskUtils.decode(rle) gt_masks = [ BitmapMasks([mask], height, width) for _ in range(bs) ] if gt_masks is None: model(torch.rand(bs, channels, height, width).cuda(), img_metas=img_metas, gt_bboxes=gt_boxes, gt_labels=gt_labels) else: model(torch.rand(bs, channels, height, width).cuda(), img_metas=img_metas, gt_bboxes=gt_boxes, gt_labels=gt_labels, gt_masks=gt_masks) batch_size = bs except RuntimeError as e: if str(e).startswith('CUDA out of memory'): break resulting_batch_size = int(batch_size * percentage) del model torch.cuda.empty_cache() if distributed: rank, world_size = get_dist_info() resulting_batch_size = torch.tensor(resulting_batch_size).cuda() dist.all_reduce(resulting_batch_size, torch.distributed.ReduceOp.MIN) print('rank', rank, 'resulting_batch_size', resulting_batch_size) resulting_batch_size = int(resulting_batch_size.cpu()) else: print('resulting_batch_size', resulting_batch_size) return resulting_batch_size
def load_sequence(self, sequence): """Load a sequence of images/frames Auxiliary function that loads a sequence of frames with the corresponding ground truth and their filenames. Returns a dict with the images in [0, 1], their corresponding labels, their subset (i.e. category, clip, prefix) and their filenames. """ from pycocotools import mask as cocomask from matplotlib.path import Path X = [] Y = [] F = [] for prefix, img in sequence: if not os.path.exists('%s/%s' % (self.image_path, img['file_name'])): raise RuntimeError('Image %s is missing' % img['file_name']) im = Image.open('%s/%s' % (self.image_path, img['file_name'])).copy() if im.mode == 'L': if self.warn_grayscale: warnings.warn('image %s is grayscale..' % img['file_name'], RuntimeWarning) im = im.convert('RGB') # load the annotations and build the mask anns = self.coco.loadAnns( self.coco.getAnnIds(imgIds=img['id'], catIds=prefix, iscrowd=None)) mask = np.zeros(im.size).transpose(1, 0) for ann in anns: catId = ann['category_id'] if type(ann['segmentation']) == list: # polygon for seg in ann['segmentation']: # xy vertex of the polygon poly = np.array(seg).reshape((len(seg) / 2, 2)) closed_path = Path(poly) nx, ny = img['width'], img['height'] x, y = np.meshgrid(np.arange(nx), np.arange(ny)) x, y = x.flatten(), y.flatten() points = np.vstack((x, y)).T grid = closed_path.contains_points(points) if np.count_nonzero(grid) == 0: warnings.warn( 'One of the annotations that compose the mask ' 'of %s was empty' % img['file_name'], RuntimeWarning) grid = grid.reshape((ny, nx)) mask[grid] = catId else: # mask if type(ann['segmentation']['counts']) == list: rle = cocomask.frPyObjects([ann['segmentation']], img['height'], img['width']) else: rle = [ann['segmentation']] grid = cocomask.decode(rle)[:, :, 0] grid = grid.astype('bool') mask[grid] = catId mask = np.array(mask.astype('int32')) im = np.array(im).astype(floatX) / 255. X.append(im) Y.append(mask) F.append(img['file_name']) ret = {} ret['data'] = np.array(X) ret['labels'] = np.array(Y) ret['subset'] = prefix ret['filenames'] = np.array(F) return ret
num_clear = 0 num_valid = 0 bbox_merged_list = [] if_clear_list = [] ratio_list = [] for idx in range(len(anns)): ## Get kps ann_kps = anns_kps[idx] if_clear = check_clear( ann_kps) # check for head up and foot down person ## Get bbox ann = anns[idx] mask1 = ann['segmentation'] # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocotools/coco.py#L265 rle = maskUtils.frPyObjects(mask1, img['height'], img['width']) area = maskUtils.area(rle) bboxes = maskUtils.toBbox(rle) # [x, y, w, h] if len(bboxes.shape) != 2: # print('Warning!! len(bboxes.shape)!=2') continue bbox_merged = bboxes[0] if len(bboxes) == 1 else merge_bboxes(bboxes) if bbox_merged[2] == 0. or bbox_merged[3] == 0.: continue ratio = float(bbox_merged[3]) / float(bbox_merged[2]) # if ratio <= 2.: # continue if if_clear: any_clear = True
def fetch_from_COCO(filenames, img_list, coco_info, resize_images=False, resize_size=-1, load_categories=['person']): images = [] masks = [] assert len(filenames) == len(img_list) for n, img_el in enumerate(img_list): # load image if not os.path.exists(filenames[n]): print('Image %s is missing' % filenames[n]) continue pth = filenames[n] im = Image.open(pth) coco, catIds, imgIds = coco_info # load the annotations and build the mask anns = coco.loadAnns(coco.getAnnIds( imgIds=img_el['id'], catIds=catIds, iscrowd=None)) mask = np.zeros(im.size).transpose(1, 0) for ann in anns: catId = ann['category_id'] if type(ann['segmentation']) == list: # polygon for seg in ann['segmentation']: # xy vertex of the polygon poly = np.array(seg).reshape((len(seg)/2, 2)) closed_path = Path(poly) nx, ny = img_el['width'], img_el['height'] x, y = np.meshgrid(np.arange(nx), np.arange(ny)) x, y = x.flatten(), y.flatten() points = np.vstack((x, y)).T grid = closed_path.contains_points(points) if np.count_nonzero(grid) == 0: warnings.warn( 'One of the annotations that compose the mask ' 'of %s was empty' % img_el['file_name'], RuntimeWarning) grid = grid.reshape((ny, nx)) mask[grid] = catId else: # mask if type(ann['segmentation']['counts']) == list: rle = cocomask.frPyObjects( [ann['segmentation']], img_el['height'], img_el['width']) else: rle = [ann['segmentation']] grid = cocomask.decode(rle)[:, :, 0] grid = grid.astype('bool') mask[grid] = catId # zero_pad if resize_images: rx, ry = resize_size # resize (keeping proportions) [x, y] = im.size dx = float(rx)/x dy = float(ry)/y ratio = min(dx, dy) x = int(x * ratio) y = int(y * ratio) # workaround for PIL problems.. @retry(stop_max_attempt_number=7, wait_fixed=2000) def res(im, x, y): return im.resize((x, y), Image.ANTIALIAS) im = res(im, x, y) # mask = mask / numpy.max(mask) * 255.0 --> only visualization mask = Image.fromarray(mask.astype('uint8')) mask = mask.resize((x, y), Image.NEAREST) tmp = im im = Image.new("RGB", (rx, ry)) im.paste(tmp, ((rx-x)/2, (ry-y)/2)) tmp = mask # 80 obj categories mask = Image.new("L", (rx, ry)) mask.paste(tmp, ((rx-x)/2, (ry-y)/2)) images.append(np.asarray(im)) masks.append(np.asarray(mask)) return images, masks, filenames
def create_tf_example(image, annotations_list, image_dir, category_index, include_masks=False, keypoint_annotations_dict=None, densepose_annotations_dict=None, remove_non_person_annotations=False, remove_non_person_images=False): """Converts image and annotations to a tf.Example proto. Args: image: dict with keys: [u'license', u'file_name', u'coco_url', u'height', u'width', u'date_captured', u'flickr_url', u'id'] annotations_list: list of dicts with keys: [u'segmentation', u'area', u'iscrowd', u'image_id', u'bbox', u'category_id', u'id'] Notice that bounding box coordinates in the official COCO dataset are given as [x, y, width, height] tuples using absolute coordinates where x, y represent the top-left (0-indexed) corner. This function converts to the format expected by the Tensorflow Object Detection API (which is which is [ymin, xmin, ymax, xmax] with coordinates normalized relative to image size). image_dir: directory containing the image files. category_index: a dict containing COCO category information keyed by the 'id' field of each category. See the label_map_util.create_category_index function. include_masks: Whether to include instance segmentations masks (PNG encoded) in the result. default: False. keypoint_annotations_dict: A dictionary that maps from annotation_id to a dictionary with keys: [u'keypoints', u'num_keypoints'] represeting the keypoint information for this person object annotation. If None, then no keypoint annotations will be populated. densepose_annotations_dict: A dictionary that maps from annotation_id to a dictionary with keys: [u'dp_I', u'dp_x', u'dp_y', 'dp_U', 'dp_V'] representing part surface coordinates. For more information see http://densepose.org/. remove_non_person_annotations: Whether to remove any annotations that are not the "person" class. remove_non_person_images: Whether to remove any images that do not contain at least one "person" annotation. Returns: key: SHA256 hash of the image. example: The converted tf.Example num_annotations_skipped: Number of (invalid) annotations that were ignored. num_keypoint_annotation_skipped: Number of keypoint annotations that were skipped. num_densepose_annotation_skipped: Number of DensePose annotations that were skipped. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ image_height = image['height'] image_width = image['width'] filename = image['file_name'] image_id = image['id'] full_path = os.path.join(image_dir, filename) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) key = hashlib.sha256(encoded_jpg).hexdigest() xmin = [] xmax = [] ymin = [] ymax = [] is_crowd = [] category_names = [] category_ids = [] area = [] encoded_mask_png = [] keypoints_x = [] keypoints_y = [] keypoints_visibility = [] keypoints_name = [] num_keypoints = [] include_keypoint = keypoint_annotations_dict is not None num_annotations_skipped = 0 num_keypoint_annotation_used = 0 num_keypoint_annotation_skipped = 0 dp_part_index = [] dp_x = [] dp_y = [] dp_u = [] dp_v = [] dp_num_points = [] densepose_keys = ['dp_I', 'dp_U', 'dp_V', 'dp_x', 'dp_y', 'bbox'] include_densepose = densepose_annotations_dict is not None num_densepose_annotation_used = 0 num_densepose_annotation_skipped = 0 for object_annotations in annotations_list: (x, y, width, height) = tuple(object_annotations['bbox']) if width <= 0 or height <= 0: num_annotations_skipped += 1 continue if x + width > image_width or y + height > image_height: num_annotations_skipped += 1 continue category_id = int(object_annotations['category_id']) category_name = category_index[category_id]['name'].encode('utf8') if remove_non_person_annotations and category_name != b'person': num_annotations_skipped += 1 continue xmin.append(float(x) / image_width) xmax.append(float(x + width) / image_width) ymin.append(float(y) / image_height) ymax.append(float(y + height) / image_height) is_crowd.append(object_annotations['iscrowd']) category_ids.append(category_id) category_names.append(category_name) area.append(object_annotations['area']) if include_masks: run_len_encoding = mask.frPyObjects( object_annotations['segmentation'], image_height, image_width) binary_mask = mask.decode(run_len_encoding) if not object_annotations['iscrowd']: binary_mask = np.amax(binary_mask, axis=2) pil_image = PIL.Image.fromarray(binary_mask) output_io = io.BytesIO() pil_image.save(output_io, format='PNG') encoded_mask_png.append(output_io.getvalue()) if include_keypoint: annotation_id = object_annotations['id'] if annotation_id in keypoint_annotations_dict: num_keypoint_annotation_used += 1 keypoint_annotations = keypoint_annotations_dict[annotation_id] keypoints = keypoint_annotations['keypoints'] num_kpts = keypoint_annotations['num_keypoints'] keypoints_x_abs = keypoints[::3] keypoints_x.extend( [float(x_abs) / image_width for x_abs in keypoints_x_abs]) keypoints_y_abs = keypoints[1::3] keypoints_y.extend( [float(y_abs) / image_height for y_abs in keypoints_y_abs]) keypoints_visibility.extend(keypoints[2::3]) keypoints_name.extend(_COCO_KEYPOINT_NAMES) num_keypoints.append(num_kpts) else: keypoints_x.extend([0.0] * len(_COCO_KEYPOINT_NAMES)) keypoints_y.extend([0.0] * len(_COCO_KEYPOINT_NAMES)) keypoints_visibility.extend([0] * len(_COCO_KEYPOINT_NAMES)) keypoints_name.extend(_COCO_KEYPOINT_NAMES) num_keypoints.append(0) if include_densepose: annotation_id = object_annotations['id'] if (annotation_id in densepose_annotations_dict and all(key in densepose_annotations_dict[annotation_id] for key in densepose_keys)): dp_annotations = densepose_annotations_dict[annotation_id] num_densepose_annotation_used += 1 dp_num_points.append(len(dp_annotations['dp_I'])) dp_part_index.extend([ int(i - _DP_PART_ID_OFFSET) for i in dp_annotations['dp_I'] ]) # DensePose surface coordinates are defined on a [256, 256] grid # relative to each instance box (i.e. absolute coordinates in range # [0., 256.]). The following converts the coordinates # so that they are expressed in normalized image coordinates. dp_x_box_rel = [ clip_to_unit(val / 256.) for val in dp_annotations['dp_x'] ] dp_x_norm = [(float(x) + x_box_rel * width) / image_width for x_box_rel in dp_x_box_rel] dp_y_box_rel = [ clip_to_unit(val / 256.) for val in dp_annotations['dp_y'] ] dp_y_norm = [(float(y) + y_box_rel * height) / image_height for y_box_rel in dp_y_box_rel] dp_x.extend(dp_x_norm) dp_y.extend(dp_y_norm) dp_u.extend(dp_annotations['dp_U']) dp_v.extend(dp_annotations['dp_V']) else: dp_num_points.append(0) if (remove_non_person_images and not any(name == b'person' for name in category_names)): return (key, None, num_annotations_skipped, num_keypoint_annotation_skipped, num_densepose_annotation_skipped) feature_dict = { 'image/height': dataset_util.int64_feature(image_height), 'image/width': dataset_util.int64_feature(image_width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(str(image_id).encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(category_names), 'image/object/is_crowd': dataset_util.int64_list_feature(is_crowd), 'image/object/area': dataset_util.float_list_feature(area), } if include_masks: feature_dict['image/object/mask'] = ( dataset_util.bytes_list_feature(encoded_mask_png)) if include_keypoint: feature_dict['image/object/keypoint/x'] = ( dataset_util.float_list_feature(keypoints_x)) feature_dict['image/object/keypoint/y'] = ( dataset_util.float_list_feature(keypoints_y)) feature_dict['image/object/keypoint/num'] = ( dataset_util.int64_list_feature(num_keypoints)) feature_dict['image/object/keypoint/visibility'] = ( dataset_util.int64_list_feature(keypoints_visibility)) feature_dict['image/object/keypoint/text'] = ( dataset_util.bytes_list_feature(keypoints_name)) num_keypoint_annotation_skipped = (len(keypoint_annotations_dict) - num_keypoint_annotation_used) if include_densepose: feature_dict['image/object/densepose/num'] = ( dataset_util.int64_list_feature(dp_num_points)) feature_dict['image/object/densepose/part_index'] = ( dataset_util.int64_list_feature(dp_part_index)) feature_dict['image/object/densepose/x'] = ( dataset_util.float_list_feature(dp_x)) feature_dict['image/object/densepose/y'] = ( dataset_util.float_list_feature(dp_y)) feature_dict['image/object/densepose/u'] = ( dataset_util.float_list_feature(dp_u)) feature_dict['image/object/densepose/v'] = ( dataset_util.float_list_feature(dp_v)) num_densepose_annotation_skipped = (len(densepose_annotations_dict) - num_densepose_annotation_used) example = tf.train.Example(features=tf.train.Features( feature=feature_dict)) return (key, example, num_annotations_skipped, num_keypoint_annotation_skipped, num_densepose_annotation_skipped)
return rmin - padSize, rmax + padSize + 1, cmin - padSize, cmax + padSize + 1 for i in np.arange(_count, len(anns)): print 'transforming instance %d' % i #transform_and_save_image(i) uint_image = io.imread('%s/images/%s/%s' % (dataDir, dataType, imgs[i]['file_name'])) if len(uint_image.shape) == 2: tmp_image = np.zeros(uint_image.shape + (3, ), dtype=np.uint8) tmp_image[:, :, 0] = tmp_image[:, :, 1] = tmp_image[:, :, 2] = uint_image uint_image = tmp_image float_image = np.array(uint_image, dtype=np.float32) / 255.0 rle = mask.frPyObjects(anns[i]['segmentation'], imgs[i]['height'], imgs[i]['width']) m_uint = mask.decode(rle) m = np.array(m_uint[:, :, 0], dtype=np.float32) base_tran = video_transformer.sample() frame1_tran = base_tran # + frame_transformer.sample() frame2_tran = base_tran + frame_transformer.sample() image1 = frame1_tran.transform_img(float_image.copy(), float_image.shape[:2], m) #print 'image1 size: %s' % str(image1.shape) image1_padded = np.pad(image1, ((padSize, padSize), (padSize, padSize), (0, 0)), mode='constant') #print 'image1_padded size: %s' % str(image1_padded.shape) mask1 = frame1_tran.transform_mask(m.copy(), m.shape) #fills padded area with -1
def _read_segmentation(self, ann, H, W): s = ann['segmentation'] s = s if type(s) == list else [s] return mask.decode(mask.frPyObjects(s, H, W)).max(axis=2)
def showAnns(ori_img, anns, draw_bbox=False): h, w, c = ori_img.shape if len(anns) == 0: return ori_img if 'segmentation' in anns[0] or 'keypoints' in anns[0]: datasetType = 'instances' elif 'caption' in anns[0]: datasetType = 'captions' else: raise Exception('datasetType not supported') if datasetType == 'instances': mask = np.zeros_like(ori_img).astype(np.uint8) for ann in anns: c = np.array((np.random.random( (1, 3)) * 0.6 + 0.4)[0] * 255).astype(int).tolist() if 'segmentation' in ann: if type(ann['segmentation']) == list: # polygon for seg in ann['segmentation']: poly = np.array(seg).reshape((int(len(seg) / 2), 2)) pts = poly.reshape((-1, 1, 2)) cv2.polylines(ori_img, [pts], True, c, thickness=1, lineType=cv2.LINE_AA) cv2.drawContours(mask, [pts], -1, c, -1) if cv2.contourArea(pts) > 1: M = cv2.moments(pts) cX = int(M["m10"] / M["m00"]) cY = int(M["m01"] / M["m00"]) cv2.putText(ori_img, 'CAT:{}'.format(ann['category_id']), (cX, cY), cv2.FONT_HERSHEY_PLAIN, 0.8, (255, 255, 255), 1, cv2.LINE_AA) else: # mask if type(ann['segmentation']['counts']) == list: rle = maskUtils.frPyObjects([ann['segmentation']], h, w) else: rle = [ann['segmentation']] m = maskUtils.decode(rle) img = np.ones((m.shape[0], m.shape[1], 3)) if ann['iscrowd'] == 1: color_mask = np.array([2.0, 166.0, 101.0]) if ann['iscrowd'] == 0: color_mask = np.random.random((1, 3)).tolist()[0] for i in range(3): img[:, :, i] = color_mask[i] ori_img = cv2.addWeighted(img, 0.6, m, 0.6, 0.6) if draw_bbox: if 'bbox' in ann.keys(): [bbox_x, bbox_y, bbox_w, bbox_h] = ann['bbox'] pt1 = (int(bbox_x), int(bbox_y)) pt2 = (int(bbox_x + bbox_w), int(bbox_y + bbox_h)) cv2.rectangle(ori_img, pt1, pt2, color=c, thickness=1, lineType=cv2.LINE_AA) if 'keypoints' in ann and type(ann['keypoints']) == list: # turn skeleton into zero-based index # sks = np.array( # self.loadCats(ann['category_id'])[0]['skeleton']) - 1 kp = np.array(ann['keypoints']) x = kp[0::3] y = kp[1::3] v = kp[2::3] # for sk in sks: # if np.all(v[sk] > 0): # cv2.line(ori_img, x[sk], y[sk], color=c) print(kp) print('keypoint vis not supported') if type(ann['segmentation']) == list: ori_img = cv2.addWeighted(ori_img, 0.7, mask, 0.6, 0.7) elif datasetType == 'captions': for ann in anns: print(ann['caption']) return ori_img
def _load_annotations(self, ann, image_info=None): parsed_annotations = [] ann_id = ann.get('id') attributes = {} if 'attributes' in ann: try: attributes.update(ann['attributes']) except Exception as e: log.debug("item #%s: failed to read annotation attributes: %s", image_info['id'], e) if 'score' in ann: attributes['score'] = ann['score'] group = ann_id # make sure all tasks' annotations are merged if self._task in [CocoTask.instances, CocoTask.person_keypoints]: x, y, w, h = ann['bbox'] label_id = self._get_label_id(ann) is_crowd = bool(ann['iscrowd']) attributes['is_crowd'] = is_crowd if self._task is CocoTask.person_keypoints: keypoints = ann['keypoints'] points = [p for i, p in enumerate(keypoints) if i % 3 != 2] visibility = keypoints[2::3] parsed_annotations.append( Points(points, visibility, label=label_id, id=ann_id, attributes=attributes, group=group)) segmentation = ann.get('segmentation') if segmentation and segmentation != [[]]: rle = None if isinstance(segmentation, list): if not self._merge_instance_polygons: # polygon - a single object can consist of multiple parts for polygon_points in segmentation: parsed_annotations.append( Polygon(points=polygon_points, label=label_id, id=ann_id, attributes=attributes, group=group)) else: # merge all parts into a single mask RLE img_h = image_info['height'] img_w = image_info['width'] rles = mask_utils.frPyObjects(segmentation, img_h, img_w) rle = mask_utils.merge(rles) elif isinstance(segmentation['counts'], list): # uncompressed RLE img_h = image_info['height'] img_w = image_info['width'] mask_h, mask_w = segmentation['size'] if img_h == mask_h and img_w == mask_w: rle = mask_utils.frPyObjects([segmentation], mask_h, mask_w)[0] else: log.warning( "item #%s: mask #%s " "does not match image size: %s vs. %s. " "Skipping this annotation.", image_info['id'], ann_id, (mask_h, mask_w), (img_h, img_w)) else: # compressed RLE rle = segmentation if rle is not None: parsed_annotations.append( RleMask(rle=rle, label=label_id, id=ann_id, attributes=attributes, group=group)) else: parsed_annotations.append( Bbox(x, y, w, h, label=label_id, id=ann_id, attributes=attributes, group=group)) elif self._task is CocoTask.labels: label_id = self._get_label_id(ann) parsed_annotations.append( Label(label=label_id, id=ann_id, attributes=attributes, group=group)) elif self._task is CocoTask.captions: caption = ann['caption'] parsed_annotations.append( Caption(caption, id=ann_id, attributes=attributes, group=group)) else: raise NotImplementedError() return parsed_annotations
def load_coco_json(json_file, image_root, dataset_name=None, extra_annotation_keys=None): """ Load a json file with COCO's instances annotation format. Currently supports instance detection, instance segmentation, and person keypoints annotations. Args: json_file (str): full path to the json file in COCO instances annotation format. image_root (str or path-like): the directory where the images in this json file exists. dataset_name (str or None): the name of the dataset (e.g., coco_2017_train). When provided, this function will also do the following: * Put "thing_classes" into the metadata associated with this dataset. * Map the category ids into a contiguous range (needed by standard dataset format), and add "thing_dataset_id_to_contiguous_id" to the metadata associated with this dataset. This option should usually be provided, unless users need to load the original json content and apply more processing manually. extra_annotation_keys (list[str]): list of per-annotation keys that should also be loaded into the dataset dict (besides "iscrowd", "bbox", "keypoints", "category_id", "segmentation"). The values for these keys will be returned as-is. For example, the densepose annotations are loaded in this way. Returns: list[dict]: a list of dicts in Detectron2 standard dataset dicts format (See `Using Custom Datasets </tutorials/datasets.html>`_ ) when `dataset_name` is not None. If `dataset_name` is None, the returned `category_ids` may be incontiguous and may not conform to the Detectron2 standard format. Notes: 1. This function does not read the image files. The results do not have the "image" field. """ from pycocotools.coco import COCO timer = Timer() json_file = PathManager.get_local_path(json_file) with contextlib.redirect_stdout(io.StringIO()): coco_api = COCO(json_file) if timer.seconds() > 1: logger.info("Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds())) id_map = None if dataset_name is not None: meta = MetadataCatalog.get(dataset_name) cat_ids = sorted(coco_api.getCatIds()) cats = coco_api.loadCats(cat_ids) # The categories in a custom json file may not be sorted. thing_classes = [c["name"] for c in sorted(cats, key=lambda x: x["id"])] meta.thing_classes = thing_classes # In COCO, certain category ids are artificially removed, # and by convention they are always ignored. # We deal with COCO's id issue and translate # the category ids to contiguous ids in [0, 80). # It works by looking at the "categories" field in the json, therefore # if users' own json also have incontiguous ids, we'll # apply this mapping as well but print a warning. if not (min(cat_ids) == 1 and max(cat_ids) == len(cat_ids)): if "coco" not in dataset_name: logger.warning( """ Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you. """ ) id_map = {v: i for i, v in enumerate(cat_ids)} meta.thing_dataset_id_to_contiguous_id = id_map # sort indices for reproducible results img_ids = sorted(coco_api.imgs.keys()) # imgs is a list of dicts, each looks something like: # {'license': 4, # 'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg', # 'file_name': 'COCO_val2014_000000001268.jpg', # 'height': 427, # 'width': 640, # 'date_captured': '2013-11-17 05:57:24', # 'id': 1268} imgs = coco_api.loadImgs(img_ids) # anns is a list[list[dict]], where each dict is an annotation # record for an object. The inner list enumerates the objects in an image # and the outer list enumerates over images. Example of anns[0]: # [{'segmentation': [[192.81, # 247.09, # ... # 219.03, # 249.06]], # 'area': 1035.749, # 'iscrowd': 0, # 'image_id': 1268, # 'bbox': [192.81, 224.8, 74.73, 33.43], # 'category_id': 16, # 'id': 42986}, # ...] anns = [coco_api.imgToAnns[img_id] for img_id in img_ids] total_num_valid_anns = sum([len(x) for x in anns]) total_num_anns = len(coco_api.anns) if total_num_valid_anns < total_num_anns: logger.warning( f"{json_file} contains {total_num_anns} annotations, but only " f"{total_num_valid_anns} of them match to images in the file." ) if "minival" not in json_file: # The popular valminusminival & minival annotations for COCO2014 contain this bug. # However the ratio of buggy annotations there is tiny and does not affect accuracy. # Therefore we explicitly white-list them. ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image] assert len(set(ann_ids)) == len(ann_ids), "Annotation ids in '{}' are not unique!".format( json_file ) imgs_anns = list(zip(imgs, anns)) logger.info("Loaded {} images in COCO format from {}".format(len(imgs_anns), json_file)) dataset_dicts = [] ann_keys = ["iscrowd", "bbox", "keypoints", "category_id"] + (extra_annotation_keys or []) num_instances_without_valid_segmentation = 0 for (img_dict, anno_dict_list) in imgs_anns: record = {} record["file_name"] = os.path.join(image_root, img_dict["file_name"]) record["height"] = img_dict["height"] record["width"] = img_dict["width"] image_id = record["image_id"] = img_dict["id"] objs = [] for anno in anno_dict_list: # Check that the image_id in this annotation is the same as # the image_id we're looking at. # This fails only when the data parsing logic or the annotation file is buggy. # The original COCO valminusminival2014 & minival2014 annotation files # actually contains bugs that, together with certain ways of using COCO API, # can trigger this assertion. assert anno["image_id"] == image_id assert anno.get("ignore", 0) == 0, '"ignore" in COCO json file is not supported.' obj = {key: anno[key] for key in ann_keys if key in anno} segm = anno.get("segmentation", None) if segm: # either list[list[float]] or dict(RLE) if isinstance(segm, dict): if isinstance(segm["counts"], list): # convert to compressed RLE segm = mask_util.frPyObjects(segm, *segm["size"]) else: # filter out invalid polygons (< 3 points) segm = [poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6] if len(segm) == 0: num_instances_without_valid_segmentation += 1 continue # ignore this instance obj["segmentation"] = segm keypts = anno.get("keypoints", None) if keypts: # list[int] for idx, v in enumerate(keypts): if idx % 3 != 2: # COCO's segmentation coordinates are floating points in [0, H or W], # but keypoint coordinates are integers in [0, H-1 or W-1] # Therefore we assume the coordinates are "pixel indices" and # add 0.5 to convert to floating point coordinates. keypts[idx] = v + 0.5 obj["keypoints"] = keypts obj["bbox_mode"] = BoxMode.XYWH_ABS if id_map: annotation_category_id = obj["category_id"] try: obj["category_id"] = id_map[annotation_category_id] except KeyError as e: raise KeyError( f"Encountered category_id={annotation_category_id} " "but this id does not exist in 'categories' of the json file." ) from e objs.append(obj) record["annotations"] = objs dataset_dicts.append(record) if num_instances_without_valid_segmentation > 0: logger.warning( "Filtered out {} instances without valid segmentation. ".format( num_instances_without_valid_segmentation ) + "There might be issues in your dataset generation process. " "A valid polygon should be a list[float] with even length >= 6." ) return dataset_dicts
def annotation_data(folders, vid_id, ann_id, exists=False): hand_data = dict(info=hands_info, licenses=hands_licenses, categories=hands_categories, videos=[], annotations=[]) for directory in folders: print(sorted(directory.glob('*.jpg'))[0]) im_path = str(sorted(directory.glob('*.jpg'))[0]) img = cv2.imread(im_path) video_polygons = get_path_polygons(directory) video = dict(width=img.shape[1], length=len(sorted(directory.glob('*.jpg'))), date_captured='', license='', flickr_url='', file_names=[], id=vid_id, coco_url='', height=img.shape[0]) annotations = {} for i in range(4): annotations[ann_id] = dict(height=img.shape[0], width=img.shape[1], length=1, category_id=1, segmentations=[], bboxes=[], video_id=vid_id, iscrowd=False, id=ann_id, areas=[]) ann_id += 1 if not exists: for polygons, frame_path in zip(video_polygons, sorted(directory.glob('*.jpg'))): file_name = str(frame_path).split(os.sep) file_name = os.path.join(*file_name[-2:]) video['file_names'].append(file_name) for inst_id, polygon in zip(annotations, list(polygons)): if polygon.shape[0] > 1: #polygon = polygon.astype(int).astype(float) #polygon[:, 0], polygon[:, 1] = polygon[:, 1], polygon[:, 0].copy() polygon = polygon.transpose() contour = [ j for i in zip(polygon[0], polygon[1]) for j in i ] rles = mask.frPyObjects([contour], img.shape[0], img.shape[1]) rle = mask.merge(rles) area = mask.area(rle) bounding_box = mask.toBbox(rle) annotations[inst_id]['bboxes'].append( bounding_box.tolist()) annotations[inst_id]['areas'].append(int(area)) rle['counts'] = rle['counts'].decode('ascii') annotations[inst_id]['segmentations'].append(rle) else: annotations[inst_id]['segmentations'].append(None) annotations[inst_id]['bboxes'].append(None) annotations[inst_id]['areas'].append(None) for _, ann in annotations.items(): hand_data['annotations'].append(ann) hand_data['videos'].append(video) vid_id += 1 return hand_data, vid_id, ann_id
def coco_poly_to_mask(poly, h, w): rles = mask_utils.frPyObjects(poly, h, w) rle = mask_utils.merge(rles) mask = mask_utils.decode(rle) return mask
def segmentation_to_mask(self, polys, height, width): import pycocotools.mask as cocomask polys = [p.flatten().tolist() for p in polys] rles = cocomask.frPyObjects(polys, height, width) rle = cocomask.merge(rles) return cocomask.decode(rle)
def polys_to_mask_wrt_box(polygons, box, M): """Convert from the COCO polygon segmentation format to a binary mask encoded as a 2D array of data type numpy.float32. The polygon segmentation is understood to be enclosed in the given box and rasterized to an M x M mask. The resulting mask is therefore of shape (M, M). """ w = box[2] - box[0] h = box[3] - box[1] w = np.maximum(w, 1) h = np.maximum(h, 1) polygons_norm = [] for poly in polygons: p = np.array(poly, dtype=np.float32) p[0::2] = (p[0::2] - box[0]) * M / w p[1::2] = (p[1::2] - box[1]) * M / h polygons_norm.append(p) rle = mask_util.frPyObjects(polygons_norm, M, M) mask_ = np.array(mask_util.decode(rle), dtype=np.float32) # Flatten in case polygons was a list mask_ = np.sum(mask_, axis=2) mask_ = np.array(mask_ > 0, dtype=np.float32) mask_ = np.array(mask_, dtype=np.uint8) ret, thr = cv2.threshold(mask_, 0, 1, cv2.THRESH_BINARY) _, countors, _ = cv2.findContours(thr, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) height = M width = M mask_bshape = np.zeros((height, width), np.float32) inner_bshape = np.zeros((height, width), np.float32) temp1 = np.zeros((height, width), np.float32) temp2 = np.zeros((height, width), np.float32) temp3 = np.zeros((height, width), np.float32) temp4 = np.zeros((height, width), np.float32) temp5 = np.zeros((height, width), np.float32) temp6 = np.zeros((height, width), np.float32) temp7 = np.zeros((height, width), np.float32) temp8 = np.zeros((height, width), np.float32) temp9 = np.zeros((height, width), np.float32) temp10 = np.zeros((height, width), np.float32) # 3px mask polygon = countors pixels = cfg.BSHAPE.PIXELS mask = None if pixels == 3: inner_bshape = cv2.fillPoly(inner_bshape, polygon, 3) mask_bshape = cv2.polylines(mask_bshape, polygon, True, 1, 1) temp1 = cv2.polylines(temp1, polygon, True, 1, 2) temp2 = cv2.polylines(temp2, polygon, True, 1, 3) temp3 = cv2.polylines(temp3, polygon, True, 1, 4) mask = inner_bshape + mask_bshape + temp1 + temp2 + temp3 mask = np.where( mask == 7, 1, np.where(mask == 6, 0.95, np.where(mask == 5, 0.85, np.where(mask == 4, 0.70, 0)))) elif pixels == 5: inner_bshape = cv2.fillPoly(inner_bshape, polygon, 5) mask_bshape = cv2.polylines(mask_bshape, polygon, True, 1, 1) temp1 = cv2.polylines(temp1, polygon, True, 1, 2) temp2 = cv2.polylines(temp2, polygon, True, 1, 3) temp3 = cv2.polylines(temp3, polygon, True, 1, 4) temp4 = cv2.polylines(temp4, polygon, True, 1, 5) temp5 = cv2.polylines(temp5, polygon, True, 1, 6) mask = inner_bshape + mask_bshape + temp1 + temp2 + temp3 + temp4 + temp5 mask = np.where( mask == 11, 1, np.where( mask == 10, 0.95, np.where( mask == 9, 0.85, np.where( mask == 8, 0.70, np.where(mask == 7, 0.65, np.where(mask == 6, 0.60, 0)))))) elif pixels == 7: inner_bshape = cv2.fillPoly(inner_bshape, polygon, 7) mask_bshape = cv2.polylines(mask_bshape, polygon, True, 1, 1) temp1 = cv2.polylines(temp1, polygon, True, 1, 2) temp2 = cv2.polylines(temp2, polygon, True, 1, 3) temp3 = cv2.polylines(temp3, polygon, True, 1, 4) temp4 = cv2.polylines(temp4, polygon, True, 1, 5) temp5 = cv2.polylines(temp5, polygon, True, 1, 6) temp6 = cv2.polylines(temp6, polygon, True, 1, 7) temp7 = cv2.polylines(temp7, polygon, True, 1, 8) mask = inner_bshape + mask_bshape + temp1 + temp2 + temp3 + temp4 + temp5 + temp6 + temp7 mask = np.where( mask == 15, 1, np.where( mask == 14, 0.95, np.where( mask == 13, 0.90, np.where( mask == 12, 0.85, np.where( mask == 11, 0.80, np.where( mask == 10, 0.75, np.where(mask == 9, 0.70, np.where(mask == 8, 0.65, 0)))))))) elif pixels == 11: inner_bshape = cv2.fillPoly(inner_bshape, polygon, 10) mask_bshape = cv2.polylines(mask_bshape, polygon, True, 1, 1) temp1 = cv2.polylines(temp1, polygon, True, 1, 2) temp2 = cv2.polylines(temp2, polygon, True, 1, 3) temp3 = cv2.polylines(temp3, polygon, True, 1, 4) temp4 = cv2.polylines(temp4, polygon, True, 1, 5) temp5 = cv2.polylines(temp5, polygon, True, 1, 6) temp6 = cv2.polylines(temp6, polygon, True, 1, 7) temp7 = cv2.polylines(temp7, polygon, True, 1, 8) temp8 = cv2.polylines(temp8, polygon, True, 1, 9) temp9 = cv2.polylines(temp9, polygon, True, 1, 10) temp10 = cv2.polylines(temp10, polygon, True, 1, 11) mask = inner_bshape + mask_bshape + temp1 + temp2 + temp3 + temp4 + temp5 + temp6 + temp7 + temp8 + +temp9 + temp10 mask = np.where( mask == 21, 1, np.where( mask == 20, 0.95, np.where( mask == 19, 0.90, np.where( mask == 18, 0.85, np.where( mask == 17, 0.80, np.where( mask == 16, 0.75, np.where( mask == 15, 0.70, np.where( mask == 14, 0.65, np.where( mask == 13, 0.60, np.where( mask == 12, 0.55, np.where(mask == 11, 0.50, 0))))))))))) # inner_bshape = cv2.fillPoly(inner_bshape, polygon, 5) # mask_bshape = cv2.polylines(mask_bshape, polygon, True, 1, 1) # temp1 = cv2.polylines(temp1, polygon, True, 1, 2) # temp2 = cv2.polylines(temp2, polygon, True, 1, 3) # temp3 = cv2.polylines(temp3, polygon, True, 1, 4) # temp4 = cv2.polylines(temp4, polygon, True, 1, 5) # temp5 = cv2.polylines(temp5, polygon, True, 1, 6) # # mask = inner_bshape + mask_bshape + temp1 + temp2 + temp3 + temp4 +temp5 # mask = np.where(mask == 11, 1, np.where(mask == 10, 0.95, np.where(mask == 9, 0.85, np.where(mask == 8, 0.70, # np.where(mask == 7, 0.65, # np.where( # mask == 6, # 0.60, # 0)))))) mask = np.array(mask, dtype=np.float32) # print('!!!!!!!!!!!!!!!!!!!mask number check!!!!!!!!!!!!!!!') # print(np.unique(mask)) # np.savetxt('/home/bkang/data.csv', mask, delimiter=',') return mask
coco = CocoDetection( 'data/MSCOCO/images/{}2017'.format(split), 'data/MSCOCO/annotations/instances_{}2017.json'.format(split)) dest = 'data/MSCOCO/imageclassification/{}/'.format(split) if not os.path.exists(dest): os.mkdir(dest) ii = 0 for x, y in tqdm(coco): w, h = x.size x = np.array(x) for _y in y: cat = _y['category_id'] rle = mask.frPyObjects(_y['segmentation'], h, w) mm = mask.toBbox(rle) for m in mm: if m.shape == (4, ): m = [int(u) for u in m] x1, x2, y1, y2 = m[0], m[0] + m[2], m[1], m[1] + m[3] if m[2] > 32 and m[3] > 32: im = Image.fromarray(x[y1:y2, x1:x2]) la = '{:03d}_{}'.format(_y['category_id'], cats[_y['category_id']]) la_path = os.path.join(dest, la) if not os.path.exists(la_path): os.mkdir(la_path) im_path = os.path.join(la_path, '{:08d}.jpg'.format(ii)) im.save(im_path)
def create_tf_example(image, annotations_list, image_dir, category_index, include_masks=False): """Converts image and annotations to a tf.Example proto. Args: image: dict with keys: [u'license', u'file_name', u'coco_url', u'height', u'width', u'date_captured', u'flickr_url', u'id'] annotations_list: list of dicts with keys: [u'segmentation', u'area', u'iscrowd', u'image_id', u'bbox', u'category_id', u'id'] Notice that bounding box coordinates in the official COCO dataset are given as [x, y, width, height] tuples using absolute coordinates where x, y represent the top-left (0-indexed) corner. This function converts to the format expected by the Tensorflow Object Detection API (which is which is [ymin, xmin, ymax, xmax] with coordinates normalized relative to image size). image_dir: directory containing the image files. category_index: a dict containing COCO category information keyed by the 'id' field of each category. See the label_map_util.create_category_index function. include_masks: Whether to include instance segmentations masks (PNG encoded) in the result. default: False. Returns: example: The converted tf.Example num_annotations_skipped: Number of (invalid) annotations that were ignored. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ image_height = image['height'] image_width = image['width'] filename = image['file_name'] image_id = image['id'] full_path = os.path.join(image_dir, filename) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) key = hashlib.sha256(encoded_jpg).hexdigest() xmin = [] xmax = [] ymin = [] ymax = [] is_crowd = [] category_names = [] category_ids = [] area = [] encoded_mask_png = [] num_annotations_skipped = 0 for object_annotations in annotations_list: (x, y, width, height) = tuple(object_annotations['bbox']) if width <= 0 or height <= 0: num_annotations_skipped += 1 continue if x + width > image_width or y + height > image_height: num_annotations_skipped += 1 continue xmin.append(float(x) / image_width) xmax.append(float(x + width) / image_width) ymin.append(float(y) / image_height) ymax.append(float(y + height) / image_height) is_crowd.append(object_annotations['iscrowd']) category_id = int(object_annotations['category_id']) category_ids.append(category_id) category_names.append(category_index[category_id]['name'].encode('utf8')) area.append(object_annotations['area']) if include_masks: run_len_encoding = mask.frPyObjects(object_annotations['segmentation'], image_height, image_width) binary_mask = mask.decode(run_len_encoding) if not object_annotations['iscrowd']: binary_mask = np.amax(binary_mask, axis=2) pil_image = PIL.Image.fromarray(binary_mask) output_io = io.BytesIO() pil_image.save(output_io, format='PNG') encoded_mask_png.append(output_io.getvalue()) feature_dict = { 'image/height': dataset_util.int64_feature(image_height), 'image/width': dataset_util.int64_feature(image_width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(str(image_id).encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/label': dataset_util.int64_list_feature(category_ids), 'image/object/is_crowd': dataset_util.int64_list_feature(is_crowd), 'image/object/area': dataset_util.float_list_feature(area), } if include_masks: feature_dict['image/object/mask'] = ( dataset_util.bytes_list_feature(encoded_mask_png)) example = tf.train.Example(features=tf.train.Features(feature=feature_dict)) return key, example, num_annotations_skipped
def project_masks_on_boxes(segmentation_masks, proposals, discretization_size, maskiou_on): """ Given segmentation masks and the bounding boxes corresponding to the location of the masks in the image, this function crops and resizes the masks in the position defined by the boxes. This prepares the masks for them to be fed to the loss computation as the targets. If use maskiou head, we will compute the maskiou target here. Arguments: segmentation_masks: an instance of SegmentationMask proposals: an instance of BoxList """ masks = [] mask_ratios = [] M = discretization_size device = proposals.bbox.device proposals = proposals.convert("xyxy") assert segmentation_masks.size == proposals.size, "{}, {}".format( segmentation_masks, proposals) # TODO put the proposals on the CPU, as the representation for the # masks is not efficient GPU-wise (possibly several small tensors for # representing a single instance mask) proposals = proposals.bbox.to(torch.device("cpu")) for segmentation_mask, proposal in zip(segmentation_masks, proposals): # crop the masks, resize them to the desired resolution and # then convert them to the tensor representation, # instead of the list representation that was used cropped_mask = segmentation_mask.crop(proposal) scaled_mask = cropped_mask.resize((M, M)) mask = scaled_mask.get_mask_tensor() masks.append(mask) if maskiou_on: x1 = int(proposal[0]) y1 = int(proposal[1]) x2 = int(proposal[2]) + 1 y2 = int(proposal[3]) + 1 # for poly_ in segmentation_mask.polygons: for polygonInstance_ in segmentation_mask.instances.polygons: poly = np.array(polygonInstance_.polygons[0], dtype=np.float32) x1 = np.minimum(x1, poly[0::2].min()) x2 = np.maximum(x2, poly[0::2].max()) y1 = np.minimum(y1, poly[1::2].min()) y2 = np.maximum(y2, poly[1::2].max()) img_h = segmentation_mask.size[1] img_w = segmentation_mask.size[0] x1 = np.maximum(x1, 0) x2 = np.minimum(x2, img_w - 1) y1 = np.maximum(y1, 0) y2 = np.minimum(y2, img_h - 1) segmentation_mask_for_maskratio = segmentation_mask.crop( [x1, y1, x2, y2]) ''' #type 1 gt_img_mask = segmentation_mask_for_maskratio.convert(mode='mask') gt_img_mask_area = gt_img_mask.sum().float() gt_box_mask = gt_img_mask[int(proposal[1]-y1):int(proposal[3]-y1)+1, int(proposal[0]-x1):int(proposal[2]-x1)+1] gt_box_mask_area = gt_box_mask.sum().float() mask_ratio = gt_box_mask_area / gt_img_mask_area ''' #type 2 rle_for_fullarea = mask_util.frPyObjects([ p.polygons[0].numpy() for p in segmentation_mask_for_maskratio.instances.polygons ], y2 - y1, x2 - x1) full_area = torch.tensor( mask_util.area(rle_for_fullarea).sum().astype(float)) rle_for_box_area = mask_util.frPyObjects([ p.polygons[0].numpy() for p in cropped_mask.instances.polygons ], proposal[3] - proposal[1], proposal[2] - proposal[0]) box_area = torch.tensor( mask_util.area(rle_for_box_area).sum().astype(float)) mask_ratio = box_area / full_area mask_ratios.append(mask_ratio) if len(masks) == 0: return torch.empty(0, dtype=torch.float32, device=device), torch.empty(0, dtype=torch.float32, device=device) if maskiou_on: mask_ratios = torch.stack(mask_ratios, dim=0).to(device, dtype=torch.float32) else: mask_ratios = None # if len(masks) == 0: # return torch.empty(0, dtype=torch.float32, device=device), torch.empty(0, dtype=torch.float32, device=device) return torch.stack(masks, dim=0).to(device, dtype=torch.float32), mask_ratios
def fix_segments_intersections(polygons, height, width, img_name, threshold=0.0, ratio_tolerance=0.001, area_threshold=1): """Find all intersected regions and crop contour for back object by objects which are in front of the first one. It is related to a specialty of segmentation in CVAT annotation. Intersection is calculated via function 'iou' from cocoapi Args: polygons: all objects on image represented as 2D array of objects' contours height: height of image width: width of image img_name: name of image file threshold: threshold of intersection over union of two objects. By default is set to 0 and processes any two intersected objects ratio_tolerance: used for situation when one object is fully or almost fully inside another one and we don't want make "hole" in one of objects """ empty_polygon = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] for i, _ in enumerate(polygons): rle_bottom = mask_util.frPyObjects([polygons[i]['points']], height, width) segment_overlapped = False for j in range(i + 1, len(polygons)): rle_top = mask_util.frPyObjects([polygons[j]['points']], height, width) iou = mask_util.iou(rle_bottom, rle_top, [0, 0]) area_top = sum(mask_util.area(rle_top)) area_bottom = sum(mask_util.area(rle_bottom)) if area_bottom == 0: continue area_ratio = area_top / area_bottom sum_iou = sum(iou) # If segment is fully inside another one, save this segment as is if area_ratio - ratio_tolerance < sum_iou[ 0] < area_ratio + ratio_tolerance: continue # Check situation when bottom segment is fully inside top. # It means that in annotation is mistake. Save this segment as is if 1 / area_ratio - ratio_tolerance < sum_iou[ 0] < 1 / area_ratio + ratio_tolerance: continue if sum_iou[0] > threshold: segment_overlapped = True bottom_mask = np.array(mask_util.decode(rle_bottom), dtype=np.uint8) top_mask = np.array(mask_util.decode(rle_top), dtype=np.uint8) bottom_mask = np.subtract(bottom_mask, top_mask) bottom_mask[bottom_mask > 1] = 0 bottom_mask = np.sum(bottom_mask, axis=2) bottom_mask = np.array(bottom_mask > 0, dtype=np.uint8) polygons[i]['points'] = mask_to_polygon( bottom_mask, area_threshold=area_threshold) # If some segment is empty, do small fix to avoid error in cocoapi function if len(polygons[i]['points']) == 0: polygons[i]['points'] = [empty_polygon] rle_bottom = mask_util.frPyObjects(polygons[i]['points'], height, width) if not segment_overlapped: polygons[i]['points'] = [polygons[i]['points']] output_polygons = [] for polygon in polygons: poly_len = len(polygon['points']) if poly_len != 0 and polygon['points'] != [empty_polygon]: output_polygons.append(polygon) return output_polygons
def polygons_to_mask(self, polygons): rle = mask_util.frPyObjects(polygons, self.height, self.width) rle = mask_util.merge(rle) return mask_util.decode(rle)[:, :]
coco.showAnns(anns) plt.savefig('b.png') for id in imgIds: img = coco.loadImgs(ids = id)[0] height = img['height'] width = img['width'] name = img['file_name'] gtName = name[0:len(name)-3] + 'png' annIds = coco.getAnnIds(imgIds=id, iscrowd=None) anns = coco.loadAnns(annIds) gt = np.zeros((height, width)) for ann in anns: catId = ann['category_id'] if type(ann['segmentation']) == list: # print 'polygon' rle = mask.frPyObjects(ann['segmentation'], height, width) else: if type(ann['segmentation']['counts']) == list: # print 'mask' rle = mask.frPyObjects([ann['segmentation']], height, width) else: # print 'third' rle = [ann['segmentation']] m = mask.decode(rle) m = m[:,:,0] gt *= 1 - m gt += m * catId; im = Image.fromarray(np.uint8(gt)) im.save(savePath + gtName) #plt.imsave(savePath+gtName, gt) counter = counter + 1
def create_tf_example(image, annotations_list, image_dir, category_index, include_masks=False, keypoint_annotations_dict=None): """Converts image and annotations to a tf.Example proto. Args: image: dict with keys: [u'license', u'file_name', u'coco_url', u'height', u'width', u'date_captured', u'flickr_url', u'id'] annotations_list: list of dicts with keys: [u'segmentation', u'area', u'iscrowd', u'image_id', u'bbox', u'category_id', u'id'] Notice that bounding box coordinates in the official COCO dataset are given as [x, y, width, height] tuples using absolute coordinates where x, y represent the top-left (0-indexed) corner. This function converts to the format expected by the Tensorflow Object Detection API (which is which is [ymin, xmin, ymax, xmax] with coordinates normalized relative to image size). image_dir: directory containing the image files. category_index: a dict containing COCO category information keyed by the 'id' field of each category. See the label_map_util.create_category_index function. include_masks: Whether to include instance segmentations masks (PNG encoded) in the result. default: False. keypoint_annotations_dict: A dictionary that maps from annotation_id to a dictionary with keys: [u'keypoints', u'num_keypoints'] represeting the keypoint information for this person object annotation. If None, then no keypoint annotations will be populated. Returns: example: The converted tf.Example num_annotations_skipped: Number of (invalid) annotations that were ignored. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ image_height = image['height'] image_width = image['width'] filename = image['file_name'] image_id = image['id'] full_path = os.path.join(image_dir, filename) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) key = hashlib.sha256(encoded_jpg).hexdigest() xmin = [] xmax = [] ymin = [] ymax = [] is_crowd = [] category_names = [] category_ids = [] area = [] encoded_mask_png = [] keypoints_x = [] keypoints_y = [] keypoints_visibility = [] keypoints_name = [] num_keypoints = [] include_keypoint = keypoint_annotations_dict is not None num_annotations_skipped = 0 num_keypoint_annotation_used = 0 num_keypoint_annotation_skipped = 0 for object_annotations in annotations_list: (x, y, width, height) = tuple(object_annotations['bbox']) if width <= 0 or height <= 0: num_annotations_skipped += 1 continue if x + width > image_width or y + height > image_height: num_annotations_skipped += 1 continue xmin.append(float(x) / image_width) xmax.append(float(x + width) / image_width) ymin.append(float(y) / image_height) ymax.append(float(y + height) / image_height) is_crowd.append(object_annotations['iscrowd']) category_id = int(object_annotations['category_id']) category_ids.append(category_id) category_names.append( category_index[category_id]['name'].encode('utf8')) area.append(object_annotations['area']) if include_masks: run_len_encoding = mask.frPyObjects( object_annotations['segmentation'], image_height, image_width) binary_mask = mask.decode(run_len_encoding) if not object_annotations['iscrowd']: binary_mask = np.amax(binary_mask, axis=2) pil_image = PIL.Image.fromarray(binary_mask) output_io = io.BytesIO() pil_image.save(output_io, format='PNG') encoded_mask_png.append(output_io.getvalue()) if include_keypoint: annotation_id = object_annotations['id'] if annotation_id in keypoint_annotations_dict: num_keypoint_annotation_used += 1 keypoint_annotations = keypoint_annotations_dict[annotation_id] keypoints = keypoint_annotations['keypoints'] num_kpts = keypoint_annotations['num_keypoints'] keypoints_x_abs = keypoints[::3] keypoints_x.extend( [float(x_abs) / image_width for x_abs in keypoints_x_abs]) keypoints_y_abs = keypoints[1::3] keypoints_y.extend( [float(y_abs) / image_height for y_abs in keypoints_y_abs]) keypoints_visibility.extend(keypoints[2::3]) keypoints_name.extend(_COCO_KEYPOINT_NAMES) num_keypoints.append(num_kpts) else: keypoints_x.extend([0.0] * len(_COCO_KEYPOINT_NAMES)) keypoints_y.extend([0.0] * len(_COCO_KEYPOINT_NAMES)) keypoints_visibility.extend([0] * len(_COCO_KEYPOINT_NAMES)) keypoints_name.extend(_COCO_KEYPOINT_NAMES) num_keypoints.append(0) feature_dict = { 'image/height': dataset_util.int64_feature(image_height), 'image/width': dataset_util.int64_feature(image_width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(str(image_id).encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(category_names), 'image/object/is_crowd': dataset_util.int64_list_feature(is_crowd), 'image/object/area': dataset_util.float_list_feature(area), } if include_masks: feature_dict['image/object/mask'] = ( dataset_util.bytes_list_feature(encoded_mask_png)) if include_keypoint: feature_dict['image/object/keypoint/x'] = ( dataset_util.float_list_feature(keypoints_x)) feature_dict['image/object/keypoint/y'] = ( dataset_util.float_list_feature(keypoints_y)) feature_dict['image/object/keypoint/num'] = ( dataset_util.int64_list_feature(num_keypoints)) feature_dict['image/object/keypoint/visibility'] = ( dataset_util.int64_list_feature(keypoints_visibility)) feature_dict['image/object/keypoint/text'] = ( dataset_util.bytes_list_feature(keypoints_name)) num_keypoint_annotation_skipped = (len(keypoint_annotations_dict) - num_keypoint_annotation_used) example = tf.train.Example(features=tf.train.Features( feature=feature_dict)) return key, example, num_annotations_skipped, num_keypoint_annotation_skipped
def evalPointingGame(cocoAnn, cat, caffeNet, imgDir): imgIds = cocoAnn.getImgIds(catIds=cat['id']) imgList = cocoAnn.loadImgs(ids=imgIds) hit = 0 miss = 0 t0 = time.time() for I in imgList: # run EB on img, get max location on attMap imgName = imgDir + I['file_name'] img = caffe.io.load_image(imgName) attMap = doExcitationBackprop(caffeNet, img, cat['name']) if 1: # naively take argmax maxSub = np.unravel_index(np.argmax(attMap), attMap.shape) else: # take center of max locations maxAtt = np.max(attMap) maxInd = np.where(attMap == maxAtt) maxSub = (np.mean(maxInd[0]), np.mean(maxInd[1])) # load annotations annList = cocoAnn.loadAnns(cocoAnn.getAnnIds(imgIds=I['id'], catIds=cat['id'])) # hit/miss? isHit = 0 for ann in annList: # create a radius-15 circle around max location and see if it # intersects with segmentation mask if type(ann['segmentation']) == list: # polygon for seg in ann['segmentation']: polyPts = np.array(seg).reshape((len(seg)/2, 2)) poly = shapely.geometry.Polygon(polyPts) circ = shapely.geometry.Point(maxSub[::-1]).buffer(15) isHit += poly.intersects(circ) else: # RLE if type(ann['segmentation']['counts']) == list: rle = mask.frPyObjects([ann['segmentation']], I['height'], I['width']) else: rle = [ann['segmentation']] m = mask.decode(rle) m = m[:, :, 0] ind = np.where(m>0) mp = shapely.geometry.MultiPoint(zip(ind[0], ind[1])) circ = shapely.geometry.Point(maxSub).buffer(15) isHit += circ.intersects(mp) if isHit: break if isHit: hit += 1 else: miss += 1 accuracy = (hit+0.0)/(hit+miss) if time.time() - t0 > 10: print cat['name'], ': Hit =', hit, 'Miss =', miss, ' Acc =', accuracy t0 = time.time() return accuracy
def load(file_object, annotations): from pycocotools import coco as coco_loader from pycocotools import mask as mask_utils import numpy as np def get_filename(path): import os return os.path.splitext(os.path.basename(path))[0] def match_frame(frame_info, filename): import re # try to match by filename yolo_filename = get_filename(filename) for frame_number, info in frame_info.items(): cvat_filename = get_filename(info["path"]) if cvat_filename == yolo_filename: return frame_number # try to extract frame number from filename numbers = re.findall(r"\d+", filename) if numbers and len(numbers) == 1: return int(numbers[0]) raise Exception( "Cannot match filename or determinate framenumber for {} filename". format(filename)) coco = coco_loader.COCO(file_object.name) labels = { cat['id']: cat['name'] for cat in coco.loadCats(coco.getCatIds()) } group_idx = 0 for img_id in coco.getImgIds(): anns = coco.loadAnns(coco.getAnnIds(imgIds=img_id)) img = coco.loadImgs(ids=img_id)[0] frame_number = match_frame(annotations.frame_info, img['file_name']) for ann in anns: group = 0 label_name = labels[ann['category_id']] if 'segmentation' in ann: polygons = [] # polygon if ann['iscrowd'] == 0: polygons = ann['segmentation'] # mask else: if isinstance(ann['segmentation']['counts'], list): rle = mask_utils.frPyObjects([ann['segmentation']], img['height'], img['width']) else: rle = [ann['segmentation']] mask = np.array(mask_utils.decode(rle), dtype=np.uint8) mask = np.sum(mask, axis=2) mask = np.array(mask > 0, dtype=np.uint8) polygons = mask_to_polygon(mask) if len(polygons) > 1: group_idx += 1 group = group_idx for polygon in polygons: annotations.add_shape( annotations.LabeledShape( type='polygon', frame=frame_number, label=label_name, points=polygon, occluded=False, attributes=[], group=group, ))
def __getitem__(self, index): img = self.coco_kps.loadImgs(self.imgIds[index])[0] img_ori = cv2.imread( os.path.join(self.trainimagepath, img['file_name'])) img_human_seg = np.zeros(shape=img_ori.shape[0:2], dtype=np.float32) loss_mask = np.ones_like(img_human_seg) annIds = self.coco_kps.getAnnIds(imgIds=img['id'], catIds=self.catIds, iscrowd=None) anns = self.coco_kps.loadAnns(annIds) # plt.imshow(img_ori) # self.coco_kps.showAnns(anns) # plt.show() assert len(anns) > 0 assert 'segmentation' in anns[0] or 'keypoints' in anns[0] polygons = [] color = [] keypoints = [] #(part_id,x,y) parts = [] #((partid0,x0,y0),(partid1,x1,y1)) for ann in anns: c = (np.random.random((1, 3)) * 0.6 + 0.4).tolist()[0] if 'segmentation' in ann: if type(ann['segmentation']) == list: # polygon for seg in ann['segmentation']: poly = np.array(seg).reshape((int(len(seg) / 2), 2)) cv2.drawContours( img_human_seg, [poly[np.newaxis, :].astype(np.int32)], 0, (1, 1, 1), -1) polygons.append(Polygon(poly)) color.append(c) if 'keypoints' in ann and (ann['num_keypoints'] < 5 or ann['area'] < 32 * 32): for seg in ann['segmentation']: poly = np.array(seg).reshape( (int(len(seg) / 2), 2)) cv2.drawContours( loss_mask, [poly[np.newaxis, :].astype(np.int32)], 0, (0, 0, 0), -1) else: # mask t = self.coco_kps.imgs[ann['image_id']] if type(ann['segmentation']['counts']) == list: rle = maskUtils.frPyObjects([ann['segmentation']], t['height'], t['width']) else: rle = [ann['segmentation']] m = maskUtils.decode(rle) loss_mask *= (1.0 - m[:, :, 0]).astype(np.float32) COCO_to_ours_1 = [ 1, 6, 7, 9, 11, 6, 8, 10, 13, 15, 17, 12, 14, 16, 3, 2, 5, 4 ] COCO_to_ours_2 = [ 1, 7, 7, 9, 11, 6, 8, 10, 13, 15, 17, 12, 14, 16, 3, 2, 5, 4 ] mid_1 = [ 2, 9, 10, 2, 12, 13, 2, 3, 4, 3, 2, 6, 7, 6, 2, 1, 1, 15, 16 ] mid_2 = [ 9, 10, 11, 12, 13, 14, 3, 4, 5, 17, 6, 7, 8, 18, 1, 15, 16, 17, 18 ] assert len(COCO_to_ours_1) == len(COCO_to_ours_2) == self.NUM_PARTS if 'keypoints' in ann and type(ann['keypoints']) == list: # turn skeleton into zero-based index # sks = np.array(self.coco_kps.loadCats(ann['category_id'])[0]['skeleton'])-1 kp = np.array(ann['keypoints']) x_coco = kp[0::3] y_coco = kp[1::3] v_coco = kp[2::3] x = [] y = [] v = [] for index1, index2 in zip(COCO_to_ours_1, COCO_to_ours_2): index1 -= 1 index2 -= 1 x.append(0.5 * (x_coco[index1] + x_coco[index2])) y.append(0.5 * (y_coco[index1] + y_coco[index2])) v.append(min(v_coco[index1], v_coco[index2])) for i in range(self.NUM_PARTS): if v[i] > 0: # cv2.circle(heatmaps[i],(int(round(x[i])),int(round(y[i]))),self.HEAT_RADIUS,(1,1,1),-1) keypoints.append([i, x[i], y[i]]) for i in range(self.NUM_LINKS): kp0, kp1 = mid_1[i] - 1, mid_2[i] - 1 if v[kp0] > 0 and v[kp1] > 0: parts.append([i, x[kp0], y[kp0], x[kp1], y[kp1]]) if len(img_ori.shape) == 2: temp = np.empty(shape=(img_ori.shape[0], img_ori.shape[1], 3), dtype=np.uint8) for i in range(3): temp[:, :, i] = img_ori print('gray img') ''' Image augmentation. ''' from img_aug import im_aug [img_ori, loss_mask], keypoints, parts = im_aug([img_ori, loss_mask], keypoints, parts) img_ori = np.transpose(img_ori, (2, 0, 1)) loss_mask = loss_mask[np.newaxis, :, :] img_ori, loss_mask = self.im_transpose([img_ori, loss_mask], axes=(1, 2, 0)) img_ori, keypoints, parts, loss_mask = self.im_resize( img_ori, keypoints, parts, loss_mask) ''' Generate pafmaps for stride 4 , other pafmaps can be sampled by this. ''' pafmaps = [ np.zeros_like(np.squeeze(loss_mask)) for _ in range(self.NUM_LINKS * 2) ] pafmaps_count = [ np.zeros_like(np.squeeze(loss_mask)) for _ in range(self.NUM_LINKS * 2) ] for limb_id, x0, y0, x1, y1 in parts: p0 = np.array([x0, y0]) p1 = np.array([x1, y1]) mask_ = np.zeros_like(np.squeeze(loss_mask), dtype=np.uint8) cv2.line(mask_, (int(round(x0)), int(round(y0))), (int(round(x1)), int(round(y1))), (1, 1, 1), self.PART_LINE_WIDTH) vec = p1 - p0 vec = vec / (np.linalg.norm(vec) + 0.001) vec_index = np.where(np.squeeze(mask_)) pafmaps[2 * limb_id][vec_index] += vec[0] pafmaps[2 * limb_id + 1][vec_index] += vec[1] pafmaps_count[2 * limb_id][vec_index] += 1 pafmaps_count[2 * limb_id + 1][vec_index] += 1 pafmaps_count = np.array(pafmaps_count) pafmaps = np.array(pafmaps) pafmaps[np.where(pafmaps_count != 0)] /= pafmaps_count[np.where( pafmaps_count != 0)] ''' Generate heatmaps for stride 1(the minimum stride) heatmaps for other strides can be sampled by this heatmaps. ''' # for stride in [64,32,16,8,4]: import time t0 = time.time() heatmaps_strides = [] for stride in self.STRIDES: dest_size = (int(self.INPUT_SIZE // stride), int(self.INPUT_SIZE // stride)) heatmaps = [ np.zeros(shape=dest_size, dtype=np.float32) for _ in range(self.NUM_PARTS) ] from cheatmap.heatmaps import genGaussionHeatmap for part_id, x, y in keypoints: heat_tmp = genGaussionHeatmap(int(self.INPUT_SIZE // stride), int(self.INPUT_SIZE // stride), x, y, stride=stride) heatmaps[part_id] = np.max([heat_tmp, heatmaps[part_id]], axis=0) heatmaps = np.array(heatmaps) heatmaps = np.concatenate( [heatmaps, np.min(heatmaps, axis=0)[np.newaxis]]) heatmaps_strides.append(heatmaps.reshape((-1))) heatmap_strides = np.concatenate(heatmaps_strides, axis=0) # for m in range(int(self.INPUT_SIZE//stride)): # for n in range(int(self.INPUT_SIZE//stride)): # ori_x = n *stride + stride / 2 - 0.5 # ori_y = m * stride + stride / 2 - 0.5 # for pard_id,x,y in keypoints: # d2 = (ori_x-x)**2+(ori_y-y)**2 # sigma = 7.0 # exponent = d2 / 2.0 / (sigma**2) # heatmaps[pard_id][m, n] = max(np.exp(-exponent), heatmaps[pard_id][m,n]) # print(heatmaps.shape) # print(heatmaps.shape) pafmaps = np.array(pafmaps) t1 = time.time() # print(t1-t0) # print(heatmaps.shape,pafmaps.shape,loss_mask.shape) return np.transpose( img_ori, (2, 0, 1)), [heatmap_strides] + self.make_fpn_label( pafmaps, loss_mask, )
cols = np.any(img, axis=0) rmin, rmax = np.where(rows)[0][[0, -1]] cmin, cmax = np.where(cols)[0][[0, -1]] return rmin-padSize, rmax+padSize+1, cmin-padSize, cmax+padSize+1 for i in np.arange(_count, len(anns)): print 'transforming instance %d' % i #transform_and_save_image(i) uint_image = io.imread('%s/images/%s/%s' % (dataDir,dataType,imgs[i]['file_name'])) if len(uint_image.shape) == 2: tmp_image = np.zeros(uint_image.shape + (3,), dtype=np.uint8) tmp_image[:,:,0] = tmp_image[:,:,1] = tmp_image[:,:,2] = uint_image uint_image = tmp_image float_image = np.array(uint_image, dtype=np.float32)/255.0 rle = mask.frPyObjects(anns[i]['segmentation'], imgs[i]['height'], imgs[i]['width']) m_uint = mask.decode(rle) m = np.array(m_uint[:,:,0], dtype=np.float32) base_tran = video_transformer.sample() frame1_tran = base_tran # + frame_transformer.sample() frame2_tran = base_tran + frame_transformer.sample() image1 = frame1_tran.transform_img(float_image.copy(), float_image.shape[:2], m) #print 'image1 size: %s' % str(image1.shape) image1_padded = np.pad(image1,((padSize,padSize),(padSize,padSize),(0,0)), mode='constant') #print 'image1_padded size: %s' % str(image1_padded.shape) mask1 = frame1_tran.transform_mask(m.copy(), m.shape) #fills padded area with -1 mask1 = mask1[0] mask1[mask1 == -1] = 0 #print 'mask1 size: %s' % str(mask1.shape)
plt.imshow(I) plt.show() # load and display instance annotations plt.imshow(I) plt.imsave('a.png', I) annIds = coco.getAnnIds(imgIds=img['id'], catIds=catIds, iscrowd=None) anns = coco.loadAnns(annIds) coco.showAnns(anns) plt.savefig('b.png') #img = coco.loadImgs(ids=ids2[2])[0] #ann = coco.loadAnns(ids = coco.getAnnIds(imgIds=ids2[2]))[0] ann = anns[0] seg = ann['segmentation'][0] rle = mask.frPyObjects([seg], img['height'], img['width']) m = mask.decode(rle) image = np.ones( (m.shape[0], m.shape[1], 3)) color_mask = np.random.random((1,3)).tolist()[0] for i in range(3): image[:,:,i] = color_mask[i] m2=np.dstack((image, m * 0.5)) plt.imsave('m.png', m2) vocId = [1, 2, 3, 4, 5, 6, 7, 9, 16, 17, 18, 19, 20, 21, 44, 62, 64, 67, 72] #vocId = [1, 2, 3, 4, 5, 6, 7, 9, 16, 17, 18, 19, 20, 21, 44, 62, 63, 64, 67, 72] ids=coco.getImgIds() print 'length of ids %d\n' % len(ids)
def showAnns(self, anns): """ Display the specified annotations. :param anns (array of object): annotations to display :return: None """ if len(anns) == 0: return 0 if 'segmentation' in anns[0] or 'keypoints' in anns[0]: datasetType = 'instances' elif 'caption' in anns[0]: datasetType = 'captions' else: raise Exception('datasetType not supported') if datasetType == 'instances': ax = plt.gca() ax.set_autoscale_on(False) polygons = [] color = [] for ann in anns: c = (np.random.random((1, 3))*0.6+0.4).tolist()[0] if 'segmentation' in ann: if type(ann['segmentation']) == list: # polygon for seg in ann['segmentation']: poly = np.array(seg).reshape((int(len(seg)/2), 2)) polygons.append(Polygon(poly)) color.append(c) else: # mask t = self.imgs[ann['image_id']] if type(ann['segmentation']['counts']) == list: rle = maskUtils.frPyObjects([ann['segmentation']], t['height'], t['width']) else: rle = [ann['segmentation']] m = maskUtils.decode(rle) img = np.ones( (m.shape[0], m.shape[1], 3) ) if ann['iscrowd'] == 1: color_mask = np.array([2.0,166.0,101.0])/255 if ann['iscrowd'] == 0: color_mask = np.random.random((1, 3)).tolist()[0] for i in range(3): img[:,:,i] = color_mask[i] ax.imshow(np.dstack( (img, m*0.5) )) if 'keypoints' in ann and type(ann['keypoints']) == list: # turn skeleton into zero-based index sks = np.array(self.loadCats(ann['category_id'])[0]['skeleton'])-1 kp = np.array(ann['keypoints']) x = kp[0::3] y = kp[1::3] v = kp[2::3] for sk in sks: if np.all(v[sk]>0): plt.plot(x[sk],y[sk], linewidth=3, color=c) plt.plot(x[v>0], y[v>0],'o',markersize=8, markerfacecolor=c, markeredgecolor='k',markeredgewidth=2) plt.plot(x[v>1], y[v>1],'o',markersize=8, markerfacecolor=c, markeredgecolor=c, markeredgewidth=2) p = PatchCollection(polygons, facecolor=color, linewidths=0, alpha=0.4) ax.add_collection(p) p = PatchCollection(polygons, facecolor='none', edgecolors=color, linewidths=2) ax.add_collection(p) elif datasetType == 'captions': for ann in anns: print(ann['caption'])
def coco2binary_mask(segmentation: list, height: int, width: int) -> np.array: rles = mutils.frPyObjects(segmentation, height, width) return mutils.decode(rles)[:, :, 0]
def crop_covered_segments(segments, width, height, iou_threshold=0.0, ratio_tolerance=0.001, area_threshold=1, return_masks=False): """ Find all segments occluded by others and crop them to the visible part only. Input segments are expected to be sorted from background to foreground. Args: segments: 1d list of segment RLEs (in COCO format) width: width of the image height: height of the image iou_threshold: IoU threshold for objects to be counted as intersected By default is set to 0 to process any intersected objects ratio_tolerance: an IoU "handicap" value for a situation when an object is (almost) fully covered by another one and we don't want make a "hole" in the background object area_threshold: minimal area of included segments Returns: A list of input segments' parts (in the same order as input): [ [[x1,y1, x2,y2 ...], ...], # input segment #0 parts mask1, # input segment #1 mask (if source segment is mask) [], # when source segment is too small ... ] """ from pycocotools import mask as mask_utils segments = [[s] for s in segments] input_rles = [mask_utils.frPyObjects(s, height, width) for s in segments] for i, rle_bottom in enumerate(input_rles): area_bottom = sum(mask_utils.area(rle_bottom)) if area_bottom < area_threshold: segments[i] = [] if not return_masks else None continue rles_top = [] for j in range(i + 1, len(input_rles)): rle_top = input_rles[j] iou = sum(mask_utils.iou(rle_bottom, rle_top, [0, 0]))[0] if iou <= iou_threshold: continue area_top = sum(mask_utils.area(rle_top)) area_ratio = area_top / area_bottom # If a segment is fully inside another one, skip this segment if abs(area_ratio - iou) < ratio_tolerance: continue # Check if the bottom segment is fully covered by the top one. # There is a mistake in the annotation, keep the background one if abs(1 / area_ratio - iou) < ratio_tolerance: rles_top = [] break rles_top += rle_top if not rles_top and not isinstance(segments[i][0], dict) \ and not return_masks: continue rle_bottom = rle_bottom[0] bottom_mask = mask_utils.decode(rle_bottom).astype(np.uint8) if rles_top: rle_top = mask_utils.merge(rles_top) top_mask = mask_utils.decode(rle_top).astype(np.uint8) bottom_mask -= top_mask bottom_mask[bottom_mask != 1] = 0 if not return_masks and not isinstance(segments[i][0], dict): segments[i] = mask_to_polygons(bottom_mask, area_threshold=area_threshold) else: segments[i] = bottom_mask return segments
def polys_to_mask(polygons, height, width): """Convert from the COCO polygon segmentation format to a binary mask encoded as a 2D array of data type numpy.float32. The polygon segmentation is understood to be enclosed inside a height x width image. The resulting mask is therefore of shape (height, width). """ rle = mask_util.frPyObjects(polygons, height, width) mask_ = np.array(mask_util.decode(rle), dtype=np.float32) # Flatten in case polygons was a list mask_ = np.sum(mask_, axis=2) mask_ = np.array(mask_ > 0, dtype=np.float32) mask_ = np.array(mask_, dtype=np.uint8) ret, thr = cv2.threshold(mask_, 0, 1, cv2.THRESH_BINARY) _, countors, _ = cv2.findContours(thr, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) mask_bshape = np.zeros((height, width), np.float32) inner_bshape = np.zeros((height, width), np.float32) temp1 = np.zeros((height, width), np.float32) temp2 = np.zeros((height, width), np.float32) temp3 = np.zeros((height, width), np.float32) temp4 = np.zeros((height, width), np.float32) temp5 = np.zeros((height, width), np.float32) temp6 = np.zeros((height, width), np.float32) temp7 = np.zeros((height, width), np.float32) temp8 = np.zeros((height, width), np.float32) temp9 = np.zeros((height, width), np.float32) temp10 = np.zeros((height, width), np.float32) # 3px mask polygon = countors pixels = cfg.BSHAPE.PIXELS mask = None if pixels == 3: inner_bshape = cv2.fillPoly(inner_bshape, polygon, 3) mask_bshape = cv2.polylines(mask_bshape, polygon, True, 1, 1) temp1 = cv2.polylines(temp1, polygon, True, 1, 2) temp2 = cv2.polylines(temp2, polygon, True, 1, 3) temp3 = cv2.polylines(temp3, polygon, True, 1, 4) mask = inner_bshape + mask_bshape + temp1 + temp2 + temp3 mask = np.where( mask == 7, 1, np.where(mask == 6, 0.95, np.where(mask == 5, 0.85, np.where(mask == 4, 0.70, 0)))) elif pixels == 5: inner_bshape = cv2.fillPoly(inner_bshape, polygon, 5) mask_bshape = cv2.polylines(mask_bshape, polygon, True, 1, 1) temp1 = cv2.polylines(temp1, polygon, True, 1, 2) temp2 = cv2.polylines(temp2, polygon, True, 1, 3) temp3 = cv2.polylines(temp3, polygon, True, 1, 4) temp4 = cv2.polylines(temp4, polygon, True, 1, 5) temp5 = cv2.polylines(temp5, polygon, True, 1, 6) mask = inner_bshape + mask_bshape + temp1 + temp2 + temp3 + temp4 + temp5 mask = np.where( mask == 11, 1, np.where( mask == 10, 0.95, np.where( mask == 9, 0.85, np.where( mask == 8, 0.70, np.where(mask == 7, 0.65, np.where(mask == 6, 0.60, 0)))))) elif pixels == 7: inner_bshape = cv2.fillPoly(inner_bshape, polygon, 7) mask_bshape = cv2.polylines(mask_bshape, polygon, True, 1, 1) temp1 = cv2.polylines(temp1, polygon, True, 1, 2) temp2 = cv2.polylines(temp2, polygon, True, 1, 3) temp3 = cv2.polylines(temp3, polygon, True, 1, 4) temp4 = cv2.polylines(temp4, polygon, True, 1, 5) temp5 = cv2.polylines(temp5, polygon, True, 1, 6) temp6 = cv2.polylines(temp6, polygon, True, 1, 7) temp7 = cv2.polylines(temp7, polygon, True, 1, 8) mask = inner_bshape + mask_bshape + temp1 + temp2 + temp3 + temp4 + temp5 + temp6 + temp7 mask = np.where( mask == 15, 1, np.where( mask == 14, 0.95, np.where( mask == 13, 0.90, np.where( mask == 12, 0.85, np.where( mask == 11, 0.80, np.where( mask == 10, 0.75, np.where(mask == 9, 0.70, np.where(mask == 8, 0.65, 0)))))))) elif pixels == 11: inner_bshape = cv2.fillPoly(inner_bshape, polygon, 10) mask_bshape = cv2.polylines(mask_bshape, polygon, True, 1, 1) temp1 = cv2.polylines(temp1, polygon, True, 1, 2) temp2 = cv2.polylines(temp2, polygon, True, 1, 3) temp3 = cv2.polylines(temp3, polygon, True, 1, 4) temp4 = cv2.polylines(temp4, polygon, True, 1, 5) temp5 = cv2.polylines(temp5, polygon, True, 1, 6) temp6 = cv2.polylines(temp6, polygon, True, 1, 7) temp7 = cv2.polylines(temp7, polygon, True, 1, 8) temp8 = cv2.polylines(temp8, polygon, True, 1, 9) temp9 = cv2.polylines(temp9, polygon, True, 1, 10) temp10 = cv2.polylines(temp10, polygon, True, 1, 11) mask = inner_bshape + mask_bshape + temp1 + temp2 + temp3 + temp4 + temp5 + temp6 + temp7 + temp8 + +temp9 + temp10 mask = np.where( mask == 21, 1, np.where( mask == 20, 0.95, np.where( mask == 19, 0.90, np.where( mask == 18, 0.85, np.where( mask == 17, 0.80, np.where( mask == 16, 0.75, np.where( mask == 15, 0.70, np.where( mask == 14, 0.65, np.where( mask == 13, 0.60, np.where( mask == 12, 0.55, np.where(mask == 11, 0.50, 0))))))))))) # inner_bshape = cv2.fillPoly(inner_bshape, polygon, 4) # mask_bshape = cv2.polylines(mask_bshape,polygon, True, 1, 1) # temp1 = cv2.polylines(temp1, polygon, True, 1, 2) # temp2 = cv2.polylines(temp2, polygon, True, 1, 3) # temp3 = cv2.polylines(temp3,polygon, True, 1, 4) # temp4 = cv2.polylines(temp4, polygon, True, 1, 5) # temp5 = cv2.polylines(temp5, polygon, True, 1, 6) # # mask = inner_bshape + mask_bshape + temp1 + temp2 + temp3 + temp4 + temp5 # mask = np.where(mask == 11, 1, np.where(mask == 10, 0.95, np.where(mask == 9, 0.85, np.where(mask == 8, 0.70, # np.where(mask == 7, # 0.65, # np.where( # mask == 6, # 0.60, # 0)))))) mask = np.array(mask, dtype=np.float32) return mask
def polys_to_mask(polygons, height, width): rle = mask_util.frPyObjects(polygons, height, width) mask = np.array(mask_util.decode(rle), dtype=np.float32) mask = np.sum(mask, axis=2) mask = np.array(mask > 0, dtype=np.float32) return mask
def coco_rle_decode(rle, h, w): return mutils.decode(mutils.frPyObjects(rle, h, w))
def _load_coco_annotation(self, index): """ Loads COCO bounding-box & segmentation instance annotations. Crowd instances are removed. """ im_ann = self._COCO.loadImgs(index)[0] width = im_ann['width']; height = im_ann['height'] ####################################################################### # get bboxes that are outside crowd regions annIds = self._COCO.getAnnIds(imgIds=index, iscrowd=False) ####################################################################### objs = self._COCO.loadAnns(annIds) # Sanitize bboxes -- some are invalid valid_objs = [] valid_IDs = [] for i in xrange(len(objs)): obj = objs[i] x1 = np.max((0, obj['bbox'][0])) y1 = np.max((0, obj['bbox'][1])) x2 = np.min((width - 1, x1 + np.max((0, obj['bbox'][2] - 1)))) y2 = np.min((height - 1, y1 + np.max((0, obj['bbox'][3] - 1)))) if obj['area'] > 0 and x2 >= x1 and y2 >= y1 and (not obj['iscrowd']): obj['clean_bbox'] = [x1, y1, x2, y2] valid_objs.append(obj) valid_IDs.append(annIds[i]) ######################################################################## boxes = [] gt_classes = [] seg_areas = [] # RLE representation of binary mask rles = [] ####################################################################### ####################################################################### # Lookup table to map from COCO category ids to our internal class # indices coco_cat_id_to_class_ind = dict([(self._class_to_coco_cat_id[cls], self._class_to_ind[cls]) for cls in self._classes[1:]]) for i in xrange(len(valid_objs)): obj = valid_objs[i] cls = coco_cat_id_to_class_ind[obj['category_id']] ####################################################################### if type(obj['segmentation']) == list: # polygon rle = COCOmask.frPyObjects(obj['segmentation'], height, width) elif type(obj['segmentation']['counts']) == list: rle = COCOmask.frPyObjects([obj['segmentation']], height, width) else: rle = [obj['segmentation']] ####################################################################### boxes.append(obj['clean_bbox']) gt_classes.append(cls) seg_areas.append(obj['area']) rles.append(rle) ############################################################### ## calculate the areas of objects area = float(width * height) mask = np.zeros((height, width), dtype=np.float32) for j in xrange(len(rles)): rle = rles[j] cur_mask = np.amax(COCOmask.decode(rle), axis=2) mask = np.maximum(mask, cur_mask) seg_area = np.sum(mask) seg_ratio = seg_area/area # print seg_ratio ############################################################### return {'image' : self.image_path_from_index(index), 'width' : width, 'height' : height, 'boxes' : np.array(boxes).reshape((-1,4)), 'clses' : np.array(gt_classes), 'polys' : rles, 'ann_ids' : np.array(valid_IDs), 'flipped' : False, 'seg_areas' : np.array(seg_areas), 'image_index': index, 'seg_ratio': seg_ratio}