def loadRes(self, resFile): """ Load result file and return a result api object. :param resFile (str) : file name of result file :return: res (obj) : result api object """ res = COCO() res.dataset['images'] = [img for img in self.dataset['images']] print('Loading and preparing results...') tic = time.time() if type(resFile) == str or type(resFile) == unicode: anns = json.load(open(resFile)) elif type(resFile) == np.ndarray: anns = self.loadNumpyAnnotations(resFile) else: anns = resFile assert type(anns) == list, 'results in not an array of objects' annsImgIds = [ann['image_id'] for ann in anns] assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \ 'Results do not correspond to current coco set' if 'caption' in anns[0]: imgIds = set([img['id'] for img in res.dataset['images']]) & set([ann['image_id'] for ann in anns]) res.dataset['images'] = [img for img in res.dataset['images'] if img['id'] in imgIds] for id, ann in enumerate(anns): ann['id'] = id+1 elif 'bbox' in anns[0] and not anns[0]['bbox'] == []: res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) for id, ann in enumerate(anns): bb = ann['bbox'] x1, x2, y1, y2 = [bb[0], bb[0]+bb[2], bb[1], bb[1]+bb[3]] if not 'segmentation' in ann: ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]] ann['area'] = bb[2]*bb[3] ann['id'] = id+1 ann['iscrowd'] = 0 elif 'segmentation' in anns[0]: res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) for id, ann in enumerate(anns): # now only support compressed RLE format as segmentation results ann['area'] = maskUtils.area(ann['segmentation']) if not 'bbox' in ann: ann['bbox'] = maskUtils.toBbox(ann['segmentation']) ann['id'] = id+1 ann['iscrowd'] = 0 elif 'keypoints' in anns[0]: res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) for id, ann in enumerate(anns): s = ann['keypoints'] x = s[0::3] y = s[1::3] x0,x1,y0,y1 = np.min(x), np.max(x), np.min(y), np.max(y) ann['area'] = (x1-x0)*(y1-y0) ann['id'] = id + 1 ann['bbox'] = [x0,y0,x1-x0,y1-y0] print('DONE (t={:0.2f}s)'.format(time.time()- tic)) res.dataset['annotations'] = anns res.createIndex() return res
def LoadAnnotations(self, annotations): """Load annotations dictionary into COCO datastructure. See http://mscoco.org/dataset/#format for a description of the annotations format. As above, this function replicates the default behavior of the API but does not require writing to external storage. Args: annotations: python list holding object detection results where each detection is encoded as a dict with required keys ['image_id', 'category_id', 'score'] and one of ['bbox', 'segmentation'] based on `detection_type`. Returns: a coco.COCO datastructure holding object detection annotations results Raises: ValueError: if annotations is not a list ValueError: if annotations do not correspond to the images contained in self. """ results = coco.COCO() results.dataset['images'] = [img for img in self.dataset['images']] tf.logging.info('Loading and preparing annotation results...') tic = time.time() if not isinstance(annotations, list): raise ValueError('annotations is not a list of objects') annotation_img_ids = [ann['image_id'] for ann in annotations] if (set(annotation_img_ids) != (set(annotation_img_ids) & set(self.getImgIds()))): raise ValueError('Results do not correspond to current coco set') results.dataset['categories'] = copy.deepcopy(self.dataset['categories']) if self._detection_type == 'bbox': for idx, ann in enumerate(annotations): bb = ann['bbox'] ann['area'] = bb[2] * bb[3] ann['id'] = idx + 1 ann['iscrowd'] = 0 elif self._detection_type == 'segmentation': for idx, ann in enumerate(annotations): ann['area'] = mask.area(ann['segmentation']) ann['bbox'] = mask.toBbox(ann['segmentation']) ann['id'] = idx + 1 ann['iscrowd'] = 0 tf.logging.info('DONE (t=%0.2fs)', (time.time() - tic)) results.dataset['annotations'] = annotations results.createIndex() return results
def getMask(self, ref): ''' :return: mask, mask-area, mask-center ''' ann = self.refToAnn[ref['ref_id']] image = self.imgs[ref['image_id']] if type(ann['segmentation'][0]) == list: # polygon rle = mask.frPyObjects(ann['segmentation'], image['height'], image['width']) else: # mask rle = ann['segmentation'] m = mask.decode(rle) m = np.sum(m, axis=2) # sometimes there are multiple binary map (corresponding to multiple segs) m = m.astype(np.uint8) # convert to np.uint8 # area area = sum(mask.area(rle)) # very close to ann['area'] # position position_x = np.mean(np.where(m==1)[1]) # [1] means columns (matlab style) -> x (c++ style) position_y = np.mean(np.where(m==1)[0]) # [0] means rows (matlab style) -> y (c++ style) # mass position (If there were multiple regions, we use the largest one.) label_m = label(m, connectivity=m.ndim) regions = regionprops(label_m) if len(regions) > 0: largest_id = np.argmax(np.array([props.filled_area for props in regions])) largest_props = regions[largest_id] mass_y, mass_x = largest_props.centroid else: mass_x, mass_y = position_x, position_y # if centroid is not in mask, we find the closest point to it from mask if m[mass_y, mass_x] != 1: print 'Finding closest mask point...' kernel = np.ones((10, 10),np.uint8) me = cv2.erode(m, kernel, iterations = 1) points = zip(np.where(me == 1)[0].tolist(), np.where(me == 1)[1].tolist()) # row, col style points = np.array(points) dist = np.sum((points - (mass_y, mass_x))**2, axis=1) id = np.argsort(dist)[0] mass_y, mass_x = points[id] # return return {'mask': m, 'area': area, 'position_x': position_x, 'position_y': position_y, 'mass_x': mass_x, 'mass_y': mass_y}
def loadRes(self, predictions): """Loads result file and return a result api object. Args: predictions: a list of dictionary each representing an annotation in COCO format. The required fields are `image_id`, `category_id`, `score`, `bbox`, `segmentation`. Returns: res: result COCO api object. Raises: ValueError: if the set of image id from predctions is not the subset of the set of image id of the groundtruth dataset. """ res = coco.COCO() res.dataset['images'] = copy.deepcopy(self.dataset['images']) res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) image_ids = [ann['image_id'] for ann in predictions] if set(image_ids) != (set(image_ids) & set(self.getImgIds())): raise ValueError( 'Results do not correspond to the current dataset!') for ann in predictions: x1, x2, y1, y2 = [ ann['bbox'][0], ann['bbox'][0] + ann['bbox'][2], ann['bbox'][1], ann['bbox'][1] + ann['bbox'][3] ] if self._eval_type == 'box': ann['area'] = ann['bbox'][2] * ann['bbox'][3] ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]] elif self._eval_type == 'mask': ann['area'] = mask_utils.area(ann['segmentation']) res.dataset['annotations'] = copy.deepcopy(predictions) res.createIndex() return res
def load_txt(path): objects_per_frame = {} track_ids_per_frame = {} # To check that no frame contains two objects with same id combined_mask_per_frame = {} # To check that no frame contains overlapping masks with open(path, "r") as f: for line in f: line = line.strip() fields = line.split(" ") frame = int(fields[0]) if frame not in objects_per_frame: objects_per_frame[frame] = [] if frame not in track_ids_per_frame: track_ids_per_frame[frame] = set() if int(fields[1]) in track_ids_per_frame[frame]: assert False, "Multiple objects with track id " + fields[1] + " in frame " + fields[0] else: track_ids_per_frame[frame].add(int(fields[1])) class_id = int(fields[2]) if not(class_id == 1 or class_id == 2 or class_id == 10): assert False, "Unknown object class " + fields[2] mask = {'size': [int(fields[3]), int(fields[4])], 'counts': fields[5].encode(encoding='UTF-8')} if frame not in combined_mask_per_frame: combined_mask_per_frame[frame] = mask elif rletools.area(rletools.merge([combined_mask_per_frame[frame], mask], intersect=True)) > 0.0: assert False, "Objects with overlapping masks in frame " + fields[0] else: combined_mask_per_frame[frame] = rletools.merge([combined_mask_per_frame[frame], mask], intersect=False) objects_per_frame[frame].append(SegmentedObject( mask, class_id, int(fields[1]) )) return objects_per_frame
def create_annotation_info(annotation_id, image_id, category_info, binary_mask, image_size=None, tolerance=2, bounding_box=None): if image_size is not None: binary_mask = resize_binary_mask(binary_mask, image_size) binary_mask_encoded = mask.encode( np.asfortranarray(binary_mask.astype(np.uint8))) area = mask.area(binary_mask_encoded) if area < 1: return None if bounding_box is None: bounding_box = mask.toBbox(binary_mask_encoded) if category_info["is_crowd"]: is_crowd = 1 segmentation = binary_mask_to_rle(binary_mask) else: is_crowd = 0 segmentation = binary_mask_to_polygon(binary_mask, tolerance) if not segmentation: return None annotation_info = { "id": annotation_id, "image_id": image_id, "category_id": category_info["id"], "iscrowd": is_crowd, "area": area.tolist(), "bbox": bounding_box.tolist(), "segmentation": segmentation, "width": binary_mask.shape[1], "height": binary_mask.shape[0], } return annotation_info
def batch2annList(batch): annList = [] image_id = int(batch["name"][0].replace("_","")) #image_id = batch["image_id"][0] height, width = batch["images"].shape[-2:] maskObjects = batch["maskObjects"] maskClasses = batch["maskClasses"] n_objects = maskObjects[maskObjects!=255].max() id = 1 for obj_id in range(1, n_objects+1): if obj_id == 0: continue binmask = (maskObjects == obj_id) segmentation = maskUtils.encode(np.asfortranarray(ms.t2n(binmask).squeeze())) segmentation["counts"] = segmentation["counts"].decode("utf-8") uniques = (binmask.long()*maskClasses).unique() uniques = uniques[uniques!=0] assert len(uniques) == 1 category_id = uniques[0].item() annList += [{"segmentation":segmentation, "iscrowd":0, # "bbox":maskUtils.toBbox(segmentation).tolist(), "area":int(maskUtils.area(segmentation)), "id":id, "height":height, "width":width, "image_id":image_id, "category_id":category_id}] id += 1 return annList
def create_video_annotation_info(binary_mask, is_crowd, image_size=None, tolerance=2, pixel_thr=100, bounding_box=None): if image_size is not None: binary_mask = resize_binary_mask(binary_mask, image_size) binary_mask_encoded = mask.encode(np.asfortranarray(binary_mask.astype(np.uint8))) area = mask.area(binary_mask_encoded) #if area < 1: if area < pixel_thr: return None, None, None if bounding_box is None: bounding_box = mask.toBbox(binary_mask_encoded) if is_crowd == 1: segmentation = binary_mask_to_rle(binary_mask) else: segmentation = binary_mask_to_polygon(binary_mask, tolerance) if not segmentation: return None, None, None # segmentation = binary_mask_to_rle(binary_mask) return segmentation, bounding_box.tolist(), area.tolist()
def _create_anno(msk, lb, sc, img_id, anno_id, ar=None, crw=None): H, W = msk.shape if crw is None: crw = False msk = np.asfortranarray(msk.astype(np.uint8)) rle = mask_tools.encode(msk) if ar is None: # We compute dummy area to pass to pycocotools. # Note that area dependent scores are ignored afterwards. ar = mask_tools.area(rle) if crw is None: crw = False # Rounding is done to make the result consistent with COCO. anno = { 'image_id': img_id, 'category_id': lb, 'segmentation': rle, 'area': ar, 'id': anno_id, 'iscrowd': crw } if sc is not None: anno.update({'score': sc}) return anno
def get_append_annotations(self): #COCO """ Make and append COCO annotations for each object in the scene """ seg = segmentationToCocoMask(img_mask,object_uid) area = float(mask.area(seg)) bbox = mask.toBbox(seg).flatten().tolist() #1 run length encoding RLE segmentation format seg['counts'] = str(seg['counts'], "utf-8") #utf-8 format in str #2 or poly segmentation format bitmap = mask.decode(seg) seg = _segmentationToPoly(bitmap) self.too_small_obj = False try: #notify and skip the object with too small visible representation assert(area > config['min_obj_area']) assert(len(seg)>0 and len(seg[0])>0) except: #make inverse map id->name (just to pretty print) inv_map = dict(zip(config['used_class_names'].values(), config['used_class_names'].keys())) self.too_small_obj = inv_map[class_id] self.data_dict = dict( id=self.id_unique, image_id=image_id, category_id=class_id, segmentation=seg, area=area, bbox=bbox, iscrowd=0, ) if self.too_small_obj == False: #area ok data['annotations'].append(self.data_dict) #append annotations self.id_unique +=1 else: #area too small to be realistically seen print('Too small object of class {} with area={} in img {}'.format(self.too_small_obj, self.data_dict['area'], name))
def sa_pixel_to_coco_panoptic_segmentation(image_commons, id_generator): sa_ann_json = image_commons.sa_ann_json flat_mask = image_commons.flat_mask ann_mask = image_commons.ann_mask segments_info = [] for instance in sa_ann_json: if 'parts' not in instance: continue parts = [int(part['color'][1:], 16) for part in instance['parts']] if instance['classId'] < 0: continue category_id = instance['classId'] instance_bitmask = np.isin(flat_mask, parts) segment_id = next(id_generator) ann_mask[instance_bitmask] = segment_id coco_instance_mask = cocomask.encode( np.asfortranarray(instance_bitmask)) bbox = cocomask.toBbox(coco_instance_mask).tolist() area = int(cocomask.area(coco_instance_mask)) segment_info = { 'id': segment_id, 'category_id': category_id, 'area': area, 'bbox': bbox, 'iscrowd': 0 } segments_info.append(segment_info) return (image_commons.image_info, segments_info, image_commons.ann_mask)
def __get_annotation__(self, mask, image=None): _, contours, _ = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) segmentation = [] for contour in contours: # Valid polygons have >= 6 coordinates (3 points) if contour.size >= 6: segmentation.append(contour.flatten().tolist()) RLEs = cocomask.frPyObjects(segmentation, mask.shape[0], mask.shape[1]) RLE = cocomask.merge(RLEs) # RLE = cocomask.encode(np.asfortranarray(mask)) area = cocomask.area(RLE) [x, y, w, h] = cv2.boundingRect(mask) if image is not None: image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) cv2.drawContours(image, contours, -1, (0, 255, 0), 1) cv2.rectangle(image, (x, y), (x + w, y + h), (255, 0, 0), 2) cv2.imshow("", image) cv2.waitKey(1) return segmentation, [x, y, w, h], area
def __instance_object_commons_per_instance( instance, id_generator, image_commons, cat_id_map ): if "parts" not in instance: return None anno_id = next(id_generator) parts = [int(part["color"][1:], 16) for part in instance["parts"]] if instance['className'] in cat_id_map: category_id = cat_id_map[instance['className']] else: category_id = instance['classId'] instance_bitmask = np.isin(image_commons.flat_mask, parts) size = instance_bitmask.shape[::-1] databytes = instance_bitmask * np.uint8(255) contours, hierarchy = cv.findContours( databytes, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_NONE ) coco_instance_mask = cocomask.encode(np.asfortranarray(instance_bitmask)) bbox = cocomask.toBbox(coco_instance_mask).tolist() area = int(cocomask.area(coco_instance_mask)) return (bbox, area, contours, category_id, anno_id)
def convert_to_coco_dict(dataset_name): """ Convert an instance detection/segmentation or keypoint detection dataset in detectron2's standard format into COCO json format. Generic dataset description can be found here: https://detectron2.readthedocs.io/tutorials/datasets.html#register-a-dataset COCO data format description can be found here: http://cocodataset.org/#format-data Args: dataset_name (str): name of the source dataset Must be registered in DatastCatalog and in detectron2's standard format. Must have corresponding metadata "thing_classes" Returns: coco_dict: serializable dict in COCO json format """ dataset_dicts = DatasetCatalog.get(dataset_name) metadata = MetadataCatalog.get(dataset_name) # unmap the category mapping ids for COCO if hasattr(metadata, "thing_dataset_id_to_contiguous_id"): reverse_id_mapping = {v: k for k, v in metadata.thing_dataset_id_to_contiguous_id.items()} reverse_id_mapper = lambda contiguous_id: reverse_id_mapping[contiguous_id] # noqa else: reverse_id_mapper = lambda contiguous_id: contiguous_id # noqa # categories = [ # {"id": reverse_id_mapper(id), "name": name} # for id, name in enumerate(metadata.thing_classes) # ] categories = [ {"id": 1, "name": 'lesion'} ] logger.info("Converting dataset dicts into COCO format") coco_images = [] coco_annotations = [] for image_id, image_dict in enumerate(dataset_dicts): coco_image = { "id": image_dict.get("image_id", image_id), "width": int(image_dict["width"]), "height": int(image_dict["height"]), "file_name": str(image_dict["file_name"]), } coco_images.append(coco_image) anns_per_image = image_dict.get("annotations", []) for annotation in anns_per_image: # create a new dict with only COCO fields coco_annotation = {} # COCO requirement: XYWH box format for axis-align and XYWHA for rotated bbox = annotation["bbox"] if isinstance(bbox, np.ndarray): if bbox.ndim != 1: raise ValueError(f"bbox has to be 1-dimensional. Got shape={bbox.shape}.") bbox = bbox.tolist() if len(bbox) not in [4, 5]: raise ValueError(f"bbox has to has length 4 or 5. Got {bbox}.") from_bbox_mode = annotation["bbox_mode"] to_bbox_mode = BoxMode.XYWH_ABS if len(bbox) == 4 else BoxMode.XYWHA_ABS bbox = BoxMode.convert(bbox, from_bbox_mode, to_bbox_mode) # COCO requirement: instance area if "segmentation" in annotation: # Computing areas for instances by counting the pixels segmentation = annotation["segmentation"] # TODO: check segmentation type: RLE, BinaryMask or Polygon if isinstance(segmentation, list): polygons = PolygonMasks([segmentation]) area = polygons.area()[0].item() elif isinstance(segmentation, dict): # RLE area = mask_util.area(segmentation).item() else: raise TypeError(f"Unknown segmentation type {type(segmentation)}!") else: # Computing areas using bounding boxes if to_bbox_mode == BoxMode.XYWH_ABS: bbox_xy = BoxMode.convert(bbox, to_bbox_mode, BoxMode.XYXY_ABS) area = Boxes([bbox_xy]).area()[0].item() else: area = RotatedBoxes([bbox]).area()[0].item() if "keypoints" in annotation: keypoints = annotation["keypoints"] # list[int] for idx, v in enumerate(keypoints): if idx % 3 != 2: # COCO's segmentation coordinates are floating points in [0, H or W], # but keypoint coordinates are integers in [0, H-1 or W-1] # For COCO format consistency we substract 0.5 # https://github.com/facebookresearch/detectron2/pull/175#issuecomment-551202163 keypoints[idx] = v - 0.5 if "num_keypoints" in annotation: num_keypoints = annotation["num_keypoints"] else: num_keypoints = sum(kp > 0 for kp in keypoints[2::3]) # COCO requirement: # linking annotations to images # "id" field must start with 1 coco_annotation["id"] = len(coco_annotations) + 1 coco_annotation["image_id"] = coco_image["id"] coco_annotation["bbox"] = [round(float(x), 3) for x in bbox] coco_annotation["area"] = float(area) coco_annotation["iscrowd"] = int(annotation.get("iscrowd", 0)) coco_annotation["category_id"] = int(reverse_id_mapper(annotation["category_id"])) # Add optional fields if "keypoints" in annotation: coco_annotation["keypoints"] = keypoints coco_annotation["num_keypoints"] = num_keypoints if "segmentation" in annotation: seg = coco_annotation["segmentation"] = annotation["segmentation"] if isinstance(seg, dict): # RLE counts = seg["counts"] if not isinstance(counts, str): # make it json-serializable seg["counts"] = counts.decode("ascii") coco_annotations.append(coco_annotation) logger.info( "Conversion finished, " f"#images: {len(coco_images)}, #annotations: {len(coco_annotations)}" ) info = { "date_created": str(datetime.datetime.now()), "description": "Automatically generated COCO json file for Detectron2.", } coco_dict = {"info": info, "images": coco_images, "categories": categories, "licenses": None} if len(coco_annotations) > 0: coco_dict["annotations"] = coco_annotations return coco_dict
for i, labelIdx in enumerate(labelsStuff): # Create mask and encode it labelMask = np.zeros((h, w)) labelMask[:, :] = S == labelIdx labelMask = np.expand_dims(labelMask, axis=2) labelMask = labelMask.astype('uint8') labelMask = np.asfortranarray(labelMask) Rs = mask.encode(labelMask) # Create annotation data anndata = {} anndata['id'] = annId anndata['image_id'] = imageIds[imageIdx] anndata['category_id'] = labelIdx - oldStuffStartIdx + newStuffStartIdx # Stuff classes start from 92 in v. 1.1 anndata['segmentation'] = Rs anndata['area'] = float(mask.area(Rs)) anndata['bbox'] = mask.toBbox(Rs).tolist() anndata['iscrowd'] = 1 # Write JSON str_ = json.dumps(anndata, indent=indent, sort_keys=True, separators=separators, ensure_ascii=ensure_ascii) outfile.write(unicode(str_)) # Increment ann id annId = annId + 1 # Add a comma and line break after each annotation if not (imageIdx == imageCount-1 and i == len(labelsStuff)-1): outfile.write(unicode(',')) outfile.write(unicode('\n'))
def __init__(self, lvis_gt, results, max_dets=300): """Constructor for LVIS results. Args: lvis_gt (LVIS class instance, or str containing path of annotation file) results (str containing path of result file or a list of dicts) max_dets (int): max number of detections per image. The official value of max_dets for LVIS is 300. """ if isinstance(lvis_gt, LVIS): self.dataset = deepcopy(lvis_gt.dataset) elif isinstance(lvis_gt, str): self.dataset = self._load_json(lvis_gt) else: raise TypeError("Unsupported type {} of lvis_gt.".format(lvis_gt)) self.logger = logging.getLogger(__name__) self.logger.info("Loading and preparing results.") if isinstance(results, str): result_anns = self._load_json(results) else: # this path way is provided to avoid saving and loading result # during training. self.logger.warn( "Assuming user provided the results in correct format.") result_anns = results assert isinstance(result_anns, list), "results is not a list." if max_dets >= 0: result_anns = self.limit_dets_per_image(result_anns, max_dets) if "bbox" in result_anns[0]: for id, ann in enumerate(result_anns): x1, y1, w, h = ann["bbox"] x2 = x1 + w y2 = y1 + h if "segmentation" not in ann: ann["segmentation"] = [[x1, y1, x1, y2, x2, y2, x2, y1]] ann["area"] = w * h ann["id"] = id + 1 elif "segmentation" in result_anns[0]: for id, ann in enumerate(result_anns): # Only support compressed RLE format as segmentation results ann["area"] = mask_utils.area(ann["segmentation"]) if "bbox" not in ann: ann["bbox"] = mask_utils.toBbox(ann["segmentation"]) ann["id"] = id + 1 self.dataset["annotations"] = result_anns self._create_index() img_ids_in_result = [ann["image_id"] for ann in result_anns] assert set(img_ids_in_result) == ( set(img_ids_in_result) & set(self.get_img_ids()) ), "Results do not correspond to current LVIS set."
for i in range(0, 147): hull = [] path = '/Users/sonudileep/Downloads/Mask_606/' + str(i) + '.png' ground_truth_binary_mask = cv2.imread(path, 0) ground_truth_binary_mask = ground_truth_binary_mask / 255 ground_truth_binary_mask = ground_truth_binary_mask.astype(np.uint8) countzero_in1 = not np.any(ground_truth_binary_mask) #print(i, countzero_in1) if countzero_in1 == True: continue hull = find_hull(ground_truth_binary_mask) #print(hull) fortran_ground_truth_binary_mask = np.asfortranarray( ground_truth_binary_mask) encoded_ground_truth = mask.encode(fortran_ground_truth_binary_mask) ground_truth_area = mask.area(encoded_ground_truth) ground_truth_bounding_box = mask.toBbox(encoded_ground_truth) contours = measure.find_contours(ground_truth_binary_mask, 0.5) null = 'null' annotation = { "version": "4.5.6", "flags": {}, "shapes": [{ "label": "1", "points": " ", "group_id": null, "shape_type": "polygon", "flags": {} }], "imagePath":
def _main(video_folder, download_path, output, redownload, model_config, write_videos, outputdir): if os.path.exists(output): with open(output, 'r') as fi: videos_information = json.load(fi) else: videos_information = {} if os.path.isdir(video_folder): detectron = Detectron(model_config) for file in os.listdir(video_folder): print('Bearbeite Video:',file) if os.path.isdir(os.path.join(video_folder,file)) and (file!='a5arrD39XjY.mp4') and (file not in videos_information): video_dimensions=None video_boxes=[] video_segments=[] video_information=[] frame_count=0 diff = 0 for filename in sorted(os.listdir(os.path.join(video_folder,file))): if filename.endswith('jpg'): found=False class_names = [] #print(os.path.join(video_folder, file,filename)) frame=cv2.imread(os.path.join(video_folder, file,filename)) if video_dimensions is None: video_dimensions = frame.shape[1], frame.shape[0] frame_boxes, frame_segments = detectron.infer_image(frame) frame_information = [] print('Nach infer:', type(frame_boxes),type(frame_segments)) if isinstance(frame_boxes, list): frame_boxes, frame_segments, _, classes = vis_utils.convert_from_cls_format(frame_boxes,frame_segments, None) print('Nach convert:', type(frame_boxes),type(frame_segments), classes) if frame_boxes is not None and frame_boxes.shape[0] != 0: video_area = video_dimensions[0] * video_dimensions[1] box_areas = (frame_boxes[:, 2] - frame_boxes[:, 0]) * (frame_boxes[:, 3] - frame_boxes[:, 1]) sorted_inds = np.argsort(-box_areas) print(box_areas, sorted_inds) for i in sorted_inds: try: class_name = detectron.get_class_name(classes[i]) if class_name != '__background__': class_names.append(class_name) except IndexError as e: log.error("Cannot get_class_name: %s", e) log.debug("sorted_inds: %s", sorted_inds) log.debug("box_areas: %s", box_areas) log.debug("frame_boxes: %s", frame_boxes) log.debug("frame_segments: %s", frame_segments) score = float(frame_boxes[i, -1]) if not(score < THRESHOLD or class_name == '__background__'): found=True log.debug("Frame %s: found class '%s' with score '%s'", frame_count, class_name, score) segment_area = int(mask_utils.area(frame_segments[i])) frame_information.append({ 'label': class_name, 'total_area': segment_area, 'percentage': float(segment_area) / float(video_area), 'score': score, 'bbox': frame_boxes[i, :4].astype(np.int).tolist() }) frame = detectron.vis_one_image_opencv(im=frame, boxes=frame_boxes[i], segms=frame_segments[i], class_str=class_name) else: log.debug("Found nothing in frame %s", frame_count) if found: img = cv2.resize(frame, video_dimensions, cv2.INTER_NEAREST) if not os.path.exists(os.path.join(outputdir, file)): os.makedirs(os.path.join(outputdir, file), 0o755) cv2.imwrite(os.path.join(outputdir, file, filename), img) with open(os.path.join(outputdir, file, (filename.split('.')[0]+'.json')), 'w') as fo: json.dump(frame_information, fo, indent=2) video_information.append(frame_information) frame_count+=1 print('video',file,'bearbeitet. Ergebisse:',video_dimensions,video_information) log.info("Write intermediate file") videos_information[file] = video_information with open(output, 'w') as fo: json.dump(videos_information, fo, indent=2) #videos = _download_videos(video_text_file=video_text_file, download_path=download_path, redownload=redownload) for idx, (video_id, video_file) in enumerate(videos, start=1): log.info("Video %s/%s: Start inference for video_id '%s' on file '%s'", idx, len(videos), video_id, video_file)
def project_masks_on_boxes(segmentation_masks, proposals, discretization_size, maskiou_on): """ Given segmentation masks and the bounding boxes corresponding to the location of the masks in the image, this function crops and resizes the masks in the position defined by the boxes. This prepares the masks for them to be fed to the loss computation as the targets. If use maskiou head, we will compute the maskiou target here. Arguments: segmentation_masks: an instance of SegmentationMask proposals: an instance of BoxList """ masks = [] mask_ratios = [] M = discretization_size device = proposals.bbox.device im_width, im_height = proposals.size # proposals = proposals.convert("xyxy") assert segmentation_masks.size == proposals.size, "{}, {}".format( segmentation_masks, proposals) # TODO put the proposals on the CPU, as the representation for the # masks is not efficient GPU-wise (possibly several small tensors for # representing a single instance mask) proposals = proposals.bbox.to(torch.device("cpu")) for segmentation_mask, proposal in zip(segmentation_masks, proposals): # # debug # print(segmentation_mask.polygons) # print(proposal) # from maskrcnn_benchmark.engine.extra_utils import xywha_to_xyxy # import cv2 # cv_img = cv2.imread('/workspace/mnt/group/ocr/qiutairu/dataset/LSVT_full_train/demo_images/gt_102.jpg') # for proposal in proposals: # xc, yc, w, h, a = proposal.numpy() # pts = xywha_to_xyxy((xc, yc, w, h, a)) # cv2.polylines(cv_img, [pts], True, (0, 255, 0), 2) # # for mask in segmentation_masks: # pts = mask.polygons[0].numpy().astype(np.int32).reshape(-1, 2) # cv2.polylines(cv_img, [pts], True, (0, 0, 255), 1) # # cv2.imwrite('debug_mask.jpg', cv_img) # rotate_bbox -> horizon_bbox (as angle = 0) xc, yc, w, h, angle = proposal.numpy() x_min = int(xc - w / 2) y_min = int(yc - h / 2) x_max = int(xc + w / 2) y_max = int(yc + h / 2) # rotate_mask -> horizon_mask (rotate every point in degree of -angle) new_contour = rotate_pts( segmentation_mask.polygons[0].numpy().astype(np.int32).reshape( -1, 2).tolist(), (xc, yc), -angle) horizon_mask = Polygons([torch.from_numpy(new_contour.reshape(-1))], size=segmentation_mask.size, mode=segmentation_mask.mode) # crop the masks, resize them to the desired resolution and # then convert them to the tensor representation, # instead of the list representation that was used cropped_mask = horizon_mask.crop([x_min, y_min, x_max, y_max]) scaled_mask = cropped_mask.resize((M, M)) mask = scaled_mask.convert(mode="mask") masks.append(mask) if maskiou_on: x1 = x_min y1 = y_min x2 = x_max + 1 y2 = y_max + 1 for poly_ in horizon_mask.polygons: poly = np.array(poly_, dtype=np.float32) x1 = np.minimum(x1, poly[0::2].min()) x2 = np.maximum(x2, poly[0::2].max()) y1 = np.minimum(y1, poly[1::2].min()) y2 = np.maximum(y2, poly[1::2].max()) img_h = horizon_mask.size[1] img_w = horizon_mask.size[0] x1 = np.maximum(x1, 0) x2 = np.minimum(x2, img_w - 1) y1 = np.maximum(y1, 0) y2 = np.minimum(y2, img_h - 1) segmentation_mask_for_maskratio = horizon_mask.crop( [x1, y1, x2, y2]) ''' #type 1 gt_img_mask = segmentation_mask_for_maskratio.convert(mode='mask') gt_img_mask_area = gt_img_mask.sum().float() gt_box_mask = gt_img_mask[int(proposal[1]-y1):int(proposal[3]-y1)+1, int(proposal[0]-x1):int(proposal[2]-x1)+1] gt_box_mask_area = gt_box_mask.sum().float() mask_ratio = gt_box_mask_area / gt_img_mask_area ''' #type 2 rle_for_fullarea = mask_util.frPyObjects( [p.numpy() for p in segmentation_mask_for_maskratio.polygons], y2 - y1, x2 - x1) full_area = torch.tensor( mask_util.area(rle_for_fullarea).sum().astype(float)) rle_for_box_area = mask_util.frPyObjects( [p.numpy() for p in cropped_mask.polygons], y_max - y_min, x_max - x_min) box_area = torch.tensor( mask_util.area(rle_for_box_area).sum().astype(float)) mask_ratio = box_area / full_area mask_ratios.append(mask_ratio) if maskiou_on: mask_ratios = torch.stack(mask_ratios, dim=0).to(device, dtype=torch.float32) else: mask_ratios = None if len(masks) == 0: return torch.empty(0, dtype=torch.float32, device=device), torch.empty(0, dtype=torch.float32, device=device) return torch.stack(masks, dim=0).to(device, dtype=torch.float32), mask_ratios
def convert_groundtruths_to_coco_dataset(groundtruths, label_map=None): """Converts groundtruths to the dataset in COCO format. Args: groundtruths: a dictionary of numpy arrays including the fields below. Note that each element in the list represent the number for a single example without batch dimension. K below denotes the actual number of instances for each image. Required fields: - source_id: a list of numpy arrays of int or string of shape [batch_size]. - height: a list of numpy arrays of int of shape [batch_size]. - width: a list of numpy arrays of int of shape [batch_size]. - num_detections: a list of numpy arrays of int of shape [batch_size]. - boxes: a list of numpy arrays of float of shape [batch_size, K, 4], where coordinates are in the original image space (not the normalized coordinates). - classes: a list of numpy arrays of int of shape [batch_size, K]. Optional fields: - is_crowds: a list of numpy arrays of int of shape [batch_size, K]. If th field is absent, it is assumed that this instance is not crowd. - areas: a list of numy arrays of float of shape [batch_size, K]. If the field is absent, the area is calculated using either boxes or masks depending on which one is available. - masks: a list of numpy arrays of string of shape [batch_size, K], label_map: (optional) a dictionary that defines items from the category id to the category name. If `None`, collect the category mappping from the `groundtruths`. Returns: coco_groundtruths: the groundtruth dataset in COCO format. """ source_ids = np.concatenate(groundtruths['source_id'], axis=0) heights = np.concatenate(groundtruths['height'], axis=0) widths = np.concatenate(groundtruths['width'], axis=0) gt_images = [{ 'id': int(i), 'height': int(h), 'width': int(w) } for i, h, w in zip(source_ids, heights, widths)] gt_annotations = [] num_batches = len(groundtruths['source_id']) batch_size = groundtruths['source_id'][0].shape[0] for i in range(num_batches): for j in range(batch_size): num_instances = groundtruths['num_detections'][i][j] for k in range(num_instances): ann = {} ann['image_id'] = int(groundtruths['source_id'][i][j]) if 'is_crowds' in groundtruths: ann['iscrowd'] = int(groundtruths['is_crowds'][i][j, k]) else: ann['iscrowd'] = 0 ann['category_id'] = int(groundtruths['classes'][i][j, k]) boxes = groundtruths['boxes'][i] ann['bbox'] = [ float(boxes[j, k, 1]), float(boxes[j, k, 0]), float(boxes[j, k, 3] - boxes[j, k, 1]), float(boxes[j, k, 2] - boxes[j, k, 0]) ] if 'areas' in groundtruths: ann['area'] = float(groundtruths['areas'][i][j, k]) else: ann['area'] = float((boxes[j, k, 3] - boxes[j, k, 1]) * (boxes[j, k, 2] - boxes[j, k, 0])) if 'masks' in groundtruths: mask = Image.open( six.BytesIO(groundtruths['masks'][i][j, k])) width, height = mask.size np_mask = (np.array(mask.getdata()).reshape( height, width).astype(np.uint8)) np_mask[np_mask > 0] = 255 encoded_mask = mask_api.encode(np.asfortranarray(np_mask)) ann['segmentation'] = encoded_mask if 'areas' not in groundtruths: ann['area'] = mask_api.area(encoded_mask) gt_annotations.append(ann) for i, ann in enumerate(gt_annotations): ann['id'] = i + 1 if label_map: gt_categories = [{'id': i, 'name': label_map[i]} for i in label_map] else: category_ids = [gt['category_id'] for gt in gt_annotations] gt_categories = [{'id': i} for i in set(category_ids)] gt_dataset = { 'images': gt_images, 'categories': gt_categories, 'annotations': copy.deepcopy(gt_annotations), } return gt_dataset
for img in tqdm.tqdm(gt_robo['images']): img_id = img['id'] robo_coco['images'].append(img) gt_instances = img_anns[img_id] out_instances = list(out_robo[img_id - 1].values()) for instance in out_instances: rle = instance['mask'] instance['bbox'] = instance['bbox'][:, :-1] max_iou, cat = 0, 7 for gt_instance in gt_instances: iou = mask.iou([rle], gt_instance['rle'], [False]).max() if iou > max_iou: max_iou, cat = iou, gt_instance['category_id'] H, W = rle['size'] area = mask.area([rle]).item() segmentation = binary_mask_to_polygon(mask.decode(rle)) sent = f'{instance["sentences"][0].split()[0]} {categories[cat]}' ann = { 'id': instance_id, 'image_id': img_id, 'category_id': cat, 'iscrowd': 0, 'area': area, 'bbox': instance['bbox'][0].tolist(), 'segmentation': segmentation, 'width': W, 'height': H } ref = { 'sent_ids': [sent_id],
def draw_binary_mask(self, binary_mask, color=None, *, edge_color=None, text=None, alpha=0.5, area_threshold=4096): """ Args: binary_mask (ndarray): numpy array of shape (H, W), where H is the image height and W is the image width. Each value in the array is either a 0 or 1 value of uint8 type. color: color of the mask. Refer to `matplotlib.colors` for a full list of formats that are accepted. If None, will pick a random color. edge_color: color of the polygon edges. Refer to `matplotlib.colors` for a full list of formats that are accepted. text (str): if None, will be drawn in the object's center of mass. alpha (float): blending efficient. Smaller values lead to more transparent masks. area_threshold (float): a connected component small than this will not be shown. Returns: output (VisImage): image object with mask drawn. """ if color is None: color = random_color(rgb=True, maximum=1) if area_threshold is None: area_threshold = 4096 has_valid_segment = False binary_mask = binary_mask.astype("uint8") # opencv needs uint8 mask = GenericMask(binary_mask, self.output.height, self.output.width) shape2d = (binary_mask.shape[0], binary_mask.shape[1]) if not mask.has_holes: # draw polygons for regular masks for segment in mask.polygons: area = mask_util.area( mask_util.frPyObjects([segment], shape2d[0], shape2d[1])) if area < area_threshold: continue has_valid_segment = True segment = segment.reshape(-1, 2) self.draw_polygon(segment, color=color, edge_color=edge_color, alpha=alpha) else: rgba = np.zeros(shape2d + (4, ), dtype="float32") rgba[:, :, :3] = color rgba[:, :, 3] = (mask.mask == 1).astype("float32") * alpha has_valid_segment = True self.output.ax.imshow(rgba) if text is not None and has_valid_segment: # TODO sometimes drawn on wrong objects. the heuristics here can improve. lighter_color = self._change_color_brightness( color, brightness_factor=0.7) _num_cc, cc_labels, stats, centroids = cv2.connectedComponentsWithStats( binary_mask, 8) largest_component_id = np.argmax(stats[1:, -1]) + 1 # draw text on the largest component, as well as other very large components. for cid in range(1, _num_cc): if cid == largest_component_id or stats[ cid, -1] > _LARGE_MASK_AREA_THRESH: # median is more stable than centroid # center = centroids[largest_component_id] center = np.median((cc_labels == cid).nonzero(), axis=1)[::-1] self.draw_text(text, center, color=lighter_color) return self.output
def create_json(output_file_name, ref_json, cam5_list, cam6_list): # read json template with open('instances_val2017_template.json') as json_data: d = json.load(json_data) with open(ref_json) as ref: d_ref = json.load(ref) json_copy = copy.deepcopy(d) json_copy['images'] = [] json_copy['annotations'] = [] kaggle_to_coco = { 36: 1, #person 35: 2, #bicycle 33: 3, #car 34: 4, #motorcycle 39: 6, #bus 38: 8, #truck 40: 2 #tricycle => bicycle } annotation_counter = 0 counter = 0 missing_counter = 0 for clip in d_ref['images']: clip_images = get_clip_images( clip['file_name'].split('.')[0] + '_instanceIds.png', cam5_list, cam6_list) if clip_images is None: missing_counter += 1 continue for i, single_image in enumerate(clip_images): if counter % 50 == 0: print("{} images so far".format(counter)) image_name = single_image file_name = image_name.replace('_instanceIds', '') file_name = file_name.replace('.png', '.jpg') json_single_image = copy.deepcopy(d['images'][0]) json_single_image['file_name'] = file_name json_single_image['id'] = counter json_copy['images'].append(json_single_image) #print(json_copy['images']) filepath = pwd + '/train_label/' + image_name img = cv2.imread(filepath, -1) instance_label = np.unique(img) instance_label = instance_label[instance_label != 255].tolist() instance_label = [ item for item in instance_label if item // 1000 in kaggle_to_coco ] id_list = list( map(lambda x: kaggle_to_coco[x // 1000], instance_label)) mask_list = [] for val in instance_label: mask_list.append(np.uint8(1) * (img == val)) # print(mask_list) for j, mask_i in enumerate(mask_list): ground_truth_binary_mask = mask_i mask_sum = np.sum(mask_i) fortran_ground_truth_binary_mask = np.asfortranarray( ground_truth_binary_mask) encoded_ground_truth = mask.encode( fortran_ground_truth_binary_mask) ground_truth_area = mask.area(encoded_ground_truth) ground_truth_bounding_box = mask.toBbox(encoded_ground_truth) contours = measure.find_contours(ground_truth_binary_mask, 0.5) annotation = { "segmentation": [], "area": ground_truth_area.tolist(), "iscrowd": 0, "image_id": counter, "bbox": ground_truth_bounding_box.tolist(), "category_id": id_list[j], "id": annotation_counter } annotation_counter += 1 for contour in contours: contour = np.flip(contour, axis=1) segmentation = contour.ravel().tolist() annotation["segmentation"].append(segmentation) json_copy['annotations'].append(annotation) #print(json.dumps(annotation, indent=4)) #print(mask_sum) counter += 1 with open(output_file_name + '.json', 'w') as f: json.dump(json_copy, f, ensure_ascii=False) print("missing counter is {}".format(missing_counter))
def writeParticlesMasksToJson(destFilePath, segms, srcImage, srcImageName, coordsMult, minContourArea=0): problemMaskInds = set() with open(destFilePath, 'w') as outFile: _writeParticlesJsonHeader(outFile, srcImage, srcImageName) inst_cls = 0 isFirstParticle = True for maskInd, compMask in enumerate(segms[inst_cls]): if maskUtils.area(compMask) <= 1: print('Warning: 0 or 1 pixel mask (index %d)' % maskInd) problemMaskInds.add(maskInd) mask = maskUtils.decode(compMask).astype(np.bool) maskImg = (mask * 1) maskImg = np.expand_dims(maskImg, 2).astype(np.uint8) # maskImg = np.tile(maskImg, (1, 1, 3)) # return maskImg contours, _ = cv2.findContours(maskImg, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) # print('Found %d contours' % len(contours), _) # Can print like "Found 2 contours [[[ 1 -1 -1 -1] [-1 0 -1 -1]]]", for contourInd, contour in enumerate(contours): # approx = cv2.approxPolyDP(contour, 0, True) # # No documentation to approxPolyDP. The 2nd parameter is something like minimum # # produced line length. 0.009 * cv2.arcLength(contour, True) produces about 9 points, # # 0.005 * ... - 18, 0.003... - 27, 0.001... and less - 29 if len(contour) < 3: additInfo = '' if len(contour) >= 1: additInfo = ', near %s' % str(contour[0]) print('Warning: polygon with %d vertices (mask index %d, contour index %d / %d): %s. Skipping' % \ (len(contour), maskInd, contourInd, len(contours), str(contour).replace('\n', ' '))) problemMaskInds.add(maskInd) # plt.imshow(mask); continue area = cv2.contourArea(contour) if area < minContourArea: print('Warning: polygon with area %.2f pixels (mask index %d, contour %d / %d): %s. Skipping' % \ (area, maskInd, contourInd, len(contours), str(contour).replace('\n', ' '))) problemMaskInds.add(maskInd) continue elif len(contours) > 1: # Printing information for all suspicious contours print( 'Information: polygon area %.2f (mask %d, contour %d / %d)' % (area, maskInd, contourInd, len(contours))) if isFirstParticle: isFirstParticle = False else: outFile.write(',\n') outFile.write(''' { "label": "nanoparticle %d%s", "line_color": null, "fill_color": null, "points": [\n''' % (maskInd, '' if contourInd == 0 else (', %d' % contourInd))) addition = '%8s' % '' for point in contour: outFile.write('%s[ %.3f, %.3f ]' % (addition, point[0][0] * coordsMult, point[0][1] * coordsMult)) addition = ',\n%8s' % '' outFile.write('\n ]\n }') # break outFile.write('\n ]') outFile.write(',\n "imageData": "%s"\n}' % encodeImageForJson(srcImage)) return problemMaskInds
def loadRes(dataset, resFile): """ Load result file and return a result api object. :param resFile (str) : file name of result file :return: res (obj) : result api object """ from pycocotools.coco import COCO import json import time import copy from pycocotools import mask as maskUtils self = dataset.coco res = COCO() print('Loading and preparing results...') tic = time.time() if type(resFile) == str or type(resFile) == bytes: anns = json.load(open(resFile)) elif type(resFile) == np.ndarray: anns = self.loadNumpyAnnotations(resFile) else: anns = resFile assert type(anns) == list, 'results in not an array of objects' annsImgIds = set([ann['image_id'] for ann in anns]) gtImgIds = set([str(i) for i in set(self.getImgIds())]) validImgIds = annsImgIds & gtImgIds if len(validImgIds) < len(annsImgIds): print( f'skip {len(annsImgIds) - len(validImgIds)} images which does not have annotation' ) anns = [ann for ann in anns if ann['image_id'] in validImgIds] annsImgIds = [] for ann in anns: image_id = int(ann['image_id']) # str -> int for COCO ann['image_id'] = image_id annsImgIds.append(image_id) res.dataset['images'] = self.loadImgs(self.getImgIds(annsImgIds)) if 'caption' in anns[0]: imgIds = set([img['id'] for img in res.dataset['images']]) & set( [ann['image_id'] for ann in anns]) res.dataset['images'] = [ img for img in res.dataset['images'] if img['id'] in imgIds ] for id, ann in enumerate(anns): ann['id'] = id + 1 elif 'bbox' in anns[0] and not anns[0]['bbox'] == []: res.dataset['categories'] = copy.deepcopy( self.dataset['categories']) # map class_id to category_id in coco api category_id_map = None if dataset.target_transform.label_map: category_id_map = { v: k for k, v in dataset.target_transform.label_map.items() } for id, ann in enumerate(anns): bb = ann['bbox'] x1, x2, y1, y2 = [bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]] if not 'segmentation' in ann: ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]] ann['area'] = bb[2] * bb[3] ann['id'] = id + 1 ann['iscrowd'] = 0 if category_id_map: ann['category_id'] = category_id_map[ann['category_id']] elif 'segmentation' in anns[0]: res.dataset['categories'] = copy.deepcopy( self.dataset['categories']) for id, ann in enumerate(anns): # now only support compressed RLE format as segmentation results ann['area'] = maskUtils.area(ann['segmentation']) if not 'bbox' in ann: ann['bbox'] = maskUtils.toBbox(ann['segmentation']) ann['id'] = id + 1 ann['iscrowd'] = 0 elif 'keypoints' in anns[0]: res.dataset['categories'] = copy.deepcopy( self.dataset['categories']) for id, ann in enumerate(anns): s = ann['keypoints'] x = s[0::3] y = s[1::3] x0, x1, y0, y1 = np.min(x), np.max(x), np.min(y), np.max(y) ann['area'] = (x1 - x0) * (y1 - y0) ann['id'] = id + 1 ann['bbox'] = [x0, y0, x1 - x0, y1 - y0] print('DONE (t={:0.2f}s)'.format(time.time() - tic)) res.dataset['annotations'] = anns res.createIndex() return res
def internalToCocoGTDemo(dataType='train2017', dataDir='../..', imgCount=float('inf'), stuffStartId=92, stuffEndId=182, mergeThings=True, indent=None, includeCrowd=False, outputAnnots=True): ''' Converts our internal .mat representation of the ground-truth annotations to COCO format. :param dataType: the name of the subset: train201x, val201x, test-dev201x or test201x :param dataDir: location of the COCO root folder :param imgCount: the number of images to use for the .json file :param stuffStartId: id where stuff classes start :param stuffEndId: id where stuff classes end :param mergeThings: merges all 91 thing classes into a single class 'other' with id 183 :param indent: number of whitespaces used for JSON indentation :param includeCrowd: whether to include 'crowd' thing annotations as 'other' (or void) :param outputAnnots: whether to include annotations (for test images we only release ids) :return: None ''' # Define paths imgCountStr = ('_%d' % imgCount) if imgCount < float('inf') else '' annotFolder = '%s/annotations/internal/%s' % (dataDir, dataType) annPath = '%s/annotations/instances_%s.json' % (dataDir, dataType) if outputAnnots: jsonPath = '%s/annotations/stuff_%s%s.json' % (dataDir, dataType, imgCountStr) else: jsonPath = '%s/annotations/stuff_image_info_%s%s.json' % ( dataDir, dataType, imgCountStr) # Check if output file already exists if os.path.exists(jsonPath): raise Exception('Error: Output file already exists: %s' % jsonPath) # Check if input folder exists if not os.path.exists(annotFolder): raise Exception('Error: Input folder does not exist: %s' % annotFolder) # Get images imgNames = os.listdir(annotFolder) imgNames = [ imgName[:-4] for imgName in imgNames if imgName.endswith('.mat') ] imgNames.sort() if imgCount < len(imgNames): imgNames = imgNames[0:imgCount] imgCount = len(imgNames) imgIds = [int(imgName) for imgName in imgNames] # Load COCO API for things cocoGt = COCO(annPath) # Init # annId must be unique, >=1 and cannot overlap with the detection annotations if dataType == 'train2017': annIdStart = int(1e7) elif dataType == 'val2017': annIdStart = int(2e7) elif dataType == 'test-dev2017': annIdStart = int(3e7) elif dataType == 'test2017': annIdStart = int(4e7) else: raise Exception('Error: Unknown dataType %s specified!' % dataType) annId = annIdStart startTime = time.clock() print("Writing JSON metadata...") with io.open(jsonPath, 'w', encoding='utf8') as output: # Write info infodata = { 'description': 'COCO 2017 Stuff Dataset', 'url': 'http://cocodataset.org', 'version': '1.0', 'year': 2017, 'contributor': 'H. Caesar, J. Uijlings, M. Maire, T.-Y. Lin, P. Dollar and V. Ferrari', 'date_created': '2017-08-31 00:00:00.0' } infodata = {'info': infodata} infoStr = json.dumps(infodata, indent=indent) infoStr = infoStr[1:-1] + ',\n' # Remove brackets and add comma # Write images imdata = [i for i in cocoGt.dataset['images'] if i['id'] in imgIds] imdata = {'images': imdata} imStr = json.dumps(imdata, indent=indent) imStr = imStr[1:-1] + ',\n' # Remove brackets and add comma # Write licenses licdata = {'licenses': cocoGt.dataset['licenses']} licStr = json.dumps(licdata, indent=indent) licStr = licStr[1:-1] + ',\n' # Remove brackets and add comma # Write categories catdata = [] catdata.extend([{ 'id': 92, 'name': 'banner', 'supercategory': 'textile' }, { 'id': 93, 'name': 'blanket', 'supercategory': 'textile' }, { 'id': 94, 'name': 'branch', 'supercategory': 'plant' }, { 'id': 95, 'name': 'bridge', 'supercategory': 'building' }, { 'id': 96, 'name': 'building-other', 'supercategory': 'building' }, { 'id': 97, 'name': 'bush', 'supercategory': 'plant' }, { 'id': 98, 'name': 'cabinet', 'supercategory': 'furniture-stuff' }, { 'id': 99, 'name': 'cage', 'supercategory': 'structural' }, { 'id': 100, 'name': 'cardboard', 'supercategory': 'raw-material' }, { 'id': 101, 'name': 'carpet', 'supercategory': 'floor' }, { 'id': 102, 'name': 'ceiling-other', 'supercategory': 'ceiling' }, { 'id': 103, 'name': 'ceiling-tile', 'supercategory': 'ceiling' }, { 'id': 104, 'name': 'cloth', 'supercategory': 'textile' }, { 'id': 105, 'name': 'clothes', 'supercategory': 'textile' }, { 'id': 106, 'name': 'clouds', 'supercategory': 'sky' }, { 'id': 107, 'name': 'counter', 'supercategory': 'furniture-stuff' }, { 'id': 108, 'name': 'cupboard', 'supercategory': 'furniture-stuff' }, { 'id': 109, 'name': 'curtain', 'supercategory': 'textile' }, { 'id': 110, 'name': 'desk-stuff', 'supercategory': 'furniture-stuff' }, { 'id': 111, 'name': 'dirt', 'supercategory': 'ground' }, { 'id': 112, 'name': 'door-stuff', 'supercategory': 'furniture-stuff' }, { 'id': 113, 'name': 'fence', 'supercategory': 'structural' }, { 'id': 114, 'name': 'floor-marble', 'supercategory': 'floor' }, { 'id': 115, 'name': 'floor-other', 'supercategory': 'floor' }, { 'id': 116, 'name': 'floor-stone', 'supercategory': 'floor' }, { 'id': 117, 'name': 'floor-tile', 'supercategory': 'floor' }, { 'id': 118, 'name': 'floor-wood', 'supercategory': 'floor' }, { 'id': 119, 'name': 'flower', 'supercategory': 'plant' }, { 'id': 120, 'name': 'fog', 'supercategory': 'water' }, { 'id': 121, 'name': 'food-other', 'supercategory': 'food-stuff' }, { 'id': 122, 'name': 'fruit', 'supercategory': 'food-stuff' }, { 'id': 123, 'name': 'furniture-other', 'supercategory': 'furniture-stuff' }, { 'id': 124, 'name': 'grass', 'supercategory': 'plant' }, { 'id': 125, 'name': 'gravel', 'supercategory': 'ground' }, { 'id': 126, 'name': 'ground-other', 'supercategory': 'ground' }, { 'id': 127, 'name': 'hill', 'supercategory': 'solid' }, { 'id': 128, 'name': 'house', 'supercategory': 'building' }, { 'id': 129, 'name': 'leaves', 'supercategory': 'plant' }, { 'id': 130, 'name': 'light', 'supercategory': 'furniture-stuff' }, { 'id': 131, 'name': 'mat', 'supercategory': 'textile' }, { 'id': 132, 'name': 'metal', 'supercategory': 'raw-material' }, { 'id': 133, 'name': 'mirror-stuff', 'supercategory': 'furniture-stuff' }, { 'id': 134, 'name': 'moss', 'supercategory': 'plant' }, { 'id': 135, 'name': 'mountain', 'supercategory': 'solid' }, { 'id': 136, 'name': 'mud', 'supercategory': 'ground' }, { 'id': 137, 'name': 'napkin', 'supercategory': 'textile' }, { 'id': 138, 'name': 'net', 'supercategory': 'structural' }, { 'id': 139, 'name': 'paper', 'supercategory': 'raw-material' }, { 'id': 140, 'name': 'pavement', 'supercategory': 'ground' }, { 'id': 141, 'name': 'pillow', 'supercategory': 'textile' }, { 'id': 142, 'name': 'plant-other', 'supercategory': 'plant' }, { 'id': 143, 'name': 'plastic', 'supercategory': 'raw-material' }, { 'id': 144, 'name': 'platform', 'supercategory': 'ground' }, { 'id': 145, 'name': 'playingfield', 'supercategory': 'ground' }, { 'id': 146, 'name': 'railing', 'supercategory': 'structural' }, { 'id': 147, 'name': 'railroad', 'supercategory': 'ground' }, { 'id': 148, 'name': 'river', 'supercategory': 'water' }, { 'id': 149, 'name': 'road', 'supercategory': 'ground' }, { 'id': 150, 'name': 'rock', 'supercategory': 'solid' }, { 'id': 151, 'name': 'roof', 'supercategory': 'building' }, { 'id': 152, 'name': 'rug', 'supercategory': 'textile' }, { 'id': 153, 'name': 'salad', 'supercategory': 'food-stuff' }, { 'id': 154, 'name': 'sand', 'supercategory': 'ground' }, { 'id': 155, 'name': 'sea', 'supercategory': 'water' }, { 'id': 156, 'name': 'shelf', 'supercategory': 'furniture-stuff' }, { 'id': 157, 'name': 'sky-other', 'supercategory': 'sky' }, { 'id': 158, 'name': 'skyscraper', 'supercategory': 'building' }, { 'id': 159, 'name': 'snow', 'supercategory': 'ground' }, { 'id': 160, 'name': 'solid-other', 'supercategory': 'solid' }, { 'id': 161, 'name': 'stairs', 'supercategory': 'furniture-stuff' }, { 'id': 162, 'name': 'stone', 'supercategory': 'solid' }, { 'id': 163, 'name': 'straw', 'supercategory': 'plant' }, { 'id': 164, 'name': 'structural-other', 'supercategory': 'structural' }, { 'id': 165, 'name': 'table', 'supercategory': 'furniture-stuff' }, { 'id': 166, 'name': 'tent', 'supercategory': 'building' }, { 'id': 167, 'name': 'textile-other', 'supercategory': 'textile' }, { 'id': 168, 'name': 'towel', 'supercategory': 'textile' }, { 'id': 169, 'name': 'tree', 'supercategory': 'plant' }, { 'id': 170, 'name': 'vegetable', 'supercategory': 'food-stuff' }, { 'id': 171, 'name': 'wall-brick', 'supercategory': 'wall' }, { 'id': 172, 'name': 'wall-concrete', 'supercategory': 'wall' }, { 'id': 173, 'name': 'wall-other', 'supercategory': 'wall' }, { 'id': 174, 'name': 'wall-panel', 'supercategory': 'wall' }, { 'id': 175, 'name': 'wall-stone', 'supercategory': 'wall' }, { 'id': 176, 'name': 'wall-tile', 'supercategory': 'wall' }, { 'id': 177, 'name': 'wall-wood', 'supercategory': 'wall' }, { 'id': 178, 'name': 'water-other', 'supercategory': 'water' }, { 'id': 179, 'name': 'waterdrops', 'supercategory': 'water' }, { 'id': 180, 'name': 'window-blind', 'supercategory': 'window' }, { 'id': 181, 'name': 'window-other', 'supercategory': 'window' }, { 'id': 182, 'name': 'wood', 'supercategory': 'solid' }]) if mergeThings: catdata.extend([{ 'id': stuffEndId + 1, 'name': 'other', 'supercategory': 'other' }]) catdata = {'categories': catdata} catStr = json.dumps(catdata, indent=indent) catStr = catStr[1:-1] # Remove brackets # Write opening braces, headers and annotation start to disk output.write(unicode('{\n' + infoStr + imStr + licStr + catStr)) # Start annots if outputAnnots: output.write(unicode(',\n"annotations": \n[\n')) for i, imgName in enumerate(imgNames): # Write annotations imgId = imgIds[i] diffTime = time.clock() - startTime print "Writing JSON annotation %d of %d (%.1fs): %s..." % ( i + 1, imgCount, diffTime, imgName) # Read annotation file annotPath = os.path.join(annotFolder, imgName) matfile = scipy.io.loadmat(annotPath) labelMap = matfile['S'] if not np.all( [i == 0 or i >= stuffStartId for i in np.unique(labelMap)]): raise Exception( 'Error: .mat annotation files should not contain thing labels!' ) # Merge thing classes if mergeThings: # Get thing GT labelMapThings = cocoSegmentationToSegmentationMap( cocoGt, imgId, checkUniquePixelLabel=False, includeCrowd=includeCrowd) if labelMap.shape[0] != labelMapThings.shape[0] \ or labelMap.shape[1] != labelMapThings.shape[1]: raise Exception( 'Error: Stuff segmentation map has different size from thing segmentation map!' ) # Set all thing classes to the new 'other' class labelMap[labelMapThings > 0] = stuffEndId + 1 # Add stuff annotations labelsAll = np.unique(labelMap) labelsValid = [i for i in labelsAll if i >= stuffStartId] for i, labelId in enumerate(labelsValid): # Add a comma and line break after each annotation assert annId - annIdStart <= 1e7, 'Error: Annotation ids are not unique!' if annId == annIdStart: annotStr = '' else: annotStr = ',\n' # Create mask and encode it Rs = segmentationToCocoMask(labelMap, labelId) # Create annotation data anndata = {} anndata['id'] = annId anndata['image_id'] = int(imgId) anndata['category_id'] = int(labelId) anndata['segmentation'] = Rs anndata['area'] = float(mask.area(Rs)) anndata['bbox'] = mask.toBbox(Rs).tolist() anndata['iscrowd'] = 0 # Write JSON annotStr = annotStr + json.dumps(anndata, indent=indent) output.write(unicode(annotStr)) # Increment annId annId = annId + 1 # End annots output.write(unicode('\n]')) # Global end output.write(unicode('\n}'))
def printMainParticlesStats(segms, imageInfo, mainInfoToPrint, cropAreaPart=1, printLineBeforeMainInfo=True): inst_cls = 0 mainInfo = mainInfoToPrint # List into which messages which should be after ------- are saved particleCount = len(segms[inst_cls]) if particleCount == 0: print('No particles') return imageSize = segms[inst_cls][0][ 'size'] # Mask's, maybe stretched image size pixelSize = None stretchFactor = None if imageInfo: pixelSize = getPixelSize(imageInfo) stretchFactor = getOutputImageStretchFactor(imageInfo, imageSize) if pixelSize: pixelSize = [ pixelSize[0] / stretchFactor, pixelSize[1] / stretchFactor ] print('Masked image pixel size: ', pixelSize) particleAreaSum = 0 # In masks' pixels (at e.g. 1215 * 1215 image) particleAreaSqrtSum = 0 particlesImagePart = 0 if particleCount != 0: particleAreas = [ maskUtils.area(compMask) for compMask in segms[inst_cls] ] for particleArea in particleAreas: particleAreaSum += particleArea particleAreaSqrtSum += math.sqrt(particleArea) avgParticleArea = particleAreaSum / particleCount mess = 'Average particle area: %.1f pixels' % (avgParticleArea) if pixelSize: mess += ', %.3f nm^2' % (avgParticleArea * \ pixelSize[0] * pixelSize[1] * 1e18) print(mess) unitedCompMask = maskUtils.merge(segms[inst_cls], intersect=False) unitedMaskPixelCount = maskUtils.area(unitedCompMask) print('Total particles area: %d pixels' % unitedMaskPixelCount) print('Overlapping area: %d pixels' % (particleAreaSum - unitedMaskPixelCount)) particlesImagePart = float(unitedMaskPixelCount) / ( imageSize[0] * imageSize[1] * cropAreaPart) mainInfo.append('Particles area: %.3f%%' % (particlesImagePart * 100)) pi4sqrt = math.sqrt(math.pi / 4) # Среднеповерхностный диаметр. Area = pi * r^2 = pi * d^2 / 4; d^2 = area / pi * 4 midSurfaceDiam = math.sqrt(avgParticleArea) / pi4sqrt # Средний проектированный диаметр, Pi pixels area will mean 1-pixel radius avgProjectedAreaDiam = particleAreaSqrtSum / particleCount / pi4sqrt midSum = 0 projectedSum = 0 for particleArea in particleAreas: diam = math.sqrt(particleArea) / pi4sqrt midSum += (diam - midSurfaceDiam)**2 projectedSum += (diam - avgProjectedAreaDiam)**2 midSurfaceDiamStdDev = math.sqrt(midSum / particleCount) projectedAreaDiamStdDev = math.sqrt(projectedSum / particleCount) mess = 'Mid-surface diameter: %.2f pixels' % midSurfaceDiam if pixelSize: mess += ', %.4f nm' % \ (midSurfaceDiam * (pixelSize[0] + pixelSize[1]) / 2 * 1e9) # Actually not correct for non-square pixels, we should analyze source pixels # with respect to x and y axes instead of simple particleAreaSqrtSum += math.sqrt(particleArea) mainInfo.append(mess) mess = 'Mid-surface diameter std. dev.: %.3f pixels' % midSurfaceDiamStdDev if pixelSize: mess += ', %.5f nm' % \ (midSurfaceDiamStdDev * (pixelSize[0] + pixelSize[1]) / 2 * 1e9) mainInfo.append(mess) mess = 'Average projected area diameter: %.2f pixels' % avgProjectedAreaDiam if pixelSize: mess += ', %.4f nm' % \ (avgProjectedAreaDiam * (pixelSize[0] + pixelSize[1]) / 2 * 1e9) # (avgParticleDiameter * math.sqrt(pixelSize[0] ** 2 + pixelSize[1] ** 2) * 1e9) mainInfo.append(mess) mess = 'Average projected area diameter std. dev.: %.3f pixels' % projectedAreaDiamStdDev if pixelSize: mess += ', %.5f nm' % \ (projectedAreaDiamStdDev * (pixelSize[0] + pixelSize[1]) / 2 * 1e9) mainInfo.append(mess) mess = 'Density: %.4f particles / 1000 pixels' % (particleCount * 1000.0 / \ (imageSize[0] * imageSize[1] * cropAreaPart)) imageMetersSize = getImageMetersSize(imageInfo) if imageMetersSize: mess += ', %.4f / nm^2' % (particleCount / ( imageMetersSize[0] * imageMetersSize[1] * cropAreaPart * 1e18)) mainInfo.append(mess) if printLineBeforeMainInfo: print('--------------------') for line in mainInfo: print(line)
def _load_raw_file(self, tracker, seq, is_gt): """Load a file (gt or tracker) in the KITTI MOTS format If is_gt, this returns a dict which contains the fields: [gt_ids, gt_classes] : list (for each timestep) of 1D NDArrays (for each det). [gt_dets]: list (for each timestep) of lists of detections. [gt_ignore_region]: list (for each timestep) of masks for the ignore regions if not is_gt, this returns a dict which contains the fields: [tracker_ids, tracker_classes] : list (for each timestep) of 1D NDArrays (for each det). [tracker_dets]: list (for each timestep) of lists of detections. """ # Only loaded when run to reduce minimum requirements from pycocotools import mask as mask_utils # File location if self.data_is_zipped: if is_gt: zip_file = os.path.join(self.gt_fol, 'data.zip') else: zip_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol + '.zip') file = seq + '.txt' else: zip_file = None if is_gt: file = self.config["GT_LOC_FORMAT"].format( gt_folder=self.gt_fol, seq=seq) else: file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq + '.txt') # Ignore regions if is_gt: crowd_ignore_filter = {2: ['10']} else: crowd_ignore_filter = None # Load raw data from text file read_data, ignore_data = self._load_simple_text_file( file, crowd_ignore_filter=crowd_ignore_filter, is_zipped=self.data_is_zipped, zip_file=zip_file, force_delimiters=' ') # Convert data to required format num_timesteps = self.seq_lengths[seq] data_keys = ['ids', 'classes', 'dets'] if is_gt: data_keys += ['gt_ignore_region'] raw_data = {key: [None] * num_timesteps for key in data_keys} # Check for any extra time keys extra_time_keys = [ x for x in read_data.keys() if x not in [str(t) for t in range(num_timesteps)] ] if len(extra_time_keys) > 0: if is_gt: text = 'Ground-truth' else: text = 'Tracking' raise TrackEvalException( text + ' data contains the following invalid timesteps in seq %s: ' % seq + ', '.join([str(x) + ', ' for x in extra_time_keys])) for t in range(num_timesteps): time_key = str(t) # list to collect all masks of a timestep to check for overlapping areas all_masks = [] if time_key in read_data.keys(): try: raw_data['dets'][t] = [{ 'size': [int(region[3]), int(region[4])], 'counts': region[5].encode(encoding='UTF-8') } for region in read_data[time_key]] raw_data['ids'][t] = np.atleast_1d([ region[1] for region in read_data[time_key] ]).astype(int) raw_data['classes'][t] = np.atleast_1d([ region[2] for region in read_data[time_key] ]).astype(int) all_masks += raw_data['dets'][t] except IndexError: self._raise_index_error(is_gt, tracker, seq) except ValueError: self._raise_value_error(is_gt, tracker, seq) else: raw_data['dets'][t] = [] raw_data['ids'][t] = np.empty(0).astype(int) raw_data['classes'][t] = np.empty(0).astype(int) if is_gt: if time_key in ignore_data.keys(): try: time_ignore = [{ 'size': [int(region[3]), int(region[4])], 'counts': region[5].encode(encoding='UTF-8') } for region in ignore_data[time_key]] raw_data['gt_ignore_region'][t] = mask_utils.merge( [mask for mask in time_ignore], intersect=False) all_masks += [raw_data['gt_ignore_region'][t]] except IndexError: self._raise_index_error(is_gt, tracker, seq) except ValueError: self._raise_value_error(is_gt, tracker, seq) else: raw_data['gt_ignore_region'][t] = mask_utils.merge( [], intersect=False) # check for overlapping masks if all_masks: masks_merged = all_masks[0] for mask in all_masks[1:]: if mask_utils.area( mask_utils.merge([masks_merged, mask], intersect=True)) != 0.0: raise TrackEvalException( 'Tracker has overlapping masks. Tracker: ' + tracker + ' Seq: ' + seq + ' Timestep: ' + str(t)) masks_merged = mask_utils.merge([masks_merged, mask], intersect=False) if is_gt: key_map = { 'ids': 'gt_ids', 'classes': 'gt_classes', 'dets': 'gt_dets' } else: key_map = { 'ids': 'tracker_ids', 'classes': 'tracker_classes', 'dets': 'tracker_dets' } for k, v in key_map.items(): raw_data[v] = raw_data.pop(k) raw_data["num_timesteps"] = num_timesteps raw_data['seq'] = seq return raw_data
def loadRes(self, resFile): """ Load result file and return a result api object. :param resFile (str) : file name of result file :return: res (obj) : result api object """ res = COCO() res.dataset['images'] = [img for img in self.dataset['images']] print('Loading and preparing results...') tic = time.time() if type(resFile) == str or (PYTHON_VERSION == 2 and type(resFile) == unicode): anns = json.load(open(resFile)) elif type(resFile) == np.ndarray: anns = self.loadNumpyAnnotations(resFile) else: anns = resFile assert type(anns) == list, 'results in not an array of objects' annsImgIds = [ann['image_id'] for ann in anns] assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \ 'Results do not correspond to current coco set' if 'caption' in anns[0]: imgIds = set([img['id'] for img in res.dataset['images']]) & set( [ann['image_id'] for ann in anns]) res.dataset['images'] = [ img for img in res.dataset['images'] if img['id'] in imgIds ] for id, ann in enumerate(anns): ann['id'] = id + 1 elif 'bbox' in anns[0] and not anns[0]['bbox'] == []: res.dataset['categories'] = copy.deepcopy( self.dataset['categories']) for id, ann in enumerate(anns): bb = ann['bbox'] x1, x2, y1, y2 = [bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]] if not 'segmentation' in ann: ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]] ann['area'] = bb[2] * bb[3] ann['id'] = id + 1 ann['iscrowd'] = 0 elif 'segmentation' in anns[0]: res.dataset['categories'] = copy.deepcopy( self.dataset['categories']) for id, ann in enumerate(anns): # now only support compressed RLE format as segmentation results ann['area'] = maskUtils.area(ann['segmentation']) if not 'bbox' in ann: ann['bbox'] = maskUtils.toBbox(ann['segmentation']) ann['id'] = id + 1 ann['iscrowd'] = 0 elif 'keypoints' in anns[0]: res.dataset['categories'] = copy.deepcopy( self.dataset['categories']) for id, ann in enumerate(anns): s = ann['keypoints'] x = s[0::3] y = s[1::3] x0, x1, y0, y1 = np.min(x), np.max(x), np.min(y), np.max(y) ann['area'] = (x1 - x0) * (y1 - y0) ann['id'] = id + 1 ann['bbox'] = [x0, y0, x1 - x0, y1 - y0] print('DONE (t={:0.2f}s)'.format(time.time() - tic)) res.dataset['annotations'] = anns res.createIndex() return res
def COCO_format(img_dirs, gt_seg_dirs, output_folder, dataset_name): """ :param img_dirs: list of absolute paths :param gt_seg_dirs: list of absolute paths :param output_folder: :param dataset_name: :return: """ assert len(img_dirs) == len(gt_seg_dirs) categories = [{"id": 0, "name": "P"}] coco_images = [] coco_annotations = [] for i, (im_path, gt_seg_path) in tqdm(enumerate(zip(img_dirs, gt_seg_dirs))): coco_image = { "id": i, "width": _W, "height": _H, "file_name": im_path, } coco_images.append(coco_image) idmap_face = cv2.imread(f"{gt_seg_path}", cv2.IMREAD_ANYDEPTH) unval = np.unique(idmap_face) area = [] for val in unval[1:]: gt_mask = np.asarray(idmap_face == val, order="F") encoded_gt = mask.encode(gt_mask) area_gt = mask.area(encoded_gt) area.append(area_gt) # area = sorted(area) # top5 = area[-5] if len(area) > 6 else 0 # print(sorted(area)) # exit() for val in unval[1:]: coco_annotation = {} gt_mask = np.asarray(idmap_face == val, order="F") encoded_gt = mask.encode(gt_mask) area_gt = mask.area(encoded_gt) bbox_gt = toBbox(gt_mask) if area_gt < _plane_area: continue # if area_gt < top5: # continue coco_annotation["id"] = len(coco_annotations) + 1 coco_annotation["image_id"] = coco_image["id"] coco_annotation["bbox"] = [round(float(x), 3) for x in bbox_gt] coco_annotation["segmentation"] = encoded_gt coco_annotation["area"] = area_gt coco_annotation["category_id"] = 0 coco_annotation["iscrowd"] = 0 coco_annotations.append(coco_annotation) info = { "date_created": str(datetime.datetime.now()), "description": "Automatically generated COCO json file for Detectron2.", } coco_dict = { "info": info, "images": coco_images, "annotations": coco_annotations, "categories": categories, "licenses": None, } print( "Conversion finished, " f"num images: {len(coco_images)}, num annotations: {len(coco_annotations)}" ) cache_path = os.path.join(output_folder, f"{dataset_name}_coco_format.json") with PathManager.open(cache_path, "w") as json_file: # logger.info(f"Caching annotations in COCO format: {cache_path}") json.dump(coco_dict, json_file, cls=MyEncoder) return cache_path
def encode_gt(mask_dir): """Given a path to a directory of ground-truth image segmentation masks, encodes them into the COCO annotations format using the COCO API. GT segmasks are 3D Numpy arrays of shape (n, h, w) for n predicted instances, in case of overlapping masks. These MUST be the same size as predicted masks, ensure this by using masks returned from model.load_image_gt. DO NOT INCLUDE THE BACKGROUND. model.load_image_gt will automatically remove it. Requires that GT masks are named image_000000.npy, image_000001.npy, etc. in order without any missing numbers. mask_dir: str, directory in which GT masks are stored. Avoid relative paths if possible. """ # Constructing GT annotation file per COCO format: # http://cocodataset.org/#download gt_annos = { 'images': [], 'annotations': [], 'categories': [ {'name': 'object', 'id': 1, 'supercategory': 'object'} ] } N = len(fnmatch.filter(os.listdir(mask_dir), 'image_*.npy')) for i in range(N): # load image im_name = 'image_{:06d}.npy'.format(i) I = np.load(os.path.join(mask_dir, im_name)) im_anno = { 'id': i, 'width': int(I.shape[1]), 'height': int(I.shape[2]), 'file_name': im_name } gt_annos['images'].append(im_anno) # leaving license, flickr_url, coco_url, date_captured # fields incomplete # mask each individual object # NOTE: We assume these masks do not include backgrounds. # This means that the 1st object instance will have index 0! for val in range(I.shape[0]): # get binary mask bin_mask = I[val,:,:].astype(np.uint8) instance_id = i * 100 + (val + 1) # create id for instance, increment val # find bounding box def bbox2(img): rows = np.any(img, axis=1) cols = np.any(img, axis=0) rmin, rmax = np.where(rows)[0][[0, -1]] cmin, cmax = np.where(cols)[0][[0, -1]] return int(cmin), int(rmin), int(cmax - cmin), int(rmax - rmin) # encode mask encode_mask = mask.encode(np.asfortranarray(bin_mask)) encode_mask['counts'] = encode_mask['counts'].decode('ascii') size = int(mask.area(encode_mask)) x, y, w, h = bbox2(bin_mask) instance_anno = { "id" : instance_id, "image_id" : i, "category_id" : 1, "segmentation" : encode_mask, "area" : size, "bbox" : [x, y, w, h], "iscrowd" : 0, } gt_annos['annotations'].append(instance_anno) anno_path = os.path.join(mask_dir, 'annos_gt.json') json.dump(gt_annos, open(anno_path, 'w+')) print("successfully wrote GT annotations to", anno_path)
def constructAnnotationsFolder(filename, subsetInfos, numLimit=-1): """subset can be ycb_trainval_imageInfos, ycb_train_imageInfos, ycb_val_imageInfos. if numLimit == -1, ignore it. """ dataset = { "images": [], "annotations": [], "categories": getAllClassesAndIds() } # this is a json, after all. with tqdm(total=len(subsetInfos)) as pbar: cnt = 0 for i_img, info in enumerate(subsetInfos): ### image related. imageAnn = {} imgPath = getImageColorPath(info) img = mp.imread(imgPath) imageAnn["file_name"] = imageId2imageName(info["cocoId"]) imageAnn["height"] = img.shape[0] imageAnn["width"] = img.shape[1] imageAnn["id"] = info["cocoId"] dataset["images"].append(imageAnn) ### class related has been acquired. ### annotation related. segPath = getSegMaskPath(info) metaPath = getMetaMatPath(info) bboxPath = getBboxPath(info) seg = mp.imread(segPath) meta = loadmat(metaPath) bboxes = loadBboxes(bboxPath) centers = meta['center'] clsIds = meta['cls_indexes'].flatten() for i in range(len(bboxes)): annoAnn = {} annoAnn["image_id"] = imageAnn[ "id"] # be consistent with above. annoAnn["bbox"] = bboxes[i]["bbox"] annoAnn["iscrowd"] = 1 annoAnn["id"] = len(dataset["annotations"]) # use int to convert ndarray to ensure serializable. annoAnn["category_id"] = int(clsIds[i]) # create segmentation. height, width = seg.shape[0], seg.shape[1] center = centers[i] # make sure center is within image. center = (min(int(center[0]), width - 1), min(int(center[1]), height - 1)) segVal = seg[int(center[1])][int(center[0])] segMask = np.zeros_like(seg, dtype=np.uint8) segMask[seg != segVal] = 0 segMask[seg == segVal] = 1 segMask = np.asfortranarray(segMask) rle = encode(segMask) # byte and string are handled differently in python 3. # decode bytes to string, then dump to json. rle['counts'] = rle['counts'].decode('ascii') annoAnn["segmentation"] = rle annoAnn["area"] = int(area(rle)) dataset["annotations"].append(annoAnn) pbar.update(1) cnt += 1 if cnt == numLimit: break dumpJsontoFolder(filename, dataset)
def fix_segments_intersections(polygons, height, width, img_name, use_background_label, threshold=0.0, ratio_tolerance=0.001): """Find all intersected regions and crop contour for back object by objects which are in front of the first one. It is related to a specialty of segmentation in CVAT annotation. Intersection is calculated via function 'iou' from cocoapi Args: polygons: all objects on image represented as 2D array of objects' contours height: height of image width: width of image img_name: name of image file threshold: threshold of intersection over union of two objects. By default is set to 0 and processes any two intersected objects ratio_tolerance: used for situation when one object is fully or almost fully inside another one and we don't want make "hole" in one of objects """ converted_polygons = [] empty_polygon = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] # Convert points of polygons from string to coco's array. # All polygons must be sorted in order from bottom to top for polygon in polygons: label = polygon['label'] points = polygon['points'].split(';') new_polygon = [] for xy in points: x = float(xy.split(',')[0]) y = float(xy.split(',')[1]) new_polygon.append(x) new_polygon.append(y) converted_polygons.append({'label': label, 'points': new_polygon}) for i in range(0, len(converted_polygons)): rle_bottom = mask_util.frPyObjects([converted_polygons[i]['points']], height, width) segment_overlapped = False for j in range(i + 1, len(converted_polygons)): rle_top = mask_util.frPyObjects([converted_polygons[j]['points']], height, width) iou = mask_util.iou(rle_bottom, rle_top, [0, 0]) area_top = sum(mask_util.area(rle_top)) area_bottom = sum(mask_util.area(rle_bottom)) if area_bottom == 0: continue area_ratio = area_top / area_bottom sum_iou = sum(iou) # If segment is fully inside another one, save this segment as is if area_ratio - ratio_tolerance < sum_iou[ 0] < area_ratio + ratio_tolerance: continue # Check situation when bottom segment is fully inside top. # It means that in annotation is mistake. Save this segment as is if 1 / area_ratio - ratio_tolerance < sum_iou[ 0] < 1 / area_ratio + ratio_tolerance: continue if sum_iou[0] > threshold: segment_overlapped = True bottom_mask = np.array(mask_util.decode(rle_bottom), dtype=np.uint8) top_mask = np.array(mask_util.decode(rle_top), dtype=np.uint8) bottom_mask = np.subtract(bottom_mask, top_mask) bottom_mask[bottom_mask > 1] = 0 bottom_mask = np.sum(bottom_mask, axis=2) bottom_mask = np.array(bottom_mask > 0, dtype=np.uint8) converted_polygons[i]['points'] = mask_to_polygon(bottom_mask) # If some segment is empty, do small fix to avoid error in cocoapi function if len(converted_polygons[i]['points']) == 0: converted_polygons[i]['points'] = [empty_polygon] rle_bottom = mask_util.frPyObjects( converted_polygons[i]['points'], height, width) if not segment_overlapped: converted_polygons[i]['points'] = [converted_polygons[i]['points']] output_polygons = [] for i in range(0, len(converted_polygons)): if not use_background_label and converted_polygons[i][ 'label'] == 'background': continue poly_len = len(converted_polygons[i]['points']) if poly_len == 0 or converted_polygons[i]['points'] == [empty_polygon]: log.warning('Image <{}> has an empty polygon with label <{}>. ' 'Perhaps there is a mistake in annotation'.format( img_name, converted_polygons[i]['label'])) else: output_polygons.append(converted_polygons[i]) return output_polygons
from pycocotools import mask from skimage import measure import numpy as np import json import cv2 import matplotlib.pyplot as plt from shapely.geometry import Polygon, MultiPolygon from PIL import Image from PIL import ImageDraw a = {"image_id": 579, "bbox": [407.4486999511719, 236.60690307617188, 57.411163330078125, 56.066925048828125], "score": 0.9970695972442627, "category_id": 1, "segmentation": {"size": [394, 556], "counts": "`Pn4=k;3N1N2O2M2N2N3M2M3N2M4K4O2N1O1O2O001O001O00001O00001O1O01O00001O0O1O2O0N3N1O2M3K4N3L3N3L3N2L4O2M2N3N2M3M3MoTS1"}} print(mask.decode(a["segmentation"])) print(mask.area(a["segmentation"])) print(mask.toBbox(a["segmentation"])) contours = measure.find_contours(mask.decode(a["segmentation"]), 0.5) def close_contour(contour): if not np.array_equal(contour[0], contour[-1]): contour = np.vstack((contour, contour[0])) return contour for contour in contours: segmentation = contour.ravel().tolist() print(len(segmentation)) poly = Polygon(contour) poly = poly.simplify(0.001, preserve_topology=True) segmentation = np.array(poly.exterior.coords).ravel().tolist() # contour = close_contour(contour) # contour = measure.approximate_polygon(contour, 0.01) # contour = np.flip(contour, axis=1) # segmentation = contour.ravel().tolist() print(len(segmentation))