def loadRes(self, resFile): """ Load result file and return a result api object. :param resFile (str) : file name of result file :return: res (obj) : result api object """ res = COCO() res.dataset['images'] = [img for img in self.dataset['images']] print('Loading and preparing results...') tic = time.time() if type(resFile) == str or type(resFile) == unicode: anns = json.load(open(resFile)) elif type(resFile) == np.ndarray: anns = self.loadNumpyAnnotations(resFile) else: anns = resFile assert type(anns) == list, 'results in not an array of objects' annsImgIds = [ann['image_id'] for ann in anns] assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \ 'Results do not correspond to current coco set' if 'caption' in anns[0]: imgIds = set([img['id'] for img in res.dataset['images']]) & set([ann['image_id'] for ann in anns]) res.dataset['images'] = [img for img in res.dataset['images'] if img['id'] in imgIds] for id, ann in enumerate(anns): ann['id'] = id+1 elif 'bbox' in anns[0] and not anns[0]['bbox'] == []: res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) for id, ann in enumerate(anns): bb = ann['bbox'] x1, x2, y1, y2 = [bb[0], bb[0]+bb[2], bb[1], bb[1]+bb[3]] if not 'segmentation' in ann: ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]] ann['area'] = bb[2]*bb[3] ann['id'] = id+1 ann['iscrowd'] = 0 elif 'segmentation' in anns[0]: res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) for id, ann in enumerate(anns): # now only support compressed RLE format as segmentation results ann['area'] = maskUtils.area(ann['segmentation']) if not 'bbox' in ann: ann['bbox'] = maskUtils.toBbox(ann['segmentation']) ann['id'] = id+1 ann['iscrowd'] = 0 elif 'keypoints' in anns[0]: res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) for id, ann in enumerate(anns): s = ann['keypoints'] x = s[0::3] y = s[1::3] x0,x1,y0,y1 = np.min(x), np.max(x), np.min(y), np.max(y) ann['area'] = (x1-x0)*(y1-y0) ann['id'] = id + 1 ann['bbox'] = [x0,y0,x1-x0,y1-y0] print('DONE (t={:0.2f}s)'.format(time.time()- tic)) res.dataset['annotations'] = anns res.createIndex() return res
def loadRes(self, resFile): """ Load result file and return a result api object. :param resFile (str) : file name of result file :return: res (obj) : result api object """ res = COCO() res.dataset['images'] = [img for img in self.dataset['images']] # res.dataset['info'] = copy.deepcopy(self.dataset['info']) # res.dataset['licenses'] = copy.deepcopy(self.dataset['licenses']) print('Loading and preparing results... ') tic = time.time() anns = json.load(open(resFile)) assert type(anns) == list, 'results in not an array of objects' annsImgIds = [ann['image_id'] for ann in anns] assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \ 'Results do not correspond to current coco set' if 'caption' in anns[0]: imgIds = set([img['id'] for img in res.dataset['images']]) & set( [ann['image_id'] for ann in anns]) res.dataset['images'] = [ img for img in res.dataset['images'] if img['id'] in imgIds ] for id, ann in enumerate(anns): ann['id'] = id + 1 elif 'bbox' in anns[0] and not anns[0]['bbox'] == []: res.dataset['categories'] = copy.deepcopy( self.dataset['categories']) for id, ann in enumerate(anns): bb = ann['bbox'] x1, x2, y1, y2 = [bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]] if not 'segmentation' in ann: ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]] ann['area'] = bb[2] * bb[3] ann['id'] = id + 1 ann['iscrowd'] = 0 elif 'segmentation' in anns[0]: res.dataset['categories'] = copy.deepcopy( self.dataset['categories']) for id, ann in enumerate(anns): # now only support compressed RLE format as segmentation results ann['area'] = mask.area([ann['segmentation']])[0] if not 'bbox' in ann: ann['bbox'] = mask.toBbox([ann['segmentation']])[0] ann['id'] = id + 1 ann['iscrowd'] = 0 print('DONE (t=%0.2fs)' % (time.time() - tic)) res.dataset['annotations'] = anns res.createIndex() return res
def LoadAnnotations(self, annotations): """Load annotations dictionary into COCO datastructure. See http://mscoco.org/dataset/#format for a description of the annotations format. As above, this function replicates the default behavior of the API but does not require writing to external storage. Args: annotations: python list holding object detection results where each detection is encoded as a dict with required keys ['image_id', 'category_id', 'score'] and one of ['bbox', 'segmentation'] based on `detection_type`. Returns: a coco.COCO datastructure holding object detection annotations results Raises: ValueError: if annotations is not a list ValueError: if annotations do not correspond to the images contained in self. """ results = coco.COCO() results.dataset['images'] = [img for img in self.dataset['images']] tf.logging.info('Loading and preparing annotation results...') tic = time.time() if not isinstance(annotations, list): raise ValueError('annotations is not a list of objects') annotation_img_ids = [ann['image_id'] for ann in annotations] if (set(annotation_img_ids) != (set(annotation_img_ids) & set(self.getImgIds()))): raise ValueError('Results do not correspond to current coco set') results.dataset['categories'] = copy.deepcopy( self.dataset['categories']) if self._detection_type == 'bbox': for idx, ann in enumerate(annotations): bb = ann['bbox'] ann['area'] = bb[2] * bb[3] ann['id'] = idx + 1 ann['iscrowd'] = 0 elif self._detection_type == 'segmentation': for idx, ann in enumerate(annotations): ann['area'] = mask.area(ann['segmentation']) ann['bbox'] = mask.toBbox(ann['segmentation']) ann['id'] = idx + 1 ann['iscrowd'] = 0 tf.logging.info('DONE (t=%0.2fs)', (time.time() - tic)) results.dataset['annotations'] = annotations results.createIndex() return results
def convert_mask_image_to_polygon_format(mask): ''' Args: mask: np.array of binary image Return: ''' fortran_gt_map = np.asfortranarray(mask) encoded_gt = mask.encode(fortran_gt_map) gt_area = mask.area(encoded_gt) gt_bbox = mask.toBbox(encoded_gt) contours = measure.find_contours(mask, 0.5) pass
def prepare_scenes(self): print('Preparing scenes') for i, scene in enumerate(self.scenes): print(f'\r{i + 1}/{len(self.scenes)}', end='') # print(scene['objects_detection']) if self.split == 'mini': detection = 'objects' else: detection = 'objects_detection' boxes = np.array( [mask_utils.toBbox(obj['mask']) for obj in scene[detection]]) scene['boxes'] = norm_bbox(boxes) print()
def toBbox(obj): if is_RLE(obj): rles = [obj] boxes = _mask.toBbox(rles) box = boxes[0] return box elif is_RLEs(obj): rles = obj boxes = _mask.toBbox(rles) return boxes elif is_mask(obj): rle = encode(obj) rles = [rle] boxes = _mask.toBbox(rles) box = boxes[0] return box elif is_masks(obj): rles = encode(obj) boxes = _mask.toBbox(rles) return boxes else: raise Exception("Not Implement")
def __init__(self, config, subset, coord): # old_proposal_directory = "/home/luiten/vision/PReMVOS/first_frame_no_ReID/%s/" old_proposal_directory = config.str("bb_input_dir", None) data_directory = config.str("image_input_dir", None) # old_proposal_directory = "/home/luiten/vision/PReMVOS/proposals_with_flow/%s/" # old_proposal_directory = "/home/luiten/vision/PReMVOS/post_proposal_expansion_json_with_flow/%s/" # sets = ['test-challenge/', 'val/', 'test-dev/'] # sets = ['val/',] annotations = [] # Read in all proposals # for set_id, set in enumerate(sets): # folders = sorted(glob.glob(old_proposal_directory.split('%s')[0] + set + '*/')) folders = sorted(glob.glob(old_proposal_directory + '*/')) for folder in folders: seq = folder.split('/')[-2] # name = set + seq name = seq # files = sorted(glob.glob(old_proposal_directory % name + "*.json")) files = sorted(glob.glob(old_proposal_directory + name + "/*.json")) for file in files: timestep = file.split('/')[-1].split('.json')[0] with open(file, "r") as f: proposals = json.load(f) for prop_id, proposal in enumerate(proposals): # img_file = "/home/luiten/vision/PReMVOS/home_data/"+name+"/images/"+timestep+".jpg" img_file = data_directory + name + "/" + timestep + ".jpg" catagory_id = 1 tag = name + '/' + timestep + '___' + str(prop_id) segmentation = proposal["segmentation"] bbox = toBbox(segmentation) ann = { "img_file": img_file, "category_id": catagory_id, "bbox": bbox, "tag": tag } if bbox[2] <= 0 or bbox[3] <= 0: continue annotations.append(ann) super(DAVISForwardSimilarityDataset, self).__init__(config, subset, coord, annotations, n_train_ids=1)
def create_annotation_info(annotation_id, image_id, category_info, binary_mask=None, image_size=None, tolerance=2, bounding_box=None): if binary_mask is not None: if image_size is not None: binary_mask = resize_binary_mask(binary_mask, image_size) binary_mask_encoded = mask.encode( np.asfortranarray(binary_mask.astype(np.uint8))) area = mask.area(binary_mask_encoded) if area < 1: return None if bounding_box is None: bounding_box = mask.toBbox(binary_mask_encoded) area = area.tolist() else: x1, y1, w, h = bounding_box area = w * h if category_info["is_crowd"]: is_crowd = 1 segmentation = binary_mask_to_rle(binary_mask) else: is_crowd = 0 if binary_mask is not None: segmentation = binary_mask_to_polygon(binary_mask, tolerance) if not segmentation: return None else: segmentation = [] annotation_info = { "id": annotation_id, "image_id": image_id, "category_id": category_info["id"], "iscrowd": is_crowd, "area": area, "bbox": bounding_box, "segmentation": segmentation, # "width": binary_mask.shape[1], # "height": binary_mask.shape[0], "ignore": 0 } return annotation_info
def get_planar_dicts(num): dirs = sorted(glob.glob(GT))[:num] dataset_dicts = [] for i, path in enumerate(dirs): record = {} record["file_name"] = path.replace("_GL.npz", ".png") record["image_id"] = i record["height"] = _H record["width"] = _W # with open(path.replace("_GL.npz", "_label.json"), "r") as f: # data = json.load(f) # tris2plane = np.array(data["tris2plane"]) with np.load(path) as gl: idmap_face = gl["idmap_face"] # idmap_face = np.array(tris2plane[idmap_face.ravel() - 1]).reshape(idmap_face.shape) idmap_face = downsample_nearest(idmap_face, new_size=(_H, _W)) objs = [] unval, idx = np.unique(idmap_face, return_inverse=True) # print(unval) # exit() # the background id from 0 to max after tris2plane !!! for val in unval[:-1]: # if val == 0: # continue gt_mask = np.asarray(idmap_face == val, order="F") encoded_gt = mask.encode(gt_mask) area_gt = mask.area(encoded_gt) bbox_gt = mask.toBbox(encoded_gt) # print(area_gt) # exit() obj = { # "bbox": bbox_gt.tolist(), "bbox": toBbox(gt_mask), "bbox_mode": BoxMode.XYXY_ABS, "segmentation": encoded_gt, "area": area_gt, "category_id": 0, "iscrowd": 0 } objs.append(obj) record["annotations"] = objs dataset_dicts.append(record) return dataset_dicts
def __init__(self, lvis_gt, results, max_dets=300): """Constructor for LVIS results. Args: lvis_gt (LVIS class instance, or str containing path of annotation file) results (str containing path of result file or a list of dicts) max_dets (int): max number of detections per image. The official value of max_dets for LVIS is 300. """ super(LVISResults, self).__init__() assert isinstance(lvis_gt, LVIS) self.dataset["images"] = [img for img in lvis_gt.dataset["images"]] if isinstance(results, str): result_anns = self._load_json(results) elif type(results) == np.ndarray: result_anns = self.loadNumpyAnnotations(results) else: result_anns = results if max_dets >= 0: result_anns = self.limit_dets_per_image(result_anns, max_dets) if len(result_anns) > 0 and "bbox" in result_anns[0]: self.dataset["categories"] = copy.deepcopy( lvis_gt.dataset["categories"]) for id, ann in enumerate(result_anns): x1, y1, w, h = ann["bbox"] x2 = x1 + w y2 = y1 + h if "segmentation" not in ann: ann["segmentation"] = [[x1, y1, x1, y2, x2, y2, x2, y1]] ann["area"] = w * h ann["id"] = id + 1 elif len(result_anns) > 0 and "segmentation" in result_anns[0]: self.dataset["categories"] = copy.deepcopy( lvis_gt.dataset["categories"]) for id, ann in enumerate(result_anns): # Only support compressed RLE format as segmentation results ann["area"] = mask_util.area(ann["segmentation"]) if "bbox" not in ann: ann["bbox"] = mask_util.toBbox(ann["segmentation"]) ann["id"] = id + 1 self.dataset["annotations"] = result_anns self._create_index()
def LoadAnnotations(self, annotations): """Load annotations dictionary into COCO datastructure. See http://mscoco.org/dataset/#format for a description of the annotations format. As above, this function replicates the default behavior of the API but does not require writing to external storage. Args: annotations: python list holding object detection results where each detection is encoded as a dict with required keys ['image_id', 'category_id', 'score'] and one of ['bbox', 'segmentation'] based on `detection_type`. Returns: a coco.COCO datastructure holding object detection annotations results Raises: ValueError: if annotations is not a list ValueError: if annotations do not correspond to the images contained in self. """ results = coco.COCO() results.dataset['images'] = [img for img in self.dataset['images']] tf.logging.info('Loading and preparing annotation results...') tic = time.time() if not isinstance(annotations, list): raise ValueError('annotations is not a list of objects') annotation_img_ids = [ann['image_id'] for ann in annotations] if (set(annotation_img_ids) != (set(annotation_img_ids) & set(self.getImgIds()))): raise ValueError('Results do not correspond to current coco set') results.dataset['categories'] = copy.deepcopy(self.dataset['categories']) if self._detection_type == 'bbox': for idx, ann in enumerate(annotations): bb = ann['bbox'] ann['area'] = bb[2] * bb[3] ann['id'] = idx + 1 ann['iscrowd'] = 0 elif self._detection_type == 'segmentation': for idx, ann in enumerate(annotations): ann['area'] = mask.area(ann['segmentation']) ann['bbox'] = mask.toBbox(ann['segmentation']) ann['id'] = idx + 1 ann['iscrowd'] = 0 tf.logging.info('DONE (t=%0.2fs)', (time.time() - tic)) results.dataset['annotations'] = annotations results.createIndex() return results
def create_annotation_info(binary_mask, image_size=None, tolerance=2): binary_mask_encoded = mask.encode(np.asfortranarray(binary_mask.astype(np.uint8))) area = int(mask.area(binary_mask_encoded)) bounding_box = mask.toBbox(binary_mask_encoded).astype(np.int32).tolist() segmentation = binary_mask_to_rle(binary_mask) #segmentation = binary_mask_to_polygon(binary_mask, tolerance) if area < 1: area = None bounding_box = None segmentation = None return area, bounding_box, segmentation
def set_seg_object_geometry(annotation: AnnType, mask: np.ndarray) -> AnnType: """Parsing bbox, area, polygon from seg ann.""" if not mask.sum(): return annotation rle: RLEType = mask_utils.encode( np.array(mask[:, :, None], order="F", dtype="uint8"))[0] rle["counts"] = rle["counts"].decode("utf-8") # type: ignore bbox = mask_utils.toBbox(rle).tolist() area = mask_utils.area(rle).tolist() annotation.update(dict(segmentation=rle)) annotation.update(dict(bbox=bbox, area=area)) return annotation
def coco_seg2bbox(polygons, image_height: int, image_width: int) -> list: """Converts polygons in COCO format to bounding box in pixels. Args: polygons: image_height: Height of the target image. image_width: Width of the target image. Returns: [x_min, y_min, width, height] """ rles = coco_mask.frPyObjects(polygons, image_height, image_width) mask = coco_mask.decode(rles) bbox = coco_mask.toBbox( coco_mask.encode(np.asfortranarray(mask.astype(np.uint8)))) return bbox[0].astype(int).tolist()
def loadRes(self, resFile): """ Load result file and return a result api object. :param resFile (str) : file name of result file :return: res (obj) : result api object """ res = Fashionpedia() res.dataset['images'] = [img for img in self.dataset['images']] print('Loading and preparing results...') tic = time.time() if type(resFile) == str \ or (PYTHON_VERSION == 2 and type(resFile) == unicode): anns = json.load(open(resFile)) else: anns = resFile assert type(anns) == list, 'results in not an array of objects' annsImgIds = [ann['image_id'] for ann in anns] assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \ 'Results do not correspond to current coco set' if 'bbox' in anns[0] and not anns[0]['bbox'] == []: res.dataset['categories'] = copy.deepcopy( self.dataset['categories']) for id, ann in enumerate(anns): bb = ann['bbox'] x1, x2, y1, y2 = [bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]] if 'segmentation' not in ann: ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]] ann['area'] = bb[2] * bb[3] ann['id'] = id + 1 ann['iscrowd'] = 0 elif 'segmentation' in anns[0]: res.dataset['categories'] = copy.deepcopy( self.dataset['categories']) for idx, ann in enumerate(anns): # now only support compressed RLE format as segmentation ann['area'] = maskUtils.area(ann['segmentation']) if 'bbox' not in ann: ann['bbox'] = maskUtils.toBbox(ann['segmentation']) ann['id'] = idx + 1 ann['iscrowd'] = 0 print('DONE (t={:0.2f}s)'.format(time.time() - tic)) res.dataset['annotations'] = anns res.dataset["attributes"] = self.dataset["attributes"] res.dataset["categories"] = self.dataset["categories"] res.createIndex() return res
def loadRes(self, detection_results, mask_results): """Load result file and return a result api object. Args: detection_results: a numpy array of detection results of shape: [num_images * detection_per_image, 7]. The format is: [image_id, x, y, width, height, score, class]. mask_results: a list of RLE encoded binary instance masks. Length is num_images * detections_per_image. Returns: res: result MaskCOCO api object """ res = MaskCOCO() res.dataset['images'] = [img for img in self.dataset['images']] print('Loading and preparing results...') predictions = self.load_predictions(detection_results, mask_results) assert isinstance(predictions, list), 'results in not an array of objects' image_ids = [pred['image_id'] for pred in predictions] assert set(image_ids) == (set(image_ids) & set(self.getImgIds())), \ 'Results do not correspond to current coco set' if ('bbox' in predictions[0] and predictions[0]['bbox']): res.dataset['categories'] = copy.deepcopy( self.dataset['categories']) for idx, pred in enumerate(predictions): bb = pred['bbox'] x1, x2, y1, y2 = [bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]] if 'segmentation' not in pred: pred['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]] pred['area'] = bb[2] * bb[3] pred['id'] = idx + 1 pred['iscrowd'] = 0 elif 'segmentation' in predictions[0]: res.dataset['categories'] = copy.deepcopy( self.dataset['categories']) for idx, pred in enumerate(predictions): # now only support compressed RLE format as segmentation results pred['area'] = maskUtils.area(pred['segmentation']) if 'bbox' not in pred: pred['bbox'] = maskUtils.toBbox(pred['segmentation']) pred['id'] = idx + 1 pred['iscrowd'] = 0 res.dataset['annotations'] = predictions res.createIndex() return res
def create_annotation_info(annotation_id, image_id, category_info, binary_mask, image_size=None, tolerance=2, bounding_box=None, data_type=None): if image_size is not None: binary_mask = resize_binary_mask(binary_mask, image_size) binary_mask_encoded = mask.encode( np.asfortranarray(binary_mask.astype(np.uint8))) area = mask.area(binary_mask_encoded) if area < 1: return None if bounding_box is None: bounding_box = mask.toBbox(binary_mask_encoded) if category_info["is_crowd"]: is_crowd = 1 segmentation = binary_mask_to_rle(binary_mask) else: is_crowd = 0 segmentation = binary_mask_to_polygon(binary_mask, tolerance) if not segmentation: return None # labels 크기 조절 if data_type != 'train': if bounding_box[2] < 20 or bounding_box[3] < 20: return None annotation_info = { "id": annotation_id, "image_id": image_id, "category_id": category_info["id"], "iscrowd": is_crowd, "area": area.tolist(), "bbox": bounding_box.tolist(), "segmentation": segmentation, "width": binary_mask.shape[1], "height": binary_mask.shape[0], } return annotation_info
def get_KITTI_dicts(img_dir): dataset_dicts = [] tmp = os.path.join(img_dir, 'annotations_raw') for ann in os.listdir(tmp): f = 0 imgs_anns = open(os.path.join(tmp, ann), 'r') imgs_anns = imgs_anns.read().splitlines() prev_img_id = -1 record = {} objs = [] for _, v in enumerate(imgs_anns): v = v.split() number_of_folder = int(ann[:4]) number_of_frame = int(v[0]) filename = os.path.join(img_dir, 'train/' + str(10000 * number_of_folder + number_of_frame)) height, width = int(v[3]), int(v[4]) if (int(v[1]) == 10000): continue if (prev_img_id != 10000 * number_of_folder + number_of_frame): if(objs != []): record["annotations"] = deepcopy(objs) dataset_dicts.append(deepcopy(record)) objs.clear() record["file_name"] = filename + '.png' record["image_id"] = 10000 * number_of_folder + number_of_frame record["height"] = height record["width"] = width rle2 = {"size": [height, width], "counts": v[5]} result_mask = decode(rle2) encoded_mask = encode(result_mask) result_bbox = toBbox(encoded_mask) contours = measure.find_contours(result_mask, 0.5) polygons = measure.approximate_polygon(np.flip(contours[0], axis=1), tolerance=0) obj = { "bbox": list(result_bbox), "bbox_mode": BoxMode.XYWH_ABS, "segmentation": [polygons.tolist()], "category_id": int(v[2]) - 1, "iscrowd": 0 } objs.append(deepcopy(obj)) prev_img_id = 10000 * number_of_folder + number_of_frame print("Done: ", ann) return dataset_dicts
def polygon_area_and_bbox(polygon, height, width): """Calculate area of object's polygon and bounding box around it Args: polygon: objects contour represented as 2D array height: height of object's region (use full image) width: width of object's region (use full image) """ rle = mask_util.frPyObjects(polygon, height, width) area = mask_util.area(rle) bbox = mask_util.toBbox(rle) bbox = [min(bbox[:, 0]), min(bbox[:, 1]), max(bbox[:, 0] + bbox[:, 2]) - min(bbox[:, 0]), max(bbox[:, 1] + bbox[:, 3]) - min(bbox[:, 1])] return area, bbox
def loadRes(self, detection_results, include_mask, is_image_mask=False): """Load result file and return a result api object. Args: detection_results: a dictionary containing predictions results. include_mask: a boolean, whether to include mask in detection results. is_image_mask: a boolean, where the predict mask is a whole image mask. Returns: res: result MaskCOCO api object """ res = MaskCOCO() res.dataset['images'] = [img for img in self.dataset['images']] print('Loading and preparing results...') predictions = self.load_predictions( detection_results, include_mask=include_mask, is_image_mask=is_image_mask) assert isinstance(predictions, list), 'results in not an array of objects' if predictions: image_ids = [pred['image_id'] for pred in predictions] assert set(image_ids) == (set(image_ids) & set(self.getImgIds())), \ 'Results do not correspond to current coco set' if (predictions and 'bbox' in predictions[0] and predictions[0]['bbox']): res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) for idx, pred in enumerate(predictions): bb = pred['bbox'] x1, x2, y1, y2 = [bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]] if 'segmentation' not in pred: pred['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]] pred['area'] = bb[2] * bb[3] pred['id'] = idx + 1 pred['iscrowd'] = 0 elif 'segmentation' in predictions[0]: res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) for idx, pred in enumerate(predictions): # now only support compressed RLE format as segmentation results pred['area'] = maskUtils.area(pred['segmentation']) if 'bbox' not in pred: pred['bbox'] = maskUtils.toBbox(pred['segmentation']) pred['id'] = idx + 1 pred['iscrowd'] = 0 res.dataset['annotations'] = predictions res.createIndex() return res
def calcParticlesStatsForCsv(segms, imageInfo): # Somewhat similar to printParticlesStats. Checks are mainly made there, the code here is simpler inst_cls = 0 particleCount = len(segms[inst_cls]) data = { 'index': [], 'bbox_center_x': [], 'bbox_center_y': [], 'area_pixels': [], 'diameter_pixels': [] } # TODO: max height, min height, center of mass... if particleCount == 0: return data imageSize = segms[inst_cls][0][ 'size'] # Mask's, maybe stretched, image size pixelSize = getPixelSize(imageInfo) if pixelSize: stretchFactor = getOutputImageStretchFactor(imageInfo, imageSize) pixelSize = [ pixelSize[0] / stretchFactor, pixelSize[1] / stretchFactor ] print('Csv pixelSize ', pixelSize) data['area_nm2'] = [] data['diameter_nm'] = [] for maskInd, compMask in enumerate(segms[inst_cls]): data['index'].append(maskInd) bbox = maskUtils.toBbox( compMask) # Bbox here is (x0, y0, width, height) # if maskInd < 7: # print('CSV mask bbox ', bbox) data['bbox_center_x'].append(bbox[0] + bbox[2] / 2.0) # In output image's pixels data['bbox_center_y'].append(bbox[1] + bbox[3] / 2.0) particleArea = maskUtils.area(compMask) data['area_pixels'].append(particleArea) particleDiam = math.sqrt(particleArea / math.pi * 4) data['diameter_pixels'].append(particleDiam) if not pixelSize is None: data['area_nm2'].append(particleArea * pixelSize[0] * pixelSize[1] * 1e18) data['diameter_nm'].append(particleDiam * (pixelSize[0] + pixelSize[1]) / 2 * 1e9) return data
def forward(self): out_folder = "forwarded/" + self.model_name + "/tracking_data_bbox_refined/" tf.gfile.MakeDirs(out_folder) for n in range(21): open(out_folder + "%04d" % n + ".txt", "w") data = self.val_data n_examples_per_epoch = data.n_examples_per_epoch() extraction_keys = [Extractions.DET_MASKS, DataKeys.IMAGE_FILENAMES, DataKeys.IDS] tracks = {} # tag -> list of lists of trackelems for n in range(n_examples_per_epoch): res = self.trainer.validation_step(extraction_keys=extraction_keys) masks = res[Extractions.EXTRACTIONS][Extractions.DET_MASKS][0][0] #if len(masks) > 0: # import matplotlib.pyplot as plt # for mask in masks: # plt.imshow(mask) # plt.show() filename = res[Extractions.EXTRACTIONS][DataKeys.IMAGE_FILENAMES][0][0].decode("utf-8") sp = filename.split("/") seq = sp[-2] t = int(sp[-1].replace(".png", "").replace(".jpg", "")) ids = res[Extractions.EXTRACTIONS][DataKeys.IDS][0][0] masks_encoded = [cocomask.encode(np.asfortranarray(mask)) for mask in masks] if seq not in tracks: tracks[seq] = [] while len(tracks[seq]) < t + 1: tracks[seq].append([]) assert len(masks_encoded) == len(ids) for id_, mask_ in zip(ids, masks_encoded): x0, y0, w, h = cocomask.toBbox(mask_) box = [x0, y0, x0 + w, y0 + h] obj = data.tracking_result[seq][id_][t] class_str = obj.class_ if class_str == "Car": class_id = 1 elif class_str == "Pedestrian": class_id = 2 else: assert False, ("unknown class str", class_str) score = obj.score tracks[seq][t].append(TrackElement(box=box, track_id=id_, class_=class_id, score=score, mask=mask_)) print(n, "/", n_examples_per_epoch, masks.shape, filename, ids) for seq in tracks.keys(): tracks[seq] = make_disjoint(tracks[seq], self.mask_disjoint_strategy) # write out data export_tracking_result_in_kitti_format(seq, tracks[seq], True, self.config.string("model"), out_folder)
def __init__(self, label_file): self.airbus_ship_objects = defaultdict(list) self.anno_data = pd.read_csv(label_file) self.anno_data = self.anno_data.dropna(axis=0) for idx in range(self.anno_data.shape[0]): image_name = self.anno_data.iloc[idx, 0] image_objects = self.anno_data.iloc[idx, 1] binary_mask = self.rle_decode(image_objects) binary_mask_encoded = mask.encode(np.asfortranarray(binary_mask.astype(np.uint8))) bounding_box = mask.toBbox(binary_mask_encoded) xmin, ymin, xmax, ymax = wwtool.xywh2xyxy(bounding_box) self.airbus_ship_objects[image_name].append([xmin, ymin, xmax, ymax])
def loadRes(self, resFile): """ Load result file and return a result api object. :param resFile (str) : file name of result file :return: res (obj) : result api object """ res = CELL() res.dataset['images'] = [img for img in self.dataset['images']] print('Loading and preparing results...') tic = time.time() if type(resFile) == str or type(resFile) == unicode: anns = json.load(open(resFile)) elif type(resFile) == np.ndarray: anns = self.loadNumpyAnnotations(resFile) else: anns = resFile assert type(anns) == list, 'results in not an array of objects' annsImgIds = [ann['image_id'] for ann in anns] assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \ 'Results do not correspond to current cell set' if 'bbox' in anns[0] and not anns[0]['bbox'] == []: res.dataset['categories'] = copy.deepcopy( self.dataset['categories']) for id, ann in enumerate(anns): bb = ann['bbox'] x1, x2, y1, y2 = [bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]] if not 'segmentation' in ann: # segmentation different from coco ann['segmentation'] = [[x1, y1], [x1, y2], [x2, y2], [x2, y1]] ann['area'] = bb[2] * bb[3] ann['id'] = id + 1 elif 'segmentation' in anns[0]: res.dataset['categories'] = copy.deepcopy( self.dataset['categories']) for id, ann in enumerate(anns): # now only support compressed RLE format as segmentation results ann['area'] = maskUtils.area(ann['segmentation']) if not 'bbox' in ann: ann['bbox'] = maskUtils.toBbox(ann['segmentation']) ann['id'] = id + 1 print('DONE (t={:0.2f}s)'.format(time.time() - tic)) res.dataset['annotations'] = anns res.createIndex() return res
def create_annotation_info(annotation_id, image_id, category_info, binary_mask, image_size=None, tolerance=2, bounding_box=None): if image_size is not None: binary_mask = resize_binary_mask(binary_mask, image_size) # 그냥 단순하게 # 0과 1로 이루어진 바이너리 이미지를 0 255로 돌리는건데 굳이 왜 이렇게 하지? binary_mask_encoded = mask.encode(np.asfortranarray(binary_mask.astype(np.uint8))) # pycocotool에서 지원해주는 코드임 # mask의 영역을 고려해서 뭔가 바운딩 박스를 자동으로 만들어주기 위한 기초 초석인듯? # 더 자세히는 공부해봐야 알듯. area = mask.area(binary_mask_encoded) # 오류여부를 판단해주는 것인듯 if area < 1: print ("ERROR!! : area of mask is None") # area 즉, 마스크에 무슨 오류가 생긴다면 아무것도 반환해주지 않음. return None if bounding_box is None: #bounding box를 내가 입력으로 주는게 아니면 자동으로 생성해주는 듯. bounding_box = mask.toBbox(binary_mask_encoded) #이 부분은 자세히 들여다보지않음. if category_info["is_crowd"]: is_crowd = 1 segmentation = binary_mask_to_rle(binary_mask) else : is_crowd = 0 segmentation = binary_mask_to_polygon(binary_mask, tolerance) if not segmentation: return None annotation_info = { "id": annotation_id, "image_id": image_id, "category_id": category_info["id"], "iscrowd": is_crowd, "area": area.tolist(), "bbox": bounding_box.tolist(), "segmentation": segmentation, "width": binary_mask.shape[1], "height": binary_mask.shape[0], } return annotation_info
def mask_to_polygon(self, mask, need_binary=True): res = cv2.findContours(mask, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE) hierarchy = res[-1] if hierarchy is None: # empty mask return None, None, None has_holes = (hierarchy.reshape(-1, 4)[:, 3] >= 0).sum() > 0 res = res[-2] res = [x.flatten() for x in res] res = [x for x in res if len(x) >= 6] p = mask_util.frPyObjects(res, self.viz.height, self.viz.width) p = mask_util.merge(p) bbox = mask_util.toBbox(p) bbox[2] += bbox[0] bbox[3] += bbox[1] return res, bbox, has_holes
def make_annotations(ann_list): annotations = [] for a in tqdm(ann_list): ann = {} ann["id"] = len(annotations) + 1 ann["category_id"] = a["category_id"] ann["image_id"] = a["image_id"] ann["iscrowd"] = 0 if "score" in a: ann["score"] = a["score"] segm = a["segmentation"] ann["segmentation"] = segm ann["area"] = int(COCOmask.area(segm)) ann["bbox"] = list(COCOmask.toBbox(segm)) annotations.append(ann) return annotations
def load_img_info(files): img_file, inst_file, segm_file = files inst_img = mmcv.imread(inst_file, "unchanged") # ids < 24 are stuff labels (filtering them first is about 5% faster) unique_inst_ids = np.unique(inst_img[inst_img >= 24]) anno_info = [] for inst_id in unique_inst_ids: # For non-crowd annotations, inst_id // 1000 is the label_id # Crowd annotations have <1000 instance ids label_id = inst_id // 1000 if inst_id >= 1000 else inst_id label = CSLabels.id2label[label_id] if not label.hasInstances or label.ignoreInEval: continue category_id = label.id iscrowd = int(inst_id < 1000) mask = np.asarray(inst_img == inst_id, dtype=np.uint8, order="F") mask_rle = maskUtils.encode(mask[:, :, None])[0] area = maskUtils.area(mask_rle) # convert to COCO style XYWH format bbox = maskUtils.toBbox(mask_rle) # for json encoding mask_rle["counts"] = mask_rle["counts"].decode() anno = dict( iscrowd=iscrowd, category_id=category_id, bbox=bbox.tolist(), area=area.tolist(), segmentation=mask_rle, ) anno_info.append(anno) video_name = osp.basename(osp.dirname(img_file)) img_info = dict( # remove img_prefix for filename file_name=osp.join(video_name, osp.basename(img_file)), height=inst_img.shape[0], width=inst_img.shape[1], anno_info=anno_info, segm_file=osp.join(video_name, osp.basename(segm_file)), ) return img_info
def get_vector(scene, idx, mode='val'): """ Render ResNet-32 representation of a single object Args: scene : the scene annotation TODO: change to filepath & bbox coordinates idx : idx of object in scene mode : 'train' or 'val' allows for gradient chart and backprop of ResNet model Returns: features : vector with feature maps of object and whole scene concatenated """ image_name = 'images/' + scene['split'] + '/' + scene['image_filename'] #obj = scene['objects'][idx] obj_mask = scene['objects_detection'][idx]['mask'] obj_bbox = mask.toBbox(obj_mask) # 1. Load the whole image and object subpart of image with Pillow library img = Image.open(image_name).convert(mode='RGB') obj = img.crop((obj_bbox[0], obj_bbox[1], obj_bbox[0] + obj_bbox[2], obj_bbox[1] + obj_bbox[3])) # 2. Create a PyTorch Variable with the transformed image img_var = Variable(normalize(to_tensor(scaler(img))).unsqueeze(0)).to( device) # assign it to a variable obj_var = Variable(normalize(to_tensor(scaler(obj))).unsqueeze(0)).to( device) # assign it to a variable if mode == 'train': resnet.train() img_features_var = resnet( img_var ) # get the output from the last hidden layer of the pretrained resnet obj_features_var = resnet( obj_var ) # get the output from the last hidden layer of the pretrained resnet else: resnet.eval() with torch.no_grad(): img_features_var = resnet( img_var ) # get the output from the last hidden layer of the pretrained resnet obj_features_var = resnet( obj_var ) # get the output from the last hidden layer of the pretrained resnet features = torch.cat(( torch.flatten(img_features_var), torch.flatten(obj_features_var))) # get the tensor out of the variable if mode == 'train': return features else: return features.detach().cpu()
def transform_rgba_instance(rgba_ins_anns: list, trans_param: list, width: int, height: int): ann = {} trans_param = trans_param[0] ## directy infer mask from rgba cause too much segments rgba_ins_ann = rgba_ins_anns[0] # trans_param = trans_param[0] # inst_mask = rgba_ins[:,:,3].copy() # inst_mask[inst_mask>0]=1 ## use cuted mask insted # xmin, ymin, w, h = rgba_ins_ann['bbox'] # xmax, ymax = xmin+w, ymin+h # xmin, ymin, xmax, ymax = int(xmin), int(ymin), int(xmax), int(ymax) # inst_mask = cocoseg_to_binary(rgba_ins_ann['segmentation'], height, width) # inst_mask = inst_mask[ymin:ymax, xmin:xmax] inst_mask = rgba_ins_ann if 'flip' in trans_param: if trans_param['flip'] == 'horizontal': inst_mask = inst_mask[:, ::-1] elif trans_param['flip'] == 'vertical': inst_mask = inst_mask[::-1, :] else: raise ValueError('Unknown flip parameter {}'.format( trans_param['flip'])) # nearest interpolation new_inst_mask = __transform_img(inst_mask, trans_param, (height, width), order=0) new_segm = binary_mask_to_polygon(new_inst_mask) if len(new_segm) == 0: # transformed mask out of image raise ValueError rle = cocomask.frPyObjects(new_segm, height, width) rle = cocomask.merge(rle) bbox = cocomask.toBbox(rle).tolist() ann['segmentation'] = new_segm # ann['keypoints'] = new_keypoints ann['bbox'] = bbox return ann
def create_annotation_info(annotation_id, image_id, category_info, binary_mask, image_size=None, tolerance=2, bounding_box=None): if image_size is not None: binary_mask = resize_binary_mask(binary_mask, image_size) binary_mask_encoded = mask.encode(np.asfortranarray(binary_mask.astype(np.uint8))) area = mask.area(binary_mask_encoded) if area < 1: return None if bounding_box is None: bounding_box = mask.toBbox(binary_mask_encoded) if category_info["is_crowd"]: is_crowd = 1 segmentation = binary_mask_to_rle(binary_mask) else: is_crowd = 0 segmentation = binary_mask_to_polygon(binary_mask, tolerance) if not segmentation: return None annotation_info = { "id": annotation_id, "image_id": image_id, "category_id": category_info["id"], "iscrowd": is_crowd, # 0或1,指定为0,表示“单个的对象(不存在多个对象重叠)”.只要是iscrowd=0那么segmentation就是polygon格式 "area": area.tolist(), # area of encoded masks,是标注区域的面积。如果是矩形框,那就是高乘宽; 浮点数,需大于0,因icdar数据没有segmentation,所以本项人为指定为10 "bbox": bounding_box.tolist(), "segmentation": segmentation, # polygon格式.这些数按照相邻的顺序两两组成一个点的xy坐标,如果有n个数(必定是偶数),那么就是n/2个点坐标。 # 注意这里,必须是list 包含list,底层的list中必须有至少6个元素,否则coco api会过滤掉这个annotations,也就是说你必须用至少三个点来表达一块。 # 外层的list的长度取决于一个完整的物体是否被分割成了数块,比如一个物体苹果没有任何的遮挡,则外部的List长度就为1 # 按照给出各个坐标的顺序描点(顺时针、逆时针都行),eg: # gemfield_polygons1 = [[0,0,10,0,10,20,0,10]] # 逆时针 # gemfield_polygons2 = [[0,0,0,10,10,20,10,0]] # 顺时针 # gemfield_polygons3 = [[10,0,0,10,0,0,10,20]] # 注意次序,此时不是四边形,而是两个三角形 "width": binary_mask.shape[1], "height": binary_mask.shape[0], } return annotation_info
def read_ann(ann_fn): ann = np.array(Image.open(ann_fn)) ids = np.unique(ann) ids = [id for id in ids if id != 0] ann_masks = [(ann == id_).astype(np.uint8) for id_ in ids if id_ != 0] new_proposals = [] for ann_mask, id in zip(ann_masks, ids): encoded_mask = encode(np.asfortranarray(ann_mask)) encoded_mask['counts'] = encoded_mask['counts'].decode("utf-8") bbox = toBbox(encoded_mask) new_proposals.append({ 'id': id, 'bbox': bbox, 'segmentation': encoded_mask, 'conf_score': "1.0", 'score': 1.0 }) return new_proposals
# Create mask and encode it labelMask = np.zeros((h, w)) labelMask[:, :] = S == labelIdx labelMask = np.expand_dims(labelMask, axis=2) labelMask = labelMask.astype('uint8') labelMask = np.asfortranarray(labelMask) Rs = mask.encode(labelMask) # Create annotation data anndata = {} anndata['id'] = annId anndata['image_id'] = imageIds[imageIdx] anndata['category_id'] = labelIdx - oldStuffStartIdx + newStuffStartIdx # Stuff classes start from 92 in v. 1.1 anndata['segmentation'] = Rs anndata['area'] = float(mask.area(Rs)) anndata['bbox'] = mask.toBbox(Rs).tolist() anndata['iscrowd'] = 1 # Write JSON str_ = json.dumps(anndata, indent=indent, sort_keys=True, separators=separators, ensure_ascii=ensure_ascii) outfile.write(unicode(str_)) # Increment ann id annId = annId + 1 # Add a comma and line break after each annotation if not (imageIdx == imageCount-1 and i == len(labelsStuff)-1): outfile.write(unicode(',')) outfile.write(unicode('\n')) # End