def transform_proposals(dataset_dict, image_shape, transforms, *, proposal_topk, min_box_size=0): if "proposal_boxes" in dataset_dict: boxes = transforms.apply_box( BoxMode.convert( dataset_dict.pop("proposal_boxes"), dataset_dict.pop("proposal_bbox_mode"), BoxMode.XYXY_ABS, )) boxes = Boxes(boxes) objectness_logits = torch.as_tensor( dataset_dict.pop("proposal_objectness_logits").astype("float32")) boxes.clip(image_shape) keep = boxes.nonempty(threshold=min_box_size) boxes = boxes[keep] objectness_logits = objectness_logits[keep] proposals = Instances(image_shape) proposals.proposal_boxes = boxes[:proposal_topk] proposals.objectness_logits = objectness_logits[:proposal_topk] dataset_dict["proposals"] = proposals
def gen_crop_transform_with_instance(crop_size, image_size, instance): crop_size = np.asarray(crop_size, dtype=np.int32) bbox = BoxMode.convert(instance["bbox"], instance["bbox_mode"], BoxMode.XYXY_ABS) center_yx = (bbox[1] + bbox[3]) * 0.5, (bbox[0] + bbox[2]) * 0.5 assert image_size[0] >= center_yx[0] and image_size[1] >= center_yx[1], \ "The annotation bounding box is outside of the image!" assert image_size[0] >= crop_size[0] and image_size[1] >= crop_size[1], \ "Crop size is larger than image size!" min_yx = np.maximum(np.floor(center_yx).astype(np.int32) - crop_size, 0) max_yx = np.maximum(np.asarray(image_size, dtype=np.int32) - crop_size, 0) max_yx = np.minimum(max_yx, np.ceil(center_yx).astype(np.int32)) y0 = np.random.randint(min_yx[0], max_yx[0] + 1) x0 = np.random.randint(min_yx[1], max_yx[1] + 1) return T.CropTransform(x0, y0, crop_size[1], crop_size[0])
def annotations_to_instances(annos, image_size, mask_format="polygon"): boxes = [ BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS) for obj in annos ] target = Instances(image_size) target.gt_boxes = Boxes(boxes) classes = [obj["category_id"] for obj in annos] classes = torch.tensor(classes, dtype=torch.int64) target.gt_classes = classes if len(annos) and "segmentation" in annos[0]: segms = [obj["segmentation"] for obj in annos] if mask_format == "polygon": masks = PolygonMasks(segms) else: assert mask_format == "bitmask", mask_format masks = [] for segm in segms: if isinstance(segm, list): masks.append(polygons_to_bitmask(segm, *image_size)) elif isinstance(segm, dict): masks.append(mask_util.decode(segm)) elif isinstance(segm, np.ndarray): assert segm.ndim == 2, f"Expect segmentation of 2 dimensions, got {segm.ndim}." masks.append(segm) else: raise ValueError( "Cannot convert segmentation of type '{}' to BitMasks!" "Supported types are: polygons as list[list[float] or ndarray]," " COCO-style RLE as a dict, or a full-image segmentation mask " "as a 2D ndarray.".format(type(segm))) masks = BitMasks( torch.stack([ torch.from_numpy(np.ascontiguousarray(x)) for x in masks ])) target.gt_masks = masks if len(annos) and "keypoints" in annos[0]: kpts = [obj.get("keypoints", []) for obj in annos] target.gt_keypoints = Keypoints(kpts) return target
def create_instances(predictions, image_size, conf_threshold=0.5): ret = Instances(image_size) score = np.asarray([x["score"] for x in predictions]) chosen = (score > conf_threshold).nonzero()[0] score = score[chosen] bbox = np.asarray([predictions[i]["bbox"] for i in chosen]).reshape(-1, 4) labels = np.asarray( [dataset_id_map(predictions[i]["category_id"]) for i in chosen]) ret.scores = score ret.pred_boxes = Boxes( BoxMode.convert(bbox, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS)) ret.pred_classes = labels try: ret.pred_masks = [predictions[i]["segmentation"] for i in chosen] except KeyError: pass return ret
def draw_dataset_dict(self, dic): annos = dic.get("annotations", None) if annos: if "segmentation" in annos[0]: masks = [x["segmentation"] for x in annos] else: masks = None if "keypoints" in annos[0]: keypts = [x["keypoints"] for x in annos] keypts = np.array(keypts).reshape(len(annos), -1, 3) else: keypts = None boxes = [BoxMode.convert(x["bbox"], x["bbox_mode"], BoxMode.XYXY_ABS) for x in annos] labels = [x["category_id"] for x in annos] colors = None if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get("thing_colors"): colors = [ self._jitter([x / 255 for x in self.metadata.thing_colors[c]]) for c in labels ] names = self.metadata.get("thing_classes", None) if names: labels = [names[i] for i in labels] labels = [ f"{i}" + ("|crowd" if a.get("iscrowd", 0) else "") for i, a in zip(labels, annos) ] self.overlay_instances( labels=labels, boxes=boxes, masks=masks, keypoints=keypts, assigned_colors=colors ) sem_seg = dic.get("sem_seg", None) if sem_seg is None and "sem_seg_file_name" in dic: with PathManager.open(dic["sem_seg_file_name"], "rb") as f: sem_seg = Image.open(f) sem_seg = np.asarray(sem_seg, dtype=uint8) if sem_seg is not None: self.draw_sem_seg(sem_seg, area_threshold=0, alpha=0.5) return self.output
def instances_to_coco_json(instances, img_id): num_instance = len(instances) if num_instance == 0: return [] boxes = instances.pred_boxes.tensor.numpy() boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS) boxes = boxes.tolist() scores = instances.scores.tolist() classes = instances.pred_classes.tolist() has_mask = instances.has("pred_masks") if has_mask: rles = [ mask_util.encode(np.array(mask[:, :, None], order="F", dtype="uint8"))[0] for mask in instances.pred_masks ] for rle in rles: rle["counts"] = rle["counts"].decode("utf-8") has_keypoints = instances.has("pred_keypoints") if has_keypoints: keypoints = instances.pred_keypoints results = [] for k in range(num_instance): result = { "image_id": img_id, "category_id": classes[k], "bbox": boxes[k], "score": scores[k], } if has_mask: result["segmentation"] = rles[k] if has_keypoints: keypoints[k][:, :2] -= 0.5 result["keypoints"] = keypoints[k].flatten().tolist() results.append(result) return results
def transform_instance_annotations(annotation, transforms, image_size, *, keypoint_hflip_indices=None): bbox = BoxMode.convert(annotation["bbox"], annotation["bbox_mode"], BoxMode.XYXY_ABS) bbox = transforms.apply_box([bbox])[0].clip(min=0) annotation["bbox"] = np.minimum(bbox, list(image_size + image_size)[::-1]) annotation["bbox_mode"] = BoxMode.XYXY_ABS if "segmentation" in annotation: segm = annotation["segmentation"] if isinstance(segm, list): polygons = [np.asarray(p).reshape(-1, 2) for p in segm] annotation["segmentation"] = [ p.reshape(-1) for p in transforms.apply_polygons(polygons) ] elif isinstance(segm, dict): mask = mask_util.decode(segm) mask = transforms.apply_segmentation(mask) assert tuple(mask.shape[:2]) == image_size annotation["segmentation"] = mask else: raise ValueError( "Cannot transform segmentation of type '{}'!" "Supported types are: polygons as list[list[float] or ndarray]," " COCO-style RLE as a dict.".format(type(segm))) if "keypoints" in annotation: keypoints = transform_keypoint_annotations(annotation["keypoints"], transforms, image_size, keypoint_hflip_indices) annotation["keypoints"] = keypoints return annotation
def _evaluate_box_proposals(dataset_predictions, lvis_api, thresholds=None, area="all", limit=None): areas = { "all": 0, "small": 1, "medium": 2, "large": 3, "96-128": 4, "128-256": 5, "256-512": 6, "512-inf": 7, } area_ranges = [ [0**2, 1e5**2], [0**2, 32**2], [32**2, 96**2], [96**2, 1e5**2], [96**2, 128**2], [128**2, 256**2], [256**2, 512**2], [512**2, 1e5**2], ] assert area in areas, f"Unknown area range: {area}".format() area_range = area_ranges[areas[area]] gt_overlaps = [] num_pos = 0 for prediction_dict in dataset_predictions: predictions = prediction_dict["proposals"] inds = predictions.objectness_logits.sort(descending=True)[1] predictions = predictions[inds] ann_ids = lvis_api.get_ann_ids(img_ids=[prediction_dict["image_id"]]) anno = lvis_api.load_anns(ann_ids) gt_boxes = [ BoxMode.convert(obj["bbox"], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) for obj in anno ] gt_boxes = torch.as_tensor(gt_boxes).reshape(-1, 4) gt_boxes = Boxes(gt_boxes) gt_areas = torch.as_tensor([obj["area"] for obj in anno]) if len(gt_boxes) == 0 or len(predictions) == 0: continue valid_gt_inds = (gt_areas >= area_range[0]) & (gt_areas <= area_range[1]) gt_boxes = gt_boxes[valid_gt_inds] num_pos += len(gt_boxes) if len(gt_boxes) == 0: continue if limit is not None and len(predictions) > limit: predictions = predictions[:limit] overlaps = pairwise_iou(predictions.proposal_boxes, gt_boxes) _gt_overlaps = torch.zeros(len(gt_boxes)) for j in range(min(len(predictions), len(gt_boxes))): max_overlaps, argmax_overlaps = overlaps.max(dim=0) gt_ovr, gt_ind = max_overlaps.max(dim=0) assert gt_ovr >= 0 box_ind = argmax_overlaps[gt_ind] _gt_overlaps[j] = overlaps[box_ind, gt_ind] assert _gt_overlaps[j] == gt_ovr overlaps[box_ind, :] = -1 overlaps[:, gt_ind] = -1 gt_overlaps.append(_gt_overlaps) gt_overlaps = (torch.cat(gt_overlaps, dim=0) if len(gt_overlaps) else torch.zeros(0, dtype=torch.float32)) gt_overlaps, _ = torch.sort(gt_overlaps) if thresholds is None: step = 0.05 thresholds = torch.arange(0.5, 0.95 + 1e-5, step, dtype=torch.float32) recalls = torch.zeros_like(thresholds) for i, t in enumerate(thresholds): recalls[i] = (gt_overlaps >= t).float().sum() / float(num_pos) ar = recalls.mean() return { "ar": ar, "recalls": recalls, "thresholds": thresholds, "gt_overlaps": gt_overlaps, "num_pos": num_pos, }
def convert_to_coco_dict(dataset_name): dataset_dicts = DatasetCatalog.get(dataset_name) metadata = MetadataCatalog.get(dataset_name) if hasattr(metadata, "thing_dataset_id_to_contiguous_id"): reverse_id_mapping = {v: k for k, v in metadata.thing_dataset_id_to_contiguous_id.items()} reverse_id_mapper = lambda contiguous_id: reverse_id_mapping[contiguous_id] else: reverse_id_mapper = lambda contiguous_id: contiguous_id categories = [ {"id": reverse_id_mapper(id), "name": name} for id, name in enumerate(metadata.thing_classes) ] logger.info("Converting dataset dicts into COCO format") coco_images = [] coco_annotations = [] for image_id, image_dict in enumerate(dataset_dicts): coco_image = { "id": image_dict.get("image_id", image_id), "width": image_dict["width"], "height": image_dict["height"], "file_name": image_dict["file_name"], } coco_images.append(coco_image) anns_per_image = image_dict["annotations"] for annotation in anns_per_image: coco_annotation = {} bbox = annotation["bbox"] bbox_mode = annotation["bbox_mode"] bbox = BoxMode.convert(bbox, bbox_mode, BoxMode.XYWH_ABS) if "segmentation" in annotation: segmentation = annotation["segmentation"] if isinstance(segmentation, list): polygons = PolygonMasks([segmentation]) area = polygons.area()[0].item() elif isinstance(segmentation, dict): area = mask_util.area(segmentation).item() else: raise TypeError(f"Unknown segmentation type {type(segmentation)}!") else: bbox_xy = BoxMode.convert(bbox, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) area = Boxes([bbox_xy]).area()[0].item() if "keypoints" in annotation: keypoints = annotation["keypoints"] for idx, v in enumerate(keypoints): if idx % 3 != 2: keypoints[idx] = v - 0.5 if "num_keypoints" in annotation: num_keypoints = annotation["num_keypoints"] else: num_keypoints = sum(kp > 0 for kp in keypoints[2::3]) coco_annotation["id"] = len(coco_annotations) + 1 coco_annotation["image_id"] = coco_image["id"] coco_annotation["bbox"] = [round(float(x), 3) for x in bbox] coco_annotation["area"] = float(area) coco_annotation["iscrowd"] = annotation.get("iscrowd", 0) coco_annotation["category_id"] = reverse_id_mapper(annotation["category_id"]) if "keypoints" in annotation: coco_annotation["keypoints"] = keypoints coco_annotation["num_keypoints"] = num_keypoints if "segmentation" in annotation: seg = coco_annotation["segmentation"] = annotation["segmentation"] if isinstance(seg, dict): counts = seg["counts"] if not isinstance(counts, str): seg["counts"] = counts.decode("ascii") coco_annotations.append(coco_annotation) logger.info( "Conversion finished, " f"#images: {len(coco_images)}, #annotations: {len(coco_annotations)}" ) info = { "date_created": str(datetime.datetime.now()), "description": "Automatically generated COCO json file for tkdetection.", } coco_dict = { "info": info, "images": coco_images, "annotations": coco_annotations, "categories": categories, "licenses": None, } return coco_dict