def _collate_and_decode_sample( self, data: Tuple[Tuple[str, str], Tuple[Tuple[str, io.IOBase], Tuple[str, io.IOBase]]], *, decoder: Optional[Callable[[io.IOBase], torch.Tensor]], ) -> Dict[str, Any]: key, (image_data, ann_data) = data category, _ = key image_path, image_buffer = image_data ann_path, ann_buffer = ann_data label = self.info.categories.index(category) image = decoder(image_buffer) if decoder else image_buffer ann = read_mat(ann_buffer) bbox = BoundingBox(ann["box_coord"].astype( np.int64).squeeze()[[2, 0, 3, 1]], format="xyxy") contour = Feature(ann["obj_contour"].T) return dict( category=category, label=label, image=image, image_path=image_path, bbox=bbox, contour=contour, ann_path=ann_path, )
def _decode_instances_anns(self, anns: List[Dict[str, Any]], image_meta: Dict[str, Any]) -> Dict[str, Any]: image_size = (image_meta["height"], image_meta["width"]) labels = [ann["category_id"] for ann in anns] return dict( # TODO: create a segmentation feature segmentations=_Feature( torch.stack([ self._segmentation_to_mask(ann["segmentation"], is_crowd=ann["iscrowd"], image_size=image_size) for ann in anns ])), areas=_Feature([ann["area"] for ann in anns]), crowds=_Feature([ann["iscrowd"] for ann in anns], dtype=torch.bool), bounding_boxes=BoundingBox( [ann["bbox"] for ann in anns], format="xywh", image_size=image_size, ), labels=Label(labels, categories=self._categories), super_categories=[ self._category_to_super_category[self._categories[label]] for label in labels ], ann_ids=[ann["id"] for ann in anns], )
def get_params(self, sample: Any) -> Dict[str, Any]: image_size = SampleQuery(sample).image_size() image_height, image_width = image_size crop_height, crop_width = self.crop_size x = torch.randint(0, image_width - crop_width + 1, size=()) if crop_width < image_width else 0 y = torch.randint(0, image_height - crop_height + 1, size=()) if crop_height < image_height else 0 crop_box = BoundingBox.from_parts(x, y, crop_width, crop_height, image_size=image_size, format="xywh") return dict(crop_box=crop_box)
def get_params(self, sample: Any) -> Dict[str, Any]: image_size = SampleQuery(sample).image_size() image_height, image_width = image_size cx = image_width // 2 cy = image_height // 2 h, w = self.crop_size crop_box = BoundingBox.from_parts(cx, cy, w, h, image_size=image_size, format="cxcywh") return dict(crop_box=crop_box)
def _decode_detection_ann(self, buffer: io.IOBase) -> torch.Tensor: result = VOCDetection.parse_voc_xml( ElementTree.parse(buffer).getroot()) # type: ignore[arg-type] objects = result["annotation"]["object"] bboxes = [obj["bndbox"] for obj in objects] bboxes = [[ int(bbox[part]) for part in ("xmin", "ymin", "xmax", "ymax") ] for bbox in bboxes] return BoundingBox(bboxes)
def bounding_box(input: BoundingBox, *, size: Tuple[int, int], **_: Any) -> BoundingBox: old_height, old_width = input.image_size new_height, new_width = size height_scale = new_height / old_height width_scale = new_width / old_width old_x1, old_y1, old_x2, old_y2 = input.convert("xyxy").to_parts() new_x1 = old_x1 * width_scale new_y1 = old_y1 * height_scale new_x2 = old_x2 * width_scale new_y2 = old_y2 * height_scale return BoundingBox.from_parts( new_x1, new_y1, new_x2, new_y2, like=input, format="xyxy", image_size=size ).convert(input.format)
def horizontal_flip( input: features.BoundingBox) -> features.BoundingBox: x, y, w, h = input.convert("xywh").to_parts() x = input.image_size[1] - (x + w) return features.BoundingBox.from_parts(x, y, w, h, like=input, format="xywh")
def _prepare_sample(self, data: Tuple[Tuple[str, BinaryIO], Tuple[int, int, int, int, int, str]]) -> Dict[str, Any]: image, target = data path, buffer = image image = EncodedImage.from_file(buffer) return dict( path=path, image=image, label=Label(target[4] - 1, categories=self.categories), bounding_box=BoundingBox(target[:4], format="xyxy", image_size=image.image_size), )
def _2011_prepare_ann(self, data: Tuple[str, Tuple[List[str], Tuple[str, BinaryIO]]], image_size: Tuple[int, int]) -> Dict[str, Any]: _, (bounding_box_data, segmentation_data) = data segmentation_path, segmentation_buffer = segmentation_data return dict( bounding_box=BoundingBox( [float(part) for part in bounding_box_data[1:]], format="xywh", image_size=image_size), segmentation_path=segmentation_path, segmentation=EncodedImage.from_file(segmentation_buffer), )
def _2010_load_ann( self, data: Tuple[str, Tuple[str, io.IOBase]], *, decoder: Optional[Callable[[io.IOBase], torch.Tensor]]) -> Dict[str, Any]: _, (path, buffer) = data content = read_mat(buffer) return dict( ann_path=path, bounding_box=BoundingBox([ int(content["bbox"][coord]) for coord in ("left", "bottom", "right", "top") ], format="xyxy"), segmentation=Feature(content["seg"]), )
def _2011_load_ann( self, data: Tuple[str, Tuple[List[str], Tuple[str, io.IOBase]]], *, decoder: Optional[Callable[[io.IOBase], torch.Tensor]], ) -> Dict[str, Any]: _, (bounding_box_data, segmentation_data) = data segmentation_path, segmentation_buffer = segmentation_data return dict( bounding_box=BoundingBox( [float(part) for part in bounding_box_data[1:]], format="xywh"), segmentation_path=segmentation_path, segmentation=Feature(decoder(segmentation_buffer)) if decoder else segmentation_buffer, )
def _2010_prepare_ann(self, data: Tuple[str, Tuple[str, BinaryIO]], image_size: Tuple[int, int]) -> Dict[str, Any]: _, (path, buffer) = data content = read_mat(buffer) return dict( ann_path=path, bounding_box=BoundingBox( [ int(content["bbox"][coord]) for coord in ("left", "bottom", "right", "top") ], format="xyxy", image_size=image_size, ), segmentation=_Feature(content["seg"]), )
def _prepare_sample(self, data: Tuple[Tuple[str, Any], Dict[str, Any]]) -> Dict[str, Any]: (path, buffer), csv_info = data label = int(csv_info["ClassId"]) bounding_box = BoundingBox( [int(csv_info[k]) for k in ("Roi.X1", "Roi.Y1", "Roi.X2", "Roi.Y2")], format="xyxy", image_size=(int(csv_info["Height"]), int(csv_info["Width"])), ) return { "path": path, "image": EncodedImage.from_file(buffer), "label": Label(label, categories=self._categories), "bounding_box": bounding_box, }
def _prepare_detection_ann(self, buffer: BinaryIO) -> Dict[str, Any]: anns = self._parse_detection_ann(buffer) instances = anns["object"] return dict( bounding_boxes=BoundingBox( [ [int(instance["bndbox"][part]) for part in ("xmin", "ymin", "xmax", "ymax")] for instance in instances ], format="xyxy", image_size=cast(Tuple[int, int], tuple(int(anns["size"][dim]) for dim in ("height", "width"))), ), labels=Label( [self.categories.index(instance["name"]) for instance in instances], categories=self.categories ), )
def _prepare_sample( self, data: Tuple[Tuple[str, Tuple[Tuple[str, List[str]], Tuple[str, BinaryIO]]], Tuple[Tuple[str, Dict[str, str]], Tuple[str, Dict[str, str]], Tuple[str, Dict[str, str]], Tuple[str, Dict[str, str]], ], ], ) -> Dict[str, Any]: split_and_image_data, ann_data = data _, (_, image_data) = split_and_image_data path, buffer = image_data image = EncodedImage.from_file(buffer) (_, identity), (_, attributes), (_, bounding_box), (_, landmarks) = ann_data return dict( path=path, image=image, identity=Label(int(identity["identity"])), attributes={ attr: value == "1" for attr, value in attributes.items() }, bounding_box=BoundingBox( [ int(bounding_box[key]) for key in ("x_1", "y_1", "width", "height") ], format="xywh", image_size=image.image_size, ), landmarks={ landmark: _Feature((int(landmarks[f"{landmark}_x"]), int(landmarks[f"{landmark}_y"]))) for landmark in {key[:-2] for key in landmarks.keys()} }, )
def _collate_and_decode( self, data: Tuple[Tuple[str, Any], Dict[str, Any]], decoder: Optional[Callable[[io.IOBase], torch.Tensor]]) -> Dict[str, Any]: (image_path, image_buffer), csv_info = data label = int(csv_info["ClassId"]) bbox = BoundingBox( torch.tensor([ int(csv_info[k]) for k in ("Roi.X1", "Roi.Y1", "Roi.X2", "Roi.Y2") ]), format="xyxy", image_size=(int(csv_info["Height"]), int(csv_info["Width"])), ) return { "image_path": image_path, "image": decoder(image_buffer) if decoder else image_buffer, "label": Label(label, category=self.categories[label]), "bbox": bbox, }
def _collate_and_decode_sample( self, data: Tuple[Tuple[str, Tuple[Tuple[str, Dict[str, Any]], Tuple[str, io.IOBase]]], Tuple[str, Dict[str, Any]]], *, decoder: Optional[Callable[[io.IOBase], torch.Tensor]], ) -> Dict[str, Any]: split_and_image_data, ann_data = data _, (_, image_data) = split_and_image_data path, buffer = image_data _, ann = ann_data image = decoder(buffer) if decoder else buffer identity = Label(int(ann["identity"]["identity"])) attributes = { attr: value == "1" for attr, value in ann["attributes"].items() } bbox = BoundingBox([ int(ann["bbox"][key]) for key in ("x_1", "y_1", "width", "height") ]) landmarks = { landmark: Feature((int(ann["landmarks"][f"{landmark}_x"]), int(ann["landmarks"][f"{landmark}_y"]))) for landmark in {key[:-2] for key in ann["landmarks"].keys()} } return dict( path=path, image=image, identity=identity, attributes=attributes, bbox=bbox, landmarks=landmarks, )
def _prepare_sample( self, data: Tuple[Tuple[str, str], Tuple[Tuple[str, BinaryIO], Tuple[str, BinaryIO]]] ) -> Dict[str, Any]: key, (image_data, ann_data) = data category, _ = key image_path, image_buffer = image_data ann_path, ann_buffer = ann_data image = EncodedImage.from_file(image_buffer) ann = read_mat(ann_buffer) return dict( label=Label.from_category(category, categories=self._categories), image_path=image_path, image=image, ann_path=ann_path, bounding_box=BoundingBox(ann["box_coord"].astype( np.int64).squeeze()[[2, 0, 3, 1]], format="xyxy", image_size=image.image_size), contour=_Feature(ann["obj_contour"].T), )
def resize(input: features.BoundingBox, size: torch.Tensor) -> features.BoundingBox: old_height, old_width = input.image_size new_height, new_width = size height_scale = new_height / old_height width_scale = new_width / old_width old_x1, old_y1, old_x2, old_y2 = input.convert("xyxy").to_parts() new_x1 = old_x1 * width_scale new_y1 = old_y1 * height_scale new_x2 = old_x2 * width_scale new_y2 = old_y2 * height_scale return features.BoundingBox.from_parts(new_x1, new_y1, new_x2, new_y2, like=input, format="xyxy", image_size=tuple( size.tolist()))
def bounding_box(input: BoundingBox) -> BoundingBox: x, y, w, h = input.convert("xywh").to_parts() x = input.image_size[1] - (x + w) return BoundingBox.from_parts(x, y, w, h, like=input, format="xywh").convert(input.format)
def image(input: Image, *, crop_box: BoundingBox) -> Image: # FIXME: pad input in case it is smaller than crop_box x1, y1, x2, y2 = crop_box.convert("xyxy").to_parts() return Image(input[..., y1 : y2 + 1, x1 : x2 + 1], like=input) # type: ignore[misc]
def __init__(self, crop_box: BoundingBox) -> None: super().__init__() self.crop_box = crop_box.convert("xyxy")