def inference(self, images): """ image(tensor): ImageList in dl_lib.structures """ n, c, h, w = images.tensor.shape new_h, new_w = (h | 127) + 1, (w | 127) + 1 center_wh = np.array([w // 2, h // 2], dtype=np.float32) size_wh = np.array([new_w, new_h], dtype=np.float32) down_scale = self.cfg.MODEL.CENTERNET.DOWN_SCALE img_info = dict(center=center_wh, size=size_wh, height=new_h // down_scale, width=new_w // down_scale) pad_value = [-x / y for x, y in zip(self.mean, self.std)] aligned_img = torch.Tensor(pad_value).reshape((1, -1, 1, 1)).expand(n, c, new_h, new_w) aligned_img = aligned_img.to(images.tensor.device) pad_w, pad_h = math.ceil((new_w - w) / 2), math.ceil((new_h - h) / 2) aligned_img[..., pad_h:h + pad_h, pad_w:w + pad_w] = images.tensor features = self.backbone(aligned_img) up_fmap = self.upsample(features) pred_dict = self.head(up_fmap) results = self.decode_prediction(pred_dict, img_info) ori_w, ori_h = img_info['center'] * 2 det_instance = Instances((int(ori_h), int(ori_w)), **results) return [{"instances": det_instance}]
def annotations_to_instances(annos, image_size, mask_format="polygon"): """ Create an :class:`Instances` object used by the models, from instance annotations in the dataset dict. Args: annos (list[dict]): a list of instance annotations in one image, each element for one instance. image_size (tuple): height, width Returns: Instances: It will contain fields "gt_boxes", "gt_classes", "gt_masks", "gt_keypoints", if they can be obtained from `annos`. This is the format that builtin models expect. """ boxes = [ BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS) for obj in annos ] target = Instances(image_size) boxes = target.gt_boxes = Boxes(boxes) boxes.clip(image_size) classes = [obj["category_id"] for obj in annos] classes = torch.tensor(classes, dtype=torch.int64) target.gt_classes = classes if len(annos) and "segmentation" in annos[0]: polygons = [obj["segmentation"] for obj in annos] if mask_format == "polygon": masks = PolygonMasks(polygons) else: assert mask_format == "bitmask", mask_format masks = BitMasks.from_polygon_masks(polygons, *image_size) target.gt_masks = masks if len(annos) and "keypoints" in annos[0]: kpts = [obj.get("keypoints", []) for obj in annos] target.gt_keypoints = Keypoints(kpts) return target
def transform_proposals(dataset_dict, image_shape, transforms, min_box_side_len, proposal_topk): """ Apply transformations to the proposals in dataset_dict, if any. Args: dataset_dict (dict): a dict read from the dataset, possibly contains fields "proposal_boxes", "proposal_objectness_logits", "proposal_bbox_mode" image_shape (tuple): height, width transforms (TransformList): min_box_side_len (int): keep proposals with at least this size proposal_topk (int): only keep top-K scoring proposals The input dict is modified in-place, with abovementioned keys removed. A new key "proposals" will be added. Its value is an `Instances` object which contains the transformed proposals in its field "proposal_boxes" and "objectness_logits". """ if "proposal_boxes" in dataset_dict: # Transform proposal boxes boxes = transforms.apply_box( BoxMode.convert( dataset_dict.pop("proposal_boxes"), dataset_dict.pop("proposal_bbox_mode"), BoxMode.XYXY_ABS, )) boxes = Boxes(boxes) objectness_logits = torch.as_tensor( dataset_dict.pop("proposal_objectness_logits").astype("float32")) boxes.clip(image_shape) keep = boxes.nonempty(threshold=min_box_side_len) boxes = boxes[keep] objectness_logits = objectness_logits[keep] proposals = Instances(image_shape) proposals.proposal_boxes = boxes[:proposal_topk] proposals.objectness_logits = objectness_logits[:proposal_topk] dataset_dict["proposals"] = proposals