def forward_for_single_feature_map(self, anchors, objectness, box_regression): """ Arguments: anchors: list[BoxList] objectness: tensor of size N, A, H, W box_regression: tensor of size N, A * 4, H, W """ device = objectness.device N, A, H, W = objectness.shape # put in the same format as anchors objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1) objectness = objectness.sigmoid() box_regression = permute_and_flatten(box_regression, N, A, 4, H, W) num_anchors = A * H * W pre_nms_top_n = min(self.pre_nms_top_n, num_anchors) objectness, topk_idx = objectness.topk(pre_nms_top_n, dim=1, sorted=True) batch_idx = torch.arange(N, device=device)[:, None] box_regression = box_regression[batch_idx, topk_idx] image_shapes = [box.size for box in anchors] concat_anchors = torch.cat([a.bbox for a in anchors], dim=0) concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx] proposals = self.box_coder.decode( box_regression.view(-1, 4), concat_anchors.view(-1, 4) ) proposals = proposals.view(N, -1, 4) result = [] for proposal, score, im_shape in zip(proposals, objectness, image_shapes): boxlist = BoxList(proposal, im_shape, mode="xyxy") boxlist.add_field("objectness", score) boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, self.min_size) boxlist = boxlist_nms( boxlist, self.nms_thresh, max_proposals=self.post_nms_top_n, score_field="objectness", ) result.append(boxlist) return result
def __getitem__(self, index): img_id = self.ids[index] img = Image.open(self._imgpath % img_id).convert("RGB") if not os.path.exists(self._annopath % img_id): target = None else: target = self.get_groundtruth(index) target = target.clip_to_image(remove_empty=True) if self.proposals is not None: if '_' in self.ids[ index] and self.image_set == "test" and "2012" in self.root: img_id = int(self.ids[index].split('_')[1]) else: img_id = int(self.ids[index]) id_field = 'indexes' if 'indexes' in self.proposals else 'ids' # compat fix roi_idx = self.proposals[id_field].index(img_id) rois = self.proposals['boxes'][roi_idx] # scores = self.proposals['scores'][roi_idx] # assert rois.shape[0] == scores.shape[0] # remove duplicate, clip, remove small boxes, and take top k keep = unique_boxes(rois) rois = rois[keep, :] # scores = scores[keep] rois = BoxList(torch.tensor(rois), img.size, mode="xyxy") rois = rois.clip_to_image(remove_empty=True) # TODO: deal with scores rois = remove_small_boxes(boxlist=rois, min_size=2) if self.top_k > 0: rois = rois[[range(self.top_k)]] # scores = scores[:self.top_k] else: rois = None if self.transforms is not None: img, target, rois = self.transforms(img, target, rois) return img, target, rois, index
def forward_for_single_feature_map(self, anchors, box_cls, box_regression): """ Arguments: anchors: list[BoxList] box_cls: tensor of size N, A * C, H, W box_regression: tensor of size N, A * 4, H, W """ device = box_cls.device N, _, H, W = box_cls.shape A = box_regression.size(1) // 4 C = box_cls.size(1) // A # put in the same format as anchors box_cls = permute_and_flatten(box_cls, N, A, C, H, W) box_cls = box_cls.sigmoid() box_regression = permute_and_flatten(box_regression, N, A, 4, H, W) box_regression = box_regression.reshape(N, -1, 4) num_anchors = A * H * W candidate_inds = box_cls > self.pre_nms_thresh pre_nms_top_n = candidate_inds.view(N, -1).sum(1) pre_nms_top_n = pre_nms_top_n.clamp(max=self.pre_nms_top_n) results = [] for per_box_cls, per_box_regression, per_pre_nms_top_n, \ per_candidate_inds, per_anchors in zip( box_cls, box_regression, pre_nms_top_n, candidate_inds, anchors): # Sort and select TopN # TODO most of this can be made out of the loop for # all images. # TODO:Yang: Not easy to do. Because the numbers of detections are # different in each image. Therefore, this part needs to be done # per image. per_box_cls = per_box_cls[per_candidate_inds] per_box_cls, top_k_indices = \ per_box_cls.topk(per_pre_nms_top_n, sorted=False) per_candidate_nonzeros = \ per_candidate_inds.nonzero()[top_k_indices, :] per_box_loc = per_candidate_nonzeros[:, 0] per_class = per_candidate_nonzeros[:, 1] per_class += 1 detections = self.box_coder.decode( per_box_regression[per_box_loc, :].view(-1, 4), per_anchors.bbox[per_box_loc, :].view(-1, 4)) boxlist = BoxList(detections, per_anchors.size, mode="xyxy") boxlist.add_field("labels", per_class) boxlist.add_field("scores", per_box_cls) boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, self.min_size) results.append(boxlist) return results
def __getitem__(self, idx): img, anno = super(COCODataset, self).__getitem__(idx) # filter crowd annotations # TODO might be better to add an extra field if "lvis_v0.5" not in self.ann_file: anno = [obj for obj in anno if obj["iscrowd"] == 0] if self.proposals is not None: img_id = self.ids[idx] id_field = 'indexes' if 'indexes' in self.proposals else 'ids' # compat fix roi_idx = self.proposals[id_field].index(img_id) rois = self.proposals['boxes'][roi_idx] # remove duplicate, clip, remove small boxes, and take top k keep = unique_boxes(rois) rois = rois[keep, :] # scores = scores[keep] rois = BoxList(torch.tensor(rois), img.size, mode="xyxy") rois = rois.clip_to_image(remove_empty=True) rois = remove_small_boxes(boxlist=rois, min_size=2) if self.top_k > 0: rois = rois[[range(self.top_k)]] # scores = scores[:self.top_k] else: rois = None # support un-labled if anno == [] and 'unlabeled' in self.ann_file: boxes = torch.as_tensor([[0, 0, 0, 0]]).reshape(-1, 4) target = BoxList(boxes, img.size, mode="xyxy") classes = torch.tensor([0]) target.add_field("labels", classes) if self._transforms is not None: img, target, rois = self._transforms(img, target, rois) target.bbox.fill_(0) else: boxes = [obj["bbox"] for obj in anno] boxes = torch.as_tensor(boxes).reshape(-1, 4) # guard against no boxes target = BoxList(boxes, img.size, mode="xywh").convert("xyxy") classes = [obj["category_id"] for obj in anno] classes = [ self.json_category_id_to_contiguous_id[c] for c in classes ] classes = torch.tensor(classes) target.add_field("labels", classes) if anno and "segmentation" in anno[0]: masks = [obj["segmentation"] for obj in anno] masks = SegmentationMask(masks, img.size, mode='poly') target.add_field("masks", masks) if anno and "keypoints" in anno[0]: keypoints = [obj["keypoints"] for obj in anno] keypoints = PersonKeypoints(keypoints, img.size) target.add_field("keypoints", keypoints) if anno and 'point' in anno[0]: click = [obj["point"] for obj in anno] click = Click(click, img.size) target.add_field("click", click) if anno and 'scribble' in anno[0]: scribble = [obj["scribble"] for obj in anno] # xmin, ymin, xmax, ymax scribble_box = [] for sc in scribble: if len(sc[0]) == 0: scribble_box.append([1, 2, 3, 4]) else: scribble_box.append( [min(sc[0]), min(sc[1]), max(sc[0]), max(sc[1])]) scribble_box = torch.tensor(scribble_box) scribble_box = torch.as_tensor(scribble_box).reshape( -1, 4) # guard against no boxes scribble_target = BoxList(scribble_box, img.size, mode="xyxy") target.add_field("scribble", scribble_target) if anno and 'use_as' in anno[0]: tag_to_ind = {'tag': 0, 'point': 1, 'scribble': 2, 'box': 3} use_as = [tag_to_ind[obj['use_as']] for obj in anno] use_as = torch.tensor(use_as) target.add_field("use_as", use_as) target = target.clip_to_image(remove_empty=True) if self._transforms is not None: img, target, rois = self._transforms(img, target, rois) return img, target, rois, idx