Exemple #1
0
    def forward_for_single_feature_map(self, anchors, objectness, box_regression):
        """
        Arguments:
            anchors: list[BoxList]
            objectness: tensor of size N, A, H, W
            box_regression: tensor of size N, A * 4, H, W
        """
        device = objectness.device
        N, A, H, W = objectness.shape

        # put in the same format as anchors
        objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1)
        objectness = objectness.sigmoid()

        box_regression = permute_and_flatten(box_regression, N, A, 4, H, W)

        num_anchors = A * H * W

        pre_nms_top_n = min(self.pre_nms_top_n, num_anchors)
        objectness, topk_idx = objectness.topk(pre_nms_top_n, dim=1, sorted=True)

        batch_idx = torch.arange(N, device=device)[:, None]
        box_regression = box_regression[batch_idx, topk_idx]

        image_shapes = [box.size for box in anchors]
        concat_anchors = torch.cat([a.bbox for a in anchors], dim=0)
        concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx]

        proposals = self.box_coder.decode(
            box_regression.view(-1, 4), concat_anchors.view(-1, 4)
        )

        proposals = proposals.view(N, -1, 4)

        result = []
        for proposal, score, im_shape in zip(proposals, objectness, image_shapes):
            boxlist = BoxList(proposal, im_shape, mode="xyxy")
            boxlist.add_field("objectness", score)
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size)
            boxlist = boxlist_nms(
                boxlist,
                self.nms_thresh,
                max_proposals=self.post_nms_top_n,
                score_field="objectness",
            )
            result.append(boxlist)
        return result
Exemple #2
0
    def __getitem__(self, index):
        img_id = self.ids[index]
        img = Image.open(self._imgpath % img_id).convert("RGB")

        if not os.path.exists(self._annopath % img_id):
            target = None
        else:
            target = self.get_groundtruth(index)
            target = target.clip_to_image(remove_empty=True)

        if self.proposals is not None:
            if '_' in self.ids[
                    index] and self.image_set == "test" and "2012" in self.root:
                img_id = int(self.ids[index].split('_')[1])
            else:
                img_id = int(self.ids[index])
            id_field = 'indexes' if 'indexes' in self.proposals else 'ids'  # compat fix
            roi_idx = self.proposals[id_field].index(img_id)
            rois = self.proposals['boxes'][roi_idx]
            # scores = self.proposals['scores'][roi_idx]
            # assert rois.shape[0] == scores.shape[0]
            # remove duplicate, clip, remove small boxes, and take top k
            keep = unique_boxes(rois)
            rois = rois[keep, :]
            # scores = scores[keep]
            rois = BoxList(torch.tensor(rois), img.size, mode="xyxy")
            rois = rois.clip_to_image(remove_empty=True)
            # TODO: deal with scores
            rois = remove_small_boxes(boxlist=rois, min_size=2)
            if self.top_k > 0:
                rois = rois[[range(self.top_k)]]
                # scores = scores[:self.top_k]
        else:
            rois = None

        if self.transforms is not None:
            img, target, rois = self.transforms(img, target, rois)

        return img, target, rois, index
Exemple #3
0
    def forward_for_single_feature_map(self, anchors, box_cls, box_regression):
        """
        Arguments:
            anchors: list[BoxList]
            box_cls: tensor of size N, A * C, H, W
            box_regression: tensor of size N, A * 4, H, W
        """
        device = box_cls.device
        N, _, H, W = box_cls.shape
        A = box_regression.size(1) // 4
        C = box_cls.size(1) // A

        # put in the same format as anchors
        box_cls = permute_and_flatten(box_cls, N, A, C, H, W)
        box_cls = box_cls.sigmoid()

        box_regression = permute_and_flatten(box_regression, N, A, 4, H, W)
        box_regression = box_regression.reshape(N, -1, 4)

        num_anchors = A * H * W

        candidate_inds = box_cls > self.pre_nms_thresh

        pre_nms_top_n = candidate_inds.view(N, -1).sum(1)
        pre_nms_top_n = pre_nms_top_n.clamp(max=self.pre_nms_top_n)

        results = []
        for per_box_cls, per_box_regression, per_pre_nms_top_n, \
        per_candidate_inds, per_anchors in zip(
            box_cls,
            box_regression,
            pre_nms_top_n,
            candidate_inds,
            anchors):

            # Sort and select TopN
            # TODO most of this can be made out of the loop for
            # all images.
            # TODO:Yang: Not easy to do. Because the numbers of detections are
            # different in each image. Therefore, this part needs to be done
            # per image.
            per_box_cls = per_box_cls[per_candidate_inds]

            per_box_cls, top_k_indices = \
                    per_box_cls.topk(per_pre_nms_top_n, sorted=False)

            per_candidate_nonzeros = \
                    per_candidate_inds.nonzero()[top_k_indices, :]

            per_box_loc = per_candidate_nonzeros[:, 0]
            per_class = per_candidate_nonzeros[:, 1]
            per_class += 1

            detections = self.box_coder.decode(
                per_box_regression[per_box_loc, :].view(-1, 4),
                per_anchors.bbox[per_box_loc, :].view(-1, 4))

            boxlist = BoxList(detections, per_anchors.size, mode="xyxy")
            boxlist.add_field("labels", per_class)
            boxlist.add_field("scores", per_box_cls)
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size)
            results.append(boxlist)

        return results
Exemple #4
0
    def __getitem__(self, idx):
        img, anno = super(COCODataset, self).__getitem__(idx)

        # filter crowd annotations
        # TODO might be better to add an extra field
        if "lvis_v0.5" not in self.ann_file:
            anno = [obj for obj in anno if obj["iscrowd"] == 0]

        if self.proposals is not None:
            img_id = self.ids[idx]
            id_field = 'indexes' if 'indexes' in self.proposals else 'ids'  # compat fix
            roi_idx = self.proposals[id_field].index(img_id)
            rois = self.proposals['boxes'][roi_idx]

            # remove duplicate, clip, remove small boxes, and take top k
            keep = unique_boxes(rois)
            rois = rois[keep, :]
            # scores = scores[keep]
            rois = BoxList(torch.tensor(rois), img.size, mode="xyxy")
            rois = rois.clip_to_image(remove_empty=True)
            rois = remove_small_boxes(boxlist=rois, min_size=2)
            if self.top_k > 0:
                rois = rois[[range(self.top_k)]]
                # scores = scores[:self.top_k]
        else:
            rois = None

        # support un-labled
        if anno == [] and 'unlabeled' in self.ann_file:
            boxes = torch.as_tensor([[0, 0, 0, 0]]).reshape(-1, 4)
            target = BoxList(boxes, img.size, mode="xyxy")
            classes = torch.tensor([0])
            target.add_field("labels", classes)
            if self._transforms is not None:
                img, target, rois = self._transforms(img, target, rois)
            target.bbox.fill_(0)
        else:
            boxes = [obj["bbox"] for obj in anno]
            boxes = torch.as_tensor(boxes).reshape(-1,
                                                   4)  # guard against no boxes
            target = BoxList(boxes, img.size, mode="xywh").convert("xyxy")

            classes = [obj["category_id"] for obj in anno]
            classes = [
                self.json_category_id_to_contiguous_id[c] for c in classes
            ]
            classes = torch.tensor(classes)
            target.add_field("labels", classes)

            if anno and "segmentation" in anno[0]:
                masks = [obj["segmentation"] for obj in anno]
                masks = SegmentationMask(masks, img.size, mode='poly')
                target.add_field("masks", masks)

            if anno and "keypoints" in anno[0]:
                keypoints = [obj["keypoints"] for obj in anno]
                keypoints = PersonKeypoints(keypoints, img.size)
                target.add_field("keypoints", keypoints)

            if anno and 'point' in anno[0]:
                click = [obj["point"] for obj in anno]
                click = Click(click, img.size)
                target.add_field("click", click)

            if anno and 'scribble' in anno[0]:
                scribble = [obj["scribble"] for obj in anno]
                # xmin, ymin, xmax, ymax
                scribble_box = []
                for sc in scribble:
                    if len(sc[0]) == 0:
                        scribble_box.append([1, 2, 3, 4])
                    else:
                        scribble_box.append(
                            [min(sc[0]),
                             min(sc[1]),
                             max(sc[0]),
                             max(sc[1])])
                scribble_box = torch.tensor(scribble_box)
                scribble_box = torch.as_tensor(scribble_box).reshape(
                    -1, 4)  # guard against no boxes
                scribble_target = BoxList(scribble_box, img.size, mode="xyxy")
                target.add_field("scribble", scribble_target)

            if anno and 'use_as' in anno[0]:
                tag_to_ind = {'tag': 0, 'point': 1, 'scribble': 2, 'box': 3}
                use_as = [tag_to_ind[obj['use_as']] for obj in anno]
                use_as = torch.tensor(use_as)
                target.add_field("use_as", use_as)

            target = target.clip_to_image(remove_empty=True)
            if self._transforms is not None:
                img, target, rois = self._transforms(img, target, rois)
        return img, target, rois, idx