Example #1
0
 def func(x):
     boxes = Boxes(x)
     return boxes.area()
Example #2
0
def annotations_to_instances(annos, image_size, mask_format="polygon"):
    """
    Create an :class:`Instances` object used by the models,
    from instance annotations in the dataset dict.

    Args:
        annos (list[dict]): a list of instance annotations in one image, each
            element for one instance.
        image_size (tuple): height, width

    Returns:
        Instances:
            It will contain fields "gt_boxes", "gt_classes",
            "gt_masks", "gt_keypoints", if they can be obtained from `annos`.
            This is the format that builtin models expect.
    """
    boxes = [BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS) for obj in annos]
    target = Instances(image_size)
    boxes = target.gt_boxes = Boxes(boxes)
    boxes.clip(image_size)

    classes = [obj["category_id"] for obj in annos]
    classes = torch.tensor(classes, dtype=torch.int64)
    target.gt_classes = classes

    if len(annos) and "segmentation" in annos[0]:
        #print('anns 0:', annos[0:2])
        segms = [obj["segmentation"] for obj in annos]
        bo_segms = [obj["bg_object_segmentation"] for obj in annos]
        if mask_format == "polygon":
            masks = PolygonMasks(segms)
            bo_masks = PolygonMasks(bo_segms)
        else:
            assert mask_format == "bitmask", mask_format
            masks = []
            for segm in segms:
                if isinstance(segm, list):
                    # polygon
                    masks.append(polygons_to_bitmask(segm, *image_size))
                elif isinstance(segm, dict):
                    # COCO RLE
                    masks.append(mask_util.decode(segm))
                elif isinstance(segm, np.ndarray):
                    assert segm.ndim == 2, "Expect segmentation of 2 dimensions, got {}.".format(
                        segm.ndim
                    )
                    # mask array
                    masks.append(segm)
                else:
                    raise ValueError(
                        "Cannot convert segmentation of type '{}' to BitMasks!"
                        "Supported types are: polygons as list[list[float] or ndarray],"
                        " COCO-style RLE as a dict, or a full-image segmentation mask "
                        "as a 2D ndarray.".format(type(segm))
                    )
            # torch.from_numpy does not support array with negative stride.
            masks = BitMasks(
                torch.stack([torch.from_numpy(np.ascontiguousarray(x)) for x in masks])
            )
        target.gt_masks = masks
        target.gt_bo_masks = bo_masks

    if len(annos) and "keypoints" in annos[0]:
        kpts = [obj.get("keypoints", []) for obj in annos]
        target.gt_keypoints = Keypoints(kpts)

    return target
Example #3
0
    def forward(self, batched_inputs):
        """
        Args:
            batched_inputs: a list, batched outputs of :class:`DatasetMapper`.
                Each item in the list contains the inputs for one image.

                For now, each item in the list is a dict that contains:

                * "image": Tensor, image in (C, H, W) format.
                * "instances": Instances
                * "sem_seg": semantic segmentation ground truth.
                * Other information that's included in the original dicts, such as:
                  "height", "width" (int): the output resolution of the model, used in inference.
                  See :meth:`postprocess` for details.

        Returns:
            list[dict]:
                each dict is the results for one image. The dict contains the following keys:

                * "instances": see :meth:`GeneralizedRCNN.forward` for its format.
                * "sem_seg": see :meth:`SemanticSegmentor.forward` for its format.
                * "panoptic_seg": available when `PANOPTIC_FPN.COMBINE.ENABLED`.
                  See the return value of
                  :func:`combine_semantic_and_instance_outputs` for its format.
        """
        images = [x["image"].to(self.device) for x in batched_inputs]
        images = [(x - self.pixel_mean) / self.pixel_std for x in images]
        images = ImageList.from_tensors(images, SIZE_DIVISIBILITY)
        score_sem, score_inst, score_conf = self.seg_model(images.tensor)

        h, w = images.tensor.size(2), images.tensor.size(3)
        score_inst = F.upsample(input=score_inst, size=(h, w), mode='bilinear')
        score_sem = F.upsample(input=score_sem, size=(h, w), mode='bilinear')

        score_conf_softmax = self.softmax_layer(score_conf)

        score_inst_sig = self.sigmoid_layer(score_inst)
        score_inst_sig_stuff = score_inst_sig[:, :BACKGROUND_NUM]
        score_inst_sig_thing = score_inst_sig[:, BACKGROUND_NUM:]

        if "sem_seg" in batched_inputs[0]:
            gt_sem_seg = [x["sem_seg"].to(self.device) for x in batched_inputs]
            gt_sem_seg = ImageList.from_tensors(gt_sem_seg, SIZE_DIVISIBILITY,
                                                IGNORE_LABEL_SEM).tensor
        else:
            gt_sem_seg = None

        if "instances" in batched_inputs[0]:
            gt_instances = [
                x["instances"].to(self.device) for x in batched_inputs
            ]
        else:
            gt_instances = None
        #pdb.set_trace()
        if self.training:
            assert (gt_sem_seg - 1 < 0).sum() == 0
            sem_seg_losses = self.criterion_sem(score_sem, gt_sem_seg - 1)

            gt_sem_seg[gt_sem_seg > BACKGROUND_NUM] = 0
            gt_stuff = F.one_hot(gt_sem_seg,
                                 num_classes=BACKGROUND_NUM + 1).permute(
                                     0, 3, 1, 2)
            gt_stuff = gt_stuff[:, 1:]

            num_inst = sum(
                [len(gt_instances[i]) for i in range(len(gt_instances))])
            num_inst = torch.as_tensor([num_inst],
                                       dtype=torch.float,
                                       device=self.device)
            if is_dist_avail_and_initialized():
                torch.distributed.all_reduce(num_inst)
            num_inst = torch.clamp(num_inst / get_world_size(), min=1).item()

            loss_stuff_dice = 0.
            loss_thing_dice = 0.
            loss_stuff_focal = 0.
            loss_conf = 0.

            for i in range(len(batched_inputs)):
                gt_inst = gt_instances[i]
                gt_classes = gt_inst.gt_classes

                if gt_inst.has('gt_masks'):
                    gt_masks = gt_inst.gt_masks
                    masks = torch.stack([
                        torch.from_numpy(
                            polygons_to_bitmask(poly, gt_inst.image_size[0],
                                                gt_inst.image_size[1])).to(
                                                    self.device)
                        for poly in gt_masks.polygons
                    ], 0)
                    masks_pad = masks.new_full(
                        (masks.shape[0], images.tensor.shape[-2],
                         images.tensor.shape[-1]), False)
                    masks_pad[:, :masks.shape[-2], :masks.shape[-1]].copy_(
                        masks)
                else:
                    masks_pad = torch.zeros(
                        [0, images.tensor.shape[-2], images.tensor.shape[-1]],
                        dtype=torch.bool,
                        device=self.device)

                row_ind, col_ind = MatchDice(score_inst_sig_thing[i:i + 1],
                                             torch.unsqueeze(masks_pad, 0),
                                             score_conf_softmax[i:i + 1],
                                             gt_classes)
                col_ind_empty = np.setdiff1d(
                    np.arange(score_inst_sig_thing[i:i + 1].shape[1]), col_ind)

                score_inst_sig_perm = torch.cat(
                    (score_inst_sig_stuff[i],
                     score_inst_sig_thing[i, col_ind, :, :]), 0)

                target_inst_perm = torch.cat(
                    (gt_stuff[i].float(), masks_pad[row_ind].float()), 0)

                loss_stuff_dice_tmp, loss_thing_dice_tmp = dice_loss(
                    score_inst_sig_perm,
                    target_inst_perm,
                    num_inst,
                    background_channels=BACKGROUND_NUM,
                    valid_mask=None,
                    sigmoid_clip=True)
                loss_stuff_dice += loss_stuff_dice_tmp
                loss_thing_dice += loss_thing_dice_tmp

                target_conf = gt_classes.new_full((score_conf.shape[1], ),
                                                  FOREGROUND_NUM)
                target_conf[:len(gt_classes[row_ind])] = gt_classes[row_ind]
                loss_conf_tmp = conf_loss(torch.cat(
                    (score_conf[i, col_ind], score_conf[i, col_ind_empty]), 0),
                                          target_conf.long(),
                                          neg_factor=10,
                                          neg_idx=FOREGROUND_NUM)
                loss_conf += loss_conf_tmp

                loss_stuff_focal_tmp = focal_loss(score_inst_sig_stuff[i],
                                                  gt_stuff[i].float(),
                                                  valid_mask=None,
                                                  sigmoid_clip=True)
                loss_stuff_focal += loss_stuff_focal_tmp

            loss_stuff_focal = loss_stuff_focal / len(batched_inputs)
            loss_stuff_dice = loss_stuff_dice / len(batched_inputs)
            loss_conf = loss_conf / len(batched_inputs)

            loss_stuff_focal = loss_stuff_focal * 100.
            loss_conf = loss_conf * 5

            losses = {}
            losses.update({"loss_sem_seg": sem_seg_losses})
            losses.update({"loss_stuff_focal": loss_stuff_focal})
            losses.update({"loss_stuff_dice": loss_stuff_dice})
            losses.update({"loss_thing_dice": loss_thing_dice})
            losses.update({"loss_conf": loss_conf})
            return losses

        score_sem_null = score_sem.new_full(
            (score_sem.shape[0], 1, score_sem.shape[-2], score_sem.shape[-1]),
            -1000.)
        processed_results = []
        for i in range(len(batched_inputs)):
            height = batched_inputs[i].get("height", images.image_sizes[i][0])
            width = batched_inputs[i].get("width", images.image_sizes[i][1])

            score_inst_sig_stuff_b = F.interpolate(score_inst_sig_stuff[
                i:i +
                1, :, :images.image_sizes[i][0], :images.image_sizes[i][1]],
                                                   size=(height, width),
                                                   mode="bilinear",
                                                   align_corners=False)
            score_inst_sig_thing_b = F.interpolate(score_inst_sig_thing[
                i:i +
                1, :, :images.image_sizes[i][0], :images.image_sizes[i][1]],
                                                   size=(height, width),
                                                   mode="bilinear",
                                                   align_corners=False)

            img_name = os.path.basename(batched_inputs[i]['file_name'])
            img_name_split = img_name.split('.')
            save_dir = '/home/yz9244/detectron2/output/vis_inst_sig'
            for j in range(80):
                pred_inst_tmp = np.asarray(
                    255 * (score_inst_sig_thing_b[0, j].cpu().numpy()),
                    dtype=np.uint8)
                img = Image.fromarray(pred_inst_tmp)
                save_img = Image.new('RGB', (img.width, 2 * img.height))

                img = Image.fromarray(pred_inst_tmp)
                save_img.paste(img, (0, 0))

                pred_inst_tmp = np.asarray(255 * (pred_inst_tmp > 127),
                                           dtype=np.uint8)
                img = Image.fromarray(pred_inst_tmp)
                save_img.paste(img, (0, img.height))
                save_img.save(
                    os.path.join(save_dir,
                                 img_name_split[0] + '_%02d.png' % (j)))

            res = {}

            score_sem_foreground = torch.log(
                torch.exp(score_sem[i:i + 1,
                                    BACKGROUND_NUM:]).sum(dim=1, keepdim=True))
            sem_seg_result = torch.cat(
                (score_sem_foreground, score_sem[i:i + 1, :BACKGROUND_NUM]), 1)
            sem_seg_r = sem_seg_postprocess(sem_seg_result[0],
                                            images.image_sizes[i], height,
                                            width)

            res.update({"sem_seg": sem_seg_r})

            result = Instances((height, width))
            inst_sem_id = torch.argmax(score_conf_softmax[i], dim=1)
            scores = score_conf_softmax[i,
                                        range(score_conf.shape[1]),
                                        inst_sem_id]
            scores = scores[inst_sem_id != FOREGROUND_NUM]
            pred_classes = inst_sem_id[inst_sem_id != FOREGROUND_NUM]
            pred_masks = score_inst_sig_thing_b[0,
                                                inst_sem_id != FOREGROUND_NUM]

            pred_mask_sum = torch.sum(pred_masks > 0.5, (1, 2))
            result.pred_masks = pred_masks[pred_mask_sum > 0] > 0.5
            result.pred_classes = pred_classes[pred_mask_sum > 0]
            result.scores = scores[pred_mask_sum > 0]

            box_tmp = torch.zeros(result.pred_masks.shape[0], 4)
            for j in range(result.pred_masks.shape[0]):
                nonzero_idx = torch.nonzero(result.pred_masks[j])
                box_tmp[j, 0] = nonzero_idx[:, 1].min().item()
                box_tmp[j, 2] = nonzero_idx[:, 1].max().item()
                box_tmp[j, 1] = nonzero_idx[:, 0].min().item()
                box_tmp[j, 3] = nonzero_idx[:, 0].max().item()
            result.pred_boxes = Boxes(box_tmp)

            #detector_r = detector_postprocess(result, height, width)
            detector_r = result
            res.update({"instances": detector_r})

            panoptic_r = combine_semantic_and_instance_outputs(
                result.scores, result.pred_classes,
                pred_masks[pred_mask_sum > 0], score_inst_sig_stuff_b[0])
            res.update({"panoptic_seg": panoptic_r})

            processed_results.append(res)

        return processed_results
Example #4
0
 def forward(self, scores, proposal_boxes):
     instances = Instances((10, 10))
     instances.proposal_boxes = Boxes(proposal_boxes)
     return self._output_layer.predict_probs((scores, None),
                                             [instances])
Example #5
0
 def _create_instances_fulldp(self):
     image_shape = (680, 840)
     instances = Instances(image_shape)
     instances.gt_boxes = Boxes(
         torch.as_tensor([
             [65.0, 55.0, 165.0, 155.0],
             [170.0, 175.0, 275.0, 280.0],
             [55.0, 165.0, 165.0, 275.0],
         ]))
     instances.proposal_boxes = Boxes(
         torch.as_tensor([
             [66.0, 54.0, 166.0, 154.0],
             [171.0, 174.0, 276.0, 279.0],
             [56.0, 164.0, 166.0, 274.0],
         ]))
     instances.gt_densepose = DensePoseList(
         [
             self._create_dp_data(
                 {
                     "dp_x": [149.99, 198.62, 157.59],
                     "dp_y": [170.74, 197.73, 123.12],
                     "dp_vertex": [3, 4, 5],
                     "ref_model": "cat_5001",
                     "dp_masks": [],
                 },
                 {
                     "c": (100, 100),
                     "r": 50
                 },
             ),
             self._create_dp_data(
                 {
                     "dp_x": [234.53, 116.72, 71.66],
                     "dp_y": [107.53, 11.31, 142.32],
                     "dp_vertex": [6, 7, 8],
                     "ref_model": "dog_5002",
                     "dp_masks": [],
                 },
                 {
                     "c": (200, 150),
                     "r": 40
                 },
             ),
             self._create_dp_data(
                 {
                     "dp_x": [225.54, 202.61, 135.90],
                     "dp_y": [167.46, 181.00, 211.47],
                     "dp_vertex": [9, 10, 11],
                     "ref_model": "elephant_5002",
                     "dp_masks": [],
                 },
                 {
                     "c": (100, 200),
                     "r": 45
                 },
             ),
         ],
         instances.gt_boxes,
         image_shape,
     )
     return instances
    def inference_single_image(self, anchors, box_cls, box_delta, image_size):
        """
        Single-image inference. Return bounding-box detection results by thresholding
        on scores and applying non-maximum suppression (NMS).

        Arguments:
            anchors (list[Boxes]): list of #feature levels. Each entry contains
                a Boxes object, which contains all the anchors in that feature level.
            box_cls (list[Tensor]): list of #feature levels. Each entry contains
                tensor of size (H x W x A, K)
            box_delta (list[Tensor]): Same shape as 'box_cls' except that K becomes 4.
            image_size (tuple(H, W)): a tuple of the image height and width.

        Returns:
            Same as `inference`, but for only one image.
        """
        boxes_all = []
        scores_all = []
        class_idxs_all = []

        # Iterate over every feature level
        for box_cls_i, box_reg_i, anchors_i in zip(box_cls, box_delta,
                                                   anchors):
            # (HxWxAxK,)
            box_cls_i = box_cls_i.flatten().sigmoid_()

            # Keep top k top scoring indices only.
            num_topk = min(self.topk_candidates, box_reg_i.size(0))
            # torch.sort is actually faster than .topk (at least on GPUs)
            predicted_prob, topk_idxs = box_cls_i.sort(descending=True)
            predicted_prob = predicted_prob[:num_topk]
            topk_idxs = topk_idxs[:num_topk]

            # filter out the proposals with low confidence score
            keep_idxs = predicted_prob > self.score_threshold
            predicted_prob = predicted_prob[keep_idxs]
            topk_idxs = topk_idxs[keep_idxs]

            anchor_idxs = topk_idxs // self.num_classes
            classes_idxs = topk_idxs % self.num_classes

            box_reg_i = box_reg_i[anchor_idxs]
            anchors_i = anchors_i[anchor_idxs]
            # predict boxes
            predicted_boxes = self.box2box_transform.apply_deltas(
                box_reg_i, anchors_i.tensor)

            boxes_all.append(predicted_boxes)
            scores_all.append(predicted_prob)
            class_idxs_all.append(classes_idxs)

        boxes_all, scores_all, class_idxs_all = [
            cat(x) for x in [boxes_all, scores_all, class_idxs_all]
        ]
        keep = batched_nms(boxes_all, scores_all, class_idxs_all,
                           self.nms_threshold)
        keep = keep[:self.max_detections_per_image]

        result = Instances(image_size)
        result.pred_boxes = Boxes(boxes_all[keep])
        result.scores = scores_all[keep]
        result.pred_classes = class_idxs_all[keep]
        return result
Example #7
0
    def label_and_sample_proposals(
            self, proposals: List[Instances],
            targets: List[Instances]) -> List[Instances]:
        """
        Prepare some proposals to be used to train the ROI heads.
        It performs box matching between `proposals` and `targets`, and assigns
        training labels to the proposals.
        It returns ``self.batch_size_per_image`` random samples from proposals and groundtruth
        boxes, with a fraction of positives that is no larger than
        ``self.positive_sample_fraction``.

        Args:
            See :meth:`ROIHeads.forward`

        Returns:
            list[Instances]:
                length `N` list of `Instances`s containing the proposals
                sampled for training. Each `Instances` has the following fields:

                - proposal_boxes: the proposal boxes
                - gt_boxes: the ground-truth box that the proposal is assigned to
                  (this is only meaningful if the proposal has a label > 0; if label = 0
                  then the ground-truth box is random)

                Other fields such as "gt_classes", "gt_masks", that's included in `targets`.
        """
        gt_boxes = [x.gt_boxes for x in targets]
        # Augment proposals with ground-truth boxes.
        # In the case of learned proposals (e.g., RPN), when training starts
        # the proposals will be low quality due to random initialization.
        # It's possible that none of these initial
        # proposals have high enough overlap with the gt objects to be used
        # as positive examples for the second stage components (box head,
        # cls head, mask head). Adding the gt boxes to the set of proposals
        # ensures that the second stage components will have some positive
        # examples from the start of training. For RPN, this augmentation improves
        # convergence and empirically improves box AP on COCO by about 0.5
        # points (under one tested configuration).
        if self.proposal_append_gt:
            proposals = add_ground_truth_to_proposals(gt_boxes, proposals)

        proposals_with_gt = []

        num_fg_samples = []
        num_bg_samples = []
        for proposals_per_image, targets_per_image in zip(proposals, targets):
            has_gt = len(targets_per_image) > 0
            match_quality_matrix = pairwise_iou(
                targets_per_image.gt_boxes, proposals_per_image.proposal_boxes)
            matched_idxs, matched_labels = self.proposal_matcher(
                match_quality_matrix)
            sampled_idxs, gt_classes = self._sample_proposals(
                matched_idxs, matched_labels, targets_per_image.gt_classes)

            # Set target attributes of the sampled proposals:
            proposals_per_image = proposals_per_image[sampled_idxs]
            proposals_per_image.gt_classes = gt_classes

            # We index all the attributes of targets that start with "gt_"
            # and have not been added to proposals yet (="gt_classes").
            if has_gt:
                sampled_targets = matched_idxs[sampled_idxs]
                # NOTE: here the indexing waste some compute, because heads
                # like masks, keypoints, etc, will filter the proposals again,
                # (by foreground/background, or number of keypoints in the image, etc)
                # so we essentially index the data twice.
                for (trg_name,
                     trg_value) in targets_per_image.get_fields().items():
                    if trg_name.startswith(
                            "gt_") and not proposals_per_image.has(trg_name):
                        proposals_per_image.set(trg_name,
                                                trg_value[sampled_targets])
            else:
                gt_boxes = Boxes(
                    targets_per_image.gt_boxes.tensor.new_zeros(
                        (len(sampled_idxs), 4)))
                proposals_per_image.gt_boxes = gt_boxes

            num_bg_samples.append(
                (gt_classes == self.num_classes).sum().item())
            num_fg_samples.append(gt_classes.numel() - num_bg_samples[-1])
            proposals_with_gt.append(proposals_per_image)

        # Log the number of fg/bg samples that are selected for training ROI heads
        storage = get_event_storage()
        storage.put_scalar("roi_head/num_fg_samples", np.mean(num_fg_samples))
        storage.put_scalar("roi_head/num_bg_samples", np.mean(num_bg_samples))

        return proposals_with_gt
Example #8
0
    def forward(self, features, pred_instances=None, targets=None):

        for i, f in enumerate(self.in_features):
            if i == 0:
                x = self.scale_heads[i](features[f])
            else:
                x = x + self.scale_heads[i](features[f])

        pred_logits = self.predictor(x)
        pred_edge = pred_logits.sigmoid()

        att_map = self.attender(1 - pred_edge)  # regions that need evolution

        if self.training:
            edge_target = targets[0]
            snake_input = x
            pred_edge_full = F.interpolate(
                pred_edge,
                scale_factor=self.common_stride,
                mode="bilinear",
                align_corners=False,
            )
            snake_input = torch.cat([att_map, x], dim=1)

            # Quick fix for batches that do not have poly after filtering
            try:
                _, poly_loss = self.refine_head(snake_input, None, targets[1])
            except Exception:
                poly_loss = {}

            edge_loss = self.loss(pred_edge_full,
                                  edge_target) * self.loss_weight
            poly_loss.update({
                "loss_edge_det": edge_loss,
            })
            return [], poly_loss, []
        else:

            snake_input = torch.cat([att_map, x], dim=1)

            if "instance" in self.gt_input:
                assert targets[1][0] is not None

                for im_i in range(len(targets[1][0])):
                    gt_instances_per_im = targets[1][0][im_i]
                    bboxes = gt_instances_per_im.gt_boxes.tensor
                    instances_per_im = Instances(
                        pred_instances[im_i]._image_size)
                    instances_per_im.pred_boxes = Boxes(bboxes)
                    instances_per_im.pred_classes = gt_instances_per_im.gt_classes
                    instances_per_im.scores = torch.ones_like(
                        gt_instances_per_im.gt_classes, device=bboxes.device)
                    if gt_instances_per_im.has("gt_masks"):
                        gt_masks = gt_instances_per_im.gt_masks
                        ext_pts_off = self.refine_head.get_simple_extreme_points(
                            gt_masks.polygons).to(bboxes.device)
                        ex_t = torch.stack(
                            [ext_pts_off[:, None, 0], bboxes[:, None, 1]],
                            dim=2)
                        ex_l = torch.stack(
                            [bboxes[:, None, 0], ext_pts_off[:, None, 1]],
                            dim=2)
                        ex_b = torch.stack(
                            [ext_pts_off[:, None, 2], bboxes[:, None, 3]],
                            dim=2)
                        ex_r = torch.stack(
                            [bboxes[:, None, 2], ext_pts_off[:, None, 3]],
                            dim=2)
                        instances_per_im.ext_points = ExtremePoints(
                            torch.cat([ex_t, ex_l, ex_b, ex_r], dim=1))

                    pred_instances[im_i] = instances_per_im

            new_instances, _ = self.refine_head(snake_input, pred_instances,
                                                None)

            pred_edge = att_map

            return pred_edge, {}, new_instances
Example #9
0
    def may_visualize_gt(self, batched_inputs, init_objectness, init_bbox,
                         refine_objectness, refine_boxes, centers,
                         pred_init_boxes, pred_refine_boxes, logits):
        """
        Visualize initial and refine boxes using mathced labels for filtering.
        The prediction at positive positions are shown.
        """
        if self.training:
            if self.vis_period <= 0:
                return
            storage = get_event_storage()
            if not storage.iter % self.vis_period == 0:
                return

        from detectron2.utils.visualizer import Visualizer
        image_index = 0
        img = batched_inputs[image_index]["image"].cpu().numpy()
        assert img.shape[0] == 3, "Images should have 3 channels."
        img = img[::-1, :, :]
        img = img.transpose(1, 2, 0)

        v_init = Visualizer(img, None)
        v_init = v_init.overlay_instances(boxes=Boxes(init_bbox[image_index][
            init_objectness[image_index]].cpu()))
        init_image = v_init.get_image()

        v_refine = Visualizer(img, None)
        v_refine = v_refine.overlay_instances(
            boxes=Boxes(refine_boxes[image_index][
                refine_objectness[image_index] > 0].cpu()))
        refine_image = v_refine.get_image()

        if self.training:
            vis_img = np.vstack((init_image, refine_image))
            vis_img = vis_img.transpose(2, 0, 1)
            storage.put_image("TOP: init gt boxes; Bottom: refine gt boxes",
                              vis_img)

        vp_init = Visualizer(img, None)
        selected_centers = centers[init_objectness[image_index]].cpu().numpy()
        vp_init = vp_init.overlay_instances(
            boxes=Boxes(pred_init_boxes[image_index][
                init_objectness[image_index]].detach().cpu()),
            labels=logits[image_index]
            [init_objectness[image_index]].sigmoid().max(1)[0].detach().cpu())
        init_image = vp_init.get_image()

        for point in selected_centers:
            init_image = cv2.circle(init_image, tuple(point), 3,
                                    (255, 255, 255))

        vp_refine = Visualizer(img, None)
        foreground_idxs = (refine_objectness[image_index] >= 0).logical_and(
            refine_objectness[image_index] < self.num_classes)
        selected_centers = centers[foreground_idxs].cpu().numpy()
        vp_refine = vp_refine.overlay_instances(
            boxes=pred_refine_boxes[image_index]
            [foreground_idxs].detach().cpu(),
            labels=logits[image_index][foreground_idxs].sigmoid().max(
                1)[0].detach().cpu())
        refine_image = vp_refine.get_image()
        for point in selected_centers:
            refine_image = cv2.circle(refine_image, tuple(point), 3,
                                      (255, 255, 255))

        vis_img = np.vstack((init_image, refine_image))
        if self.training:
            vis_img = vis_img.transpose(2, 0, 1)
            storage.put_image(
                "TOP: init pred boxes; Bottom: refine pred boxes", vis_img)
        # NOTE: This is commented temporarily. Uncomment it if
        # eagerly visualization is desired.
        '''
Example #10
0
def extract_features(args, detector, raw_images, given_boxes=None):
    with torch.no_grad():
        inputs = []

        for raw_image in raw_images:
            image = detector.transform_gen.get_transform(raw_image).apply_image(raw_image)
            image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))
            inputs.append({"image": image, "height": raw_image.shape[0], "width": raw_image.shape[1]})
        images = detector.model.preprocess_image(inputs)

        # Run Backbone Res1-Res4
        features = detector.model.backbone(images.tensor)

        # Feature extraction given the bounding boxes
        if given_boxes:
            # Process Boxes in batch mode
            proposal_boxes = []
            original_boxes = []
            box_ids = []

            for i, boxes_data in enumerate(given_boxes):
                boxes = []
                curr_box_ids = []

                for bid, bbox in boxes_data:
                    boxes.append(bbox)
                    curr_box_ids.append(bid)

                raw_boxes = Boxes(torch.tensor(boxes, device=images.tensor.device))

                raw_image = raw_images[i]
                # Remember that raw_image has shape [height, width, color_channel]
                raw_height, raw_width = raw_image.shape[:2]
                # Remember that images[i] has shape [color_channel, height, width]
                new_height, new_width = images[i].shape[1:]
                # Scale the box
                scale_x = 1. * new_width / raw_width
                scale_y = 1. * new_height / raw_height
                boxes = raw_boxes.clone()
                boxes.scale(scale_x=scale_x, scale_y=scale_y)
                proposal_boxes.append(boxes)
                original_boxes.append(raw_boxes)
                box_ids.append(curr_box_ids)

            features = [features[f] for f in detector.model.roi_heads.in_features]
            box_features = detector.model.roi_heads._shared_roi_transform(
                features, proposal_boxes
            )
            feature_pooled = box_features.mean(dim=[2, 3])  # pooled to 1x1

            # Predict classes and boxes for each proposal.
            pred_class_logits, pred_proposal_deltas = detector.model.roi_heads.box_predictor(feature_pooled)
            pred_class_prob = torch.softmax(pred_class_logits, -1)
            # we reset the background class that we will ignore later on
            pred_class_prob[:, -1] = 0.0

            roi_features = feature_pooled

            outputs = []
            total_boxes = 0

            # roi_features.shape = (num_total_boxes, 2048)
            # we need to group the boxes by image id
            for batch_idx, raw_image in enumerate(raw_images):
                indexes = slice(total_boxes, total_boxes + len(given_boxes[batch_idx]))
                instances = Instances(
                    image_size=raw_image.shape[:2],
                    pred_boxes=original_boxes[batch_idx],
                    scores=pred_class_prob[indexes],
                    features=roi_features[indexes],
                    box_ids=box_ids[batch_idx]
                )

                outputs.append(instances)
                total_boxes += len(given_boxes[batch_idx])

            return outputs

        # Feature extraction without bounding boxes
        # Generate proposals with RPN
        proposals, _ = detector.model.proposal_generator(images, features, None)

        # Run RoI head for each proposal (RoI Pooling + Res5)
        proposal_boxes = [x.proposal_boxes for x in proposals]
        features = [features[f] for f in detector.model.roi_heads.in_features]
        box_features = detector.model.roi_heads._shared_roi_transform(
            features, proposal_boxes
        )
        feature_pooled = box_features.mean(dim=[2, 3])  # (sum_proposals, 2048), pooled to 1x1

        # Predict classes and boxes for each proposal.
        pred_class_logits, pred_proposal_deltas = detector.model.roi_heads.box_predictor(feature_pooled)
        rcnn_outputs = FastRCNNOutputs(
            detector.model.roi_heads.box2box_transform,
            pred_class_logits,
            pred_proposal_deltas,
            proposals,
            detector.model.roi_heads.smooth_l1_beta,
        )

        # Fixed-number NMS
        instances_list, ids_list = [], []
        probs_list = rcnn_outputs.predict_probs()
        boxes_list = rcnn_outputs.predict_boxes()
        for probs, boxes, image_size in zip(probs_list, boxes_list, images.image_sizes):
            for nms_thresh in np.arange(0.5, 1.0, 0.1):
                instances, ids = fast_rcnn_inference_single_image(
                    boxes, probs, image_size,
                    nms_thresh=nms_thresh, topk_per_image=args.max_boxes
                )
                if len(ids) >= args.min_boxes:
                    break

            instances_list.append(instances)
            ids_list.append(ids)

        # Post processing for features
        features_list = feature_pooled.split(
            rcnn_outputs.num_preds_per_image)  # (sum_proposals, 2048) --> [(p1, 2048), (p2, 2048), ..., (pn, 2048)]
        roi_features_list = []
        for ids, features in zip(ids_list, features_list):
            roi_features_list.append(features[ids].detach())

        # Post processing for bounding boxes (rescale to raw_image)
        raw_instances_list = []
        for batch_idx, (instances, input_per_image, image_size) in enumerate(zip(
                instances_list, inputs, images.image_sizes
        )):
            height = input_per_image.get("height", image_size[0])
            width = input_per_image.get("width", image_size[1])
            raw_instances, nonempty = detector_postprocess(instances, height, width)
            raw_instances.features = roi_features_list[batch_idx][nonempty]
            raw_instances_list.append(raw_instances)

        return raw_instances_list
Example #11
0
    def process(self, input, output):
        previous_len = len(self._partial_results)
        for instance, output in zip(input, output):
            input_image_id = instance['image_id']

            instance_gt_annots = self._coco_api.loadAnns(
                self._coco_api.getAnnIds(imgIds=input_image_id))

            im_name = os.path.basename(instance['file_name'])

            fields = output["instances"].get_fields()
            pred_boxes = fields['pred_boxes']  # xyxy
            scores = fields['scores'].cpu().numpy()
            pred_class = fields['pred_classes']

            if instance_gt_annots:
                # GT but not preds --> FN
                if len(pred_boxes) == 0:
                    for annot_dict in instance_gt_annots:
                        row = [im_name, "FN", "FN", "non-eval", -1, "NA"]
                        self._partial_results += [row]
                # GT and preds --> TP or FP
                else:
                    det_out = "TP"
                    from detectron2.structures import Boxes, pairwise_iou, BoxMode
                    gt_boxes = torch.tensor([
                        annot_dict['bbox'] for annot_dict in instance_gt_annots
                    ])
                    gt_boxes = BoxMode.convert(gt_boxes, BoxMode.XYWH_ABS,
                                               BoxMode.XYXY_ABS)
                    gt_boxes = Boxes(gt_boxes.to(pred_boxes.device))
                    ious = pairwise_iou(gt_boxes, pred_boxes)
                    paired_preds = []
                    for gt_idx, matches in enumerate(ious):
                        if matches.sum() == 0:
                            row = [im_name, "FN", "FN", "non-eval", -1, "NA"]
                            self._partial_results += [row]
                        else:
                            if self.eval_mode == "iou":
                                pred_idx = matches.argmax()
                                if pred_idx not in paired_preds:
                                    paired_preds.append(pred_idx)
                                    class_out = self._is_polyp_classified(
                                        pred_class[pred_idx],
                                        instance_gt_annots[gt_idx]
                                        ['category_id'])
                                    row = [
                                        im_name, det_out, "TP", class_out,
                                        scores[pred_idx], pred_boxes[pred_idx]
                                    ]
                                    self._partial_results += [row]
                                else:
                                    row = [
                                        im_name, det_out, "FP", "non-eval",
                                        scores[pred_idx], pred_boxes[pred_idx]
                                    ]
                                    self._partial_results += [row]
                            else:
                                for posible_match in matches.nonzero():
                                    gt_box = gt_boxes.tensor[gt_idx]
                                    gt_x1, gt_y1, gt_x2, gt_y2 = gt_box
                                    pred_box = pred_boxes.tensor[posible_match]
                                    pred_x1, pred_y1, pred_x2, pred_y2 = pred_box.squeeze(
                                    )

                                    if self.eval_mode == 'old':
                                        pred_cx, pred_cy = (
                                            pred_x1 +
                                            (pred_x2 - pred_x1) / 2), (
                                                pred_y1 +
                                                (pred_y2 - pred_y1) / 2)
                                        eval_condition = (
                                            gt_x1 < pred_cx < gt_x2) and (
                                                gt_y1 < pred_cy < gt_y2)
                                    else:
                                        gt_cx, gt_cy = (
                                            gt_x1 + (gt_x2 - gt_x1) / 2), (
                                                gt_y1 + (gt_y2 - gt_y1) / 2)
                                        eval_condition = (
                                            pred_x1 < gt_cx < pred_x2) and (
                                                pred_y1 < gt_cy < pred_y2)

                                    if eval_condition:
                                        if posible_match not in paired_preds:
                                            paired_preds.append(posible_match)
                                            class_out = self._is_polyp_classified(
                                                pred_class[posible_match],
                                                instance_gt_annots[gt_idx]
                                                ['category_id'])
                                            row = [
                                                im_name, det_out, "TP",
                                                class_out,
                                                scores[posible_match],
                                                pred_boxes[posible_match]
                                            ]
                                            self._partial_results += [row]
                                        else:
                                            row = [
                                                im_name, det_out, "FP",
                                                "non-eval",
                                                scores[posible_match],
                                                pred_boxes[posible_match]
                                            ]
                                            self._partial_results += [row]

                    # for pred_box, pred_score, pred_classif in zip(pred_boxes, scores, pred_class):
                    #     pred_x1, pred_y1, pred_x2, pred_y2 = pred_box
                    #     if instance_gt_annots:
                    #         for annot_dict in instance_gt_annots:
                    #             gt_bbox = annot_dict['bbox']  # xywh
                    #             gt_bbox[2] += gt_bbox[0]
                    #             gt_bbox[3] += gt_bbox[1]  # xyxy
                    #
                    #             gt_x1, gt_y1, gt_x2, gt_y2 = gt_bbox
                    #
                    #             eval_condition = self._is_localized(gt_bbox, gt_x1, gt_x2, gt_y1, gt_y2, pred_box,
                    #                                                 pred_x1, pred_x2, pred_y1, pred_y2)
                    #
                    #             if eval_condition:
                    #                 class_out = self._is_polyp_classified(pred_classif, annot_dict['category_id'])
                    #
                    #                 row = [im_name, det_out, "TP", class_out, pred_score, pred_box]
                    #                 self._partial_results += [row]
                    #                 instance_gt_annots.remove(annot_dict)
                    #                 break
                    #
                    #     else:
                    #         row = [im_name, "FP", "FP", "non-eval", pred_score, pred_box]
                    #         self._partial_results += [row]
            else:
                # No GT but Preds --> FP
                if len(pred_boxes) > 0:
                    for pred_box, pred_score, pred_classif in zip(
                            pred_boxes, scores, pred_class):
                        row = [
                            im_name, "FP", "FP", "non-eval", pred_score,
                            pred_box
                        ]
                        self._partial_results += [row]
                # No GT and no Preds --> TN
                else:
                    row = [im_name, "TN", "TN", "non-eval", -1, "NA"]
                    self._partial_results += [row]
Example #12
0
    def get_ground_truth(self, anchors, targets, gt_classification):
        """
        Args:
            anchors (list[list[Boxes]]): a list of N=#image elements. Each is a
                list of #feature level Boxes. The Boxes contains anchors of
                this image on the specific feature level.
            targets (list[Instances]): a list of N `Instances`s. The i-th
                `Instances` contains the ground-truth per-instance annotations
                for the i-th input image.  Specify `targets` during training only.

        Returns:
            gt_classes (Tensor):
                An integer tensor of shape (N, R) storing ground-truth
                labels for each anchor.
                R is the total number of anchors, i.e. the sum of Hi x Wi x A for all levels.
                Anchors with an IoU with some target higher than the foreground threshold
                are assigned their corresponding label in the [0, K-1] range.
                Anchors whose IoU are below the background threshold are assigned
                the label "K". Anchors whose IoU are between the foreground and background
                thresholds are assigned a label "-1", i.e. ignore.
            gt_anchors_deltas (Tensor):
                Shape (N, R, 4).
                The last dimension represents ground-truth box2box transform
                targets (dx, dy, dw, dh) that map each anchor to its matched ground-truth box.
                The values in the tensor are meaningful only when the corresponding
                anchor is labeled as foreground.
        """
        gt_classes = []
        gt_anchors_deltas = []
        anchors = [Boxes.cat(anchors_i) for anchors_i in anchors]
        # list[Tensor(R, 4)], one for each image

        for anchors_per_image, targets_per_image, classification_per_image in zip(
                anchors, targets, gt_classification):
            match_quality_matrix = pairwise_iou(targets_per_image.gt_boxes,
                                                anchors_per_image)
            gt_matched_idxs, anchor_labels = self.matcher(match_quality_matrix)

            has_gt = len(targets_per_image) > 0
            if has_gt:
                # ground truth box regression
                matched_gt_boxes = targets_per_image.gt_boxes[gt_matched_idxs]
                gt_anchors_reg_deltas_i = self.box2box_transform.get_deltas(
                    anchors_per_image.tensor, matched_gt_boxes.tensor)

                gt_classes_i = targets_per_image.gt_classes[gt_matched_idxs]
                # Anchors with label 0 are treated as background.
                gt_classes_i[anchor_labels == 0] = self.num_classes
                # Anchors with label -1 are ignored.
                gt_classes_i[anchor_labels == -1] = -1
            else:
                gt_classes_i = torch.zeros_like(
                    gt_matched_idxs) + self.num_classes
                gt_anchors_reg_deltas_i = torch.zeros_like(
                    anchors_per_image.tensor)

            # only commodity and model data do object detection,
            # other type ignore all anchors
            # object_detection_enable = classification_per_image.gt_classes == 0 \
            #                           or classification_per_image.gt_classes == 1

            if not has_gt:
                # Anchors with label -1 are ignored.
                gt_classes_i[:] = -1

            gt_classes.append(gt_classes_i)
            gt_anchors_deltas.append(gt_anchors_reg_deltas_i)

        return torch.stack(gt_classes), torch.stack(gt_anchors_deltas)
Example #13
0
    def _forward_box(
            self,
            features: Dict[str, torch.Tensor],
            proposals: List[Instances],
            void_proposals: Optional[List[Instances]] = None,
            image_path=None,
            flips=None,
            exemplar_info=None
    ) -> Union[Dict[str, torch.Tensor], List[Instances]]:
        """
        Forward logic of the box prediction branch. If `self.train_on_pred_boxes is True`,
            the function puts predicted boxes in the `proposal_boxes` field of `proposals` argument.

        Args:
            features (dict[str, Tensor]): mapping from feature map names to tensor.
                Same as in :meth:`ROIHeads.forward`.
            proposals (list[Instances]): the per-image object proposals with
                their matching ground truth.
                Each has fields "proposal_boxes", and "objectness_logits",
                "gt_classes", "gt_boxes".

        Returns:
            In training, a dict of losses.
            In inference, a list of `Instances`, the predicted instances.
        """
        features = [features[f] for f in self.box_in_features]
        box_features = self.box_pooler(features,
                                       [x.proposal_boxes for x in proposals])
        box_features = self.box_head(box_features)
        predictions = self.box_predictor(box_features)
        if self.training:
            void_box_features = self.box_pooler(
                features, [x.proposal_boxes for x in void_proposals])
            void_box_features = self.box_head(void_box_features)
            void_predictions = self.box_predictor(void_box_features)
            if exemplar_info is not None:
                with torch.no_grad():
                    ap = void_proposals[:-1]
                    l = sum([len(e) for e in ap])
                    lbl = self.box_predictor.add_exemplar(
                        exemplar_info, void_box_features[:l].detach(), ap,
                        image_path[:-1], flips[:-1])
                    if lbl is not None:
                        for x, l in zip(ap, lbl):
                            x.gt_classes = l
            del box_features
            losses = self.box_predictor.losses(predictions,
                                               proposals,
                                               void_predictions,
                                               void_proposals,
                                               image_path=image_path,
                                               flips=flips,
                                               use_exemplar=exemplar_info
                                               is not None)
            # proposals is modified in-place below, so losses must be computed first.
            if self.train_on_pred_boxes:
                with torch.no_grad():
                    pred_boxes = self.box_predictor.predict_boxes_for_gt_classes(
                        predictions, proposals)
                    for proposals_per_image, pred_boxes_per_image in zip(
                            proposals, pred_boxes):
                        proposals_per_image.proposal_boxes = Boxes(
                            pred_boxes_per_image)
            return losses
        else:
            pred_instances, get_inds = self.box_predictor.inference(
                predictions, proposals, use_unknown=True)
            del box_features
            return pred_instances
Example #14
0
 def func_cat(x: torch.Tensor):
     boxes1 = Boxes(x)
     boxes2 = Boxes(x)
     # boxes3 = Boxes.cat([boxes1, boxes2])  # this is not supported by torchsript for now.
     boxes3 = boxes1.cat([boxes1, boxes2])
     return boxes3
Example #15
0
def assemble_rcnn_outputs_by_name(image_sizes,
                                  tensor_outputs,
                                  force_mask_on=False):
    """
    A function to assemble caffe2 model's outputs (i.e. Dict[str, Tensor])
    to detectron2's format (i.e. list of Instances instance).
    This only works when the model follows the Caffe2 detectron's naming convention.

    Args:
        image_sizes (List[List[int, int]]): [H, W] of every image.
        tensor_outputs (Dict[str, Tensor]): external_output to its tensor.

        force_mask_on (Bool): if true, the it make sure there'll be pred_masks even
            if the mask is not found from tensor_outputs (usually due to model crash)
    """

    results = [Instances(image_size) for image_size in image_sizes]

    batch_splits = tensor_outputs.get("batch_splits", None)
    if batch_splits:
        raise NotImplementedError()
    assert len(image_sizes) == 1
    result = results[0]

    bbox_nms = tensor_outputs["bbox_nms"]
    score_nms = tensor_outputs["score_nms"]
    class_nms = tensor_outputs["class_nms"]
    # Detection will always success because Conv support 0-batch
    assert _is_valid_model_output_blob(bbox_nms)
    assert _is_valid_model_output_blob(score_nms)
    assert _is_valid_model_output_blob(class_nms)
    result.pred_boxes = Boxes(torch.Tensor(bbox_nms))
    result.scores = torch.Tensor(score_nms)
    result.pred_classes = torch.Tensor(class_nms).to(torch.int64)

    mask_fcn_probs = tensor_outputs.get("mask_fcn_probs", None)
    if _is_valid_model_output_blob(mask_fcn_probs):
        # finish the mask pred
        mask_probs_pred = torch.Tensor(mask_fcn_probs)
        num_masks = mask_probs_pred.shape[0]
        class_pred = result.pred_classes
        indices = torch.arange(num_masks, device=class_pred.device)
        mask_probs_pred = mask_probs_pred[indices, class_pred][:, None]
        result.pred_masks = mask_probs_pred
    elif force_mask_on:
        # NOTE: there's no way to know the height/width of mask here, it won't be
        # used anyway when batch size is 0, so just set them to 0.
        result.pred_masks = torch.zeros([0, 1, 0, 0], dtype=torch.uint8)

    keypoints_out = tensor_outputs.get("keypoints_out", None)
    kps_score = tensor_outputs.get("kps_score", None)
    if _is_valid_model_output_blob(keypoints_out):
        # keypoints_out: [N, 4, #kypoints], where 4 is in order of (x, y, score, prob)
        keypoints_tensor = torch.Tensor(keypoints_out)
        # NOTE: it's possible that prob is not calculated if "should_output_softmax"
        # is set to False in HeatmapMaxKeypoint, so just using raw score, seems
        # it doesn't affect mAP. TODO: check more carefully.
        keypoint_xyp = keypoints_tensor.transpose(1, 2)[:, :, [0, 1, 2]]
        result.pred_keypoints = keypoint_xyp
    elif _is_valid_model_output_blob(kps_score):
        # keypoint heatmap to sparse data structure
        pred_keypoint_logits = torch.Tensor(kps_score)
        keypoint_head.keypoint_rcnn_inference(pred_keypoint_logits, [result])

    return results
Example #16
0
    def inference_single_image(self, logits, init_boxes, refine_boxes,
                               image_size):
        boxes_all = []
        init_boxes_all = []
        class_idxs_all = []
        scores_all = []
        for logit, init_box, refine_box in zip(logits, init_boxes,
                                               refine_boxes):
            scores, cls = logit.sigmoid().max(0)
            cls = cls.view(-1)
            scores = scores.view(-1)
            init_box = init_box.view(4, -1).permute(1, 0)
            refine_box = refine_box.view(4, -1).permute(1, 0)

            predicted_prob, topk_idxs = scores.sort(descending=True)
            num_topk = min(self.topk_candidates, cls.size(0))
            predicted_prob = predicted_prob[:num_topk]
            topk_idxs = topk_idxs[:num_topk]

            # filter out the proposals with low confidence score
            keep_idxs = predicted_prob > self.score_threshold
            predicted_prob = predicted_prob[keep_idxs]
            topk_idxs = topk_idxs[keep_idxs]
            init_box_topk = init_box[topk_idxs]
            refine_box_topk = refine_box[topk_idxs]
            cls_topk = cls[topk_idxs]
            score_topk = scores[topk_idxs]

            boxes_all.append(refine_box_topk)
            init_boxes_all.append(init_box_topk)
            class_idxs_all.append(cls_topk)
            scores_all.append(score_topk)
            # The following code is the decoding procedure of RetinaNet in D2.
            # However, it fails to handle the predictions though I thought it could.
            """
            cls = logit.flatten().sigmoid()

            # pre nms
            num_topk = min(self.topk_candidates, cls.size(0))

            predicted_prob, topk_idxs = cls.sort(descending=True)
            predicted_prob = predicted_prob[:num_topk]
            topk_idxs = topk_idxs[:num_topk]

            # filter out the proposals with low confidence score
            keep_idxs = predicted_prob > self.score_threshold
            predicted_prob = predicted_prob[keep_idxs]
            topk_idxs = topk_idxs[keep_idxs]

            points_idxs = topk_idxs // self.num_classes
            classes_idxs = topk_idxs % self.num_classes

            init_box = init_box.reshape(4, -1).clone()
            refine_box = refine_box.reshape(4, -1).clone()
            init_box = init_box[:, points_idxs].permute(1, 0)
            refine_box_topk = refine_box[:, points_idxs].permute(1, 0)

            boxes_all.append(refine_box_topk)
            init_boxes_all.append(init_box)
            class_idxs_all.append(classes_idxs)
            scores_all.append(predicted_prob)
            """

        boxes_all, scores_all, class_idxs_all, init_boxes_all = [
            cat(x)
            for x in [boxes_all, scores_all, class_idxs_all, init_boxes_all]
        ]
        keep = batched_nms(boxes_all, scores_all, class_idxs_all,
                           self.nms_threshold)
        keep = keep[:self.max_detections_per_image]

        result = Instances(image_size)
        result.pred_boxes = Boxes(boxes_all[keep])
        result.scores = scores_all[keep]
        result.pred_classes = class_idxs_all[keep]
        result.init_boxes = init_boxes_all[keep]
        return result
Example #17
0
def fast_rcnn_inference_single_image(boxes,
                                     scores,
                                     image_shape,
                                     score_thresh,
                                     nms_thresh,
                                     topk_per_image,
                                     class_logits=None,
                                     estimate_uncertainty=False,
                                     variance=torch.Tensor([])):
    """
    Single-image inference. Return bounding-box detection results by thresholding
    on scores and applying non-maximum suppression (NMS).

    Args:
        Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
        per image.

    Returns:
        Same as `fast_rcnn_inference`, but for only one image.
    """
    valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(
        dim=1)
    if not valid_mask.all():
        boxes = boxes[valid_mask]
        scores = scores[valid_mask]

    scores = scores[:, :-1]
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4
    # Filter results based on detection scores
    filter_mask = scores > score_thresh  # R x K
    # R' x 2. First column contains indices of the R predictions;
    # Second column contains indices of classes.

    # Get box ID with predicted class label: [box id, class label]
    filter_inds = filter_mask.nonzero()

    import numpy as np
    class_id = np.argmax(scores.cpu().numpy(), axis=1)
    class_id = np.array([np.arange(1000), class_id])
    class_id = np.swapaxes(class_id, 1, 0)
    boxes_one_class = boxes[class_id[:, 0], class_id[:, 1], :].cpu().numpy()
    scores_one_class = np.max(scores.cpu().numpy(), axis=1)

    if not class_logits == None:
        class_logits = class_logits[filter_inds[:, 0]]
        predicted_probs = scores[filter_inds[:, 0]]

    if num_bbox_reg_classes == 1:
        boxes = boxes[filter_inds[:, 0], 0]
    else:
        boxes = boxes[filter_mask]
    scores_filtered = scores[filter_mask]

    # Apply per-class NMS
    keep = batched_nms(boxes, scores_filtered, filter_inds[:, 1], nms_thresh)

    if topk_per_image >= 0:
        keep = keep[:topk_per_image]

    boxes_final, scores_final, filter_inds_final = boxes[
        keep], scores_filtered[keep], filter_inds[keep]

    result = Instances(image_shape)
    result.pred_boxes = Boxes(boxes_final)
    result.scores = scores_final
    result.pred_classes = filter_inds_final[:, 1]
    # Jamie
    # Save out logits
    if not class_logits == None:
        #result.class_logits = class_logits[filter_inds_final[:,0]]
        result.class_logits = class_logits[keep]
        result.prob_score = predicted_probs[keep]
        #class_logits = class_logits[filter_inds_final[:,0]]
        #result.class_logits = class_logits[keep]

    if estimate_uncertainty:
        # std from 1000 proposals
        #stds = nms_calc_uncertainty(boxes_final.cpu().numpy(), scores_final.cpu().numpy(), boxes_one_class, scores_one_class, 0.75)
        # std from bbox with class confidence score higher than threshold
        stds = nms_calc_uncertainty(boxes_final.cpu().numpy(),
                                    scores_final.cpu().numpy(),
                                    boxes.cpu().numpy(),
                                    scores_filtered.cpu().numpy(), 0.9)
        result.stds = torch.Tensor(stds).cuda()

    if len(variance) > 0:
        result.vars = variance[keep]

    return result, filter_inds_final[:, 0]
Example #18
0
 def ga_shape_targets(self, approxs, inside_flags, squares, gt_instances):
     assert len(approxs) == len(inside_flags) == len(squares)
     approxs_flatten = Boxes.cat(approxs)
     inside_flags_flatten = torch.cat(inside_flags)
     squares_flatten = Boxes.cat(squares)
Example #19
0
def evaluate(cfg, evaluator, det_1, det_2, anno, predictor, method):
    evaluator.reset()
    img_folder = '../../../Datasets/FLIR/val/thermal_8_bit/'
    num_img = len(det_2['image'])
    count_1 = 0
    count_2 = 0
    count_fusion = 0

    print('Method: ', method)

    img_folder = '../../../Datasets/FLIR/val/thermal_8_bit/'
    num_img = len(det_2['image'])
    count_1 = 0
    count_2 = 0
    count_fusion = 0
    X = None
    Y = np.array([])
    cnt = 0

    for i in range(num_img):
        info_1 = {}
        info_1['img_name'] = det_1['image'][i]
        info_1['bbox'] = det_1['boxes'][i]
        info_1['score'] = det_1['scores'][i]
        info_1['class'] = det_1['classes'][i]
        info_1['class_logits'] = det_1['class_logits'][i]
        if 'probs' in det_1.keys():
            info_1['prob'] = det_1['probs'][i]

        info_2 = {}
        info_2['img_name'] = det_2['image'][i].split('.')[0] + '.jpeg'
        info_2['bbox'] = det_2['boxes'][i]
        info_2['score'] = det_2['scores'][i]
        info_2['class'] = det_2['classes'][i]
        info_2['class_logits'] = det_2['class_logits'][i]
        if 'probs' in det_2.keys():
            info_2['prob'] = det_2['probs'][i]

        #img_id = int(info_1['img_name'].split('.')[0].split('_')[1]) - 1
        img_id = det_1['image_id'][i]
        box_gt = []
        class_gt = []
        info_gt = {}

        #print('img_id:',img_id)
        if img_id in anno.keys():
            # Handle groundtruth
            anno_gt = anno[img_id]
            for j in range(len(anno_gt)):
                box = anno_gt[j]['bbox']
                box_gt.append(
                    [box[0], box[1], box[0] + box[2], box[1] + box[3]])
                class_gt.append(anno_gt[j]['category_id'])

            info_gt['bbox'] = box_gt
            info_gt['class'] = class_gt

            # If no any detection in two results
            if len(info_1['bbox']) == 0 and len(info_2['bbox']) == 0:
                continue
            # If no detection in 1st model:
            elif len(info_1['bbox']) == 0:
                print('model 1 miss detected')
                in_boxes, in_scores, in_class, in_logits, in_prob, num_det = prepare_data_gt_1_det(
                    info_2)
                score_results, class_results, box_results = nms_multiple_box(
                    in_boxes, in_scores, in_class, in_logits, 0.5, num_det,
                    method)
                #class_results, score_results, box_results = match_box_nms(in_boxes, in_scores, in_class, in_logits, 0.5, num_det, method)
            elif len(info_2['bbox']) == 0:
                print('model 2 miss detected')
                in_boxes, in_scores, in_class, in_logits, in_prob, num_det = prepare_data_gt_1_det(
                    info_1)
                score_results, class_results, box_results = nms_multiple_box(
                    in_boxes, in_scores, in_class, in_logits, 0.5, num_det,
                    method)
                #class_results, score_results, box_results = match_box_nms(in_boxes, in_scores, in_class, in_logits, 0.5, num_det, method)
            else:
                in_boxes, in_scores, in_class, in_logits, in_prob, num_det = prepare_data_gt(
                    info_1, info_2)
                score_results, class_results, box_results = nms_multiple_box(
                    in_boxes, in_scores, in_class, in_logits, 0.5, num_det,
                    method)
                #class_results, score_results, box_results = match_box_nms(in_boxes, in_scores, in_class, in_logits, 0.5, num_det, method)

            pred_prob_multiclass = predictor.predict_proba(score_results)
            out_scores = np.max(pred_prob_multiclass, axis=1)
            out_class = np.argmax(pred_prob_multiclass, axis=1)
            """
            Send information to evaluator
            """
            # Image info
            file_name = img_folder + info_1['img_name'].split('.')[0] + '.jpeg'
            img = cv2.imread(file_name)
            H, W, _ = img.shape

            # Handle inputs
            inputs = []
            input_info = {}
            input_info['file_name'] = file_name
            input_info['height'] = H
            input_info['width'] = W
            input_info['image_id'] = det_1['image_id'][i]
            input_info['image'] = torch.Tensor(img)
            inputs.append(input_info)

            # Handle outputs
            outputs = []
            out_info = {}
            proposals = Instances([H, W])
            proposals.pred_boxes = Boxes(box_results)
            proposals.scores = torch.Tensor(out_scores)
            proposals.pred_classes = torch.Tensor(out_class)
            out_info['instances'] = proposals
            outputs.append(out_info)
            evaluator.process(inputs, outputs)

            if len(score_results):
                if cnt == 0:
                    X = score_results
                else:
                    try:
                        X = np.concatenate((X, score_results))
                    except:
                        pdb.set_trace()
                Y = np.concatenate((Y, class_results))
                cnt += 1
        else:
            continue

    results = evaluator.evaluate(out_eval_path='FLIR_pooling_.out')

    if results is None:
        results = {}

    avgRGB = count_1 / num_img
    avgThermal = count_2 / num_img
    avgNMS = count_fusion / num_img

    print('Avg bbox for RGB:', avgRGB, "average count thermal:", avgThermal,
          'average count nms:', avgNMS)
    return results
Example #20
0
def convert_to_coco_dict(dataset_name):
    """
    Convert an instance detection/segmentation or keypoint detection dataset
    in detectron2's standard format into COCO json format.

    Generic dataset description can be found here:
    https://detectron2.readthedocs.io/tutorials/datasets.html#register-a-dataset

    COCO data format description can be found here:
    http://cocodataset.org/#format-data

    Args:
        dataset_name (str):
            name of the source dataset
            Must be registered in DatastCatalog and in detectron2's standard format.
            Must have corresponding metadata "thing_classes"
    Returns:
        coco_dict: serializable dict in COCO json format
    """

    dataset_dicts = DatasetCatalog.get(dataset_name)
    metadata = MetadataCatalog.get(dataset_name)

    # unmap the category mapping ids for COCO
    if hasattr(metadata, "thing_dataset_id_to_contiguous_id"):
        reverse_id_mapping = {
            v: k
            for k, v in metadata.thing_dataset_id_to_contiguous_id.items()
        }
        reverse_id_mapper = lambda contiguous_id: reverse_id_mapping[
            contiguous_id]  # noqa
    else:
        reverse_id_mapper = lambda contiguous_id: contiguous_id  # noqa

    categories = [{
        "id": reverse_id_mapper(id),
        "name": name
    } for id, name in enumerate(metadata.thing_classes)]

    logger.info("Converting dataset dicts into COCO format")
    coco_images = []
    coco_annotations = []

    for image_id, image_dict in enumerate(dataset_dicts):
        coco_image = {
            "id": image_dict.get("image_id", image_id),
            "width": image_dict["width"],
            "height": image_dict["height"],
            "file_name": image_dict["file_name"],
        }
        coco_images.append(coco_image)

        anns_per_image = image_dict.get("annotations", [])
        for annotation in anns_per_image:
            # create a new dict with only COCO fields
            coco_annotation = {}

            # COCO requirement: XYWH box format
            bbox = annotation["bbox"]
            bbox_mode = annotation["bbox_mode"]
            bbox = BoxMode.convert(bbox, bbox_mode, BoxMode.XYWH_ABS)

            # COCO requirement: instance area
            if "segmentation" in annotation:
                # Computing areas for instances by counting the pixels
                segmentation = annotation["segmentation"]
                # TODO: check segmentation type: RLE, BinaryMask or Polygon
                if isinstance(segmentation, list):
                    polygons = PolygonMasks([segmentation])
                    area = polygons.area()[0].item()
                elif isinstance(segmentation, dict):  # RLE
                    area = mask_util.area(segmentation).item()
                else:
                    raise TypeError(
                        f"Unknown segmentation type {type(segmentation)}!")
            else:
                # Computing areas using bounding boxes
                bbox_xy = BoxMode.convert(bbox, BoxMode.XYWH_ABS,
                                          BoxMode.XYXY_ABS)
                area = Boxes([bbox_xy]).area()[0].item()

            if "keypoints" in annotation:
                keypoints = annotation["keypoints"]  # list[int]
                for idx, v in enumerate(keypoints):
                    if idx % 3 != 2:
                        # COCO's segmentation coordinates are floating points in [0, H or W],
                        # but keypoint coordinates are integers in [0, H-1 or W-1]
                        # For COCO format consistency we substract 0.5
                        # https://github.com/facebookresearch/detectron2/pull/175#issuecomment-551202163
                        keypoints[idx] = v - 0.5
                if "num_keypoints" in annotation:
                    num_keypoints = annotation["num_keypoints"]
                else:
                    num_keypoints = sum(kp > 0 for kp in keypoints[2::3])

            # COCO requirement:
            #   linking annotations to images
            #   "id" field must start with 1
            coco_annotation["id"] = len(coco_annotations) + 1
            coco_annotation["image_id"] = coco_image["id"]
            coco_annotation["bbox"] = [round(float(x), 3) for x in bbox]
            coco_annotation["area"] = float(area)
            coco_annotation["iscrowd"] = annotation.get("iscrowd", 0)
            coco_annotation["category_id"] = reverse_id_mapper(
                annotation["category_id"])

            # Add optional fields
            if "keypoints" in annotation:
                coco_annotation["keypoints"] = keypoints
                coco_annotation["num_keypoints"] = num_keypoints

            if "segmentation" in annotation:
                seg = coco_annotation["segmentation"] = annotation[
                    "segmentation"]
                if isinstance(seg, dict):  # RLE
                    counts = seg["counts"]
                    if not isinstance(counts, str):
                        # make it json-serializable
                        seg["counts"] = counts.decode("ascii")

            coco_annotations.append(coco_annotation)

    logger.info(
        "Conversion finished, "
        f"#images: {len(coco_images)}, #annotations: {len(coco_annotations)}")

    info = {
        "date_created": str(datetime.datetime.now()),
        "description":
        "Automatically generated COCO json file for Detectron2.",
    }
    coco_dict = {
        "info": info,
        "images": coco_images,
        "categories": categories,
        "licenses": None
    }
    if len(coco_annotations) > 0:
        coco_dict["annotations"] = coco_annotations
    return coco_dict
Example #21
0
 def forward(self, proposal_deltas, proposal_boxes):
     instances = Instances((10, 10))
     instances.proposal_boxes = Boxes(proposal_boxes)
     return self._output_layer.predict_boxes(
         (None, proposal_deltas), [instances])
Example #22
0
    def inference_single_image(self, cate_preds, kernel_preds, seg_preds,
                               cur_size, ori_size):
        # overall info.
        h, w = cur_size
        f_h, f_w = seg_preds.size()[-2:]
        ratio = math.ceil(h / f_h)
        upsampled_size_out = (int(f_h * ratio), int(f_w * ratio))

        # process.
        inds = (cate_preds > self.score_threshold)
        cate_scores = cate_preds[inds]
        if len(cate_scores) == 0:
            results = Instances(ori_size)
            results.scores = torch.tensor([])
            results.pred_classes = torch.tensor([])
            results.pred_masks = torch.tensor([])
            results.pred_boxes = Boxes(torch.tensor([]))
            return results

        # cate_labels & kernel_preds
        inds = inds.nonzero()
        cate_labels = inds[:, 1]
        kernel_preds = kernel_preds[inds[:, 0]]

        # trans vector.
        size_trans = cate_labels.new_tensor(self.num_grids).pow(2).cumsum(0)
        strides = kernel_preds.new_ones(size_trans[-1])

        n_stage = len(self.num_grids)
        strides[:size_trans[0]] *= self.instance_strides[0]
        for ind_ in range(1, n_stage):
            strides[size_trans[ind_ -
                               1]:size_trans[ind_]] *= self.instance_strides[
                                   ind_]
        strides = strides[inds[:, 0]]

        # mask encoding.
        N, I = kernel_preds.shape
        kernel_preds = kernel_preds.view(N, I, 1, 1)
        seg_preds = F.conv2d(seg_preds, kernel_preds,
                             stride=1).squeeze(0).sigmoid()

        # mask.
        seg_masks = seg_preds > self.mask_threshold
        sum_masks = seg_masks.sum((1, 2)).float()

        # filter.
        keep = sum_masks > strides
        if keep.sum() == 0:
            results = Instances(ori_size)
            results.scores = torch.tensor([])
            results.pred_classes = torch.tensor([])
            results.pred_masks = torch.tensor([])
            results.pred_boxes = Boxes(torch.tensor([]))
            return results

        seg_masks = seg_masks[keep, ...]
        seg_preds = seg_preds[keep, ...]
        sum_masks = sum_masks[keep]
        cate_scores = cate_scores[keep]
        cate_labels = cate_labels[keep]

        # mask scoring.
        seg_scores = (seg_preds * seg_masks.float()).sum((1, 2)) / sum_masks
        cate_scores *= seg_scores

        # sort and keep top nms_pre
        sort_inds = torch.argsort(cate_scores, descending=True)
        if len(sort_inds) > self.max_before_nms:
            sort_inds = sort_inds[:self.max_before_nms]
        seg_masks = seg_masks[sort_inds, :, :]
        seg_preds = seg_preds[sort_inds, :, :]
        sum_masks = sum_masks[sort_inds]
        cate_scores = cate_scores[sort_inds]
        cate_labels = cate_labels[sort_inds]

        if self.nms_type == "matrix":
            # matrix nms & filter.
            cate_scores = matrix_nms(cate_labels,
                                     seg_masks,
                                     sum_masks,
                                     cate_scores,
                                     sigma=self.nms_sigma,
                                     kernel=self.nms_kernel)
            keep = cate_scores >= self.update_threshold
        elif self.nms_type == "mask":
            # original mask nms.
            keep = mask_nms(cate_labels,
                            seg_masks,
                            sum_masks,
                            cate_scores,
                            nms_thr=self.mask_threshold)
        else:
            raise NotImplementedError

        if keep.sum() == 0:
            results = Instances(ori_size)
            results.scores = torch.tensor([])
            results.pred_classes = torch.tensor([])
            results.pred_masks = torch.tensor([])
            results.pred_boxes = Boxes(torch.tensor([]))
            return results

        seg_preds = seg_preds[keep, :, :]
        cate_scores = cate_scores[keep]
        cate_labels = cate_labels[keep]

        # sort and keep top_k
        sort_inds = torch.argsort(cate_scores, descending=True)
        if len(sort_inds) > self.max_per_img:
            sort_inds = sort_inds[:self.max_per_img]
        seg_preds = seg_preds[sort_inds, :, :]
        cate_scores = cate_scores[sort_inds]
        cate_labels = cate_labels[sort_inds]

        # reshape to original size.
        seg_preds = F.interpolate(seg_preds.unsqueeze(0),
                                  size=upsampled_size_out,
                                  mode='bilinear')[:, :, :h, :w]
        seg_masks = F.interpolate(seg_preds, size=ori_size,
                                  mode='bilinear').squeeze(0)
        seg_masks = seg_masks > self.mask_threshold

        results = Instances(ori_size)
        results.pred_classes = cate_labels
        results.scores = cate_scores
        results.pred_masks = seg_masks

        # get bbox from mask
        pred_boxes = torch.zeros(seg_masks.size(0), 4)
        #for i in range(seg_masks.size(0)):
        #    mask = seg_masks[i].squeeze()
        #    ys, xs = torch.where(mask)
        #    pred_boxes[i] = torch.tensor([xs.min(), ys.min(), xs.max(), ys.max()]).float()
        results.pred_boxes = Boxes(pred_boxes)

        return results
Example #23
0
def get_empty_instance(h, w):
    inst = Instances((h, w))
    inst.gt_boxes = Boxes(torch.rand(0, 4))
    inst.gt_classes = torch.tensor([]).to(dtype=torch.int64)
    inst.gt_masks = BitMasks(torch.rand(0, h, w))
    return inst
Example #24
0
    def inference(self, pred_digits,pred_points,ins_feature,images):
        """
        Arguments:
            pred_digits,  pred_points: Same as the output of:
            
            images (ImageList): the input images

        Returns:
            results (List[Instances]): a list of #images elements.
        """
        batch=pred_digits.size(0)
        pred_digits=pred_digits.sigmoid_()
        results=[]

        pool_digits=F.max_pool2d(pred_digits,3,1,1)

        for img_idx in range(batch):
            # Get the size of the current image
            image_size = images.image_sizes[img_idx]

            digits_im = pred_digits[img_idx]
            pool_digits_im=pool_digits[img_idx]
            points_im=pred_points[img_idx]
            # print(points_im[:,15,15].view(-1,2).cpu().numpy())

            Index=torch.nonzero((digits_im==pool_digits_im) & (digits_im>self.score_threshold))

            results_im=Instances(image_size)
            if Index.size(0)<1:
                results_im.pred_classes = Index.new_zeros(0)
                results_im.pred_boxes = Boxes(points_im.new_zeros(0,4))
                results_im.scores = digits_im.new_zeros(0)
                results_im.pred_points=points_im.new_zeros(0,points_im.size(0)//2,2)
                results.append(results_im)
                continue
                
            cls_idxs=Index[:,0]
            pred_prob=digits_im[Index[:,0],Index[:,1],Index[:,2]]

            center=torch.cat([Index[:,2:3],Index[:,1:2]],dim=1)
            points_n_yx = points_im[:,Index[:,1],Index[:,2]]

            points_n=points_n_yx.clone().detach()
            points_n[::2,:]=points_n_yx[1::2,:]
            points_n[1::2,:]=points_n_yx[::2,:]
            # print(points_n,points_n.size())

            N=center.size(0)
            # print(N)
            TOPK=100
            if N>TOPK:
                pred_prob, topk_idxs = pred_prob.sort(descending=True)
                # Keep top k scoring values
                pred_prob = pred_prob[:TOPK]
                # Keep top k values
                center = center[topk_idxs[:TOPK],:]
                points_n = points_n[:,topk_idxs[:TOPK]]
                cls_idxs=cls_idxs[topk_idxs[:TOPK]]
                N=TOPK
            
            center=center.view(N,1,2)
                       
            npoints=torch.transpose(points_n,1,0)
            npoints=npoints.view(N,-1,2)
            real_npoints=npoints+center

            real_npoints=real_npoints*self.points_feature_strides[-1]

            location=(real_npoints[:,:,(1,0)]/self.ins_feature_strides[0]).float()
            batch_index=Index.new_zeros(N)+img_idx
            pred_ins=self.ins_head(ins_feature,location,batch_index)
            pred_ins=F.interpolate(pred_ins,scale_factor=self.ins_feature_strides[0],mode='bilinear').squeeze(1)
            pred_masks=(pred_ins>0.5)

            #crop to the image size:
            pred_masks=pred_masks[:,:image_size[0],:image_size[1]]

            top_left,_=torch.min(real_npoints,dim=1)
            bottom_right,_=torch.max(real_npoints,dim=1)

            bbox=torch.cat([top_left,bottom_right],dim=1)
            # print(pred_prob,center,bbox)

            results_im.pred_classes = cls_idxs
            results_im.pred_boxes = Boxes(bbox)
            results_im.scores = pred_prob
            results_im.pred_points=real_npoints 
            results_im.pred_masks=pred_masks      
            results.append(results_im)
        return results
Example #25
0
    def forward_for_single_feature_map(self, locations, box_cls, reg_pred,
                                       ctrness, image_sizes):
        N, C, H, W = box_cls.shape

        # put in the same format as locations
        box_cls = box_cls.view(N, C, H, W).permute(0, 2, 3, 1)
        box_cls = box_cls.reshape(N, -1, C).sigmoid()
        box_regression = reg_pred.view(N, 4, H, W).permute(0, 2, 3, 1)
        box_regression = box_regression.reshape(N, -1, 4)
        ctrness = ctrness.view(N, 1, H, W).permute(0, 2, 3, 1)
        ctrness = ctrness.reshape(N, -1).sigmoid()

        # if self.thresh_with_ctr is True, we multiply the classification
        # scores with centerness scores before applying the threshold.
        if self.thresh_with_ctr:
            box_cls = box_cls * ctrness[:, :, None]
        candidate_inds = box_cls > self.pre_nms_thresh
        pre_nms_top_n = candidate_inds.view(N, -1).sum(1)
        pre_nms_top_n = pre_nms_top_n.clamp(max=self.pre_nms_top_n)

        if not self.thresh_with_ctr:
            box_cls = box_cls * ctrness[:, :, None]

        results = []
        for i in range(N):
            per_box_cls = box_cls[i]
            per_candidate_inds = candidate_inds[i]
            per_box_cls = per_box_cls[per_candidate_inds]

            per_candidate_nonzeros = per_candidate_inds.nonzero()
            per_box_loc = per_candidate_nonzeros[:, 0]
            per_class = per_candidate_nonzeros[:, 1]

            per_box_regression = box_regression[i]
            per_box_regression = per_box_regression[per_box_loc]
            per_locations = locations[per_box_loc]

            per_pre_nms_top_n = pre_nms_top_n[i]

            if per_candidate_inds.sum().item() > per_pre_nms_top_n.item():
                per_box_cls, top_k_indices = \
                    per_box_cls.topk(per_pre_nms_top_n, sorted=False)
                per_class = per_class[top_k_indices]
                per_box_regression = per_box_regression[top_k_indices]
                per_locations = per_locations[top_k_indices]

            detections = torch.stack([
                per_locations[:, 0] - per_box_regression[:, 0],
                per_locations[:, 1] - per_box_regression[:, 1],
                per_locations[:, 0] + per_box_regression[:, 2],
                per_locations[:, 1] + per_box_regression[:, 3],
            ],
                                     dim=1)

            boxlist = Instances(image_sizes[i])
            boxlist.pred_boxes = Boxes(detections)
            boxlist.scores = torch.sqrt(per_box_cls)
            boxlist.pred_classes = per_class
            boxlist.locations = per_locations

            results.append(boxlist)

        return results
Example #26
0
    def inference_single_image(self, locations, box_cls, box_reg, center_score,
                               image_size):
        boxes_all = []
        scores_all = []
        class_idxs_all = []

        # Iterate over every feature level
        for box_cls_i, box_reg_i, locs_i, center_score_i in zip(
                box_cls, box_reg, locations, center_score):
            # (HxW, C)
            box_cls_i = box_cls_i.sigmoid_()
            keep_idxs = box_cls_i > self.pre_nms_thresh

            # multiply the classification scores with center scores
            box_cls_i *= center_score_i.sigmoid_()

            box_cls_i = box_cls_i[keep_idxs]
            keep_idxs_nonzero_i = keep_idxs.nonzero()

            box_loc_i = keep_idxs_nonzero_i[:, 0]
            class_i = keep_idxs_nonzero_i[:, 1]

            box_reg_i = box_reg_i[box_loc_i]
            locs_i = locs_i[box_loc_i]

            per_pre_nms_top_n = keep_idxs.sum().clamp(max=self.pre_nms_top_n)
            if keep_idxs.sum().item() > per_pre_nms_top_n.item():
                box_cls_i, topk_idxs = box_cls_i.topk(per_pre_nms_top_n,
                                                      sorted=False)

                class_i = class_i[topk_idxs]
                box_reg_i = box_reg_i[topk_idxs]
                locs_i = locs_i[topk_idxs]

            # predict boxes
            predicted_boxes = torch.stack([
                locs_i[:, 0] - box_reg_i[:, 0],
                locs_i[:, 1] - box_reg_i[:, 1],
                locs_i[:, 0] + box_reg_i[:, 2],
                locs_i[:, 1] + box_reg_i[:, 3],
            ],
                                          dim=1)
            box_cls_i = torch.sqrt(box_cls_i)

            boxes_all.append(predicted_boxes)
            scores_all.append(box_cls_i)
            class_idxs_all.append(class_i)

        boxes_all, scores_all, class_idxs_all = [
            cat(x) for x in [boxes_all, scores_all, class_idxs_all]
        ]

        # Apply per-class nms for each image
        keep = batched_nms(boxes_all, scores_all, class_idxs_all,
                           self.nms_thresh)
        keep = keep[:self.max_detections_per_image]

        result = Instances(image_size)
        result.pred_boxes = Boxes(boxes_all[keep])
        result.scores = scores_all[keep]
        result.pred_classes = class_idxs_all[keep]

        return result
Example #27
0
 def __call__(self, values):
     return Boxes(values[0])
Example #28
0
def find_top_rpn_proposals(
    proposals,
    pred_objectness_logits,
    images,
    nms_thresh,
    pre_nms_topk,
    post_nms_topk,
    min_box_side_len,
    training,
):
    """
    For each feature map, select the `pre_nms_topk` highest scoring proposals,
    apply NMS, clip proposals, and remove small boxes. Return the `post_nms_topk`
    highest scoring proposals among all the feature maps if `training` is True,
    otherwise, returns the highest `post_nms_topk` scoring proposals for each
    feature map.

    Args:
        proposals (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A, 4).
            All proposal predictions on the feature maps.
        pred_objectness_logits (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A).
        images (ImageList): Input images as an :class:`ImageList`.
        nms_thresh (float): IoU threshold to use for NMS
        pre_nms_topk (int): number of top k scoring proposals to keep before applying NMS.
            When RPN is run on multiple feature maps (as in FPN) this number is per
            feature map.
        post_nms_topk (int): number of top k scoring proposals to keep after applying NMS.
            When RPN is run on multiple feature maps (as in FPN) this number is total,
            over all feature maps.
        min_box_side_len (float): minimum proposal box side length in pixels (absolute units
            wrt input images).
        training (bool): True if proposals are to be used in training, otherwise False.
            This arg exists only to support a legacy bug; look for the "NB: Legacy bug ..."
            comment.

    Returns:
        proposals (list[Instances]): list of N Instances. The i-th Instances
            stores post_nms_topk object proposals for image i.
    """
    image_sizes = images.image_sizes  # in (h, w) order
    num_images = len(image_sizes)
    device = proposals[0].device

    # 1. Select top-k anchor for every level and every image
    topk_scores = []  # #lvl Tensor, each of shape N x topk
    topk_proposals = []
    level_ids = []  # #lvl Tensor, each of shape (topk,)
    batch_idx = torch.arange(num_images, device=device)
    for level_id, proposals_i, logits_i in zip(itertools.count(), proposals,
                                               pred_objectness_logits):
        Hi_Wi_A = logits_i.shape[1]
        num_proposals_i = min(pre_nms_topk, Hi_Wi_A)

        # sort is faster than topk (https://github.com/pytorch/pytorch/issues/22812)
        # topk_scores_i, topk_idx = logits_i.topk(num_proposals_i, dim=1)
        logits_i, idx = logits_i.sort(descending=True, dim=1)
        topk_scores_i = logits_i[batch_idx, :num_proposals_i]
        topk_idx = idx[batch_idx, :num_proposals_i]

        # each is N x topk
        topk_proposals_i = proposals_i[batch_idx[:, None],
                                       topk_idx]  # N x topk x 4

        topk_proposals.append(topk_proposals_i)
        topk_scores.append(topk_scores_i)
        level_ids.append(
            torch.full((num_proposals_i, ),
                       level_id,
                       dtype=torch.int64,
                       device=device))

    # 2. Concat all levels together
    topk_scores = cat(topk_scores, dim=1)
    topk_proposals = cat(topk_proposals, dim=1)
    level_ids = cat(level_ids, dim=0)

    # 3. For each image, run a per-level NMS, and choose topk results.
    results = []
    for n, image_size in enumerate(image_sizes):
        boxes = Boxes(topk_proposals[n])
        scores_per_img = topk_scores[n]
        boxes.clip(image_size)
        # filter empty boxes
        keep = boxes.nonempty(threshold=min_box_side_len)
        lvl = level_ids
        if keep.sum().item() != len(boxes):
            boxes, scores_per_img, lvl = boxes[keep], scores_per_img[
                keep], level_ids[keep]
        keep = batched_nms(boxes.tensor, scores_per_img, lvl, nms_thresh)
        # In Detectron1, there was different behavior during training vs. testing.
        # (https://github.com/facebookresearch/Detectron/issues/459)
        # During training, topk is over the proposals from *all* images in the training batch.
        # During testing, it is over the proposals for each image separately.
        # As a result, the training behavior becomes batch-dependent,
        # and the configuration "POST_NMS_TOPK_TRAIN" end up relying on the batch size.
        # This bug is addressed in Detectron2 to make the behavior independent of batch size.
        keep = keep[:post_nms_topk]

        res = Instances(image_size)

        res.proposal_boxes = boxes[keep]
        res.objectness_logits = scores_per_img[keep]
        results.append(res)
    return results
Example #29
0
def _evaluate_box_proposals(dataset_predictions, lvis_api, thresholds=None, area="all", limit=None):
    """
    Evaluate detection proposal recall metrics. This function is a much
    faster alternative to the official LVIS API recall evaluation code. However,
    it produces slightly different results.
    """
    # Record max overlap value for each gt box
    # Return vector of overlap values
    areas = {
        "all": 0,
        "small": 1,
        "medium": 2,
        "large": 3,
        "96-128": 4,
        "128-256": 5,
        "256-512": 6,
        "512-inf": 7,
    }
    area_ranges = [
        [0 ** 2, 1e5 ** 2],  # all
        [0 ** 2, 32 ** 2],  # small
        [32 ** 2, 96 ** 2],  # medium
        [96 ** 2, 1e5 ** 2],  # large
        [96 ** 2, 128 ** 2],  # 96-128
        [128 ** 2, 256 ** 2],  # 128-256
        [256 ** 2, 512 ** 2],  # 256-512
        [512 ** 2, 1e5 ** 2],
    ]  # 512-inf
    assert area in areas, "Unknown area range: {}".format(area)
    area_range = area_ranges[areas[area]]
    gt_overlaps = []
    num_pos = 0

    for prediction_dict in dataset_predictions:
        predictions = prediction_dict["proposals"]

        # sort predictions in descending order
        # TODO maybe remove this and make it explicit in the documentation
        inds = predictions.objectness_logits.sort(descending=True)[1]
        predictions = predictions[inds]

        ann_ids = lvis_api.get_ann_ids(img_ids=[prediction_dict["image_id"]])
        anno = lvis_api.load_anns(ann_ids)
        gt_boxes = [
            BoxMode.convert(obj["bbox"], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) for obj in anno
        ]
        gt_boxes = torch.as_tensor(gt_boxes).reshape(-1, 4)  # guard against no boxes
        gt_boxes = Boxes(gt_boxes)
        gt_areas = torch.as_tensor([obj["area"] for obj in anno])

        if len(gt_boxes) == 0 or len(predictions) == 0:
            continue

        valid_gt_inds = (gt_areas >= area_range[0]) & (gt_areas <= area_range[1])
        gt_boxes = gt_boxes[valid_gt_inds]

        num_pos += len(gt_boxes)

        if len(gt_boxes) == 0:
            continue

        if limit is not None and len(predictions) > limit:
            predictions = predictions[:limit]

        overlaps = pairwise_iou(predictions.proposal_boxes, gt_boxes)

        _gt_overlaps = torch.zeros(len(gt_boxes))
        for j in range(min(len(predictions), len(gt_boxes))):
            # find which proposal box maximally covers each gt box
            # and get the iou amount of coverage for each gt box
            max_overlaps, argmax_overlaps = overlaps.max(dim=0)

            # find which gt box is 'best' covered (i.e. 'best' = most iou)
            gt_ovr, gt_ind = max_overlaps.max(dim=0)
            assert gt_ovr >= 0
            # find the proposal box that covers the best covered gt box
            box_ind = argmax_overlaps[gt_ind]
            # record the iou coverage of this gt box
            _gt_overlaps[j] = overlaps[box_ind, gt_ind]
            assert _gt_overlaps[j] == gt_ovr
            # mark the proposal box and the gt box as used
            overlaps[box_ind, :] = -1
            overlaps[:, gt_ind] = -1

        # append recorded iou coverage level
        gt_overlaps.append(_gt_overlaps)
    gt_overlaps = torch.cat(gt_overlaps, dim=0)
    gt_overlaps, _ = torch.sort(gt_overlaps)

    if thresholds is None:
        step = 0.05
        thresholds = torch.arange(0.5, 0.95 + 1e-5, step, dtype=torch.float32)
    recalls = torch.zeros_like(thresholds)
    # compute recall for each iou threshold
    for i, t in enumerate(thresholds):
        recalls[i] = (gt_overlaps >= t).float().sum() / float(num_pos)
    # ar = 2 * np.trapz(recalls, thresholds)
    ar = recalls.mean()
    return {
        "ar": ar,
        "recalls": recalls,
        "thresholds": thresholds,
        "gt_overlaps": gt_overlaps,
        "num_pos": num_pos,
    }
Example #30
0
 def test_empty_cat(self):
     x = Boxes.cat([])
     self.assertTrue(x.tensor.shape, (0, 4))