Esempio n. 1
0
def get_empty_instance(h, w):
    inst = Instances((h, w))
    inst.gt_boxes = Boxes(torch.rand(0, 4))
    inst.gt_classes = torch.tensor([]).to(dtype=torch.int64)
    inst.gt_masks = BitMasks(torch.rand(0, h, w))
    return inst
 def convert_output(output):
     r = Instances(tuple(output[0]))
     r.pred_classes = output[1]
     r.pred_boxes = Boxes(output[2])
     r.scores = output[3]
     return r
Esempio n. 3
0
def generate_pair_instances(pred_instances):
    pred_pair_instances = []
    for pred_instance in pred_instances:
        pred_pair_instance = Instances(pred_instance.image_size)
        instance_num = len(pred_instance)
        pred_classes = pred_instance.pred_classes
        pred_boxes = pred_instance.pred_boxes.tensor
        # pred_masks=pred_instance.pred_masks

        image_height, image_width = pred_instance.image_size
        pred_pair_sub_classes = pred_classes.repeat(instance_num,
                                                    1).permute(1, 0).flatten()
        pred_pair_obj_classes = pred_classes.repeat(instance_num)

        pred_pair_sub_boxes = pred_boxes.repeat(instance_num, 1, 1).permute(
            1, 0, 2).contiguous().view(-1, 4)
        pred_pair_obj_boxes = pred_boxes.repeat(instance_num, 1)
        sub_boxes_x1 = pred_pair_sub_boxes[:, 0]
        obj_boxes_x1 = pred_pair_obj_boxes[:, 0]
        sub_boxes_y1 = pred_pair_sub_boxes[:, 1]
        obj_boxes_y1 = pred_pair_obj_boxes[:, 1]
        sub_boxes_x2 = pred_pair_sub_boxes[:, 2]
        obj_boxes_x2 = pred_pair_obj_boxes[:, 2]
        sub_boxes_y2 = pred_pair_sub_boxes[:, 3]
        obj_boxes_y2 = pred_pair_obj_boxes[:, 3]
        pair_boxes_x1 = torch.min(sub_boxes_x1, obj_boxes_x1)
        pair_boxes_y1 = torch.min(sub_boxes_y1, obj_boxes_y1)
        pair_boxes_x2 = torch.max(sub_boxes_x2, obj_boxes_x2)
        pair_boxes_y2 = torch.max(sub_boxes_y2, obj_boxes_y2)
        pred_pair_boxes = torch.stack(
            [pair_boxes_x1, pair_boxes_y1, pair_boxes_x2, pair_boxes_y2],
            dim=1)
        pred_pair_locations = torch.stack(
            [(sub_boxes_x1 - 0) / image_width,
             (sub_boxes_y1 - 0) / image_height,
             (sub_boxes_x2 - image_width) / image_width,
             (sub_boxes_y2 - image_height) / image_height,
             (obj_boxes_x1 - 0) / image_width,
             (obj_boxes_y1 - 0) / image_height,
             (obj_boxes_x2 - image_width) / image_width,
             (obj_boxes_y2 - image_height) / image_height],
            dim=1)
        pair_width = pair_boxes_x2 - pair_boxes_x1
        pair_height = pair_boxes_y2 - pair_boxes_y1
        pred_pair_union_locations = torch.stack(
            [(sub_boxes_x1 - pair_boxes_x1) / pair_width,
             (sub_boxes_y1 - pair_boxes_y1) / pair_height,
             (sub_boxes_x2 - pair_boxes_x2) / pair_width,
             (sub_boxes_y2 - pair_boxes_y2) / pair_height,
             (obj_boxes_x1 - pair_boxes_x1) / pair_width,
             (obj_boxes_y1 - pair_boxes_y1) / pair_height,
             (obj_boxes_x2 - pair_boxes_x2) / pair_width,
             (obj_boxes_y2 - pair_boxes_y2) / pair_height],
            dim=1)
        pred_pair_iou = boxes_iou(pred_pair_sub_boxes, pred_pair_obj_boxes)
        pred_pair_left_boxes = pred_pair_boxes.repeat(
            instance_num * instance_num, 1,
            1).permute(1, 0, 2).contiguous().view(-1, 4)
        pred_pair_right_boxes = pred_pair_boxes.repeat(
            instance_num * instance_num, 1)
        pred_union_iou = boxes_iou(pred_pair_left_boxes,
                                   pred_pair_right_boxes).view(
                                       instance_num * instance_num,
                                       instance_num * instance_num)

        left = torch.arange(0, instance_num).repeat(instance_num, 1).permute(
            1, 0).flatten().to(pred_classes.device)
        right = torch.arange(0,
                             instance_num).repeat(instance_num).flatten().to(
                                 pred_classes.device)
        lr_loc = torch.stack([left, right], dim=1)
        pred_pair_instance_relate_matrix = torch.zeros(
            instance_num * instance_num,
            instance_num).to(pred_classes.device).scatter_(1, lr_loc, 1.0)

        # pred_pair_sub_classes = []
        # pred_pair_obj_classes = []
        # pred_pair_boxes = []
        # # pred_pair_masks=[]
        # pred_pair_locations = []
        # pred_pair_union_locations = []
        # pred_pair_iou = []
        # pred_subobj_ids = []
        # pred_pair_instance_relate_matrix = []
        # for i in range(pred_classes.shape[0]):
        #     sub_class = pred_classes[i].item()
        #     sub_box = pred_boxes[i]
        #     sub_x1 = sub_box[0].item()
        #     sub_y1 = sub_box[1].item()
        #     sub_x2 = sub_box[2].item()
        #     sub_y2 = sub_box[3].item()
        #     # sub_mask = pred_masks[i]
        #     for j in range(pred_classes.shape[0]):
        #         obj_class = pred_classes[j].item()
        #         pred_pair_sub_classes.append(sub_class)
        #         pred_pair_obj_classes.append(obj_class)
        #
        #         obj_box = pred_boxes[j]
        #         obj_x1 = obj_box[0].item()
        #         obj_y1 = obj_box[1].item()
        #         obj_x2 = obj_box[2].item()
        #         obj_y2 = obj_box[3].item()
        #
        #         pair_x1 = min(sub_x1, obj_x1)
        #         pair_y1 = min(sub_y1, obj_y1)
        #         pair_x2 = max(sub_x2, obj_x2)
        #         pair_y2 = max(sub_y2, obj_y2)
        #
        #         pair_width = pair_x2 - pair_x1
        #         pair_height = pair_y2 - pair_y1
        #
        #         pred_pair_boxes.append([pair_x1, pair_y1, pair_x2, pair_y2])
        #         pred_pair_locations.append([(sub_x1 - 0) / image_width,
        #                                     (sub_y1 - 0) / image_height,
        #                                     (sub_x2 - image_width) / image_width,
        #                                     (sub_y2 - image_height) / image_height,
        #                                     (obj_x1 - 0) / image_width,
        #                                     (obj_y1 - 0) / image_height,
        #                                     (obj_x2 - image_width) / image_width,
        #                                     (obj_y2 - image_height) / image_height])
        #         pred_pair_union_locations.append([(sub_x1 - pair_x1) / pair_width,
        #                                           (sub_y1 - pair_y1) / pair_height,
        #                                           (sub_x2 - pair_x2) / pair_width,
        #                                           (sub_y2 - pair_y2) / pair_height,
        #                                           (obj_x1 - pair_x1) / pair_width,
        #                                           (obj_y1 - pair_y1) / pair_height,
        #                                           (obj_x2 - pair_x2) / pair_width,
        #                                           (obj_y2 - pair_y2) / pair_height])
        #         pred_pair_iou.append(box_iou(sub_box, obj_box))
        #         pred_subobj_ids.append([i, j])
        #         instance_vector = torch.zeros(pred_classes.shape[0]).to(pred_classes.device)
        #         instance_vector[i] = 1
        #         instance_vector[j] = 1
        #         pred_pair_instance_relate_matrix.append(instance_vector)
        #         # obj_mask=pred_masks[j]
        #         # pred_pair_masks.append(sub_mask+obj_mask)
        # pred_pair_instance.pred_pair_sub_classes = torch.IntTensor(pred_pair_sub_classes).to(pred_classes.device)
        # pred_pair_instance.pred_pair_obj_classes = torch.IntTensor(pred_pair_obj_classes).to(pred_classes.device)
        # pred_pair_instance.pred_pair_boxes = Boxes(torch.FloatTensor(pred_pair_boxes)).to(pred_classes.device)
        # # pred_pair_instance.pred_pair_masks=torch.stack(pred_pair_masks)
        # pred_pair_instance.pred_pair_locations = torch.FloatTensor(pred_pair_locations).to(pred_classes.device)
        # pred_pair_instance.pred_pair_union_locations = torch.FloatTensor(pred_pair_union_locations).to(pred_classes.device)
        # pred_pair_instance.pred_pair_iou = torch.FloatTensor(pred_pair_iou).to(pred_classes.device)
        # pred_subobj_ids = torch.IntTensor(pred_subobj_ids).to(pred_classes.device)
        # pred_pair_relate_matrix = []
        # for i in range(pred_subobj_ids.shape[0]):
        #     pred_pair_relate_matrix.append((pred_subobj_ids[i] == pred_subobj_ids).sum(1))
        # pred_union_iou=[]
        # for box1 in pred_pair_instance.pred_pair_boxes.tensor:
        #     pred_union_iou_row=[]
        #     for box2 in pred_pair_instance.pred_pair_boxes.tensor:
        #         pred_union_iou_row.append(box_iou(box1, box2))
        #     pred_union_iou.append(pred_union_iou_row)
        # pred_pair_instance.pred_union_iou=torch.FloatTensor(pred_union_iou).to(pred_classes.device)
        # pred_pair_relate_matrix = torch.stack(pred_pair_relate_matrix)
        # pred_pair_instance.pred_pair_relate_matrix = pred_pair_relate_matrix.float()
        # pred_pair_instance_relate_matrix = torch.stack(pred_pair_instance_relate_matrix)
        # pred_pair_instance.pred_pair_instance_relate_matrix = pred_pair_instance_relate_matrix.float()
        pred_pair_instance.pred_pair_sub_classes = pred_pair_sub_classes
        pred_pair_instance.pred_pair_obj_classes = pred_pair_obj_classes
        pred_pair_instance.pred_pair_boxes = Boxes(pred_pair_boxes)
        pred_pair_instance.pred_pair_locations = pred_pair_locations
        pred_pair_instance.pred_pair_union_locations = pred_pair_union_locations
        pred_pair_instance.pred_pair_iou = pred_pair_iou
        pred_pair_instance.pred_union_iou = pred_union_iou
        pred_pair_instance.pred_pair_instance_relate_matrix = pred_pair_instance_relate_matrix

        pred_pair_instances.append(pred_pair_instance)
    return pred_pair_instances
    def _forward_box(
        self, features: Dict[str, torch.Tensor], proposals: List[Instances]
    ) -> Union[Dict[str, torch.Tensor], List[Instances]]:
        """
        Forward logic of the box prediction branch. If `self.train_on_pred_boxes is True`,
            the function puts predicted boxes in the `proposal_boxes` field of `proposals` argument.

        Args:
            features (dict[str, Tensor]): mapping from feature map names to tensor.
                Same as in :meth:`ROIHeads.forward`.
            proposals (list[Instances]): the per-image object proposals with
                their matching ground truth.
                Each has fields "proposal_boxes", and "objectness_logits",
                "gt_classes", "gt_boxes".

        Returns:
            In training, a dict of losses.
            In inference, a list of `Instances`, the predicted instances.
        """
        features = [features[f] for f in self.box_in_features]
        box_features = self.box_pooler(features,
                                       [x.proposal_boxes for x in proposals])

        objectness_logits = torch.cat(
            [x.objectness_logits + 1 for x in proposals], dim=0)
        box_features = box_features * objectness_logits.view(-1, 1, 1, 1)

        # torch.cuda.empty_cache()

        box_features = self.box_head(box_features)
        predictions = self.box_predictor(box_features, proposals)
        # del box_features

        if self.training:
            losses = self.box_predictor.losses(predictions, proposals,
                                               self.gt_classes_img_oh)

            self.pred_class_img_logits = (self.box_predictor.predict_probs_img(
                predictions, proposals).clone().detach())

            prev_pred_scores = predictions[0].detach()
            for k in range(self.refine_K):
                predictions_k = self.box_refinery[k](box_features)

                losses_k = self.box_refinery[k].losses_pcl(
                    predictions_k, proposals, prev_pred_scores,
                    self.gt_classes_img_oh)

                prev_pred_scores = self.box_refinery[k].predict_probs(
                    predictions_k, proposals)
                prev_pred_scores = [
                    prev_pred_score.detach()
                    for prev_pred_score in prev_pred_scores
                ][0]

                losses.update(losses_k)
            # proposals is modified in-place below, so losses must be computed first.
            if self.train_on_pred_boxes:
                with torch.no_grad():
                    pred_boxes = self.box_predictor.predict_boxes_for_gt_classes(
                        predictions, proposals)
                    for proposals_per_image, pred_boxes_per_image in zip(
                            proposals, pred_boxes):
                        proposals_per_image.proposal_boxes = Boxes(
                            pred_boxes_per_image)
            return losses
        else:
            predictions_K = []
            for k in range(self.refine_K):
                predictions_k = self.box_refinery[k](box_features)
                predictions_K.append(predictions_k)
            pred_instances, _, all_scores, all_boxes = self.box_refinery[
                -1].inference(predictions_K, proposals, pcl_bg=True)
            return pred_instances, all_scores, all_boxes
Esempio n. 5
0
    def test_rpn(self):
        torch.manual_seed(121)
        cfg = get_cfg()
        cfg.MODEL.PROPOSAL_GENERATOR.NAME = "RPN"
        cfg.MODEL.ANCHOR_GENERATOR.NAME = "DefaultAnchorGenerator"
        cfg.MODEL.RPN.BBOX_REG_WEIGHTS = (1, 1, 1, 1)
        backbone = build_backbone(cfg)
        proposal_generator = build_proposal_generator(cfg,
                                                      backbone.output_shape())
        num_images = 2
        images_tensor = torch.rand(num_images, 20, 30)
        image_sizes = [(10, 10), (20, 30)]
        images = ImageList(images_tensor, image_sizes)
        image_shape = (15, 15)
        num_channels = 1024
        features = {"res4": torch.rand(num_images, num_channels, 1, 2)}
        gt_boxes = torch.tensor([[1, 1, 3, 3], [2, 2, 6, 6]],
                                dtype=torch.float32)
        gt_instances = Instances(image_shape)
        gt_instances.gt_boxes = Boxes(gt_boxes)
        with EventStorage():  # capture events in a new storage to discard them
            proposals, proposal_losses = proposal_generator(
                images, features, [gt_instances[0], gt_instances[1]])

        expected_losses = {
            "loss_rpn_cls": torch.tensor(0.0804563984),
            "loss_rpn_loc": torch.tensor(0.0990132466),
        }
        for name in expected_losses.keys():
            self.assertTrue(
                torch.allclose(proposal_losses[name], expected_losses[name]))

        expected_proposal_boxes = [
            Boxes(torch.tensor([[0, 0, 10, 10], [7.3365392685, 0, 10, 10]])),
            Boxes(
                torch.tensor([
                    [0, 0, 30, 20],
                    [0, 0, 16.7862777710, 13.1362524033],
                    [0, 0, 30, 13.3173446655],
                    [0, 0, 10.8602609634, 20],
                    [7.7165775299, 0, 27.3875980377, 20],
                ])),
        ]

        expected_objectness_logits = [
            torch.tensor([0.1225359365, -0.0133192837]),
            torch.tensor([
                0.1415634006, 0.0989848152, 0.0565387346, -0.0072308783,
                -0.0428492837
            ]),
        ]

        for proposal, expected_proposal_box, im_size, expected_objectness_logit in zip(
                proposals, expected_proposal_boxes, image_sizes,
                expected_objectness_logits):
            self.assertEqual(len(proposal), len(expected_proposal_box))
            self.assertEqual(proposal.image_size, im_size)
            self.assertTrue(
                torch.allclose(proposal.proposal_boxes.tensor,
                               expected_proposal_box.tensor))
            self.assertTrue(
                torch.allclose(proposal.objectness_logits,
                               expected_objectness_logit))
Esempio n. 6
0
    def inference_single_image(self, cate_preds, kernel_preds, seg_preds,
                               cur_size, ori_size):
        # overall info.
        h, w = cur_size
        f_h, f_w = seg_preds.size()[-2:]
        ratio = math.ceil(h / f_h)
        upsampled_size_out = (int(f_h * ratio), int(f_w * ratio))

        # process.
        inds = (cate_preds > self.score_threshold)
        cate_scores = cate_preds[inds]
        if len(cate_scores) == 0:
            results = Instances(ori_size)
            results.scores = torch.tensor([])
            results.pred_classes = torch.tensor([])
            results.pred_masks = torch.tensor([])
            results.pred_boxes = Boxes(torch.tensor([]))
            return results

        # cate_labels & kernel_preds
        inds = inds.nonzero()
        cate_labels = inds[:, 1]
        kernel_preds = kernel_preds[inds[:, 0]]

        # trans vector.
        size_trans = cate_labels.new_tensor(self.num_grids).pow(2).cumsum(0)
        strides = kernel_preds.new_ones(size_trans[-1])

        n_stage = len(self.num_grids)
        strides[:size_trans[0]] *= self.instance_strides[0]
        for ind_ in range(1, n_stage):
            strides[size_trans[ind_ -
                               1]:size_trans[ind_]] *= self.instance_strides[
                                   ind_]
        strides = strides[inds[:, 0]]

        # mask encoding.
        N, I = kernel_preds.shape
        kernel_preds = kernel_preds.view(N, I, 1, 1)
        seg_preds = F.conv2d(seg_preds, kernel_preds,
                             stride=1).squeeze(0).sigmoid()

        # mask.
        seg_masks = seg_preds > self.mask_threshold
        sum_masks = seg_masks.sum((1, 2)).float()

        # filter.
        keep = sum_masks > strides
        if keep.sum() == 0:
            results = Instances(ori_size)
            results.scores = torch.tensor([])
            results.pred_classes = torch.tensor([])
            results.pred_masks = torch.tensor([])
            results.pred_boxes = Boxes(torch.tensor([]))
            return results

        seg_masks = seg_masks[keep, ...]
        seg_preds = seg_preds[keep, ...]
        sum_masks = sum_masks[keep]
        cate_scores = cate_scores[keep]
        cate_labels = cate_labels[keep]

        # mask scoring.
        seg_scores = (seg_preds * seg_masks.float()).sum((1, 2)) / sum_masks
        cate_scores *= seg_scores

        # sort and keep top nms_pre
        sort_inds = torch.argsort(cate_scores, descending=True)
        if len(sort_inds) > self.max_before_nms:
            sort_inds = sort_inds[:self.max_before_nms]
        seg_masks = seg_masks[sort_inds, :, :]
        seg_preds = seg_preds[sort_inds, :, :]
        sum_masks = sum_masks[sort_inds]
        cate_scores = cate_scores[sort_inds]
        cate_labels = cate_labels[sort_inds]

        if self.nms_type == "matrix":
            # matrix nms & filter.
            cate_scores = matrix_nms(cate_labels,
                                     seg_masks,
                                     sum_masks,
                                     cate_scores,
                                     sigma=self.nms_sigma,
                                     kernel=self.nms_kernel)
            keep = cate_scores >= self.update_threshold
        elif self.nms_type == "mask":
            # original mask nms.
            keep = mask_nms(cate_labels,
                            seg_masks,
                            sum_masks,
                            cate_scores,
                            nms_thr=self.mask_threshold)
        else:
            raise NotImplementedError

        if keep.sum() == 0:
            results = Instances(ori_size)
            results.scores = torch.tensor([])
            results.pred_classes = torch.tensor([])
            results.pred_masks = torch.tensor([])
            results.pred_boxes = Boxes(torch.tensor([]))
            return results

        seg_preds = seg_preds[keep, :, :]
        cate_scores = cate_scores[keep]
        cate_labels = cate_labels[keep]

        # sort and keep top_k
        sort_inds = torch.argsort(cate_scores, descending=True)
        if len(sort_inds) > self.max_per_img:
            sort_inds = sort_inds[:self.max_per_img]
        seg_preds = seg_preds[sort_inds, :, :]
        cate_scores = cate_scores[sort_inds]
        cate_labels = cate_labels[sort_inds]

        # reshape to original size.
        seg_preds = F.interpolate(seg_preds.unsqueeze(0),
                                  size=upsampled_size_out,
                                  mode='bilinear')[:, :, :h, :w]
        seg_masks = F.interpolate(seg_preds, size=ori_size,
                                  mode='bilinear').squeeze(0)
        seg_masks = seg_masks > self.mask_threshold

        results = Instances(ori_size)
        results.pred_classes = cate_labels
        results.scores = cate_scores
        results.pred_masks = seg_masks

        # get bbox from mask
        pred_boxes = torch.zeros(seg_masks.size(0), 4)
        #for i in range(seg_masks.size(0)):
        #    mask = seg_masks[i].squeeze()
        #    ys, xs = torch.where(mask)
        #    pred_boxes[i] = torch.tensor([xs.min(), ys.min(), xs.max(), ys.max()]).float()
        results.pred_boxes = Boxes(pred_boxes)

        return results
def convert_to_coco_dict(dataset_name):
    """
    Convert an instance detection/segmentation or keypoint detection dataset
    in detectron2's standard format into COCO json format.

    Generic dataset description can be found here:
    https://detectron2.readthedocs.io/tutorials/datasets.html#register-a-dataset

    COCO data format description can be found here:
    http://cocodataset.org/#format-data

    Args:
        dataset_name (str):
            name of the source dataset
            Must be registered in DatastCatalog and in detectron2's standard format.
            Must have corresponding metadata "thing_classes"
    Returns:
        coco_dict: serializable dict in COCO json format
    """

    dataset_dicts = DatasetCatalog.get(dataset_name)
    metadata = MetadataCatalog.get(dataset_name)

    # unmap the category mapping ids for COCO
    if hasattr(metadata, "thing_dataset_id_to_contiguous_id"):
        reverse_id_mapping = {v: k for k, v in metadata.thing_dataset_id_to_contiguous_id.items()}
        reverse_id_mapper = lambda contiguous_id: reverse_id_mapping[contiguous_id]  # noqa
    else:
        reverse_id_mapper = lambda contiguous_id: contiguous_id  # noqa

    categories = [
        {"id": reverse_id_mapper(id), "name": name}
        for id, name in enumerate(metadata.thing_classes)
    ]

    logger.info("Converting dataset dicts into COCO format")
    coco_images = []
    coco_annotations = []

    for image_id, image_dict in enumerate(dataset_dicts):
        coco_image = {
            "id": image_dict.get("image_id", image_id),
            "width": image_dict["width"],
            "height": image_dict["height"],
            "file_name": image_dict["file_name"],
        }
        coco_images.append(coco_image)

        anns_per_image = image_dict.get("annotations", [])
        for annotation in anns_per_image:
            # create a new dict with only COCO fields
            coco_annotation = {}

            # COCO requirement: XYWH box format
            bbox = annotation["bbox"]
            bbox_mode = annotation["bbox_mode"]
            bbox = BoxMode.convert(bbox, bbox_mode, BoxMode.XYWH_ABS)

            # COCO requirement: instance area
            if "segmentation" in annotation:
                # Computing areas for instances by counting the pixels
                segmentation = annotation["segmentation"]
                # TODO: check segmentation type: RLE, BinaryMask or Polygon
                if isinstance(segmentation, list):
                    polygons = PolygonMasks([segmentation])
                    area = polygons.area()[0].item()
                elif isinstance(segmentation, dict):  # RLE
                    area = mask_util.area(segmentation).item()
                else:
                    raise TypeError(f"Unknown segmentation type {type(segmentation)}!")
            else:
                # Computing areas using bounding boxes
                bbox_xy = BoxMode.convert(bbox, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS)
                area = Boxes([bbox_xy]).area()[0].item()

            if "keypoints" in annotation:
                keypoints = annotation["keypoints"]  # list[int]
                for idx, v in enumerate(keypoints):
                    if idx % 3 != 2:
                        # COCO's segmentation coordinates are floating points in [0, H or W],
                        # but keypoint coordinates are integers in [0, H-1 or W-1]
                        # For COCO format consistency we substract 0.5
                        # https://github.com/facebookresearch/detectron2/pull/175#issuecomment-551202163
                        keypoints[idx] = v - 0.5
                if "num_keypoints" in annotation:
                    num_keypoints = annotation["num_keypoints"]
                else:
                    num_keypoints = sum(kp > 0 for kp in keypoints[2::3])

            # COCO requirement:
            #   linking annotations to images
            #   "id" field must start with 1
            coco_annotation["id"] = len(coco_annotations) + 1
            coco_annotation["image_id"] = coco_image["id"]
            coco_annotation["bbox"] = [round(float(x), 3) for x in bbox]
            coco_annotation["area"] = float(area)
            coco_annotation["iscrowd"] = annotation.get("iscrowd", 0)
            coco_annotation["category_id"] = reverse_id_mapper(annotation["category_id"])

            # Add optional fields
            if "keypoints" in annotation:
                coco_annotation["keypoints"] = keypoints
                coco_annotation["num_keypoints"] = num_keypoints

            if "segmentation" in annotation:
                seg = coco_annotation["segmentation"] = annotation["segmentation"]
                if isinstance(seg, dict):  # RLE
                    counts = seg["counts"]
                    if not isinstance(counts, str):
                        # make it json-serializable
                        seg["counts"] = counts.decode("ascii")

            coco_annotations.append(coco_annotation)

    logger.info(
        "Conversion finished, "
        f"#images: {len(coco_images)}, #annotations: {len(coco_annotations)}"
    )

    info = {
        "date_created": str(datetime.datetime.now()),
        "description": "Automatically generated COCO json file for Detectron2.",
    }
    coco_dict = {"info": info, "images": coco_images, "categories": categories, "licenses": None}
    if len(coco_annotations) > 0:
        coco_dict["annotations"] = coco_annotations
    return coco_dict
Esempio n. 8
0
def match_predictions_to_groundtruth(predicted_box_means,
                                     predicted_cls_probs,
                                     predicted_box_covariances,
                                     gt_box_means,
                                     gt_cat_idxs,
                                     iou_min=0.1,
                                     iou_correct=0.7):

    true_positives = dict({
        'predicted_box_means':
        torch.Tensor().to(device),
        'predicted_box_covariances':
        torch.Tensor().to(device),
        'predicted_cls_probs':
        torch.Tensor().to(device),
        'gt_box_means':
        torch.Tensor().to(device),
        'gt_cat_idxs':
        torch.Tensor().to(device),
        'iou_with_ground_truth':
        torch.Tensor().to(device)
    })

    duplicates = dict({
        'predicted_box_means': torch.Tensor().to(device),
        'predicted_box_covariances': torch.Tensor().to(device),
        'predicted_cls_probs': torch.Tensor().to(device),
        'gt_box_means': torch.Tensor().to(device),
        'gt_cat_idxs': torch.Tensor().to(device),
        'iou_with_ground_truth': torch.Tensor().to(device)
    })

    false_positives = dict({
        'predicted_box_means':
        torch.Tensor().to(device),
        'predicted_box_covariances':
        torch.Tensor().to(device),
        'predicted_cls_probs':
        torch.Tensor().to(device)
    })

    false_negatives = dict({
        'gt_box_means': torch.Tensor().to(device),
        'gt_cat_idxs': torch.Tensor().to(device)
    })

    with tqdm.tqdm(total=len(predicted_box_means)) as pbar:
        for key in predicted_box_means.keys():
            pbar.update(1)

            # Check if gt available, if not all detections go to false
            # positives
            if key not in gt_box_means.keys():
                false_positives['predicted_box_means'] = torch.cat(
                    (false_positives['predicted_box_means'],
                     predicted_box_means[key]))
                false_positives['predicted_cls_probs'] = torch.cat(
                    (false_positives['predicted_cls_probs'],
                     predicted_cls_probs[key]))
                false_positives['predicted_box_covariances'] = torch.cat(
                    (false_positives['predicted_box_covariances'],
                     predicted_box_covariances[key]))
                continue

            # Compute iou between gt boxes and all predicted boxes in frame
            frame_gt_boxes = Boxes(gt_box_means[key])
            frame_predicted_boxes = Boxes(predicted_box_means[key])

            match_iou = pairwise_iou(frame_gt_boxes, frame_predicted_boxes)

            # Get false negative ground truth, which are fully missed.
            # These can be found by looking for ground truth boxes that have an
            # iou < iou_min with any detection
            false_negative_idxs = (match_iou <= iou_min).all(1)
            false_negatives['gt_box_means'] = torch.cat(
                (false_negatives['gt_box_means'],
                 gt_box_means[key][false_negative_idxs]))
            false_negatives['gt_cat_idxs'] = torch.cat(
                (false_negatives['gt_cat_idxs'],
                 gt_cat_idxs[key][false_negative_idxs]))

            # False positives are detections that have an iou < match iou with
            # any ground truth object.
            false_positive_idxs = (match_iou <= iou_min).all(0)
            false_positives['predicted_box_means'] = torch.cat(
                (false_positives['predicted_box_means'],
                 predicted_box_means[key][false_positive_idxs]))
            false_positives['predicted_cls_probs'] = torch.cat(
                (false_positives['predicted_cls_probs'],
                 predicted_cls_probs[key][false_positive_idxs]))
            false_positives['predicted_box_covariances'] = torch.cat(
                (false_positives['predicted_box_covariances'],
                 predicted_box_covariances[key][false_positive_idxs]))

            # True positives are any detections with match iou > iou correct. We need to separate these detections to
            # True positive and duplicate set. The true positive detection is the detection assigned the highest score
            # by the neural network.
            true_positive_idxs = torch.nonzero(match_iou >= iou_correct)

            # Setup tensors to allow assignment of detections only once.
            gt_idxs_processed = torch.tensor([]).type(
                torch.LongTensor).to(device)

            for i in torch.arange(frame_gt_boxes.tensor.shape[0]):
                # Check if true positive has been previously assigned to a ground truth box and remove it if this is
                # the case. Very rare occurrence but need to handle it
                # nevertheless.
                gt_idxs = true_positive_idxs[true_positive_idxs[:, 0] == i][:,
                                                                            1]
                non_valid_idxs = torch.nonzero(
                    gt_idxs_processed[..., None] == gt_idxs)

                if non_valid_idxs.shape[0] > 0:
                    gt_idxs[non_valid_idxs[:, 1]] = -1
                    gt_idxs = gt_idxs[gt_idxs != -1]

                if gt_idxs.shape[0] > 0:
                    current_matches_predicted_cls_probs = predicted_cls_probs[
                        key][gt_idxs]
                    max_score, _ = torch.max(
                        current_matches_predicted_cls_probs, 1)
                    _, max_idxs = max_score.topk(max_score.shape[0])

                    if max_idxs.shape[0] > 1:
                        max_idx = max_idxs[0]
                        duplicate_idxs = max_idxs[1:]
                    else:
                        max_idx = max_idxs
                        duplicate_idxs = torch.empty(0).to(device)

                    current_matches_predicted_box_means = predicted_box_means[
                        key][gt_idxs]
                    current_matches_predicted_box_covariances = predicted_box_covariances[
                        key][gt_idxs]

                    # Highest scoring detection goes to true positives
                    true_positives['predicted_box_means'] = torch.cat(
                        (true_positives['predicted_box_means'],
                         current_matches_predicted_box_means[max_idx:max_idx +
                                                             1, :]))
                    true_positives['predicted_cls_probs'] = torch.cat(
                        (true_positives['predicted_cls_probs'],
                         current_matches_predicted_cls_probs[max_idx:max_idx +
                                                             1, :]))
                    true_positives['predicted_box_covariances'] = torch.cat(
                        (true_positives['predicted_box_covariances'],
                         current_matches_predicted_box_covariances[
                             max_idx:max_idx + 1, :]))

                    true_positives['gt_box_means'] = torch.cat(
                        (true_positives['gt_box_means'],
                         gt_box_means[key][i:i + 1, :]))
                    true_positives['gt_cat_idxs'] = torch.cat(
                        (true_positives['gt_cat_idxs'],
                         gt_cat_idxs[key][i:i + 1, :]))
                    true_positives['iou_with_ground_truth'] = torch.cat(
                        (true_positives['iou_with_ground_truth'],
                         match_iou[i, gt_idxs][max_idx:max_idx + 1]))

                    # Lower scoring redundant detections go to duplicates
                    if duplicate_idxs.shape[0] > 1:
                        duplicates['predicted_box_means'] = torch.cat(
                            (duplicates['predicted_box_means'],
                             current_matches_predicted_box_means[
                                 duplicate_idxs, :]))
                        duplicates['predicted_cls_probs'] = torch.cat(
                            (duplicates['predicted_cls_probs'],
                             current_matches_predicted_cls_probs[
                                 duplicate_idxs, :]))
                        duplicates['predicted_box_covariances'] = torch.cat(
                            (duplicates['predicted_box_covariances'],
                             current_matches_predicted_box_covariances[
                                 duplicate_idxs, :]))

                        duplicates['gt_box_means'] = torch.cat(
                            (duplicates['gt_box_means'], gt_box_means[key][
                                np.repeat(i, duplicate_idxs.shape[0]), :]))
                        duplicates['gt_cat_idxs'] = torch.cat(
                            (duplicates['gt_cat_idxs'], gt_cat_idxs[key][
                                np.repeat(i, duplicate_idxs.shape[0]), :]))
                        duplicates['iou_with_ground_truth'] = torch.cat(
                            (duplicates['iou_with_ground_truth'],
                             match_iou[i, gt_idxs][duplicate_idxs]))

                    elif duplicate_idxs.shape[0] == 1:
                        # Special case when only one duplicate exists, required to
                        # index properly for torch.cat
                        duplicates['predicted_box_means'] = torch.cat(
                            (duplicates['predicted_box_means'],
                             current_matches_predicted_box_means[
                                 duplicate_idxs:duplicate_idxs + 1, :]))
                        duplicates['predicted_cls_probs'] = torch.cat(
                            (duplicates['predicted_cls_probs'],
                             current_matches_predicted_cls_probs[
                                 duplicate_idxs:duplicate_idxs + 1, :]))
                        duplicates['predicted_box_covariances'] = torch.cat(
                            (duplicates['predicted_box_covariances'],
                             current_matches_predicted_box_covariances[
                                 duplicate_idxs:duplicate_idxs + 1, :]))

                        duplicates['gt_box_means'] = torch.cat(
                            (duplicates['gt_box_means'],
                             gt_box_means[key][i:i + 1, :]))
                        duplicates['gt_cat_idxs'] = torch.cat(
                            (duplicates['gt_cat_idxs'],
                             gt_cat_idxs[key][i:i + 1, :]))
                        duplicates['iou_with_ground_truth'] = torch.cat(
                            (duplicates['iou_with_ground_truth'],
                             match_iou[i,
                                       gt_idxs][duplicate_idxs:duplicate_idxs +
                                                1]))

    matched_results = dict()
    matched_results.update({
        "true_positives": true_positives,
        "duplicates": duplicates,
        "false_positives": false_positives,
        "false_negatives": false_negatives
    })
    return matched_results
Esempio n. 9
0
    def label_and_sample_proposals(self, proposals, targets):
        """
        Prepare some proposals to be used to train the ROI heads.
        It performs box matching between `proposals` and `targets`, and assigns
        training labels to the proposals.
        It returns ``self.batch_size_per_image`` random samples from proposals and groundtruth
        boxes, with a fraction of positives that is no larger than
        ``self.positive_sample_fraction``.

        Args:
            See :meth:`ROIHeads.forward`

        Returns:
            list[Instances]:
                length `N` list of `Instances`s containing the proposals
                sampled for training. Each `Instances` has the following fields:

                - proposal_boxes: the proposal boxes
                - gt_boxes: the ground-truth box that the proposal is assigned to
                  (this is only meaningful if the proposal has a label > 0; if label = 0
                  then the ground-truth box is random)

                Other fields such as "gt_classes", "gt_masks", that's included in `targets`.
        """
        # ywlee for using targets.gt_classes
        # in add_ground_truth_to_proposal()
        # gt_boxes = [x.gt_boxes for x in targets]

        # Augment proposals with ground-truth boxes.
        # In the case of learned proposals (e.g., RPN), when training starts
        # the proposals will be low quality due to random initialization.
        # It's possible that none of these initial
        # proposals have high enough overlap with the gt objects to be used
        # as positive examples for the second stage components (box head,
        # cls head, mask head). Adding the gt boxes to the set of proposals
        # ensures that the second stage components will have some positive
        # examples from the start of training. For RPN, this augmentation improves
        # convergence and empirically improves box AP on COCO by about 0.5
        # points (under one tested configuration).
        if self.proposal_append_gt:
            proposals = add_ground_truth_to_proposals(targets, proposals)

        proposals_with_gt = []

        num_fg_samples = []
        num_bg_samples = []
        for proposals_per_image, targets_per_image in zip(proposals, targets):
            has_gt = len(targets_per_image) > 0
            match_quality_matrix = pairwise_iou(
                targets_per_image.gt_boxes, proposals_per_image.proposal_boxes)
            matched_idxs, matched_labels = self.proposal_matcher(
                match_quality_matrix)
            sampled_idxs, gt_classes = self._sample_proposals(
                matched_idxs, matched_labels, targets_per_image.gt_classes)

            # Set target attributes of the sampled proposals:
            proposals_per_image = proposals_per_image[sampled_idxs]
            proposals_per_image.gt_classes = gt_classes

            # We index all the attributes of targets that start with "gt_"
            # and have not been added to proposals yet (="gt_classes").
            if has_gt:
                sampled_targets = matched_idxs[sampled_idxs]
                # NOTE: here the indexing waste some compute, because heads
                # like masks, keypoints, etc, will filter the proposals again,
                # (by foreground/background, or number of keypoints in the image, etc)
                # so we essentially index the data twice.
                for (trg_name,
                     trg_value) in targets_per_image.get_fields().items():
                    if trg_name.startswith(
                            "gt_") and not proposals_per_image.has(trg_name):
                        proposals_per_image.set(trg_name,
                                                trg_value[sampled_targets])
            else:
                gt_boxes = Boxes(
                    targets_per_image.gt_boxes.tensor.new_zeros(
                        (len(sampled_idxs), 4)))
                proposals_per_image.gt_boxes = gt_boxes

            num_bg_samples.append(
                (gt_classes == self.num_classes).sum().item())
            num_fg_samples.append(gt_classes.numel() - num_bg_samples[-1])
            proposals_with_gt.append(proposals_per_image)

        # Log the number of fg/bg samples that are selected for training ROI heads
        storage = get_event_storage()
        storage.put_scalar("roi_head/num_fg_samples", np.mean(num_fg_samples))
        storage.put_scalar("roi_head/num_bg_samples", np.mean(num_bg_samples))

        return proposals_with_gt
def _evaluate_box_proposals(dataset_predictions,
                            lvis_api,
                            thresholds=None,
                            area="all",
                            limit=None):
    """
    Evaluate detection proposal recall metrics. This function is a much
    faster alternative to the official LVIS API recall evaluation code. However,
    it produces slightly different results.
    """
    # Record max overlap value for each gt box
    # Return vector of overlap values
    areas = {
        "all": 0,
        "small": 1,
        "medium": 2,
        "large": 3,
        "96-128": 4,
        "128-256": 5,
        "256-512": 6,
        "512-inf": 7,
    }
    area_ranges = [
        [0**2, 1e5**2],  # all
        [0**2, 32**2],  # small
        [32**2, 96**2],  # medium
        [96**2, 1e5**2],  # large
        [96**2, 128**2],  # 96-128
        [128**2, 256**2],  # 128-256
        [256**2, 512**2],  # 256-512
        [512**2, 1e5**2],
    ]  # 512-inf
    assert area in areas, "Unknown area range: {}".format(area)
    area_range = area_ranges[areas[area]]
    gt_overlaps = []
    num_pos = 0

    for prediction_dict in dataset_predictions:
        predictions = prediction_dict["proposals"]

        # sort predictions in descending order
        # TODO maybe remove this and make it explicit in the documentation
        inds = predictions.objectness_logits.sort(descending=True)[1]
        predictions = predictions[inds]

        ann_ids = lvis_api.get_ann_ids(img_ids=[prediction_dict["image_id"]])
        anno = lvis_api.load_anns(ann_ids)
        gt_boxes = [
            BoxMode.convert(obj["bbox"], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS)
            for obj in anno
        ]
        gt_boxes = torch.as_tensor(gt_boxes).reshape(
            -1, 4)  # guard against no boxes
        gt_boxes = Boxes(gt_boxes)
        gt_areas = torch.as_tensor([obj["area"] for obj in anno])

        if len(gt_boxes) == 0 or len(predictions) == 0:
            continue

        valid_gt_inds = (gt_areas >= area_range[0]) & (gt_areas <=
                                                       area_range[1])
        gt_boxes = gt_boxes[valid_gt_inds]

        num_pos += len(gt_boxes)

        if len(gt_boxes) == 0:
            continue

        if limit is not None and len(predictions) > limit:
            predictions = predictions[:limit]

        overlaps = pairwise_iou(predictions.proposal_boxes, gt_boxes)

        _gt_overlaps = torch.zeros(len(gt_boxes))
        for j in range(min(len(predictions), len(gt_boxes))):
            # find which proposal box maximally covers each gt box
            # and get the iou amount of coverage for each gt box
            max_overlaps, argmax_overlaps = overlaps.max(dim=0)

            # find which gt box is 'best' covered (i.e. 'best' = most iou)
            gt_ovr, gt_ind = max_overlaps.max(dim=0)
            assert gt_ovr >= 0
            # find the proposal box that covers the best covered gt box
            box_ind = argmax_overlaps[gt_ind]
            # record the iou coverage of this gt box
            _gt_overlaps[j] = overlaps[box_ind, gt_ind]
            assert _gt_overlaps[j] == gt_ovr
            # mark the proposal box and the gt box as used
            overlaps[box_ind, :] = -1
            overlaps[:, gt_ind] = -1

        # append recorded iou coverage level
        gt_overlaps.append(_gt_overlaps)
    gt_overlaps = torch.cat(gt_overlaps, dim=0)
    gt_overlaps, _ = torch.sort(gt_overlaps)

    if thresholds is None:
        step = 0.05
        thresholds = torch.arange(0.5, 0.95 + 1e-5, step, dtype=torch.float32)
    recalls = torch.zeros_like(thresholds)
    # compute recall for each iou threshold
    for i, t in enumerate(thresholds):
        recalls[i] = (gt_overlaps >= t).float().sum() / float(num_pos)
    # ar = 2 * np.trapz(recalls, thresholds)
    ar = recalls.mean()
    return {
        "ar": ar,
        "recalls": recalls,
        "thresholds": thresholds,
        "gt_overlaps": gt_overlaps,
        "num_pos": num_pos,
    }
Esempio n. 11
0
def convert_to_coco_dict(dataset_name):
    """
    Convert a dataset in detectron2's standard format into COCO json format

    Generic dataset description can be found here:
    https://detectron2.readthedocs.io/tutorials/datasets.html#register-a-dataset

    COCO data format description can be found here:
    http://cocodataset.org/#format-data

    Args:
        dataset_name:
            name of the source dataset
            must be registered in DatastCatalog and in detectron2's standard format
    Returns:
        coco_dict: serializable dict in COCO json format
    """

    dataset_dicts = DatasetCatalog.get(dataset_name)
    categories = [{
        "id": id,
        "name": name
    } for id, name in enumerate(
        MetadataCatalog.get(dataset_name).thing_classes)]

    logger.info("Converting dataset dicts into COCO format")
    coco_images = []
    coco_annotations = []

    for image_id, image_dict in enumerate(dataset_dicts):
        coco_image = {
            "id": image_dict.get("image_id", image_id),
            "width": image_dict["width"],
            "height": image_dict["height"],
            "file_name": image_dict["file_name"],
        }
        coco_images.append(coco_image)

        anns_per_image = image_dict["annotations"]
        for annotation in anns_per_image:
            # create a new dict with only COCO fields
            coco_annotation = {}

            # COCO requirement: XYWH box format
            bbox = annotation["bbox"]
            bbox_mode = annotation["bbox_mode"]
            bbox = BoxMode.convert(bbox, bbox_mode, BoxMode.XYWH_ABS)

            # COCO requirement: instance area
            if "segmentation" in annotation:
                # Computing areas for instances by counting the pixels
                segmentation = annotation["segmentation"]
                # TODO: check segmentation type: RLE, BinaryMask or Polygon
                polygons = PolygonMasks([segmentation])
                area = polygons.area()[0].item()
            else:
                # Computing areas using bounding boxes
                bbox_xy = BoxMode.convert(bbox, BoxMode.XYWH_ABS,
                                          BoxMode.XYXY_ABS)
                area = Boxes([bbox_xy]).area()[0].item()

            if "keypoints" in annotation:
                keypoints = annotation["keypoints"]  # list[int]
                for idx, v in enumerate(keypoints):
                    if idx % 3 != 2:
                        # COCO's segmentation coordinates are floating points in [0, H or W],
                        # but keypoint coordinates are integers in [0, H-1 or W-1]
                        # For COCO format consistency we substract 0.5
                        # https://github.com/facebookresearch/detectron2/pull/175#issuecomment-551202163
                        keypoints[idx] = v - 0.5
                if "num_keypoints" in annotation:
                    num_keypoints = annotation["num_keypoints"]
                else:
                    num_keypoints = sum(kp > 0 for kp in keypoints[2::3])

            # COCO requirement:
            #   linking annotations to images
            #   "id" field must start with 1
            coco_annotation["id"] = len(coco_annotations) + 1
            coco_annotation["image_id"] = coco_image["id"]
            coco_annotation["bbox"] = [round(float(x), 3) for x in bbox]
            coco_annotation["area"] = area
            coco_annotation["category_id"] = annotation["category_id"]
            coco_annotation["iscrowd"] = annotation.get("iscrowd", 0)

            # Add optional fields
            if "keypoints" in annotation:
                coco_annotation["keypoints"] = keypoints
                coco_annotation["num_keypoints"] = num_keypoints

            if "segmentation" in annotation:
                coco_annotation["segmentation"] = annotation["segmentation"]

            coco_annotations.append(coco_annotation)

    logger.info(
        "Conversion finished, "
        f"num images: {len(coco_images)}, num annotations: {len(coco_annotations)}"
    )

    info = {
        "date_created": str(datetime.datetime.now()),
        "description":
        "Automatically generated COCO json file for Detectron2.",
    }
    coco_dict = {
        "info": info,
        "images": coco_images,
        "annotations": coco_annotations,
        "categories": categories,
        "licenses": None,
    }
    return coco_dict
def apply_late_fusion_and_evaluate(cfg, evaluator, det_1, det_2, method):
    evaluator.reset()
    img_folder = '../../../Datasets/FLIR/val/thermal_8_bit/'

    num_img = len(det_2['image'])
    count_1 = 0
    count_2 = 0
    count_fusion = 0

    print('Method: ', method)

    for i in range(num_img):
        info_1 = {}
        info_1['img_name'] = det_1['image'][i]
        info_1['bbox'] = det_1['boxes'][i]
        info_1['score'] = det_1['scores'][i]
        info_1['class'] = det_1['classes'][i]

        info_2 = {}
        info_2['img_name'] = det_2['image'][i].split('.')[0] + '.jpeg'
        info_2['bbox'] = det_2['boxes'][i]
        info_2['score'] = det_2['scores'][i]
        info_2['class'] = det_2['classes'][i]
        #pdb.set_trace()
        if len(info_1['bbox']) == 0 or len(info_2['bbox']) == 0:
            if (len(info_1['bbox']) > 0):
                out_boxes = np.array(info_1['bbox'])
                out_class = torch.Tensor(info_1['class'])
                out_scores = torch.Tensor(info_1['score'])
            elif (len(info_2['bbox']) > 0):
                out_boxes = np.array(info_2['bbox'])
                out_class = torch.Tensor(info_2['class'])
                out_scores = torch.Tensor(info_2['score'])
            else:
                out_boxes = np.array(info_2['bbox'])
                out_class = torch.Tensor(info_2['class'])
                out_scores = torch.Tensor(info_2['score'])
        else:
            if method == 'nms':
                out_boxes, out_scores, out_class = nms_1(info_1, info_2)
            elif method == 'pooling':
                in_boxes, in_scores, in_class = prepare_data(info_1, info_2)
                out_boxes = in_boxes
                out_scores = torch.Tensor(in_scores)
                out_class = torch.Tensor(in_class)
            elif method == 'baysian' or method == 'baysian_avg_bbox' or method == 'avg_score' or method == 'baysian_wt_score_box':
                threshold = 0.5
                in_boxes, in_scores, in_class = prepare_data(info_1, info_2)
                keep, out_scores, out_boxes, out_class = nms_2(
                    in_boxes, in_scores, in_class, threshold, method)

        count_1 += len(info_1['bbox'])
        count_2 += len(info_2['bbox'])
        count_fusion += len(out_boxes)

        file_name = img_folder + info_1['img_name'].split('.')[0] + '.jpeg'
        img = cv2.imread(file_name)
        H, W, _ = img.shape

        # Handle inputs
        inputs = []
        input_info = {}
        input_info['file_name'] = file_name
        input_info['height'] = H
        input_info['width'] = W
        input_info['image_id'] = det_2['image_id'][i]
        input_info['image'] = torch.Tensor(img)
        inputs.append(input_info)

        # Handle outputs
        outputs = []
        out_info = {}
        proposals = Instances([H, W])
        proposals.pred_boxes = Boxes(out_boxes)
        proposals.scores = out_scores
        proposals.pred_classes = out_class
        out_info['instances'] = proposals
        outputs.append(out_info)
        evaluator.process(inputs, outputs)

        img = draw_box(img, out_boxes, (0, 255, 0))
        out_img_name = 'out_img_baysian_fusion/' + file_name.split(
            'thermal_8_bit/')[1].split('.')[0] + '_baysian_avg_bbox.jpg'
        #cv2.imwrite(out_img_name, img)
        #pdb.set_trace()
        """
        if '09115' in file_name:
            out_img_name = 'out_img_baysian_fusion/' + file_name.split('thermal_8_bit/')[1].split('.')[0]+'_baysian_avg_bbox.jpg'
            pdb.set_trace()
            cv2.imwrite(out_img_name, img)
        """

    results = evaluator.evaluate(out_eval_path='FLIR_pooling_.out')

    if results is None:
        results = {}

    avgRGB = count_1 / num_img
    avgThermal = count_2 / num_img
    avgNMS = count_fusion / num_img

    print('Avg bbox for RGB:', avgRGB, "average count thermal:", avgThermal,
          'average count nms:', avgNMS)
    return results
Esempio n. 13
0
    def forward_for_single_feature_map(self,
                                       locations,
                                       logits_pred,
                                       reg_pred,
                                       ctrness_pred,
                                       image_sizes,
                                       top_feat=None):
        N, C, H, W = logits_pred.shape

        # put in the same format as locations
        logits_pred = logits_pred.view(N, C, H, W).permute(0, 2, 3, 1)
        logits_pred = logits_pred.reshape(N, -1, C).sigmoid()
        box_regression = reg_pred.view(N, 4, H, W).permute(0, 2, 3, 1)
        box_regression = box_regression.reshape(N, -1, 4)
        ctrness_pred = ctrness_pred.view(N, 1, H, W).permute(0, 2, 3, 1)
        ctrness_pred = ctrness_pred.reshape(N, -1).sigmoid()
        if top_feat is not None:
            top_feat = top_feat.view(N, -1, H, W).permute(0, 2, 3, 1)
            top_feat = top_feat.reshape(N, H * W, -1)

        # if self.thresh_with_ctr is True, we multiply the classification
        # scores with centerness scores before applying the threshold.
        if self.thresh_with_ctr:
            logits_pred = logits_pred * ctrness_pred[:, :, None]
        candidate_inds = logits_pred > self.pre_nms_thresh
        pre_nms_top_n = candidate_inds.view(N, -1).sum(1)
        pre_nms_top_n = pre_nms_top_n.clamp(max=self.pre_nms_topk)

        if not self.thresh_with_ctr:
            logits_pred = logits_pred * ctrness_pred[:, :, None]

        results = []
        for i in range(N):
            per_box_cls = logits_pred[i]
            per_candidate_inds = candidate_inds[i]
            per_box_cls = per_box_cls[per_candidate_inds]

            per_candidate_nonzeros = per_candidate_inds.nonzero()
            per_box_loc = per_candidate_nonzeros[:, 0]
            per_class = per_candidate_nonzeros[:, 1]

            per_box_regression = box_regression[i]
            per_box_regression = per_box_regression[per_box_loc]
            per_locations = locations[per_box_loc]
            if top_feat is not None:
                per_top_feat = top_feat[i]
                per_top_feat = per_top_feat[per_box_loc]

            per_pre_nms_top_n = pre_nms_top_n[i]

            if per_candidate_inds.sum().item() > per_pre_nms_top_n.item():
                per_box_cls, top_k_indices = \
                    per_box_cls.topk(per_pre_nms_top_n, sorted=False)
                per_class = per_class[top_k_indices]
                per_box_regression = per_box_regression[top_k_indices]
                per_locations = per_locations[top_k_indices]
                if top_feat is not None:
                    per_top_feat = per_top_feat[top_k_indices]

            detections = torch.stack([
                per_locations[:, 0] - per_box_regression[:, 0],
                per_locations[:, 1] - per_box_regression[:, 1],
                per_locations[:, 0] + per_box_regression[:, 2],
                per_locations[:, 1] + per_box_regression[:, 3],
            ],
                                     dim=1)

            boxlist = Instances(image_sizes[i])
            boxlist.pred_boxes = Boxes(detections)
            boxlist.scores = torch.sqrt(per_box_cls)
            boxlist.pred_classes = per_class
            boxlist.locations = per_locations
            if top_feat is not None:
                boxlist.top_feat = per_top_feat
            results.append(boxlist)

        return results
Esempio n. 14
0
def fast_rcnn_inference_single_image_with_anchor(proposals, boxes, scores,
                                                 image_shape, score_thresh,
                                                 nms_thresh, topk_per_image):
    """
    Single-image inference. Return bounding-box detection results by thresholding
    on scores and applying non-maximum suppression (NMS).

    Args:
        Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
        per image.

    Returns:
        Same as `fast_rcnn_inference`, but for only one image.
    """

    anchors = proposals.get_fields()['anchor_boxes'].tensor
    proposals = proposals.get_fields()['proposal_boxes'].tensor
    valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(
        dim=1)
    if not valid_mask.all():
        boxes = boxes[valid_mask]
        scores = scores[valid_mask]
        anchors = anchors[valid_mask]
        proposals = proposals[valid_mask]

    scores = scores[:, :-1]
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4

    anchors = Boxes(anchors)
    proposals = Boxes(proposals)
    anchors.clip(image_shape)
    proposals.clip(image_shape)
    anchors = anchors.tensor
    proposals = proposals.tensor
    # Filter results based on detection scores
    filter_mask = scores > score_thresh  # R x K
    # R' x 2. First column contains indices of the R predictions;
    # Second column contains indices of classes.
    filter_inds = filter_mask.nonzero()
    if num_bbox_reg_classes == 1:
        boxes = boxes[filter_inds[:, 0], 0]
    else:
        boxes = boxes[filter_mask]
    scores = scores[filter_mask]
    anchors = anchors[filter_inds[:, 0]]
    proposals = proposals[filter_inds[:, 0]]
    # Apply per-class NMS
    keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)
    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
    boxes, scores, filter_inds, anchors, proposals = boxes[keep], scores[keep], filter_inds[keep], anchors[keep], \
                                                     proposals[keep]

    result = Instances(image_shape)
    result.pred_boxes = Boxes(boxes)
    result.scores = scores
    result.pred_classes = filter_inds[:, 1]
    result.anchors = Boxes(anchors)
    result.proposals = Boxes(proposals)
    return result, filter_inds[:, 0]
Esempio n. 15
0
    def get_pgt_top_k(
        self,
        prev_pred_boxes,
        prev_pred_scores,
        proposals,
        top_k=1,
        thres=0,
        need_instance=True,
        need_weight=True,
        suffix="",
    ):
        if isinstance(prev_pred_scores, torch.Tensor):
            num_preds_per_image = [len(p) for p in proposals]
            prev_pred_scores = prev_pred_scores.split(num_preds_per_image, dim=0)
        else:
            assert isinstance(prev_pred_scores, list)
            assert isinstance(prev_pred_scores[0], torch.Tensor)

        assert isinstance(prev_pred_boxes, tuple) or isinstance(prev_pred_boxes, list)
        if isinstance(prev_pred_boxes[0], Boxes):
            num_preds = [len(prev_pred_box) for prev_pred_box in prev_pred_boxes]
            prev_pred_boxes = [
                prev_pred_box.tensor.unsqueeze(1).expand(num_pred, self.num_classes, 4)
                for num_pred, prev_pred_box in zip(num_preds, prev_pred_boxes)
            ]
        else:
            assert isinstance(prev_pred_boxes[0], torch.Tensor)
            if self.cls_agnostic_bbox_reg:
                num_preds = [prev_pred_box.size(0) for prev_pred_box in prev_pred_boxes]
                prev_pred_boxes = [
                    prev_pred_box.unsqueeze(1).expand(num_pred, self.num_classes, 4)
                    for num_pred, prev_pred_box in zip(num_preds, prev_pred_boxes)
                ]
        prev_pred_boxes = [
            prev_pred_box.view(-1, self.num_classes, 4) for prev_pred_box in prev_pred_boxes
        ]

        prev_pred_scores = [
            torch.index_select(prev_pred_score, 1, gt_int)
            for prev_pred_score, gt_int in zip(prev_pred_scores, self.gt_classes_img_int)
        ]
        prev_pred_boxes = [
            torch.index_select(prev_pred_box, 1, gt_int)
            for prev_pred_box, gt_int in zip(prev_pred_boxes, self.gt_classes_img_int)
        ]

        # get top k
        num_preds = [prev_pred_score.size(0) for prev_pred_score in prev_pred_scores]
        if top_k >= 1:
            top_ks = [min(num_pred, int(top_k)) for num_pred in num_preds]
        elif top_k < 1 and top_k > 0:
            top_ks = [max(int(num_pred * top_k), 1) for num_pred in num_preds]
        else:
            top_ks = [min(num_pred, 1) for num_pred in num_preds]
        pgt_scores_idxs = [
            torch.topk(prev_pred_score, top_k, dim=0)
            for prev_pred_score, top_k in zip(prev_pred_scores, top_ks)
        ]
        pgt_scores = [item[0] for item in pgt_scores_idxs]
        pgt_idxs = [item[1] for item in pgt_scores_idxs]
        pgt_idxs = [
            torch.unsqueeze(pgt_idx, 2).expand(top_k, gt_int.numel(), 4)
            for pgt_idx, top_k, gt_int in zip(pgt_idxs, top_ks, self.gt_classes_img_int)
        ]
        pgt_boxes = [
            torch.gather(prev_pred_box, 0, pgt_idx)
            for prev_pred_box, pgt_idx in zip(prev_pred_boxes, pgt_idxs)
        ]
        pgt_classes = [
            torch.unsqueeze(gt_int, 0).expand(top_k, gt_int.numel())
            for gt_int, top_k in zip(self.gt_classes_img_int, top_ks)
        ]
        if need_weight:
            pgt_weights = [
                torch.index_select(pred_logits, 1, gt_int).expand(top_k, gt_int.numel())
                for pred_logits, gt_int, top_k in zip(
                    self.pred_class_img_logits.split(1, dim=0), self.gt_classes_img_int, top_ks
                )
            ]

        if thres > 0:
            # get large scores
            masks = [pgt_score.ge(thres) for pgt_score in pgt_scores]
            masks = [
                torch.cat([torch.full_like(mask[0:1, :], True), mask[1:, :]], dim=0)
                for mask in masks
            ]
            pgt_scores = [
                torch.masked_select(pgt_score, mask) for pgt_score, mask in zip(pgt_scores, masks)
            ]
            pgt_boxes = [
                torch.masked_select(
                    pgt_box, torch.unsqueeze(mask, 2).expand(top_k, gt_int.numel(), 4)
                )
                for pgt_box, mask, top_k, gt_int in zip(
                    pgt_boxes, masks, top_ks, self.gt_classes_img_int
                )
            ]
            pgt_classes = [
                torch.masked_select(pgt_class, mask) for pgt_class, mask in zip(pgt_classes, masks)
            ]
            if need_weight:
                pgt_weights = [
                    torch.masked_select(pgt_weight, mask)
                    for pgt_weight, mask in zip(pgt_weights, masks)
                ]

        pgt_scores = [pgt_score.reshape(-1) for pgt_score in pgt_scores]
        pgt_boxes = [pgt_box.reshape(-1, 4) for pgt_box in pgt_boxes]
        pgt_classes = [pgt_class.reshape(-1) for pgt_class in pgt_classes]
        if need_weight:
            pgt_weights = [pgt_weight.reshape(-1) for pgt_weight in pgt_weights]

        if not need_instance and need_weight:
            return pgt_scores, pgt_boxes, pgt_classes, pgt_weights
        elif not need_instance and not need_weight:
            return pgt_scores, pgt_boxes, pgt_classes

        pgt_boxes = [Boxes(pgt_box) for pgt_box in pgt_boxes]

        targets = [
            Instances(
                proposals[i].image_size,
                gt_boxes=pgt_box,
                gt_classes=pgt_class,
                gt_scores=pgt_score,
                gt_weights=pgt_weight,
            )
            for i, (pgt_box, pgt_class, pgt_score, pgt_weight) in enumerate(
                zip(pgt_boxes, pgt_classes, pgt_scores, pgt_weights)
            )
        ]

        self._vis_pgt(targets, "pgt_top_k", suffix)

        return targets
def annotations_to_instances(annos, image_size, mask_format="polygon"):
    """
    Create an :class:`Instances` object used by the models,
    from instance annotations in the dataset dict.

    Args:
        annos (list[dict]): a list of instance annotations in one image, each
            element for one instance.
        image_size (tuple): height, width

    Returns:
        Instances:
            It will contain fields "gt_boxes", "gt_classes",
            "gt_masks", "gt_keypoints", if they can be obtained from `annos`.
            This is the format that builtin models expect.
    """
    boxes = [
        BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS)
        for obj in annos
    ]
    target = Instances(image_size)
    boxes = target.gt_boxes = Boxes(boxes)
    boxes.clip(image_size)

    classes = [obj["category_id"] for obj in annos]
    classes = torch.tensor(classes, dtype=torch.int64)
    target.gt_classes = classes

    if len(annos) and "segmentation" in annos[0]:
        segms = [obj["segmentation"] for obj in annos]
        if mask_format == "polygon":
            masks = PolygonMasks(segms)
        else:
            assert mask_format == "bitmask", mask_format
            masks = []
            for segm in segms:
                if isinstance(segm, list):
                    # polygon
                    masks.append(polygons_to_bitmask(segm, *image_size))
                elif isinstance(segm, dict):
                    # COCO RLE
                    masks.append(mask_util.decode(segm))
                elif isinstance(segm, np.ndarray):
                    assert segm.ndim == 2, "Expect segmentation of 2 dimensions, got {}.".format(
                        segm.ndim)
                    # mask array
                    masks.append(segm)
                else:
                    raise ValueError(
                        "Cannot convert segmentation of type '{}' to BitMasks!"
                        "Supported types are: polygons as list[list[float] or ndarray],"
                        " COCO-style RLE as a dict, or a full-image segmentation mask "
                        "as a 2D ndarray.".format(type(segm)))
            # torch.from_numpy does not support array with negative stride.
            masks = BitMasks(
                torch.stack([
                    torch.from_numpy(np.ascontiguousarray(x)) for x in masks
                ]))
        target.gt_masks = masks

    if len(annos) and "keypoints" in annos[0]:
        kpts = [obj.get("keypoints", []) for obj in annos]
        target.gt_keypoints = Keypoints(kpts)

    return target
Esempio n. 17
0
 def __call__(self, values):
     return Boxes(values[0])
Esempio n. 18
0
def assemble_rcnn_outputs_by_name(image_sizes,
                                  tensor_outputs,
                                  force_mask_on=False):
    """
    A function to assemble caffe2 model's outputs (i.e. Dict[str, Tensor])
    to detectron2's format (i.e. list of Instances instance).
    This only works when the model follows the Caffe2 detectron's naming convention.

    Args:
        image_sizes (List[List[int, int]]): [H, W] of every image.
        tensor_outputs (Dict[str, Tensor]): external_output to its tensor.

        force_mask_on (Bool): if true, the it make sure there'll be pred_masks even
            if the mask is not found from tensor_outputs (usually due to model crash)
    """

    results = [Instances(image_size) for image_size in image_sizes]

    batch_splits = tensor_outputs.get("batch_splits", None)
    if batch_splits:
        raise NotImplementedError()
    assert len(image_sizes) == 1
    result = results[0]

    bbox_nms = tensor_outputs["bbox_nms"]
    score_nms = tensor_outputs["score_nms"]
    class_nms = tensor_outputs["class_nms"]
    # Detection will always success because Conv support 0-batch
    assert bbox_nms is not None
    assert score_nms is not None
    assert class_nms is not None
    if bbox_nms.shape[1] == 5:
        result.pred_boxes = RotatedBoxes(bbox_nms)
    else:
        result.pred_boxes = Boxes(bbox_nms)
    result.scores = score_nms
    result.pred_classes = class_nms.to(torch.int64)

    mask_fcn_probs = tensor_outputs.get("mask_fcn_probs", None)
    if mask_fcn_probs is not None:
        # finish the mask pred
        mask_probs_pred = mask_fcn_probs
        num_masks = mask_probs_pred.shape[0]
        class_pred = result.pred_classes
        indices = torch.arange(num_masks, device=class_pred.device)
        mask_probs_pred = mask_probs_pred[indices, class_pred][:, None]
        result.pred_masks = mask_probs_pred
    elif force_mask_on:
        # NOTE: there's no way to know the height/width of mask here, it won't be
        # used anyway when batch size is 0, so just set them to 0.
        result.pred_masks = torch.zeros([0, 1, 0, 0], dtype=torch.uint8)

    keypoints_out = tensor_outputs.get("keypoints_out", None)
    kps_score = tensor_outputs.get("kps_score", None)
    if keypoints_out is not None:
        # keypoints_out: [N, 4, #kypoints], where 4 is in order of (x, y, score, prob)
        keypoints_tensor = keypoints_out
        # NOTE: it's possible that prob is not calculated if "should_output_softmax"
        # is set to False in HeatmapMaxKeypoint, so just using raw score, seems
        # it doesn't affect mAP. TODO: check more carefully.
        keypoint_xyp = keypoints_tensor.transpose(1, 2)[:, :, [0, 1, 2]]
        result.pred_keypoints = keypoint_xyp
    elif kps_score is not None:
        # keypoint heatmap to sparse data structure
        pred_keypoint_logits = kps_score
        keypoint_head.keypoint_rcnn_inference(pred_keypoint_logits, [result])

    return results
Esempio n. 19
0
    def inference_single_image(self, box_cls, box_delta, anchors, mask_coef, proto_mask, image_size):
        """
        Single-image inference. Return bounding-box detection results by thresholding
        on scores and applying non-maximum suppression (NMS).

        Arguments:
            box_cls (list[Tensor]): list of #feature levels. Each entry contains
                tensor of size (H x W x A, K)
            box_delta (list[Tensor]): Same shape as 'box_cls' except that K becomes 4.
            anchors (list[Boxes]): list of #feature levels. Each entry contains
                a Boxes object, which contains all the anchors for that
                image in that feature level.
            mask_coef (list[Tensor]): list of #feature levels. Each entry contains
                tensor of size (H x W x A, #masks)
            proto_mask (Tensor): size (M, M, #masks)
            image_size (tuple(H, W)): a tuple of the image height and width.
        Returns:
            Same as `inference`, but for only one image.
        """
        boxes_all = []
        scores_all = []
        class_idxs_all = []
        mask_coef_all = []

        # Iterate over every feature level
        for box_cls_i, box_reg_i, mask_coef_i, anchors_i in zip(
            box_cls, box_delta, mask_coef, anchors):
            # (HxWxAxK,)
            box_cls_i = box_cls_i.flatten().sigmoid_()

            # Keep top k top scoring indices only.
            num_topk = min(self.topk_candidates, box_reg_i.size(0))
            # torch.sort is actually faster than .topk (at least on GPUs)
            predicted_prob, topk_idxs = box_cls_i.sort(descending=True)
            predicted_prob = predicted_prob[:num_topk]
            topk_idxs = topk_idxs[:num_topk]

            # filter out the proposals with low confidence score
            keep_idxs = predicted_prob > self.score_threshold
            predicted_prob = predicted_prob[keep_idxs]
            topk_idxs = topk_idxs[keep_idxs]

            anchor_idxs = topk_idxs // self.num_classes
            classes_idxs = topk_idxs % self.num_classes

            box_reg_i = box_reg_i[anchor_idxs] # (N,4)
            anchors_i = anchors_i[anchor_idxs]
            mask_coef_i = mask_coef_i[anchor_idxs] # (N, #masks)
            # predict boxes
            predicted_boxes = self.box2box_transform.apply_deltas(box_reg_i, anchors_i.tensor)

            boxes_all.append(predicted_boxes)
            scores_all.append(predicted_prob)
            class_idxs_all.append(classes_idxs)
            mask_coef_all.append(mask_coef_i)

        boxes_all, scores_all, class_idxs_all, mask_coef_all = [
            cat(x) for x in [boxes_all, scores_all, class_idxs_all, mask_coef_all]
        ]
        keep = batched_nms(boxes_all, scores_all, class_idxs_all, self.nms_threshold)
        keep = keep[: self.max_detections_per_image]

        result = Instances(image_size)
        result.pred_boxes = Boxes(boxes_all[keep])
        result.scores = scores_all[keep]
        result.pred_classes = class_idxs_all[keep]

        pred_masks = F.sigmoid(proto_mask @ mask_coef_all[keep].t())
        # note: pred_masks shape (M, M, #keep)
        pred_masks = crop(pred_masks, boxes_all[keep])
        # shape (#keep, M, M)
        pred_masks = pred_masks.permute(2, 0, 1).contiguous()
        # mask_iou to rescore mask
        if self.rescore_mask:
            pred_maskiou = self.maskiou_net(pred_masks.unsqueeze(1))
            pred_maskiou = torch.gather(
                pred_maskiou, dim=1, index=class_idxs_all[keep].unsqueeze(1)).squeeze(1)
            result.scores = scores_all[keep] * pred_maskiou

        pred_masks = F.interpolate(pred_masks.unsqueeze(0), image_size, 
            mode="bilinear", align_corners=False).squeeze(0)
        result.pred_masks = pred_masks.gt_(0.5)
        return result
    def __call__(self, dataset_dict):
        """
        Args:
            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.

        Returns:
            dict: a format that builtin models in detectron2 accept

            "file_name", "image_id", 'ref_id', 'expr_id', 'height', 'weight'
            "tokens", 'cate',

        """

        dataset_dict = copy.deepcopy(
            dataset_dict)  # it will be modified by code below
        # USER: Write your own image loading if it's not from a file

        image = utils.read_image(dataset_dict['file_name'],
                                 format=self.img_format)
        utils.check_image_size(dataset_dict, image)

        image_scale = dataset_dict['image_scale']
        image = cv2.resize(image,
                           None,
                           None,
                           fx=image_scale,
                           fy=image_scale,
                           interpolation=cv2.INTER_LINEAR)

        precomp_boxes = torch.as_tensor(dataset_dict['precomp_bbox'] *
                                        image_scale,
                                        dtype=torch.float32)
        gt_boxes = torch.as_tensor(dataset_dict['gt_boxes'] * image_scale,
                                   dtype=torch.float32)
        h, w = image.shape[:2]

        gt_boxes = Boxes(gt_boxes, [h, w])
        precomp_boxes = Boxes(precomp_boxes, [h, w])
        image = torch.as_tensor(image.transpose(2, 0, 1), dtype=torch.float32)
        dataset_dict['det_label_embedding'] = self.vocab_embed[
            dataset_dict['precomp_det_label']]  ## N*1024
        dataset_dict['image'] = image

        dataset_dict['gt_boxes'] = gt_boxes
        dataset_dict['precomp_bbox'] = precomp_boxes
        return dataset_dict


# class DatasetMapper:
#     """
#     A callable which takes a dataset dict in Detectron2 Dataset format,
#     and map it into a format used by the model.
#
#     This is the default callable to be used to map your dataset dict into training data.
#     You may need to follow it to implement your own one for customized logic.
#
#     The callable currently does the following:
#     1. Read the image from "file_name"
#     2. Applies cropping/geometric transforms to the image and annotations
#     3. Prepare data and annotations to Tensor and :class:`Instances`
#     """
#
#     def __init__(self, cfg, is_train=True):
#         self.tfm_gens = utils.build_transform_gen(cfg, is_train)
#
#         if cfg.INPUT.CROP.ENABLED and is_train:
#             self.crop_gen = T.RandomCrop(cfg.INPUT.CROP.TYPE, cfg.INPUT.CROP.SIZE)
#         else:
#             self.crop_gen = None
#
#         # fmt: off
#         self.img_format     = cfg.INPUT.FORMAT
#         self.mask_on        = cfg.MODEL.MASK_ON
#         self.mask_format    = cfg.INPUT.MASK_FORMAT
#         self.keypoint_on    = cfg.MODEL.KEYPOINT_ON
#         self.load_proposals = cfg.MODEL.LOAD_PROPOSALS
#         # fmt: on
#         if self.keypoint_on and is_train:
#             # Flip only makes sense in training
#             self.keypoint_hflip_indices = utils.create_keypoint_hflip_indices(cfg.DATASETS.TRAIN)
#         else:
#             self.keypoint_hflip_indices = None
#
#         if self.load_proposals:
#             self.min_box_side_len = cfg.MODEL.PROPOSAL_GENERATOR.MIN_SIZE
#             self.proposal_topk = (
#                 cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TRAIN
#                 if is_train
#                 else cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TEST
#             )
#         self.is_train = is_train
#
#     def __call__(self, dataset_dict):
#         """
#         Args:
#             dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
#
#         Returns:
#             dict: a format that builtin models in detectron2 accept
#         """
#         dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
#         # USER: Write your own image loading if it's not from a file
#         image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
#         utils.check_image_size(dataset_dict, image)
#
#         if "annotations" not in dataset_dict:
#             image, transforms = T.apply_transform_gens(
#                 ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens, image
#             )
#         else:
#             # Crop around an instance if there are instances in the image.
#             # USER: Remove if you don't use cropping
#             if self.crop_gen:
#                 crop_tfm = utils.gen_crop_transform_with_instance(
#                     self.crop_gen.get_crop_size(image.shape[:2]),
#                     image.shape[:2],
#                     np.random.choice(dataset_dict["annotations"]),
#                 )
#                 image = crop_tfm.apply_image(image)
#             image, transforms = T.apply_transform_gens(self.tfm_gens, image)
#             if self.crop_gen:
#                 transforms = crop_tfm + transforms
#
#         image_shape = image.shape[:2]  # h, w
#
#         # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
#         # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
#         # Therefore it's important to use torch.Tensor.
#         dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32"))
#         # Can use uint8 if it turns out to be slow some day
#
#         # USER: Remove if you don't use pre-computed proposals.
#         if self.load_proposals:
#             utils.transform_proposals(
#                 dataset_dict, image_shape, transforms, self.min_box_side_len, self.proposal_topk
#             )
#
#         if not self.is_train:
#             dataset_dict.pop("annotations", None)
#             dataset_dict.pop("sem_seg_file_name", None)
#             return dataset_dict
#
#         if "annotations" in dataset_dict:
#             # USER: Modify this if you want to keep them for some reason.
#             for anno in dataset_dict["annotations"]:
#                 if not self.mask_on:
#                     anno.pop("segmentation", None)
#                 if not self.keypoint_on:
#                     anno.pop("keypoints", None)
#
#             # USER: Implement additional transformations if you have other types of data
#             annos = [
#                 utils.transform_instance_annotations(
#                     obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices
#                 )
#                 for obj in dataset_dict.pop("annotations")
#                 if obj.get("iscrowd", 0) == 0
#             ]
#             instances = utils.annotations_to_instances(
#                 annos, image_shape, mask_format=self.mask_format
#             )
#             # Create a tight bounding box from masks, useful when image is cropped
#             if self.crop_gen and instances.has("gt_masks"):
#                 instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
#             dataset_dict["instances"] = utils.filter_empty_instances(instances)
#
#         # USER: Remove if you don't do semantic/panoptic segmentation.
#         if "sem_seg_file_name" in dataset_dict:
#             with PathManager.open(dataset_dict.pop("sem_seg_file_name"), "rb") as f:
#                 sem_seg_gt = Image.open(f)
#                 sem_seg_gt = np.asarray(sem_seg_gt, dtype="uint8")
#             sem_seg_gt = transforms.apply_segmentation(sem_seg_gt)
#             sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long"))
#             dataset_dict["sem_seg"] = sem_seg_gt
#         return dataset_dict
Esempio n. 21
0
def find_top_rpn_proposals(
    proposals,
    pred_objectness_logits,
    images,
    nms_thresh,
    pre_nms_topk,
    post_nms_topk,
    min_box_side_len,
    training,
):
    """
    For each feature map, select the `pre_nms_topk` highest scoring proposals,
    apply NMS, clip proposals, and remove small boxes. Return the `post_nms_topk`
    highest scoring proposals among all the feature maps if `training` is True,
    otherwise, returns the highest `post_nms_topk` scoring proposals for each
    feature map.

    Args:
        proposals (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A, 4).
            All proposal predictions on the feature maps.
        pred_objectness_logits (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A).
        images (ImageList): Input images as an :class:`ImageList`.
        nms_thresh (float): IoU threshold to use for NMS
        pre_nms_topk (int): number of top k scoring proposals to keep before applying NMS.
            When RPN is run on multiple feature maps (as in FPN) this number is per
            feature map.
        post_nms_topk (int): number of top k scoring proposals to keep after applying NMS.
            When RPN is run on multiple feature maps (as in FPN) this number is total,
            over all feature maps.
        min_box_side_len (float): minimum proposal box side length in pixels (absolute units
            wrt input images).
        training (bool): True if proposals are to be used in training, otherwise False.
            This arg exists only to support a legacy bug; look for the "NB: Legacy bug ..."
            comment.

    Returns:
        proposals (list[Instances]): list of N Instances. The i-th Instances
            stores post_nms_topk object proposals for image i.
    """
    image_sizes = images.image_sizes  # in (h, w) order
    num_images = len(image_sizes)
    device = proposals[0].device

    # 1. Select top-k anchor for every level and every image
    topk_scores = []  # #lvl Tensor, each of shape N x topk
    topk_proposals = []
    level_ids = []  # #lvl Tensor, each of shape (topk,)
    batch_idx = torch.arange(num_images, device=device)
    for level_id, proposals_i, logits_i in zip(
        itertools.count(), proposals, pred_objectness_logits
    ):
        Hi_Wi_A = logits_i.shape[1]
        num_proposals_i = min(pre_nms_topk, Hi_Wi_A)

        # sort is faster than topk (https://github.com/pytorch/pytorch/issues/22812)
        # topk_scores_i, topk_idx = logits_i.topk(num_proposals_i, dim=1)
        logits_i, idx = logits_i.sort(descending=True, dim=1)
        topk_scores_i = logits_i[batch_idx, :num_proposals_i]
        topk_idx = idx[batch_idx, :num_proposals_i]

        # each is N x topk
        topk_proposals_i = proposals_i[batch_idx[:, None], topk_idx]  # N x topk x 4

        topk_proposals.append(topk_proposals_i)
        topk_scores.append(topk_scores_i)
        level_ids.append(torch.full((num_proposals_i,), level_id, dtype=torch.int64, device=device))

    # 2. Concat all levels together
    topk_scores = cat(topk_scores, dim=1)
    topk_proposals = cat(topk_proposals, dim=1)
    level_ids = cat(level_ids, dim=0)

    # 3. For each image, run a per-level NMS, and choose topk results.
    results = []
    for n, image_size in enumerate(image_sizes):
        boxes = Boxes(topk_proposals[n])
        scores_per_img = topk_scores[n]
        boxes.clip(image_size)

        # filter empty boxes
        keep = boxes.nonempty(threshold=min_box_side_len)
        lvl = level_ids
        if keep.sum().item() != len(boxes):
            boxes, scores_per_img, lvl = boxes[keep], scores_per_img[keep], level_ids[keep]

        keep = batched_nms(boxes.tensor, scores_per_img, lvl, nms_thresh)
        # In Detectron1, there was different behavior during training vs. testing.
        # (https://github.com/facebookresearch/Detectron/issues/459)
        # During training, topk is over the proposals from *all* images in the training batch.
        # During testing, it is over the proposals for each image separately.
        # As a result, the training behavior becomes batch-dependent,
        # and the configuration "POST_NMS_TOPK_TRAIN" end up relying on the batch size.
        # This bug is addressed in Detectron2 to make the behavior independent of batch size.
        keep = keep[:post_nms_topk]

        res = Instances(image_size)
        res.proposal_boxes = boxes[keep]
        res.objectness_logits = scores_per_img[keep]
        results.append(res)
    return results
Esempio n. 22
0
    def __init__(
        self,
        box2box_transform,
        pred_class_logits,
        pred_proposal_deltas,
        proposals,
        smooth_l1_beta=0.0,
        box_reg_loss_type="smooth_l1",
        mean_loss=False,
        W_pos=None,
        W_neg=None,
        PL=None,
        NL=None,
        csc_stats=None,
        loss_weight=1.0,
        prefix="",
    ):
        """
        Args:
            box2box_transform (Box2BoxTransform/Box2BoxTransformRotated):
                box2box transform instance for proposal-to-detection transformations.
            pred_class_logits (Tensor): A tensor of shape (R, K + 1) storing the predicted class
                logits for all R predicted object instances.
                Each row corresponds to a predicted object instance.
            pred_proposal_deltas (Tensor): A tensor of shape (R, K * B) or (R, B) for
                class-specific or class-agnostic regression. It stores the predicted deltas that
                transform proposals into final box detections.
                B is the box dimension (4 or 5).
                When B is 4, each row is [dx, dy, dw, dh (, ....)].
                When B is 5, each row is [dx, dy, dw, dh, da (, ....)].
            proposals (list[Instances]): A list of N Instances, where Instances i stores the
                proposals for image i, in the field "proposal_boxes".
                When training, each Instances must have ground-truth labels
                stored in the field "gt_classes" and "gt_boxes".
                The total number of all instances must be equal to R.
            smooth_l1_beta (float): The transition point between L1 and L2 loss in
                the smooth L1 loss function. When set to 0, the loss becomes L1. When
                set to +inf, the loss becomes constant 0.
            box_reg_loss_type (str): Box regression loss type. One of: "smooth_l1", "giou"
        """
        self.box2box_transform = box2box_transform
        self.num_preds_per_image = [len(p) for p in proposals]
        self.pred_class_logits = pred_class_logits
        self.pred_proposal_deltas = pred_proposal_deltas
        self.smooth_l1_beta = smooth_l1_beta
        self.box_reg_loss_type = box_reg_loss_type

        self.image_shapes = [x.image_size for x in proposals]

        if len(proposals):
            box_type = type(proposals[0].proposal_boxes)
            # cat(..., dim=0) concatenates over all images in the batch
            self.proposals = box_type.cat([p.proposal_boxes for p in proposals])
            assert (
                not self.proposals.tensor.requires_grad
            ), "Proposals should not require gradients!"

            # The following fields should exist only when training.
            if proposals[0].has("gt_boxes"):
                self.gt_boxes = box_type.cat([p.gt_boxes for p in proposals])
                assert proposals[0].has("gt_classes")
                self.gt_classes = cat([p.gt_classes for p in proposals], dim=0)
        else:
            self.proposals = Boxes(torch.zeros(0, 4, device=self.pred_proposal_deltas.device))
        self._no_instances = len(proposals) == 0  # no instances found

        self.mean_loss = mean_loss
        self.W_pos = W_pos
        self.W_neg = W_neg
        self.PL = PL
        self.NL = NL
        self.csc_stats = csc_stats
        self.loss_weight = loss_weight
        self.prefix = prefix
Esempio n. 23
0
def annotations_to_instances_with_attributes(annos,
                                             image_size,
                                             mask_format="polygon",
                                             load_attributes=False,
                                             max_attr_per_ins=16):
    """
    Extend the function annotations_to_instances() to support attributes
    """
    boxes = [
        BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS)
        for obj in annos
    ]
    target = Instances(image_size)
    boxes = target.gt_boxes = Boxes(boxes)
    boxes.clip(image_size)

    classes = [obj["category_id"] for obj in annos]
    classes = torch.tensor(classes, dtype=torch.int64)
    target.gt_classes = classes

    if len(annos) and "segmentation" in annos[0]:
        segms = [obj["segmentation"] for obj in annos]
        if mask_format == "polygon":
            masks = PolygonMasks(segms)
        else:
            assert mask_format == "bitmask", mask_format
            masks = []
            for segm in segms:
                if isinstance(segm, list):
                    # polygon
                    masks.append(polygons_to_bitmask(segm, *image_size))
                elif isinstance(segm, dict):
                    # COCO RLE
                    masks.append(mask_util.decode(segm))
                elif isinstance(segm, np.ndarray):
                    assert segm.ndim == 2, "Expect segmentation of 2 dimensions, got {}.".format(
                        segm.ndim)
                    # mask array
                    masks.append(segm)
                else:
                    raise ValueError(
                        "Cannot convert segmentation of type '{}' to BitMasks!"
                        "Supported types are: polygons as list[list[float] or ndarray],"
                        " COCO-style RLE as a dict, or a full-image segmentation mask "
                        "as a 2D ndarray.".format(type(segm)))
            masks = BitMasks(
                torch.stack([
                    torch.from_numpy(np.ascontiguousarray(x)) for x in masks
                ]))
        target.gt_masks = masks

    if len(annos) and "keypoints" in annos[0]:
        kpts = [obj.get("keypoints", []) for obj in annos]
        target.gt_keypoints = Keypoints(kpts)

    if len(annos) and load_attributes:
        attributes = -torch.ones(
            (len(annos), max_attr_per_ins), dtype=torch.int64)
        for idx, anno in enumerate(annos):
            if "attribute_ids" in anno:
                for jdx, attr_id in enumerate(anno["attribute_ids"]):
                    attributes[idx, jdx] = attr_id
        target.gt_attributes = attributes

    return target
Esempio n. 24
0
def fast_rcnn_inference_single_image(
    boxes, scores, image_shape, score_thresh, nms_thresh, topk_per_image
):
    """
    Single-image inference. Return bounding-box detection results by thresholding
    on scores and applying non-maximum suppression (NMS).

    Args:
        Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
        per image.

    Returns:
        Same as `fast_rcnn_inference`, but for only one image.
    """

    all_scores = scores.clone()
    all_scores = torch.unsqueeze(all_scores, 0)
    all_boxes = boxes.clone()
    all_boxes = torch.unsqueeze(all_boxes, 0)

    pred_inds = torch.unsqueeze(
        torch.arange(scores.size(0), device=scores.device, dtype=torch.long), dim=1
    ).repeat(1, scores.size(1))

    valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(dim=1)
    if not valid_mask.all():
        boxes = boxes[valid_mask]
        scores = scores[valid_mask]
        pred_inds = pred_inds[valid_mask]

    scores = scores[:, :-1]
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4
    pred_inds = pred_inds[:, :-1]

    # Filter results based on detection scores
    filter_mask = scores > score_thresh  # R x K
    # R' x 2. First column contains indices of the R predictions;
    # Second column contains indices of classes.
    filter_inds = filter_mask.nonzero()
    if num_bbox_reg_classes == 1:
        boxes = boxes[filter_inds[:, 0], 0]
    else:
        boxes = boxes[filter_mask]
    scores = scores[filter_mask]
    pred_inds = pred_inds[filter_mask]

    # Apply per-class NMS
    keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)
    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
    boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep]
    pred_inds = pred_inds[keep]

    result = Instances(image_shape)
    result.pred_boxes = Boxes(boxes)
    result.scores = scores
    result.pred_classes = filter_inds[:, 1]
    result.pred_inds = pred_inds
    return result, filter_inds[:, 0], all_scores, all_boxes
Esempio n. 25
0
# Copyright (c) Facebook, Inc. and its affiliates.

import unittest
import torch

from detectron2.structures import Boxes, BoxMode, Instances

from densepose.modeling.losses.utils import ChartBasedAnnotationsAccumulator
from densepose.structures import DensePoseDataRelative, DensePoseList

image_shape = (100, 100)
instances = Instances(image_shape)
n_instances = 3
instances.proposal_boxes = Boxes(torch.rand(n_instances, 4))
instances.gt_boxes = Boxes(torch.rand(n_instances, 4))


# instances.gt_densepose = None cannot happen because instances attributes need a length
class TestChartBasedAnnotationsAccumulator(unittest.TestCase):
    def test_chart_based_annotations_accumulator_no_gt_densepose(self):
        accumulator = ChartBasedAnnotationsAccumulator()
        accumulator.accumulate(instances)
        expected_values = {
            "nxt_bbox_with_dp_index": 0,
            "nxt_bbox_index": n_instances
        }
        for key in accumulator.__dict__:
            self.assertEqual(getattr(accumulator, key),
                             expected_values.get(key, []))

    def test_chart_based_annotations_accumulator_gt_densepose_none(self):
Esempio n. 26
0
    def _forward_box(
        self, features: Dict[str, torch.Tensor], proposals: List[Instances]
    ) -> Union[Dict[str, torch.Tensor], List[Instances]]:
        """
        Forward logic of the box prediction branch. If `self.train_on_pred_boxes is True`,
            the function puts predicted boxes in the `proposal_boxes` field of `proposals` argument.

        Args:
            features (dict[str, Tensor]): mapping from feature map names to tensor.
                Same as in :meth:`ROIHeads.forward`.
            proposals (list[Instances]): the per-image object proposals with
                their matching ground truth.
                Each has fields "proposal_boxes", and "objectness_logits",
                "gt_classes", "gt_boxes".

        Returns:
            In training, a dict of losses.
            In inference, a list of `Instances`, the predicted instances.
        """
        features = [features[f] for f in self.box_in_features]
        box_features = self.box_pooler(features, [x.proposal_boxes for x in proposals])

        objectness_logits = torch.cat([x.objectness_logits + 1 for x in proposals], dim=0)
        if self.pooler_type == "ROILoopPool":
            objectness_logits = torch.cat(
                [objectness_logits, objectness_logits, objectness_logits], dim=0
            )
        box_features = box_features * objectness_logits.view(-1, 1, 1, 1)
        if self.training:
            storage = get_event_storage()
            storage.put_scalar("proposals/objectness_logits+1 mean", objectness_logits.mean())
            storage.put_scalar("proposals/objectness_logits+1 max", objectness_logits.max())
            storage.put_scalar("proposals/objectness_logits+1 min", objectness_logits.min())

        # torch.cuda.empty_cache()

        box_features = self.box_head(box_features)
        if self.pooler_type == "ROILoopPool":
            box_features, box_features_frame, box_features_context = torch.chunk(
                box_features, 3, dim=0
            )
            predictions = self.box_predictor(
                [box_features, box_features_frame, box_features_context], proposals, context=True
            )
            del box_features_frame
            del box_features_context
        else:
            predictions = self.box_predictor(box_features, proposals)
        # del box_features

        if self.training:
            losses = self.box_predictor.losses(predictions, proposals, self.gt_classes_img_oh)

            self.pred_class_img_logits = (
                self.box_predictor.predict_probs_img(predictions, proposals).clone().detach()
            )

            prev_pred_scores = predictions[0].detach()
            prev_pred_boxes = [p.proposal_boxes for p in proposals]
            for k in range(self.refine_K):
                suffix = "_r" + str(k)
                # targets = self.get_pgt(
                # prev_pred_boxes, prev_pred_scores, proposals, suffix
                # )
                targets = self.get_pgt_top_k(
                    prev_pred_boxes, prev_pred_scores, proposals, suffix=suffix
                )

                proposals_k = self.label_and_sample_proposals(proposals, targets, suffix=suffix)

                if self.roi_label:
                    if isinstance(prev_pred_scores, list):
                        S = cat(prev_pred_scores, dim=0).cpu()
                    else:
                        S = prev_pred_scores.cpu()
                    U = cat(
                        [pairwise_iou(p.proposal_boxes, p.proposal_boxes) for p in proposals_k],
                        dim=0,
                    ).cpu()
                    L = self.gt_classes_img_oh.cpu()
                    CW = self.pred_class_img_logits.cpu()

                    RL, RW = self.roi_label(S, U, L, CW)
                    RL = RL.to(self.pred_class_img_logits.device)
                    RW = RW.to(self.pred_class_img_logits.device)

                    num_preds_per_image = [len(p) for p in proposals_k]
                    for p, rl, rw in zip(
                        proposals_k,
                        RL.split(num_preds_per_image, dim=0),
                        RW.split(num_preds_per_image, dim=0),
                    ):
                        p.gt_classes = rl.to(torch.int64)
                        p.gt_weights = rw.to(torch.float32)

                predictions_k = self.box_refinery[k](box_features)

                losses_k = self.box_refinery[k].losses(predictions_k, proposals_k)

                prev_pred_scores = self.box_refinery[k].predict_probs(predictions_k, proposals_k)
                prev_pred_boxes = self.box_refinery[k].predict_boxes(predictions_k, proposals_k)
                prev_pred_scores = [
                    prev_pred_score.detach() for prev_pred_score in prev_pred_scores
                ]
                prev_pred_boxes = [prev_pred_box.detach() for prev_pred_box in prev_pred_boxes]

                losses.update(losses_k)
            # proposals is modified in-place below, so losses must be computed first.
            if self.train_on_pred_boxes:
                with torch.no_grad():
                    pred_boxes = self.box_predictor.predict_boxes_for_gt_classes(
                        predictions, proposals
                    )
                    for proposals_per_image, pred_boxes_per_image in zip(proposals, pred_boxes):
                        proposals_per_image.proposal_boxes = Boxes(pred_boxes_per_image)
            return losses
        else:
            if self.refine_reg[-1]:
                predictions_k = self.box_refinery[-1](box_features)
                pred_instances, _, all_scores, all_boxes = self.box_refinery[-1].inference(
                    predictions_k, proposals
                )
            else:
                predictions_K = []
                for k in range(self.refine_K):
                    predictions_k = self.box_refinery[k](box_features)
                    predictions_K.append(predictions_k)
                pred_instances, _, all_scores, all_boxes = self.box_refinery[-1].inference(
                    predictions_K, proposals
                )
            return pred_instances, all_scores, all_boxes
Esempio n. 27
0
    def inference_single_image(self, box_cls, box_delta, anchors, image_size):
        """
        Single-image inference. Return bounding-box detection results by thresholding
        on scores and applying non-maximum suppression (NMS).

        Arguments:
            box_cls (list[Tensor]): list of #feature levels. Each entry contains
                tensor of size (H x W x A, K)
            box_delta (list[Tensor]): Same shape as 'box_cls' except that K becomes 4.
            anchors (list[Boxes]): list of #feature levels. Each entry contains
                a Boxes object, which contains all the anchors for that
                image in that feature level.
            image_size (tuple(H, W)): a tuple of the image height and width.

        Returns:
            Same as `inference`, but for only one image.
        """
        boxes_all = []
        scores_all = []
        class_idxs_all = []

        # Iterate over every feature level
        for box_cls_i, box_reg_i, anchors_i in zip(box_cls, box_delta, anchors):
            # (HxWxAxK,)
            box_cls_i = box_cls_i.flatten().sigmoid_()

            # Keep top k top scoring indices only.
            num_topk = min(self.topk_candidates, box_reg_i.size(0))
            # torch.sort is actually faster than .topk (at least on GPUs)
            predicted_prob, topk_idxs = box_cls_i.sort(descending=True)
            predicted_prob = predicted_prob[:num_topk]
            topk_idxs = topk_idxs[:num_topk]

            # filter out the proposals with low confidence score
            keep_idxs = predicted_prob > self.score_threshold
            predicted_prob = predicted_prob[keep_idxs]
            topk_idxs = topk_idxs[keep_idxs]

            anchor_idxs = topk_idxs // self.num_classes
            classes_idxs = topk_idxs % self.num_classes

            box_reg_i = box_reg_i[anchor_idxs]
            anchors_i = anchors_i[anchor_idxs]
            # predict boxes
            predicted_boxes = self.box2box_transform.apply_deltas(box_reg_i, anchors_i.tensor)

            boxes_all.append(predicted_boxes)
            scores_all.append(predicted_prob)
            class_idxs_all.append(classes_idxs)

        boxes_all, scores_all, class_idxs_all = [
            cat(x) for x in [boxes_all, scores_all, class_idxs_all]
        ]
        keep = batched_nms(boxes_all, scores_all, class_idxs_all, self.nms_threshold)
        keep = keep[: self.max_detections_per_image]

        result = Instances(image_size)
        result.pred_boxes = Boxes(boxes_all[keep])
        result.scores = scores_all[keep]
        result.pred_classes = class_idxs_all[keep]
        return result
Esempio n. 28
0
    def get_pgt(self, prev_pred_boxes, prev_pred_scores, proposals, suffix):
        if isinstance(prev_pred_scores, torch.Tensor):
            num_preds_per_image = [len(p) for p in proposals]
            prev_pred_scores = prev_pred_scores.split(num_preds_per_image, dim=0)
        else:
            assert isinstance(prev_pred_scores, list)
            assert isinstance(prev_pred_scores[0], torch.Tensor)

        prev_pred_scores = [
            torch.index_select(prev_pred_score, 1, gt_int)
            for prev_pred_score, gt_int in zip(prev_pred_scores, self.gt_classes_img_int)
        ]
        pgt_scores_idxs = [
            torch.max(prev_pred_score, dim=0) for prev_pred_score in prev_pred_scores
        ]
        pgt_scores = [item[0] for item in pgt_scores_idxs]
        pgt_idxs = [item[1] for item in pgt_scores_idxs]

        assert isinstance(prev_pred_boxes, tuple) or isinstance(prev_pred_boxes, list)
        if isinstance(prev_pred_boxes[0], Boxes):
            pgt_boxes = [
                prev_pred_box[pgt_idx] for prev_pred_box, pgt_idx in zip(prev_pred_boxes, pgt_idxs)
            ]
        else:
            assert isinstance(prev_pred_boxes[0], torch.Tensor)
            if self.cls_agnostic_bbox_reg:
                num_preds = [prev_pred_box.size(0) for prev_pred_box in prev_pred_boxes]
                prev_pred_boxes = [
                    prev_pred_box.unsqueeze(1).expand(num_pred, self.num_classes, 4)
                    for num_pred, prev_pred_box in zip(num_preds, prev_pred_boxes)
                ]
            prev_pred_boxes = [
                prev_pred_box.view(-1, self.num_classes, 4) for prev_pred_box in prev_pred_boxes
            ]
            prev_pred_boxes = [
                torch.index_select(prev_pred_box, 1, gt_int)
                for prev_pred_box, gt_int in zip(prev_pred_boxes, self.gt_classes_img_int)
            ]
            pgt_boxes = [
                torch.index_select(prev_pred_box, 0, pgt_idx)
                for prev_pred_box, pgt_idx in zip(prev_pred_boxes, pgt_idxs)
            ]
            pgt_boxes = [pgt_box.view(-1, 4) for pgt_box in pgt_boxes]
            diags = [
                torch.tensor(
                    [i * gt_split.numel() + i for i in range(gt_split.numel())],
                    dtype=torch.int64,
                    device=pgt_boxes[0].device,
                )
                for gt_split in self.gt_classes_img_int
            ]
            pgt_boxes = [
                torch.index_select(pgt_box, 0, diag) for pgt_box, diag in zip(pgt_boxes, diags)
            ]
            pgt_boxes = [Boxes(pgt_box) for pgt_box in pgt_boxes]

        pgt_classes = self.gt_classes_img_int
        pgt_weights = [
            torch.index_select(pred_logits, 1, pgt_class).reshape(-1)
            for pred_logits, pgt_class in zip(
                self.pred_class_img_logits.split(1, dim=0), pgt_classes
            )
        ]

        targets = [
            Instances(
                proposals[i].image_size,
                gt_boxes=pgt_box,
                gt_classes=pgt_class,
                gt_scores=pgt_score,
                gt_weights=pgt_weight,
            )
            for i, (pgt_box, pgt_class, pgt_score, pgt_weight) in enumerate(
                zip(pgt_boxes, pgt_classes, pgt_scores, pgt_weights)
            )
        ]

        self._vis_pgt(targets, "pgt", suffix)

        return targets
Esempio n. 29
0
def main(args):
    # retrieve configuration file and update the weights
    cfg = get_cfg()
    cfg.merge_from_file(args.cfg)
    # update the model so that it uses the final output weights.
    cfg.MODEL.WEIGHTS = str(Path(cfg.OUTPUT_DIR) / Path("model_final.pth"))

    predictor = DefaultPredictor(cfg)

    # load image.

    # get data from validation data
    # need to get data from the signs dataset, not the hotspots dataset.
    dset = DatasetCatalog.get(args.dataset)

    all_hotspots = []
    all_gt_aligned = []
    all_scores = []

    for example in tqdm(dset):

        img = cv2.imread(example["file_name"])
        # # format is documented at https://detectron2.readthedocs.io/tutorials/models.html#model-output-format
        outputs = predictor(img)

        # gets individual hotspot images, save to npz array

        hotspots = extract_boxes(
            img[:, :, ::-1], outputs["instances"].to("cpu").pred_boxes
        )
        all_hotspots.extend(hotspots)

        # get scores
        scores = outputs["instances"].to("cpu").scores

        all_scores.extend(scores.numpy())

        # get groundtruth classes

        # these parameters can be customized.
        matcher = Matcher([0.4, 0.5], [0, -1, 1], allow_low_quality_matches=False)

        # convert the groundtruth annotations into a detectron Boxes object
        gt_boxes = Boxes(
            torch.tensor(
                np.vstack([annotation["bbox"] for annotation in example["annotations"]])
            )
        )

        gt_classes = np.array(
            [annotation["category_id"] for annotation in example["annotations"]]
        )

        pred_boxes = outputs["instances"].to("cpu").pred_boxes

        match_quality_matrix = pairwise_iou(gt_boxes, pred_boxes)
        matched_idxs, matched_labels = matcher(match_quality_matrix)

        # compute ground-truth classes for every box
        aligned_classes = gt_classes[matched_idxs]

        # handle edge case where only one aligned box shows up
        if not isinstance(aligned_classes, np.ndarray):
            aligned_classes = np.ndarray([aligned_classes])

        # handle background classes:
        aligned_classes[matched_labels == 0] = -1
        aligned_classes[matched_labels == -1] = -1

        all_gt_aligned.extend(aligned_classes)

    np.savez(
        Path(args.outpath).with_suffix(".npz"),
        hotspots=np.array(all_hotspots, dtype=object),
        scores=all_scores,
        gt_classes=all_gt_aligned,
    )
Esempio n. 30
0
 def forward(self, x, boxes):
     return self.roi(x, [Boxes(boxes)])