def merge_branch_instances(instances, num_branch, nms_thrsh, topk_per_image):
    """
    Merge detection results from different branches of TridentNet.
    Return detection results by applying non-maximum suppression (NMS) on bounding boxes
    and keep the unsuppressed boxes and other instances (e.g mask) if any.

    Args:
        instances (list[Instances]): A list of N * num_branch instances that store detection
            results. Contain N images and each image has num_branch instances.
        num_branch (int): Number of branches used for merging detection results for each image.
        nms_thresh (float):  The threshold to use for box non-maximum suppression. Value in [0, 1].
        topk_per_image (int): The number of top scoring detections to return. Set < 0 to return
            all detections.

    Returns:
        results: (list[Instances]): A list of N instances, one for each image in the batch,
            that stores the topk most confidence detections after merging results from multiple
            branches.
    """
    if num_branch == 1:
        return instances

    batch_size = len(instances) // num_branch
    results = []
    for i in range(batch_size):
        instance = Instances.cat([instances[i + batch_size * j] for j in range(num_branch)])

        # Apply per-class NMS
        keep = batched_nms(
            instance.pred_boxes.tensor, instance.scores, instance.pred_classes, nms_thrsh
        )
        keep = keep[:topk_per_image]
        result = instance[keep]

        results.append(result)

    return results
def add_ground_truth_to_proposals_single_image(gt_boxes, proposals):
    """
    Augment `proposals` with ground-truth boxes from `gt_boxes`.

    Args:
        Same as `add_ground_truth_to_proposals`, but with gt_boxes and proposals
        per image.

    Returns:
        Same as `add_ground_truth_to_proposals`, but for only one image.
    """
    device = proposals.objectness_logits.device
    # Assign all ground-truth boxes an objectness logit corresponding to
    # P(object) = sigmoid(logit) =~ 1.
    gt_logit_value = math.log((1.0 - 1e-10) / (1 - (1.0 - 1e-10)))
    gt_logits = gt_logit_value * torch.ones(len(gt_boxes), device=device)

    # Concatenating gt_boxes with proposals requires them to have the same fields
    gt_proposal = Instances(proposals.image_size)
    gt_proposal.proposal_boxes = gt_boxes
    gt_proposal.objectness_logits = gt_logits
    new_proposals = Instances.cat([proposals, gt_proposal])

    return new_proposals
Exemple #3
0
    def predict_proposals(self):
        sampled_boxes = []

        bundle = (
            self.locations, self.logits_pred,
            self.reg_pred, self.ext_pred,
            self.ctrness_pred, self.strides
        )

        for i, (l, o, r, e, c, s) in enumerate(zip(*bundle)):
            # recall that during training, we normalize regression targets with FPN's stride.
            # we denormalize them here.
            r = r * s
            sampled_boxes.append(
                self.forward_for_single_feature_map(
                    l, o, r, e, c, self.image_sizes
                )
            )

        boxlists = list(zip(*sampled_boxes))
        boxlists = [Instances.cat(boxlist) for boxlist in boxlists]
        boxlists = self.select_over_all_levels(boxlists)

        return boxlists
    def predict_proposals(self):
        sampled_boxes = []

        bundle = (
            self.locations, self.logits_pred,
            self.reg_pred, self.ctrness_pred,
            self.strides, self.mask_regression, self.mask_prediction
        )

        for i, (l, o, r, c, s, mr, mp) in enumerate(zip(*bundle)):
            # recall that during training, we normalize regression targets with FPN's stride.
            # we denormalize them here.
            r = r * s
            # if self.thresh_with_active:
            #     mr = mr * torch.sigmoid(ma)
            sampled_boxes.append(
                self.forward_for_single_feature_map(
                    l, o, r, c, mr, mp, self.image_sizes
                )
            )

        boxlists = list(zip(*sampled_boxes))
        boxlists = [Instances.cat(boxlist) for boxlist in boxlists]
        boxlists = self.select_over_all_levels(boxlists)

        num_images = len(boxlists)
        for i in range(num_images):
            per_image_masks = boxlists[i].pred_masks
            if 'mask_bce' in self.mask_loss_type:
                per_image_masks = torch.sigmoid(per_image_masks)
            else:
                per_image_masks = torch.clamp(per_image_masks, min=0.001, max=0.999)
            per_image_masks = per_image_masks.view(-1, 1, self.output_mask_size, self.output_mask_size)
            boxlists[i].pred_masks = per_image_masks

        return boxlists
Exemple #5
0
    def forward(self, batched_inputs):
        """
        Args:
            batched_inputs: a list, batched outputs of :class:`DatasetMapper`.
                Each item in the list contains the inputs for one image.
                For now, each item in the list is a dict that contains:
                   * "image": Tensor, image in (C, H, W) format.
                   * "sem_seg": semantic segmentation ground truth
                   * "center": center points heatmap ground truth
                   * "offset": pixel offsets to center points ground truth
                   * Other information that's included in the original dicts, such as:
                     "height", "width" (int): the output resolution of the model (may be different
                     from input resolution), used in inference.
        Returns:
            list[dict]:
                each dict is the results for one image. The dict contains the following keys:

                * "panoptic_seg", "sem_seg": see documentation
                    :doc:`/tutorials/models` for the standard output format
                * "instances": available if ``predict_instances is True``. see documentation
                    :doc:`/tutorials/models` for the standard output format
        """
        images = [x["image"].to(self.device) for x in batched_inputs]
        images = [(x - self.pixel_mean) / self.pixel_std for x in images]
        # To avoid error in ASPP layer when input has different size.
        size_divisibility = (
            self.size_divisibility
            if self.size_divisibility > 0
            else self.backbone.size_divisibility
        )
        images = ImageList.from_tensors(images, size_divisibility)

        features = self.backbone(images.tensor)

        losses = {}
        if "sem_seg" in batched_inputs[0]:
            targets = [x["sem_seg"].to(self.device) for x in batched_inputs]
            targets = ImageList.from_tensors(
                targets, size_divisibility, self.sem_seg_head.ignore_value
            ).tensor
            if "sem_seg_weights" in batched_inputs[0]:
                # The default D2 DatasetMapper may not contain "sem_seg_weights"
                # Avoid error in testing when default DatasetMapper is used.
                weights = [x["sem_seg_weights"].to(self.device) for x in batched_inputs]
                weights = ImageList.from_tensors(weights, size_divisibility).tensor
            else:
                weights = None
        else:
            targets = None
            weights = None
        sem_seg_results, sem_seg_losses = self.sem_seg_head(features, targets, weights)
        losses.update(sem_seg_losses)

        if "center" in batched_inputs[0] and "offset" in batched_inputs[0]:
            center_targets = [x["center"].to(self.device) for x in batched_inputs]
            center_targets = ImageList.from_tensors(
                center_targets, size_divisibility
            ).tensor.unsqueeze(1)
            center_weights = [x["center_weights"].to(self.device) for x in batched_inputs]
            center_weights = ImageList.from_tensors(center_weights, size_divisibility).tensor

            offset_targets = [x["offset"].to(self.device) for x in batched_inputs]
            offset_targets = ImageList.from_tensors(offset_targets, size_divisibility).tensor
            offset_weights = [x["offset_weights"].to(self.device) for x in batched_inputs]
            offset_weights = ImageList.from_tensors(offset_weights, size_divisibility).tensor
        else:
            center_targets = None
            center_weights = None

            offset_targets = None
            offset_weights = None

        center_results, offset_results, center_losses, offset_losses = self.ins_embed_head(
            features, center_targets, center_weights, offset_targets, offset_weights
        )
        losses.update(center_losses)
        losses.update(offset_losses)

        if self.training:
            return losses

        if self.benchmark_network_speed:
            return []

        processed_results = []
        for sem_seg_result, center_result, offset_result, input_per_image, image_size in zip(
            sem_seg_results, center_results, offset_results, batched_inputs, images.image_sizes
        ):
            height = input_per_image.get("height")
            width = input_per_image.get("width")
            r = sem_seg_postprocess(sem_seg_result, image_size, height, width)
            c = sem_seg_postprocess(center_result, image_size, height, width)
            o = sem_seg_postprocess(offset_result, image_size, height, width)
            # Post-processing to get panoptic segmentation.
            panoptic_image, _ = get_panoptic_segmentation(
                r.argmax(dim=0, keepdim=True),
                c,
                o,
                thing_ids=self.meta.thing_dataset_id_to_contiguous_id.values(),
                label_divisor=self.meta.label_divisor,
                stuff_area=self.stuff_area,
                void_label=-1,
                threshold=self.threshold,
                nms_kernel=self.nms_kernel,
                top_k=self.top_k,
            )
            # For semantic segmentation evaluation.
            processed_results.append({"sem_seg": r})
            panoptic_image = panoptic_image.squeeze(0)
            semantic_prob = F.softmax(r, dim=0)

            # Write results to disk:
            img = input_per_image["image"]
            from detectron2.utils.visualizer import Visualizer
            from detectron2.data.detection_utils import convert_image_to_rgb
            from PIL import Image 
            import os

            img = convert_image_to_rgb(img.permute(1, 2, 0), self.input_format).astype("uint8")
            img = np.array(Image.fromarray(img).resize((width, height)))
            v_panoptic = Visualizer(img, self.meta)
            v_panoptic = v_panoptic.draw_panoptic_seg_predictions(panoptic_image.cpu(), None)
            pan_img = v_panoptic.get_image()
            image_path = input_per_image['file_name'].split(os.sep)
            image_name = os.path.splitext(image_path[-1])[0] 
            Image.fromarray(pan_img).save(os.path.join('/home/ahabbas/projects/conseg/affinityNet/output_pdl/coco/eval_vis', image_name + '_panoptic.png'))

            # For panoptic segmentation evaluation.
            processed_results[-1]["panoptic_seg"] = (panoptic_image, None)
            # For instance segmentation evaluation.
            if self.predict_instances:
                instances = []
                panoptic_image_cpu = panoptic_image.cpu().numpy()
                for panoptic_label in np.unique(panoptic_image_cpu):
                    if panoptic_label == -1:
                        continue
                    pred_class = panoptic_label // self.meta.label_divisor
                    isthing = pred_class in list(
                        self.meta.thing_dataset_id_to_contiguous_id.values()
                    )
                    # Get instance segmentation results.
                    if isthing:
                        instance = Instances((height, width))
                        # Evaluation code takes continuous id starting from 0
                        instance.pred_classes = torch.tensor(
                            [pred_class], device=panoptic_image.device
                        )
                        mask = panoptic_image == panoptic_label
                        instance.pred_masks = mask.unsqueeze(0)
                        # Average semantic probability
                        sem_scores = semantic_prob[pred_class, ...]
                        sem_scores = torch.mean(sem_scores[mask])
                        # Center point probability
                        mask_indices = torch.nonzero(mask).float()
                        center_y, center_x = (
                            torch.mean(mask_indices[:, 0]),
                            torch.mean(mask_indices[:, 1]),
                        )
                        center_scores = c[0, int(center_y.item()), int(center_x.item())]
                        # Confidence score is semantic prob * center prob.
                        instance.scores = torch.tensor(
                            [sem_scores * center_scores], device=panoptic_image.device
                        )
                        # Get bounding boxes
                        instance.pred_boxes = BitMasks(instance.pred_masks).get_bounding_boxes()
                        instances.append(instance)
                if len(instances) > 0:
                    processed_results[-1]["instances"] = Instances.cat(instances)

        return processed_results
Exemple #6
0
    def forward(self, batched_inputs):
        """
        Args:
            batched_inputs: a list, batched outputs of :class:`DatasetMapper` .
                Each item in the list contains the inputs for one image.
                For now, each item in the list is a dict that contains:

                * image: Tensor, image in (C, H, W) format.
                * instances (optional): groundtruth :class:`Instances`
                * proposals (optional): :class:`Instances`, precomputed proposals.

                Other information that's included in the original dicts, such as:

                * "height", "width" (int): the output resolution of the model, used in inference.
                  See :meth:`postprocess` for details.

        Returns:
            list[dict]:
                Each dict is the output for one input image.
                The dict contains one key "instances" whose value is a :class:`Instances`.
                The :class:`Instances` object has the following keys:
                "pred_boxes", "pred_classes", "scores", "pred_masks", "pred_keypoints"
        """
        if not self.training:
            self.init_model()
            return self.inference(batched_inputs)

        images, support_images = self.preprocess_image(batched_inputs)
        if "instances" in batched_inputs[0]:
            for x in batched_inputs:
                x['instances'].set(
                    'gt_classes',
                    torch.full_like(x['instances'].get('gt_classes'), 0))

            gt_instances = [
                x["instances"].to(self.device) for x in batched_inputs
            ]
        else:
            gt_instances = None

        features = self.backbone(images.tensor)

        # support branches
        support_bboxes_ls = []
        for item in batched_inputs:
            bboxes = item['support_bboxes']
            for box in bboxes:
                box = Boxes(box[np.newaxis, :])
                support_bboxes_ls.append(box.to(self.device))

        B, N, C, H, W = support_images.tensor.shape
        assert N == self.support_way * self.support_shot

        support_images = support_images.tensor.reshape(B * N, C, H, W)
        support_features = self.backbone(support_images)

        # support feature roi pooling
        feature_pooled = self.roi_heads.roi_pooling(support_features,
                                                    support_bboxes_ls)

        support_box_features = self.roi_heads._shared_roi_transform(
            [support_features[f] for f in self.in_features], support_bboxes_ls)
        assert self.support_way == 2  # now only 2 way support

        detector_loss_cls = []
        detector_loss_box_reg = []
        rpn_loss_rpn_cls = []
        rpn_loss_rpn_loc = []
        for i in range(B):  # batch
            # query
            query_gt_instances = [gt_instances[i]]  # one query gt instances
            query_images = ImageList.from_tensors([images[i]
                                                   ])  # one query image

            query_feature_res4 = features['res4'][i].unsqueeze(
                0)  # one query feature for attention rpn
            query_features = {
                'res4': query_feature_res4
            }  # one query feature for rcnn

            # positive support branch ##################################
            pos_begin = i * self.support_shot * self.support_way
            pos_end = pos_begin + self.support_shot
            pos_support_features = feature_pooled[pos_begin:pos_end].mean(
                0, True
            )  # pos support features from res4, average all supports, for rcnn
            pos_support_features_pool = pos_support_features.mean(
                dim=[2, 3], keepdim=True
            )  # average pooling support feature for attention rpn
            pos_correlation = F.conv2d(query_feature_res4,
                                       pos_support_features_pool.permute(
                                           1, 0, 2, 3),
                                       groups=1024)  # attention map

            pos_features = {
                'res4': pos_correlation
            }  # attention map for attention rpn
            pos_support_box_features = support_box_features[
                pos_begin:pos_end].mean(0, True)
            pos_proposals, pos_anchors, pos_pred_objectness_logits, pos_gt_labels, pos_pred_anchor_deltas, pos_gt_boxes = self.proposal_generator(
                query_images, pos_features,
                query_gt_instances)  # attention rpn
            pos_pred_class_logits, pos_pred_proposal_deltas, pos_detector_proposals = self.roi_heads(
                query_images, query_features, pos_support_box_features,
                pos_proposals, query_gt_instances)  # pos rcnn

            # negative support branch ##################################
            neg_begin = pos_end
            neg_end = neg_begin + self.support_shot

            neg_support_features = feature_pooled[neg_begin:neg_end].mean(
                0, True)
            neg_support_features_pool = neg_support_features.mean(dim=[2, 3],
                                                                  keepdim=True)
            neg_correlation = F.conv2d(query_feature_res4,
                                       neg_support_features_pool.permute(
                                           1, 0, 2, 3),
                                       groups=1024)

            neg_features = {'res4': neg_correlation}

            neg_support_box_features = support_box_features[
                neg_begin:neg_end].mean(0, True)
            neg_proposals, neg_anchors, neg_pred_objectness_logits, neg_gt_labels, neg_pred_anchor_deltas, neg_gt_boxes = self.proposal_generator(
                query_images, neg_features, query_gt_instances)
            neg_pred_class_logits, neg_pred_proposal_deltas, neg_detector_proposals = self.roi_heads(
                query_images, query_features, neg_support_box_features,
                neg_proposals, query_gt_instances)

            # rpn loss
            outputs_images = ImageList.from_tensors([images[i], images[i]])

            outputs_pred_objectness_logits = [
                torch.cat(pos_pred_objectness_logits +
                          neg_pred_objectness_logits,
                          dim=0)
            ]
            outputs_pred_anchor_deltas = [
                torch.cat(pos_pred_anchor_deltas + neg_pred_anchor_deltas,
                          dim=0)
            ]

            outputs_anchors = pos_anchors  # + neg_anchors

            # convert 1 in neg_gt_labels to 0
            for item in neg_gt_labels:
                item[item == 1] = 0

            outputs_gt_boxes = pos_gt_boxes + neg_gt_boxes  #[None]
            outputs_gt_labels = pos_gt_labels + neg_gt_labels

            if self.training:
                proposal_losses = self.proposal_generator.losses(
                    outputs_anchors, outputs_pred_objectness_logits,
                    outputs_gt_labels, outputs_pred_anchor_deltas,
                    outputs_gt_boxes)
                proposal_losses = {
                    k: v * self.proposal_generator.loss_weight
                    for k, v in proposal_losses.items()
                }
            else:
                proposal_losses = {}

            # detector loss
            detector_pred_class_logits = torch.cat(
                [pos_pred_class_logits, neg_pred_class_logits], dim=0)
            detector_pred_proposal_deltas = torch.cat(
                [pos_pred_proposal_deltas, neg_pred_proposal_deltas], dim=0)
            for item in neg_detector_proposals:
                item.gt_classes = torch.full_like(item.gt_classes, 1)

            #detector_proposals = pos_detector_proposals + neg_detector_proposals
            detector_proposals = [
                Instances.cat(pos_detector_proposals + neg_detector_proposals)
            ]
            if self.training:
                predictions = detector_pred_class_logits, detector_pred_proposal_deltas
                detector_losses = self.roi_heads.box_predictor.losses(
                    predictions, detector_proposals)

            rpn_loss_rpn_cls.append(proposal_losses['loss_rpn_cls'])
            rpn_loss_rpn_loc.append(proposal_losses['loss_rpn_loc'])
            detector_loss_cls.append(detector_losses['loss_cls'])
            detector_loss_box_reg.append(detector_losses['loss_box_reg'])

        proposal_losses = {}
        detector_losses = {}

        proposal_losses['loss_rpn_cls'] = torch.stack(rpn_loss_rpn_cls).mean()
        proposal_losses['loss_rpn_loc'] = torch.stack(rpn_loss_rpn_loc).mean()
        detector_losses['loss_cls'] = torch.stack(detector_loss_cls).mean()
        detector_losses['loss_box_reg'] = torch.stack(
            detector_loss_box_reg).mean()

        losses = {}
        losses.update(detector_losses)
        losses.update(proposal_losses)
        return losses
Exemple #7
0
    def forward(self, batched_inputs):
        """
        Args:
            batched_inputs: a list, batched outputs of :class:`DatasetMapper`.
                Each item in the list contains the inputs for one image.
                For now, each item in the list is a dict that contains:
                   * "image": Tensor, image in (C, H, W) format.
                   * "sem_seg": semantic segmentation ground truth
                   * "center": center points heatmap ground truth
                   * "offset": pixel offsets to center points ground truth
                   * Other information that's included in the original dicts, such as:
                     "height", "width" (int): the output resolution of the model (may be different
                     from input resolution), used in inference.
        Returns:
            list[dict]:
              each dict is the results for one image. The dict contains the following keys:

                * "instances": see :meth:`GeneralizedRCNN.forward` for its format.
                * "sem_seg": see :meth:`SemanticSegmentor.forward` for its format.
                * "panoptic_seg": see :func:`combine_semantic_and_instance_outputs` for its format.
        """
        images = [x["image"].to(self.device) for x in batched_inputs]
        images = [(x - self.pixel_mean) / self.pixel_std for x in images]
        size_divisibility = self.backbone.size_divisibility
        images = ImageList.from_tensors(images, size_divisibility)

        features = self.backbone(images.tensor)

        losses = {}
        if "sem_seg" in batched_inputs[0]:
            targets = [x["sem_seg"].to(self.device) for x in batched_inputs]
            targets = ImageList.from_tensors(
                targets, size_divisibility,
                self.sem_seg_head.ignore_value).tensor
            if "sem_seg_weights" in batched_inputs[0]:
                # The default D2 DatasetMapper may not contain "sem_seg_weights"
                # Avoid error in testing when default DatasetMapper is used.
                weights = [
                    x["sem_seg_weights"].to(self.device)
                    for x in batched_inputs
                ]
                weights = ImageList.from_tensors(weights,
                                                 size_divisibility).tensor
            else:
                weights = None
        else:
            targets = None
            weights = None
        sem_seg_results, sem_seg_losses = self.sem_seg_head(
            features, targets, weights)
        losses.update(sem_seg_losses)

        if "center" in batched_inputs[0] and "offset" in batched_inputs[0]:
            center_targets = [
                x["center"].to(self.device) for x in batched_inputs
            ]
            center_targets = ImageList.from_tensors(
                center_targets, size_divisibility).tensor.unsqueeze(1)
            center_weights = [
                x["center_weights"].to(self.device) for x in batched_inputs
            ]
            center_weights = ImageList.from_tensors(center_weights,
                                                    size_divisibility).tensor

            offset_targets = [
                x["offset"].to(self.device) for x in batched_inputs
            ]
            offset_targets = ImageList.from_tensors(offset_targets,
                                                    size_divisibility).tensor
            offset_weights = [
                x["offset_weights"].to(self.device) for x in batched_inputs
            ]
            offset_weights = ImageList.from_tensors(offset_weights,
                                                    size_divisibility).tensor
        else:
            center_targets = None
            center_weights = None

            offset_targets = None
            offset_weights = None

        center_results, offset_results, center_losses, offset_losses = self.ins_embed_head(
            features, center_targets, center_weights, offset_targets,
            offset_weights)
        losses.update(center_losses)
        losses.update(offset_losses)

        if self.training:
            return losses

        processed_results = []
        for sem_seg_result, center_result, offset_result, input_per_image, image_size in zip(
                sem_seg_results, center_results, offset_results,
                batched_inputs, images.image_sizes):
            height = input_per_image.get("height")
            width = input_per_image.get("width")
            r = sem_seg_postprocess(sem_seg_result, image_size, height, width)
            c = sem_seg_postprocess(center_result, image_size, height, width)
            o = sem_seg_postprocess(offset_result, image_size, height, width)
            # Post-processing to get panoptic segmentation.
            panoptic_image, _ = get_panoptic_segmentation(
                r.argmax(dim=0, keepdim=True),
                c,
                o,
                thing_ids=self.meta.thing_dataset_id_to_contiguous_id.values(),
                label_divisor=self.meta.label_divisor,
                stuff_area=self.stuff_area,
                void_label=-1,
                threshold=self.threshold,
                nms_kernel=self.nms_kernel,
                top_k=self.top_k,
            )
            # For semantic segmentation evaluation.
            processed_results.append({"sem_seg": r})
            panoptic_image = panoptic_image.squeeze(0)
            semantic_prob = F.softmax(r, dim=0)
            # For panoptic segmentation evaluation.
            processed_results[-1]["panoptic_seg"] = (panoptic_image, None)
            # For instance segmentation evaluation.
            if self.predict_instances:
                instances = []
                panoptic_image_cpu = panoptic_image.cpu().numpy()
                for panoptic_label in np.unique(panoptic_image_cpu):
                    if panoptic_label == -1:
                        continue
                    pred_class = panoptic_label // self.meta.label_divisor
                    isthing = pred_class in list(
                        self.meta.thing_dataset_id_to_contiguous_id.values())
                    # Get instance segmentation results.
                    if isthing:
                        instance = Instances((height, width))
                        # Evaluation code takes continuous id starting from 0
                        instance.pred_classes = torch.tensor(
                            [pred_class], device=panoptic_image.device)
                        mask = panoptic_image == panoptic_label
                        instance.pred_masks = mask.unsqueeze(0)
                        # Average semantic probability
                        sem_scores = semantic_prob[pred_class, ...]
                        sem_scores = torch.mean(sem_scores[mask])
                        # Center point probability
                        mask_indices = torch.nonzero(mask).float()
                        center_y, center_x = (
                            torch.mean(mask_indices[:, 0]),
                            torch.mean(mask_indices[:, 1]),
                        )
                        center_scores = c[0,
                                          int(center_y.item()),
                                          int(center_x.item())]
                        # Confidence score is semantic prob * center prob.
                        instance.scores = torch.tensor(
                            [sem_scores * center_scores],
                            device=panoptic_image.device)
                        # Get bounding boxes
                        instance.pred_boxes = BitMasks(
                            instance.pred_masks).get_bounding_boxes()
                        instances.append(instance)
                if len(instances) > 0:
                    processed_results[-1]["instances"] = Instances.cat(
                        instances)

        return processed_results
Exemple #8
0
    def predict_proposals_single_image(
        self,
        cls_scores,
        bbox_preds,
        centernesses,
        all_level_points,
        image_size
    ):
        """
        Single-image inference. Return bounding-box detection results by thresholding
        on scores and applying non-maximum suppression (NMS).

        Arguments:
            cls_scores (list[Tensor]): list of #feature levels. Each entry contains
                tensor of size (C, Hi, Wi), where i denotes a specific feature level.
            bbox_preds (list[Tensor]): Same shape as 'cls_scores' except that C becomes 4.
            centernesses (list[Tensor]): Same shape as 'cls_scores' except that C becomes 1.
            all_level_points (list[Tensor]): list of #feature levels. Each entry contains
                tensor of size (Hi*Wi, 2), a set of point coordinates (xi, yi) of all feature map
                locations on 'feature level i' in image coordinate.
            image_size (tuple(H, W)): a tuple of the image height and width.

        Returns:
            Same as `predict_proposals`, but for only one image.
        """
        assert len(cls_scores) == len(bbox_preds) == len(all_level_points)
        bboxes_list = []

        # Iterate over every feature level
        for (cls_score, bbox_pred, centerness, points) in zip(
            cls_scores, bbox_preds, centernesses, all_level_points
        ):
            assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
            # (C, Hi, Wi) -> (Hi*Wi, C)
            scores = cls_score.permute(1, 2, 0).reshape(-1, self.num_classes).sigmoid()
            # (4, Hi, Wi) -> (Hi*Wi, 4)
            bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4)
            # (1, Hi, Wi) -> (Hi*Wi, )
            centerness = centerness.permute(1, 2, 0).reshape(-1).sigmoid()

            """ Your code starts here """
            nms_pre_topk = scores.clamp(max=self.nms_pre_topk)
            candidate_inds = nms_pre_topk > self.score_threshold

            scores = scores * centerness[:, None]

            bbox_scores = scores[candidate_inds]
            bbox_hw = candidate_inds.nonzero()[:, 0]
            bbox_classes = candidate_inds.nonzero()[:, 1]

            bbox_lrtb = bbox_pred[bbox_hw]
            bbox_xy = points[bbox_hw]

            if len(bbox_hw) != 0:
                h, w = image_size
                detections = torch.stack([
                    bbox_xy[:, 0] - bbox_lrtb[:, 0],
                    bbox_xy[:, 1] - bbox_lrtb[:, 1],
                    bbox_xy[:, 0] + bbox_lrtb[:, 2],
                    bbox_xy[:, 1] + bbox_lrtb[:, 3],
                ], dim=1)

                bboxes = Boxes(detections)

                bbox_instances = Instances((int(h), int(w)))
                bbox_instances.set("pred_boxes", bboxes)
                bbox_instances.set("scores", bbox_scores)
                bbox_instances.set("pred_classes", bbox_classes)

                bboxes_list.append(bbox_instances)
            """ Your code ends here """

        bboxes_list = Instances.cat(bboxes_list)

        # non-maximum suppression per-image.
        results = ml_nms(
            bboxes_list,
            self.nms_threshold,
            # Limit to max_per_image detections **over all classes**
            max_proposals=self.nms_post_topk
        )
        return results
Exemple #9
0
    def forward(self, batched_inputs):
        # También 1 vez por imagen
        """
        Args:
            batched_inputs: a list, batched outputs of :class:`DatasetMapper` .
                Each item in the list contains the inputs for one image.
                For now, each item in the list is a dict that contains:

                * image: Tensor, image in (C, H, W) format.
                * instances (optional): groundtruth :class:`Instances`
                * proposals (optional): :class:`Instances`, precomputed proposals.

                Other information that's included in the original dicts, such as:

                * "height", "width" (int): the output resolution of the model, used in inference.
                  See :meth:`postprocess` for details.

        Returns:
            list[dict]:
                Each dict is the output for one input image.
                The dict contains one key "instances" whose value is a :class:`Instances`.
                The :class:`Instances` object has the following keys:
                "pred_boxes", "pred_classes", "scores", "pred_masks", "pred_keypoints"
        """
        if not self.training:

            ### Estimar nº clases
            support_file_name = './support_dir/support_feature.pkl'
            if os.path.exists(support_file_name) and self.n_clases == 1:
                device = torch.cuda.current_device()
                avaliable = torch.cuda.get_device_properties(
                    device).total_memory - torch.cuda.memory_reserved(device)
                #print("Memoria disponieble MiB: ", avaliable/(1024*1024))
                with open(support_file_name, "rb") as hFile:
                    aux = pickle.load(hFile, encoding="latin1")
                size = aux['res5_avg'][0].element_size(
                ) * aux['res5_avg'][0].nelement()
                size += aux['res4_avg'][0].element_size(
                ) * aux['res4_avg'][0].nelement()
                #print("Memoria ocupada por soporte: ", size)
                #print("Numero de clases :", math.floor(avaliable/size))
                self.n_clases = math.floor(avaliable / (size * 1000))
                print("Classes number: ", self.n_clases)
            ### Fin estimacion

            #Cambiar n_clases a valor estático si se quiere.
            #n_clases = self.n_clases
            n_clases = 101

            # Obtener lista de id_clases: [1,2,3....]
            metadata = MetadataCatalog.get('fsod_eval')
            class_list = list(
                metadata.thing_dataset_id_to_contiguous_id.values())

            # En cada iteración tomamos n_clases de class_list e iniciamos el modelo con ellas
            # Ejemplo con class_list=[1,2,3,4,5] y n_clases=2
            # iter1: [1,2];  iter2: [3,4];  iter3: [5]

            aux = []
            for i in range(math.ceil(len(class_list) / n_clases)):
                self.init_model(class_list[i * n_clases:i * n_clases +
                                           n_clases])
                aux.append(self.inference(batched_inputs)[0]["instances"])

            # aux es una lista de predicciones [pred_n_primeras_clases, pred_siguientes, ....]
            # necesitamos unificarlas todas en 1 solo elemento
            # -> empleando detectron2.structures.instances.Instances.cat.
            _predictions = {"instances": Instances.cat(aux)}
            return [_predictions]

        images, support_images = self.preprocess_image(batched_inputs)
        if "instances" in batched_inputs[0]:
            for x in batched_inputs:
                x['instances'].set(
                    'gt_classes',
                    torch.full_like(x['instances'].get('gt_classes'), 0))

            gt_instances = [
                x["instances"].to(self.device) for x in batched_inputs
            ]
        else:
            gt_instances = None

        features = self.backbone(images.tensor)

        # support branches
        support_bboxes_ls = []
        for item in batched_inputs:
            bboxes = item['support_bboxes']
            for box in bboxes:
                box = Boxes(box[np.newaxis, :])
                support_bboxes_ls.append(box.to(self.device))

        B, N, C, H, W = support_images.tensor.shape
        assert N == self.support_way * self.support_shot

        support_images = support_images.tensor.reshape(B * N, C, H, W)
        support_features = self.backbone(support_images)

        # support feature roi pooling
        feature_pooled = self.roi_heads.roi_pooling(support_features,
                                                    support_bboxes_ls)

        support_box_features = self.roi_heads._shared_roi_transform(
            [support_features[f] for f in self.in_features], support_bboxes_ls)
        #assert self.support_way == 2 # now only 2 way support

        detector_loss_cls = []
        detector_loss_box_reg = []
        rpn_loss_rpn_cls = []
        rpn_loss_rpn_loc = []
        for i in range(B):  # batch
            # query
            query_gt_instances = [gt_instances[i]]  # one query gt instances
            query_images = ImageList.from_tensors([images[i]
                                                   ])  # one query image

            query_feature_res4 = features['res4'][i].unsqueeze(
                0)  # one query feature for attention rpn
            query_features = {
                'res4': query_feature_res4
            }  # one query feature for rcnn

            # positive support branch ##################################
            pos_begin = i * self.support_shot * self.support_way
            pos_end = pos_begin + self.support_shot
            pos_support_features = feature_pooled[pos_begin:pos_end].mean(
                0, True
            )  # pos support features from res4, average all supports, for rcnn
            pos_support_features_pool = pos_support_features.mean(
                dim=[2, 3], keepdim=True
            )  # average pooling support feature for attention rpn
            pos_correlation = F.conv2d(query_feature_res4,
                                       pos_support_features_pool.permute(
                                           1, 0, 2, 3),
                                       groups=1024)  # attention map

            pos_features = {
                'res4': pos_correlation
            }  # attention map for attention rpn
            pos_support_box_features = support_box_features[
                pos_begin:pos_end].mean(0, True)
            pos_proposals, pos_anchors, pos_pred_objectness_logits, pos_gt_labels, pos_pred_anchor_deltas, pos_gt_boxes = self.proposal_generator(
                query_images, pos_features,
                query_gt_instances)  # attention rpn
            pos_pred_class_logits, pos_pred_proposal_deltas, pos_detector_proposals = self.roi_heads(
                query_images, query_features, pos_support_box_features,
                pos_proposals, query_gt_instances)  # pos rcnn

            # negative support branch ##################################
            neg_begin = pos_end
            neg_end = neg_begin + self.support_shot

            neg_support_features = feature_pooled[neg_begin:neg_end].mean(
                0, True)
            neg_support_features_pool = neg_support_features.mean(dim=[2, 3],
                                                                  keepdim=True)
            neg_correlation = F.conv2d(query_feature_res4,
                                       neg_support_features_pool.permute(
                                           1, 0, 2, 3),
                                       groups=1024)

            neg_features = {'res4': neg_correlation}

            neg_support_box_features = support_box_features[
                neg_begin:neg_end].mean(0, True)
            neg_proposals, neg_anchors, neg_pred_objectness_logits, neg_gt_labels, neg_pred_anchor_deltas, neg_gt_boxes = self.proposal_generator(
                query_images, neg_features, query_gt_instances)
            neg_pred_class_logits, neg_pred_proposal_deltas, neg_detector_proposals = self.roi_heads(
                query_images, query_features, neg_support_box_features,
                neg_proposals, query_gt_instances)

            # rpn loss
            outputs_images = ImageList.from_tensors([images[i], images[i]])

            outputs_pred_objectness_logits = [
                torch.cat(pos_pred_objectness_logits +
                          neg_pred_objectness_logits,
                          dim=0)
            ]
            outputs_pred_anchor_deltas = [
                torch.cat(pos_pred_anchor_deltas + neg_pred_anchor_deltas,
                          dim=0)
            ]

            outputs_anchors = pos_anchors  # + neg_anchors

            # convert 1 in neg_gt_labels to 0
            for item in neg_gt_labels:
                item[item == 1] = 0

            outputs_gt_boxes = pos_gt_boxes + neg_gt_boxes  #[None]
            outputs_gt_labels = pos_gt_labels + neg_gt_labels

            if self.training:
                proposal_losses = self.proposal_generator.losses(
                    outputs_anchors, outputs_pred_objectness_logits,
                    outputs_gt_labels, outputs_pred_anchor_deltas,
                    outputs_gt_boxes)
                proposal_losses = {
                    k: v * self.proposal_generator.loss_weight
                    for k, v in proposal_losses.items()
                }
            else:
                proposal_losses = {}

            # detector loss
            detector_pred_class_logits = torch.cat(
                [pos_pred_class_logits, neg_pred_class_logits], dim=0)
            detector_pred_proposal_deltas = torch.cat(
                [pos_pred_proposal_deltas, neg_pred_proposal_deltas], dim=0)
            for item in neg_detector_proposals:
                item.gt_classes = torch.full_like(item.gt_classes, 1)

            #detector_proposals = pos_detector_proposals + neg_detector_proposals
            detector_proposals = [
                Instances.cat(pos_detector_proposals + neg_detector_proposals)
            ]
            if self.training:
                predictions = detector_pred_class_logits, detector_pred_proposal_deltas
                detector_losses = self.roi_heads.box_predictor.losses(
                    predictions, detector_proposals)

            rpn_loss_rpn_cls.append(proposal_losses['loss_rpn_cls'])
            rpn_loss_rpn_loc.append(proposal_losses['loss_rpn_loc'])
            detector_loss_cls.append(detector_losses['loss_cls'])
            detector_loss_box_reg.append(detector_losses['loss_box_reg'])

        proposal_losses = {}
        detector_losses = {}

        proposal_losses['loss_rpn_cls'] = torch.stack(rpn_loss_rpn_cls).mean()
        proposal_losses['loss_rpn_loc'] = torch.stack(rpn_loss_rpn_loc).mean()
        detector_losses['loss_cls'] = torch.stack(detector_loss_cls).mean()
        detector_losses['loss_box_reg'] = torch.stack(
            detector_loss_box_reg).mean()

        losses = {}
        losses.update(detector_losses)
        losses.update(proposal_losses)
        return losses
Exemple #10
0
    def _merge_untracked_instances(self, instances: Instances) -> Instances:
        """
        For untracked previous instances, under certain condition, still keep them
        in tracking and merge with the current instances.

        Args:
            instances: D2 Instances, for predictions of the current frame
        Return:
            D2 Instances merging current instances and instances from previous
            frame decided to keep tracking
        """
        untracked_instances = Instances(
            image_size=instances.image_size,
            pred_boxes=[],
            pred_masks=[],
            pred_classes=[],
            scores=[],
            ID=[],
            ID_period=[],
            lost_frame_count=[],
        )
        prev_bboxes = list(self._prev_instances.pred_boxes)
        prev_classes = list(self._prev_instances.pred_classes)
        prev_scores = list(self._prev_instances.scores)
        prev_ID_period = self._prev_instances.ID_period
        if instances.has("pred_masks"):
            prev_masks = list(self._prev_instances.pred_masks)
        for idx in self._untracked_prev_idx:
            x_left, y_top, x_right, y_bot = prev_bboxes[idx]
            if ((1.0 * (x_right - x_left) / self._video_width <
                 self._min_box_rel_dim) or
                (1.0 *
                 (y_bot - y_top) / self._video_height < self._min_box_rel_dim)
                    or self._prev_instances.lost_frame_count[idx] >=
                    self._max_lost_frame_count
                    or prev_ID_period[idx] <= self._min_instance_period):
                continue
            untracked_instances.pred_boxes.append(
                list(prev_bboxes[idx].numpy()))
            untracked_instances.pred_classes.append(int(prev_classes[idx]))
            untracked_instances.scores.append(float(prev_scores[idx]))
            untracked_instances.ID.append(self._prev_instances.ID[idx])
            untracked_instances.ID_period.append(
                self._prev_instances.ID_period[idx])
            untracked_instances.lost_frame_count.append(
                self._prev_instances.lost_frame_count[idx] + 1)
            if instances.has("pred_masks"):
                untracked_instances.pred_masks.append(
                    prev_masks[idx].numpy().astype(np.uint8))

        untracked_instances.pred_boxes = Boxes(
            torch.FloatTensor(untracked_instances.pred_boxes))
        untracked_instances.pred_classes = torch.IntTensor(
            untracked_instances.pred_classes)
        untracked_instances.scores = torch.FloatTensor(
            untracked_instances.scores)
        if instances.has("pred_masks"):
            untracked_instances.pred_masks = torch.IntTensor(
                untracked_instances.pred_masks)
        else:
            untracked_instances.remove("pred_masks")

        return Instances.cat([
            instances,
            untracked_instances,
        ])
Exemple #11
0
    def predict_proposals_single_image(self, cls_scores, bbox_preds,
                                       centernesses, all_level_points,
                                       image_size):
        """
        Single-image inference. Return bounding-box detection results by thresholding
        on scores and applying non-maximum suppression (NMS).

        Arguments:
            cls_scores (list[Tensor]): list of #feature levels. Each entry contains
                tensor of size (C, Hi, Wi), where i denotes a specific feature level.
            bbox_preds (list[Tensor]): Same shape as 'cls_scores' except that C becomes 4.
            centernesses (list[Tensor]): Same shape as 'cls_scores' except that C becomes 1.
            all_level_points (list[Tensor]): list of #feature levels. Each entry contains
                tensor of size (Hi*Wi, 2), a set of point coordinates (xi, yi) of all feature map
                locations on 'feature level i' in image coordinate.
            image_size (tuple(H, W)): a tuple of the image height and width.

        Returns:
            Same as `predict_proposals`, but for only one image.
        """
        assert len(cls_scores) == len(bbox_preds) == len(all_level_points)
        bboxes_list = []

        # Iterate over every feature level
        for (cls_score, bbox_pred, centerness,
             points) in zip(cls_scores, bbox_preds, centernesses,
                            all_level_points):
            assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
            # (C, Hi, Wi) -> (Hi*Wi, C)
            scores = cls_score.permute(1, 2,
                                       0).reshape(-1,
                                                  self.num_classes).sigmoid()
            # (4, Hi, Wi) -> (Hi*Wi, 4)
            bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4)
            # (1, Hi, Wi) -> (Hi*Wi, )
            centerness = centerness.permute(1, 2, 0).reshape(-1).sigmoid()

            # Fanchen: DEBUG
            # torch.save((cls_scores,
            #             bbox_preds,
            #             centernesses,
            #             all_level_points,
            #             image_size,
            #             scores,
            #             bbox_pred,
            #             centerness), '/home/CtrlDrive/fanchen/pyws/ee898_pa1/debugdata/inf.data')
            # print('DEBUG: inf.data')
            # exit(0)
            # >>> len(cls_scores)
            # 5
            # >>> [score.size() for score in cls_scores]
            # [torch.Size([80, 152, 100]), torch.Size([80, 76, 50]), torch.Size([80, 38, 25]), torch.Size([80, 19, 13]), torch.Size([80, 10, 7])]
            # >>> scores
            # tensor([[0.0082, 0.0043, 0.0070,  ..., 0.0048, 0.0050, 0.0046],
            #         [0.0034, 0.0016, 0.0029,  ..., 0.0021, 0.0017, 0.0015],
            #         [0.0024, 0.0013, 0.0020,  ..., 0.0018, 0.0017, 0.0013],
            #         ...,
            #         [0.0050, 0.0022, 0.0024,  ..., 0.0010, 0.0013, 0.0008],
            #         [0.0057, 0.0027, 0.0032,  ..., 0.0014, 0.0015, 0.0010],
            #         [0.0129, 0.0077, 0.0085,  ..., 0.0048, 0.0057, 0.0040]],
            #        device='cuda:7')
            # >>> scores.size()
            # torch.Size([15200, 80])
            # >>> bbox_pred, bbox_pred.size()
            # (tensor([[ 6.7271,  6.7130, 16.7200, 13.4471],
            #         [12.8911,  5.4016, 11.4462, 10.5563],
            #         [17.2124,  5.3992, 17.0486, 10.5352],
            #         ...,
            #         [22.8796, 15.5267, 19.0822,  8.6359],
            #         [28.2969, 15.3834, 15.9940,  9.6031],
            #         [18.1814, 19.1390, 12.2707, 13.9811]], device='cuda:7'), torch.Size([15200, 4]))
            # >>> centerness, centerness.size()
            # (tensor([0.1976, 0.2229, 0.2007,  ..., 0.2555, 0.2092, 0.2774], device='cuda:7'), torch.Size([15200]))
            # >>> all_level_points[0].size()
            # torch.Size([15200, 2])
            """ Your code starts here """
            # H, W = image_size
            scores_i_th_inds = torch.zeros_like(scores) + (
                scores > self.score_threshold)
            scores *= scores_i_th_inds
            scores *= centerness[:, None]
            topk_cnt = scores_i_th_inds.reshape(-1).sum().clamp(
                max=self.nms_pre_topk)

            bbox_pred = torch.stack([
                points[:, 0] - bbox_pred[:, 0], points[:, 1] - bbox_pred[:, 1],
                points[:, 0] + bbox_pred[:, 2], points[:, 1] + bbox_pred[:, 3]
            ],
                                    dim=1)

            flatten_scores = scores.reshape(-1)  # Fanchen: size is (H*W*C, )
            # flatten_labels = torch.tensor(range(self.num_classes)). \
            #     repeat(image_size[0] * image_size[1])  # Fanchen: size is (H*W*C, )
            flatten_boxes = bbox_pred.unsqueeze(1). \
                expand(-1, self.num_classes, -1).reshape(-1, 4)  # Fanchen: size is (H*W*C, 4)
            pred_scores, topk_inds = flatten_scores.topk(int(topk_cnt))
            pred_scores = torch.sqrt(pred_scores)
            pred_boxes = Boxes(flatten_boxes[topk_inds])
            pred_classes = topk_inds % self.num_classes
            box_list = Instances(image_size,
                                 pred_boxes=pred_boxes,
                                 scores=pred_scores,
                                 pred_classes=pred_classes)
            bboxes_list.append(box_list)
            # Fanchen: tensor (Tensor[float]): a Nx4 matrix.  Each row is (x1, y1, x2, y2).
            """ Your code ends here """

        bboxes_list = Instances.cat(bboxes_list)
        # Fanchen: def cat(instance_lists: List["Instances"]) -> "Instances":

        # non-maximum suppression per-image.
        results = ml_nms(
            bboxes_list,
            # Fanchen:
            # boxes = boxlist.pred_boxes.tensor
            # scores = boxlist.scores
            # labels = boxlist.pred_classes
            self.nms_threshold,
            # Limit to max_per_image detections **over all classes**
            max_proposals=self.nms_post_topk)
        # Fanchen: DEBUG
        # torch.save((bboxes_list, results), '/home/CtrlDrive/fanchen/pyws/ee898_pa1/debugdata/infres.data')
        # exit(0)
        return results