Ejemplo n.º 1
0
    def postprocess_detections(
        self, class_logits: Tensor, box_regression: Tensor,
        proposals: List[Tensor], image_shapes: List[Tuple[int, int]]
    ) -> Tuple[List[Tensor], List[Tensor], List[Tensor]]:
        device = class_logits.device
        num_classes = class_logits.shape[-1]

        boxes_per_image = [
            boxes_in_image.shape[0] for boxes_in_image in proposals
        ]
        pred_boxes = self.box_coder.decode(box_regression, proposals)

        pred_scores = F.softmax(class_logits, -1)

        pred_boxes_list = pred_boxes.split(boxes_per_image, 0)
        pred_scores_list = pred_scores.split(boxes_per_image, 0)

        all_boxes = []
        all_scores = []
        all_labels = []
        for boxes, scores, image_shape in zip(pred_boxes_list,
                                              pred_scores_list, image_shapes):
            boxes = clip_boxes_to_image(boxes, image_shape)

            # create labels for each prediction
            labels = torch.arange(num_classes, device=device)
            labels = labels.view(1, -1).expand_as(scores)

            # remove predictions with the background label
            boxes = boxes[:, 1:]
            scores = scores[:, 1:]
            labels = labels[:, 1:]

            # batch everything, by making every class prediction be a separate instance
            boxes = boxes.reshape(-1, 4)
            scores = scores.reshape(-1)
            labels = labels.reshape(-1)

            # remove low scoring boxes
            inds = torch.where(scores > self.score_thresh)[0]
            boxes, scores, labels = boxes[inds], scores[inds], labels[inds]

            # remove empty boxes
            keep = remove_small_boxes(boxes, min_size=1e-2)
            boxes, scores, labels = boxes[keep], scores[keep], labels[keep]

            # non-maximum suppression, independently done per class
            keep = batched_nms(boxes, scores, labels, self.nms_thresh)
            # keep only topk scoring predictions
            keep = keep[:self.detections_per_img]
            boxes, scores, labels = boxes[keep], scores[keep], labels[keep]

            all_boxes.append(boxes)
            all_scores.append(scores)
            all_labels.append(labels)

        return all_boxes, all_scores, all_labels
Ejemplo n.º 2
0
    def postprocess_detections(self, class_logits, box_regression, proposals, image_shapes):
        device = class_logits.device
        num_classes = class_logits.shape[-1]

        boxes_per_image = [len(boxes_in_image) for boxes_in_image in proposals]
        pred_boxes = self.box_coder.decode(box_regression, proposals)

        pred_scores = F.softmax(class_logits, -1)

        # split boxes and scores per image
        if len(boxes_per_image) == 1:
            # TODO : remove this when ONNX support dynamic split sizes
            pred_boxes = (pred_boxes,)
            pred_scores = (pred_scores,)
        else:
            pred_boxes = pred_boxes.split(boxes_per_image, 0)
            pred_scores = pred_scores.split(boxes_per_image, 0)

        all_boxes = []
        all_scores = []
        all_labels = []
        for boxes, scores, image_shape in zip(pred_boxes, pred_scores, image_shapes):
            boxes = box_ops.clip_boxes_to_image(boxes, image_shape)

            # create labels for each prediction
            labels = torch.arange(num_classes, device=device)
            labels = labels.view(1, -1).expand_as(scores)

            # remove predictions with the background label
            boxes = boxes[:, 1:]
            scores = scores[:, 1:]
            labels = labels[:, 1:]

            # batch everything, by making every class prediction be a separate instance
            boxes = boxes.reshape(-1, 4)
            scores = scores.reshape(-1)
            labels = labels.reshape(-1)

            # remove low scoring boxes
            inds = torch.nonzero(scores > self.score_thresh).squeeze(1)
            boxes, scores, labels = boxes[inds], scores[inds], labels[inds]

            # remove empty boxes
            keep = box_ops.remove_small_boxes(boxes, min_size=1e-2)
            boxes, scores, labels = boxes[keep], scores[keep], labels[keep]

            # non-maximum suppression, independently done per class
            keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh)
            # keep only topk scoring predictions
            keep = keep[:self.detections_per_img]
            boxes, scores, labels = boxes[keep], scores[keep], labels[keep]

            all_boxes.append(boxes)
            all_scores.append(scores)
            all_labels.append(labels)

        return all_boxes, all_scores, all_labels
Ejemplo n.º 3
0
    def filter_proposals(self, proposals, objectness, image_shapes,
                         num_anchors_per_level):
        # type: (Tensor, Tensor, List[Tuple[int, int]], List[int]) -> Tuple[List[Tensor], List[Tensor]]
        num_images = proposals.shape[0]
        device = proposals.device
        # do not backprop throught objectness
        objectness = objectness.detach()
        objectness = objectness.reshape(num_images, -1)

        levels = [
            torch.full((n, ), idx, dtype=torch.int64, device=device)
            for idx, n in enumerate(num_anchors_per_level)
        ]
        levels = torch.cat(levels, 0)
        levels = levels.reshape(1, -1).expand_as(objectness)

        # select top_n boxes independently per level before applying nms
        top_n_idx = self._get_top_n_idx(objectness, num_anchors_per_level)

        image_range = torch.arange(num_images, device=device)
        batch_idx = image_range[:, None]

        objectness = objectness[batch_idx, top_n_idx]
        levels = levels[batch_idx, top_n_idx]
        proposals = proposals[batch_idx, top_n_idx]

        objectness_prob = torch.sigmoid(objectness)

        final_boxes = []
        final_scores = []
        for boxes, scores, lvl, img_shape in zip(proposals, objectness_prob,
                                                 levels, image_shapes):
            boxes = box_ops.clip_boxes_to_image(boxes, img_shape)

            # remove small boxes
            keep = box_ops.remove_small_boxes(boxes, self.min_size)
            boxes, scores, lvl = boxes[keep], scores[keep], lvl[keep]

            # remove low scoring boxes
            # use >= for Backwards compatibility
            keep = torch.where(scores >= self.score_thresh)[0]
            boxes, scores, lvl = boxes[keep], scores[keep], lvl[keep]

            # non-maximum suppression, independently done per level
            keep = box_ops.batched_nms(boxes, scores, lvl, self.nms_thresh)

            # keep only topk scoring predictions
            keep = keep[:self.post_nms_top_n()]
            boxes, scores = boxes[keep], scores[keep]

            final_boxes.append(boxes)
            final_scores.append(scores)
        return final_boxes, final_scores
Ejemplo n.º 4
0
    def postprocess_detections(
            self,
            pred_scores,  # type: Tensor
            pred_boxes,  # type: Tensor
            proposals,  # type: List[Tensor]
            image_shapes  # type: List[Tuple[int, int]]
    ):
        # type: (...) -> Tuple[List[Tensor], List[Tensor], List[Tensor]]
        # device = class_logits.device
        # num_classes = class_logits.shape[-1]

        boxes_per_image = [
            boxes_in_image.shape[0] for boxes_in_image in proposals
        ]

        pred_boxes_list = pred_boxes.split(boxes_per_image, 0)
        pred_scores_list = pred_scores.split(boxes_per_image, 0)

        all_boxes = []
        all_scores = []
        all_labels = []
        for boxes, scores, image_shape in zip(pred_boxes_list,
                                              pred_scores_list, image_shapes):
            boxes = box_ops.clip_boxes_to_image(boxes, image_shape)

            #去掉背景类的得分
            scores = scores[:, 1:]
            # labels = labels[:, 1:]

            scores, labels = scores.max(dim=1)
            labels += 1  #目标的标签是从1开始的

            # remove low scoring boxes
            inds = torch.where(scores > self.score_thresh)[0]
            boxes, scores, labels = boxes[inds], scores[inds], labels[inds]

            # remove empty boxes
            keep = box_ops.remove_small_boxes(boxes, min_size=1e-2)
            boxes, scores, labels = boxes[keep], scores[keep], labels[keep]

            # non-maximum suppression, independently done per class
            keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh)
            # keep only topk scoring predictions
            keep = keep[:self.detections_per_img]
            boxes, scores, labels = boxes[keep], scores[keep], labels[keep]

            all_boxes.append(boxes)
            all_scores.append(scores)
            all_labels.append(labels)

        return all_boxes, all_scores, all_labels
Ejemplo n.º 5
0
    def generate_anchors(self, x: Tensor) -> None:
        anchors = torch.cat([
            _generate_anchors(self.input_size, x.size(-1),
                              listify(anchor_sizes), self.aspect_ratios,
                              stride)
            for anchor_sizes, stride in zip(self.anchor_sizes, self.strides)
        ],
                            dim=0)

        # Filter anchors
        anchors = box_ops.clip_boxes_to_image(
            anchors, (self.input_size, self.input_size))
        keep = box_ops.remove_small_boxes(anchors, 1e-3)
        self.anchors = anchors[keep]
    def filter_predictions(self, boxes, labels, scores):

        inds = torch.nonzero(scores > self.score_thresh).squeeze(1)
        boxes, scores, labels = boxes[inds], scores[inds], labels[inds]

        # remove empty boxes
        keep = box_ops.remove_small_boxes(boxes, min_size=1e-2)
        boxes, scores, labels = boxes[keep], scores[keep], labels[keep]

        # non-maximum suppression, independently done per class
        keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh)
        # keep only topk scoring predictions
        keep = keep[:self.num_boxes_per_img]
        boxes, scores, labels = boxes[keep], scores[keep], labels[keep]
        return boxes, scores, labels
Ejemplo n.º 7
0
def filter_proposals_patch(self, proposals, objectness, image_shapes,
                           num_anchors_per_level):
    # type: (Tensor, Tensor, List[Tuple[int, int]], List[int])
    print("patch:filter_proposals")

    num_images = proposals.shape[0]
    device = proposals.device
    objectness = objectness.detach()
    objectness = objectness.reshape(num_images, -1)

    levels = [
        torch.full((n, ), idx, dtype=torch.int64, device=device)
        for idx, n in enumerate(num_anchors_per_level)
    ]
    levels = torch.cat(levels, 0)
    levels = levels.reshape(1, -1).expand_as(objectness)

    top_n_idx = self._get_top_n_idx(objectness,
                                    num_anchors_per_level)  #All call the patch

    image_range = torch.arange(num_images, device=device)
    batch_idx = image_range[:, None]

    objectness = objectness[batch_idx, top_n_idx]
    levels = levels[batch_idx, top_n_idx]
    proposals = proposals[batch_idx, top_n_idx]

    final_boxes = []
    final_scores = []
    for boxes, scores, lvl, img_shape in zip(proposals, objectness, levels,
                                             image_shapes):

        # boxes = box_ops.clip_boxes_to_image(boxes, img_shape)
        boxes = clip_boxes_to_image_patch(boxes, img_shape)

        keep = box_ops.remove_small_boxes(boxes, self.min_size)
        boxes, scores, lvl = boxes[keep], scores[keep], lvl[keep]
        # non-maximum suppression, independently done per level
        keep = box_ops.batched_nms(boxes, scores, lvl, self.nms_thresh)
        # keep only topk scoring predictions
        keep = keep[:self.post_nms_top_n()]
        boxes, scores = boxes[keep], scores[keep]
        final_boxes.append(boxes)
        final_scores.append(scores)

    return final_boxes, final_scores
Ejemplo n.º 8
0
def crop(img: Image, target: Dict[str, Any],
         region: Tuple[int]) -> Tuple[Image, Dict[str, Any]]:
    """
    Args:
        region: [Top, Left, H, W]
    """
    # crop image
    src_w, src_h = img.size
    img = TF.crop(img, *region)

    target = deepcopy(target)
    top, left, h, w = region

    # set new image size
    if "size" in target.keys():
        target["size"] = (h, w)

    fields: List[str] = list()
    for k, v in target.items():
        if isinstance(v, Tensor):
            fields.append(k)

    # crop bounding boxes
    if "boxes" in target:
        boxes = target["boxes"]
        boxes[:, [0, 2]] *= src_w
        boxes[:, [1, 3]] *= src_h
        boxes = box_op.box_convert(boxes, "cxcywh", "xyxy")
        boxes -= torch.tensor([left, top, left, top])
        boxes = box_op.clip_boxes_to_image(boxes, (h, w))
        keep = box_op.remove_small_boxes(boxes, 1)
        boxes[:, [0, 2]] /= w
        boxes[:, [1, 3]] /= h
        boxes = box_op.box_convert(boxes, "xyxy", "cxcywh")
        target["boxes"] = boxes
        for field in fields:
            target[field] = target[field][keep]

    if "masks" in target:
        target['masks'] = target['masks'][:, top:top + h, left:left + w]
        keep = target['masks'].flatten(1).any(1)
        for field in fields:
            target[field] = target[field][keep]

    return img, target
Ejemplo n.º 9
0
    def filter_proposals(self, proposals, objectness, image_shapes,
                         num_anchors_per_level):
        # type: (Tensor, Tensor, List[Tuple[int, int]], List[int])
        num_images = proposals.shape[0]
        device = proposals.device
        # do not backprop throught objectness
        objectness = objectness.detach()
        objectness = objectness.reshape(num_images, -1)

        levels = [
            torch.full((n, ), idx, dtype=torch.int64, device=device)
            for idx, n in enumerate(num_anchors_per_level)
        ]
        levels = torch.cat(levels, 0)
        levels = levels.reshape(1, -1).expand_as(objectness)

        # select top_n boxes independently per level before applying nms
        top_n_idx = self._get_top_n_idx(objectness, num_anchors_per_level)

        image_range = torch.arange(num_images, device=device)
        batch_idx = image_range[:, None]

        objectness = objectness[batch_idx, top_n_idx]
        levels = levels[batch_idx, top_n_idx]
        proposals = proposals[batch_idx, top_n_idx]

        final_boxes = []
        final_scores = []
        for boxes, scores, lvl, img_shape in zip(proposals, objectness, levels,
                                                 image_shapes):
            # For onnx export, Clip's min max can not be traced as tensor.
            if torchvision._is_tracing():
                boxes = _onnx_clip_boxes_to_image(boxes, img_shape)
            else:
                boxes = box_ops.clip_boxes_to_image(boxes, img_shape)
            keep = box_ops.remove_small_boxes(boxes, self.min_size)
            boxes, scores, lvl = boxes[keep], scores[keep], lvl[keep]
            # non-maximum suppression, independently done per level
            keep = box_ops.batched_nms(boxes, scores, lvl, self.nms_thresh)
            # keep only topk scoring predictions
            keep = keep[:self.post_nms_top_n()]
            boxes, scores = boxes[keep], scores[keep]
            final_boxes.append(boxes)
            final_scores.append(scores)
        return final_boxes, final_scores
    def filter_proposals(self, proposals, objectness, image_shapes,
                         num_anchors_per_level):
        num_images = proposals.shape[0]
        device = proposals.device
        # do not backprop throught objectness
        objectness = objectness.detach()
        objectness = objectness.reshape(num_images, -1)

        levels = [
            torch.full((n, ), idx, dtype=torch.int64, device=device)
            for idx, n in enumerate(num_anchors_per_level)
        ]
        levels = torch.cat(levels, 0)
        levels = levels.reshape(1, -1).expand_as(objectness)

        # select top_n boxes independently per level before applying nms
        top_n_idx = self._get_top_n_idx(objectness, num_anchors_per_level)
        batch_idx = torch.arange(num_images, device=device)[:, None]
        objectness = objectness[batch_idx, top_n_idx]
        levels = levels[batch_idx, top_n_idx]
        proposals = proposals[batch_idx, top_n_idx]

        final_boxes = []
        final_scores = []
        for boxes, scores, lvl, img_shape in zip(proposals, objectness, levels,
                                                 image_shapes):
            boxes = box_ops.clip_boxes_to_image(boxes, img_shape)
            keep = box_ops.remove_small_boxes(boxes, self.min_size)
            boxes, scores, lvl = boxes[keep], scores[keep], lvl[keep]
            # non-maximum suppression, independently done per level
            #lvl=torch.tensor(np.arange(len(lvl))).to(device)
            keep = box_ops.batched_nms(boxes, scores, lvl, self.nms_thresh)
            # keep only topk scoring predictions
            keep = keep[:self.post_nms_top_n]
            boxes, scores = boxes[keep], scores[keep]
            final_boxes.append(boxes)
            final_scores.append(scores)
        return final_boxes, final_scores
Ejemplo n.º 11
0
    def postprocess_boxes(
        self,
        class_logits,
        box_regression,
        embeddings,
        proposals,
        image_shapes,
        fcs=None,
        gt_det=None,
        cws=True,
    ):
        """
        Similar to RoIHeads.postprocess_detections, but can handle embeddings and implement
        First Classification Score (FCS).
        """
        device = class_logits.device

        boxes_per_image = [len(boxes_in_image) for boxes_in_image in proposals]
        pred_boxes = self.box_coder.decode(box_regression, proposals)

        if fcs is not None:
            # Fist Classification Score (FCS)
            pred_scores = fcs[0]
        else:
            pred_scores = torch.sigmoid(class_logits)
        if cws:
            # Confidence Weighted Similarity (CWS)
            embeddings = embeddings * pred_scores.view(-1, 1)

        # split boxes and scores per image
        pred_boxes = pred_boxes.split(boxes_per_image, 0)
        pred_scores = pred_scores.split(boxes_per_image, 0)
        pred_embeddings = embeddings.split(boxes_per_image, 0)

        all_boxes = []
        all_scores = []
        all_labels = []
        all_embeddings = []
        for boxes, scores, embeddings, image_shape in zip(
                pred_boxes, pred_scores, pred_embeddings, image_shapes):
            boxes = box_ops.clip_boxes_to_image(boxes, image_shape)

            # create labels for each prediction
            labels = torch.ones(scores.size(0), device=device)

            # remove predictions with the background label
            boxes = boxes[:, 1:]
            scores = scores.unsqueeze(1)
            labels = labels.unsqueeze(1)

            # batch everything, by making every class prediction be a separate instance
            boxes = boxes.reshape(-1, 4)
            scores = scores.flatten()
            labels = labels.flatten()
            embeddings = embeddings.reshape(-1, self.embedding_head.dim)

            # remove low scoring boxes
            inds = torch.nonzero(scores > self.score_thresh).squeeze(1)
            boxes, scores, labels, embeddings = (
                boxes[inds],
                scores[inds],
                labels[inds],
                embeddings[inds],
            )

            # remove empty boxes
            keep = box_ops.remove_small_boxes(boxes, min_size=1e-2)
            boxes, scores, labels, embeddings = (
                boxes[keep],
                scores[keep],
                labels[keep],
                embeddings[keep],
            )

            if gt_det is not None:
                # include GT into the detection results
                boxes = torch.cat((boxes, gt_det["boxes"]), dim=0)
                labels = torch.cat((labels, torch.tensor([1.0]).to(device)),
                                   dim=0)
                scores = torch.cat((scores, torch.tensor([1.0]).to(device)),
                                   dim=0)
                embeddings = torch.cat((embeddings, gt_det["embeddings"]),
                                       dim=0)

            # non-maximum suppression, independently done per class
            keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh)
            # keep only topk scoring predictions
            keep = keep[:self.detections_per_img]
            boxes, scores, labels, embeddings = (
                boxes[keep],
                scores[keep],
                labels[keep],
                embeddings[keep],
            )

            all_boxes.append(boxes)
            all_scores.append(scores)
            all_labels.append(labels)
            all_embeddings.append(embeddings)

        return all_boxes, all_scores, all_embeddings, all_labels
Ejemplo n.º 12
0
def roi_postprocess_detections(self, class_logits, box_regression, proposals,
                               image_shapes, *extra_tensors):
    """Hack into the torchvision model to obtain features for
    training caption model; training is assumed to be false

    https://github.com/pytorch/vision/blob/master/
        torchvision/models/detection/roi_heads.py"""

    device = class_logits.device
    num_classes = class_logits.shape[-1]

    boxes_per_image = [len(boxes_in_image) for boxes_in_image in proposals]
    pred_boxes = self.box_coder.decode(box_regression, proposals)

    pred_scores = F.softmax(class_logits, -1)

    # split boxes and scores per image
    if len(boxes_per_image) == 1:
        # TODO : remove this when ONNX support dynamic split sizes
        # and just assign to pred_boxes instead of pred_boxes_list
        pred_boxes_list = [pred_boxes]
        pred_scores_list = [pred_scores]
        extra_tensors_list = [[x] for x in extra_tensors]
    else:
        pred_boxes_list = pred_boxes.split(boxes_per_image, 0)
        pred_scores_list = pred_scores.split(boxes_per_image, 0)
        extra_tensors_list = [
            x.split(boxes_per_image, 0) for x in extra_tensors
        ]

    all_boxes = []
    all_scores = []
    all_labels = []
    all_extras = [[] for _ in extra_tensors]
    for boxes, scores, image_shape, *extras in zip(pred_boxes_list,
                                                   pred_scores_list,
                                                   image_shapes,
                                                   *extra_tensors_list):
        boxes = box_ops.clip_boxes_to_image(boxes, image_shape)

        # create labels for each prediction
        labels = torch.arange(num_classes, device=device)
        labels = labels.view(1, -1).expand_as(scores)

        # remove predictions with the background label
        boxes = boxes[:, 1:]
        scores = scores[:, 1:]
        labels = labels[:, 1:]

        # batch everything, by making every class prediction be a separate instance
        boxes = boxes.reshape(-1, 4)
        scores = scores.reshape(-1)
        labels = labels.reshape(-1)

        # remove low scoring boxes
        inds = torch.nonzero(scores > self.score_thresh).squeeze(1)
        boxes, scores, labels = boxes[inds], scores[inds], labels[inds]
        # each feature vector is used for all 91 class predictions
        # there are 90 classes (minus the background)
        # take the feature vector corresponding to each class
        extras = [x[inds // (num_classes - 1)] for x in extras]

        # remove empty boxes
        keep = box_ops.remove_small_boxes(boxes, min_size=1e-2)
        boxes, scores, labels = boxes[keep], scores[keep], labels[keep]
        # each feature vector is used for all 91 class predictions
        # there are 90 classes (minus the background)
        # take the feature vector corresponding to each class
        extras = [x[keep] for x in extras]

        # non-maximum suppression, independently done per class
        keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh)
        # keep only topk scoring predictions
        keep = keep[:self.detections_per_img]
        boxes, scores, labels = boxes[keep], scores[keep], labels[keep]
        # each feature vector is used for all 91 class predictions
        # there are 90 classes (minus the background)
        # take the feature vector corresponding to each class
        extras = [x[keep] for x in extras]

        all_boxes.append(boxes)
        all_scores.append(scores)
        all_labels.append(labels)
        for x, y in zip(all_extras, extras):
            x.append(y)

    return [all_boxes, all_scores, all_labels, *all_extras]
Ejemplo n.º 13
0
    def box_features_hook(self, module, input, output):
        '''
        hook for extracting features from MaskRCNN
        '''

        features, proposals, image_shapes, targets = input

        box_features = module.box_roi_pool(features, proposals, image_shapes)
        box_features = module.box_head(box_features)
        class_logits, box_regression = module.box_predictor(box_features)

        device = class_logits.device
        num_classes = class_logits.shape[-1]

        boxes_per_image = [len(boxes_in_image) for boxes_in_image in proposals]
        pred_boxes = module.box_coder.decode(box_regression, proposals)

        pred_scores = F.softmax(class_logits, -1)

        # split boxes and scores per image
        pred_boxes = pred_boxes.split(boxes_per_image, 0)
        pred_scores = pred_scores.split(boxes_per_image, 0)

        all_boxes = []
        all_scores = []
        all_labels = []
        all_keeps = []
        for boxes, scores, image_shape in zip(pred_boxes, pred_scores,
                                              image_shapes):
            boxes = box_ops.clip_boxes_to_image(boxes, image_shape)

            # create labels for each prediction
            labels = torch.arange(num_classes, device=device)
            labels = labels.view(1, -1).expand_as(scores)

            # remove predictions with the background label
            boxes = boxes[:, 1:]
            scores = scores[:, 1:]
            labels = labels[:, 1:]

            # batch everything, by making every class prediction be a separate instance
            boxes = boxes.reshape(-1, 4)
            scores = scores.flatten()
            labels = labels.flatten()

            # remove low scoring boxes
            inds = torch.nonzero(scores > module.score_thresh).squeeze(1)
            boxes, scores, labels = boxes[inds], scores[inds], labels[inds]

            # remove empty boxes
            keep = box_ops.remove_small_boxes(boxes, min_size=1e-2)
            boxes, scores, labels = boxes[keep], scores[keep], labels[keep]

            # non-maximum suppression, independently done per class
            keep = box_ops.batched_nms(boxes, scores, labels,
                                       module.nms_thresh)
            # keep only topk scoring predictions
            keep = keep[:self.mask_rcnn_top_k_boxes]
            boxes, scores, labels = boxes[keep], scores[keep], labels[keep]

            all_boxes.append(boxes)
            all_scores.append(scores)
            all_labels.append(labels)
            all_keeps.append(keep)

        box_features_per_image = []
        for keep in all_keeps:
            box_features_per_image.append(box_features[keep])

        self.detection_box_features = box_features_per_image
        self.fpn_pooled_features = self.avg2dpool(
            features['pool']).squeeze(-1).squeeze(-1)
Ejemplo n.º 14
0
    def postprocess_detections(self, pred_scores, box_regression, embeddings_,
                               proposals, image_shapes):
        device = pred_scores.device
        num_classes = pred_scores.shape[-1]

        boxes_per_image = [len(boxes_in_image) for boxes_in_image in proposals]
        pred_boxes = self.box_coder.decode(box_regression, proposals)

        # split boxes and scores per image
        pred_boxes = pred_boxes.split(boxes_per_image, 0)
        pred_scores = pred_scores.split(boxes_per_image, 0)
        pred_embeddings = embeddings_.split(boxes_per_image, 0)

        all_boxes = []
        all_scores = []
        all_labels = []
        all_embeddings = []
        for boxes, scores, embeddings, image_shape in zip(
                pred_boxes, pred_scores, pred_embeddings, image_shapes):
            boxes = box_ops.clip_boxes_to_image(boxes, image_shape)

            # create labels for each prediction
            labels = torch.arange(num_classes, device=device)
            labels = labels.view(1, -1).expand_as(scores)

            # remove predictions with the background label
            boxes = boxes[:, 1:]
            scores = scores[:, 1:]
            labels = labels[:, 1:]
            # embeddings are already personized.

            # batch everything, by making every class prediction be a separate
            # instance
            boxes = boxes.reshape(-1, 4)
            scores = scores.flatten()
            labels = labels.flatten()
            embeddings = embeddings.reshape(-1, self.embedding_head.dim)

            # remove low scoring boxes
            inds = torch.nonzero(scores > self.score_thresh).squeeze(1)
            boxes, scores, labels, embeddings = boxes[inds], scores[
                inds], labels[inds], embeddings[inds]

            # remove empty boxes
            keep = box_ops.remove_small_boxes(boxes, min_size=1e-2)
            boxes, scores, labels, embeddings = boxes[keep], scores[keep], \
                labels[keep], embeddings[keep]

            # non-maximum suppression, independently done per class
            keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh)
            # keep only topk scoring predictions
            keep = keep[:self.detections_per_img]
            boxes, scores, labels, embeddings = boxes[keep], scores[keep], \
                labels[keep], embeddings[keep]

            all_boxes.append(boxes)
            all_scores.append(scores)
            all_labels.append(labels)
            all_embeddings.append(embeddings)

        return all_boxes, all_scores, all_embeddings, all_labels
Ejemplo n.º 15
0
    def post_processor(self, class_logits, box_regression, proposals,
                       img_metas):
        num_classes = class_logits.shape[1]
        device = class_logits.device

        boxes_per_image = [box.shape[0] for box in proposals]
        proposals = cat([box for box in proposals])
        pred_boxes = self.box_coder.decode(
            box_regression.view(sum(boxes_per_image), -1), proposals)
        pred_boxes = pred_boxes.reshape(sum(boxes_per_image), -1, 4)

        pred_scores = F.softmax(class_logits, -1)

        # split boxes and scores per image
        if len(boxes_per_image) == 1:
            pred_boxes = (pred_boxes, )
            pred_scores = (pred_scores, )
        else:
            pred_boxes = pred_boxes.split(boxes_per_image,
                                          dim=0)  # (N, #CLS, 4)
            pred_scores = pred_scores.split(boxes_per_image,
                                            dim=0)  # (N, #CLS)

        results = []
        for scores, boxes, img_meta in zip(pred_scores, pred_boxes, img_metas):
            width, height = img_meta['img_shape']
            boxes = box_ops.clip_boxes_to_image(boxes, (height, width))

            # create labels for each prediction
            labels = torch.arange(num_classes, device=device)
            labels = labels.view(1, -1).expand_as(scores)

            # remove predictions with the background label
            boxes = boxes[:, 1:]
            scores = scores[:, 1:]
            labels = labels[:, 1:]

            # batch everything, by making every class prediction be a separate instance
            boxes = boxes.reshape(-1, 4)
            scores = scores.reshape(-1)
            labels = labels.reshape(-1)

            # remove low scoring boxes
            inds = torch.nonzero(scores > self.score_thresh).squeeze(1)
            boxes, scores, labels = boxes[inds], scores[inds], labels[inds]

            # remove empty boxes
            keep = box_ops.remove_small_boxes(boxes, min_size=1)
            boxes, scores, labels = boxes[keep], scores[keep], labels[keep]

            # non-maximum suppression, independently done per class
            keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh)
            # keep only topk scoring predictions
            keep = keep[:self.detections_per_img]
            boxes, scores, labels = boxes[keep], scores[keep], labels[keep]

            result = {
                'boxes': boxes,
                'scores': scores,
                'labels': labels,
            }

            results.append(result)

        return results
Ejemplo n.º 16
0
    def postprocess_detections(self, class_logits, attr_logits, box_regression,
                               proposals, image_shapes):
        # type: (Tensor, Tensor, List[Tensor], List[Tuple[int, int]])
        # class_logits: (1000,47), attr-logits: (1000,295)
        device = class_logits.device
        num_classes = class_logits.shape[-1]
        num_attrs = attr_logits.shape[-1]

        boxes_per_image = [len(boxes_in_image) for boxes_in_image in proposals]
        pred_boxes = self.box_coder.decode(box_regression, proposals)

        pred_scores = F.softmax(class_logits, -1)
        pred_ascores = F.sigmoid(attr_logits)
        # print(pred_ascores)
        # set_trace()

        # split boxes and scores per image
        if len(boxes_per_image) == 1:
            # TODO : remove this when ONNX support dynamic split sizes
            # and just assign to pred_boxes instead of pred_boxes_list
            pred_boxes_list = [pred_boxes]
            pred_scores_list = [pred_scores]
            pred_ascores_list = [pred_ascores]
        else:
            pred_boxes_list = pred_boxes.split(boxes_per_image,
                                               0)  # (bs,1000,47,4)
            pred_scores_list = pred_scores.split(boxes_per_image,
                                                 0)  # (bs, 1000, 47)
            pred_ascores_list = pred_ascores.split(boxes_per_image, 0)

        all_boxes = []
        all_scores = []
        all_labels = []
        all_attrs = []
        for boxes, scores, ascores, image_shape in zip(
                pred_boxes_list, pred_scores_list, pred_ascores_list,
                image_shapes):  # for each image
            boxes = box_ops.clip_boxes_to_image(boxes, image_shape)

            # create labels for each prediction
            labels = torch.arange(num_classes, device=device)
            labels = labels.view(1,
                                 -1).expand_as(scores)  # (1000,47) 都是从0,46结束

            # remove predictions with the background label
            boxes = boxes[:, 1:]
            scores = scores[:, 1:]
            labels = labels[:, 1:]
            ascores = ascores[:, 1:]  # (1000,294)

            # batch everything, by making every class prediction be a separate instance
            boxes = boxes.reshape(-1, 4)  # (46000,4)
            scores = scores.reshape(-1)  # (46000,)
            labels = labels.reshape(-1)  # (46000,)
            ascores = ascores.unsqueeze(0).repeat(46, 1, 1).reshape(
                -1, num_attrs - 1)  # (46,1000,294)->(46000,294)

            # remove low scoring boxes
            inds = torch.nonzero(scores > self.score_thresh).squeeze(
                1)  # 压缩成1d的,用thresh去卡
            boxes, scores, labels, ascores = boxes[inds], scores[inds], labels[
                inds], ascores[inds]

            # remove empty boxes
            # attention!!! 压缩成1d的,用thresh去卡(每个box会预测46个类别的box,所以一个box对一个46个box,因此卡thresh是对类别单位的box进行的)
            keep = box_ops.remove_small_boxes(boxes, min_size=1e-2)
            boxes, scores, labels, ascores = boxes[keep], scores[keep], labels[
                keep], ascores[keep]

            # non-maximum suppression, independently done per class
            keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh)
            # keep only topk scoring predictions
            keep = keep[:self.detections_per_img]
            boxes, scores, labels, ascores = boxes[keep], scores[keep], labels[
                keep], ascores[keep]

            all_boxes.append(boxes)
            all_scores.append(scores)
            all_labels.append(labels)
            # TODO: add attributes-wise thresh
            # ascores: (post_rois, 294)
            # brute force
            ascore_list = []
            for ascore in ascores:
                ascore_list.append(
                    torch.where(ascore > self.attr_score_thresh)
                    [0])  # list(Tensor()) # 长度不均匀的一维tensor
            all_attrs.append(ascore_list)  # list[(post_rois, 294)]

        # all_scores: list[(post_rois,)], all_labels: list[(post_rois,)], all_attrs: list[list[attr1,attr2], (...)] 每个list里有每个roi的attr tensor
        return all_boxes, all_scores, all_labels, all_attrs
    def postprocess_detections(
            self,
            class_logits,  # type: Tensor
            box_regression,  # type: Tensor
            proposals,  # type: List[Tensor]
            attr_logits,
            image_shapes  # type: List[Tuple[int, int]]
    ):
        # type: (...) -> Tuple[List[Tensor], List[Tensor], List[Tensor]]
        device = class_logits.device
        num_classes = class_logits.shape[-1]
        #print(class_logits.shape)
        boxes_per_image = [
            boxes_in_image.shape[0] for boxes_in_image in proposals
        ]
        #print(boxes_per_image)
        pred_boxes = self.box_coder.decode(box_regression, proposals)

        pred_scores = F.softmax(class_logits, -1)
        #print(pred_scores.shape)
        pred_attr_scores = F.sigmoid(attr_logits)
        #print(pred_attr_scores.shape)

        pred_boxes_list = pred_boxes.split(boxes_per_image, 0)
        pred_scores_list = pred_scores.split(boxes_per_image, 0)
        pred_attr_scores_list = pred_attr_scores.split(boxes_per_image, 0)

        all_boxes = []
        all_scores = []
        all_labels = []
        all_attr_scores = []
        all_attr_labels = []
        for boxes, scores, attr_scores, image_shape in zip(
                pred_boxes_list, pred_scores_list, pred_attr_scores_list,
                image_shapes):
            boxes = box_ops.clip_boxes_to_image(boxes, image_shape)

            # create labels for each prediction
            labels = torch.arange(num_classes, device=device)
            #print(labels)
            #print(scores.shape)
            labels = labels.view(1, -1).expand_as(scores)
            #print(labels.shape)

            #print(attr_labels.shape)
            detection_per_image = attr_scores.shape[0]
            attr_classes = 341
            curr_attr_shape = (detection_per_image, attr_classes)
            new_attr_shape = (detection_per_image, num_classes - 1,
                              attr_classes)
            #print(attr_scores)
            attr_scores = attr_scores.unsqueeze(1)
            #print("Unsqueeze: ", attr_scores.shape)
            attr_scores = attr_scores.expand(*new_attr_shape)
            #print("Expand: ", attr_scores.shape)
            #print("New Val: ", attr_scores)
            #print("Attr Shape: ", attr_labels.shape)
            #print(attr_labels)
            attr_labels = torch.arange(341, device=device)
            attr_labels = attr_labels.view(1, 1, -1).expand_as(attr_scores)
            #print(attr_labels.shape)
            #print(attr_labels)

            # remove predictions with the background label
            boxes = boxes[:, 1:]
            scores = scores[:, 1:]
            labels = labels[:, 1:]

            # batch everything, by making every class prediction be a separate instance
            boxes = boxes.reshape(-1, 4)
            scores = scores.reshape(-1)
            labels = labels.reshape(-1)
            attr_labels = attr_labels.reshape(-1, 341)
            attr_scores = attr_scores.reshape(-1, 341)
            #print(scores.shape)
            #print(labels.shape)

            # remove low scoring boxes
            inds = torch.nonzero(scores > self.score_thresh).squeeze(1)
            #print(inds)
            boxes, scores, labels, attr_scores, attr_labels = boxes[
                inds], scores[inds], labels[inds], attr_scores[
                    inds], attr_labels[inds]

            # remove empty boxes
            keep = box_ops.remove_small_boxes(boxes, min_size=1e-2)
            boxes, scores, labels, attr_scores, attr_labels = boxes[
                keep], scores[keep], labels[keep], attr_scores[
                    keep], attr_labels[keep]

            # non-maximum suppression, independently done per class
            keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh)
            # keep only topk scoring predictions
            keep = keep[:self.detections_per_img]
            boxes, scores, labels, attr_scores, attr_labels = boxes[
                keep], scores[keep], labels[keep], attr_scores[
                    keep], attr_labels[keep]

            all_boxes.append(boxes)
            all_scores.append(scores)
            all_labels.append(labels)
            all_attr_scores.append(attr_scores)
            all_attr_labels.append(attr_labels)

        return all_boxes, all_scores, all_labels, all_attr_scores, all_attr_labels
Ejemplo n.º 18
0
    def ssm_postprocess_detections(self, head_outputs, anchors, image_shapes):
        # type: (Dict[str, Tensor], List[Tensor], List[Tuple[int, int]]) -> List[Dict[str, Tensor]]
        # TODO: Merge this with roi_heads.RoIHeads.postprocess_detections ?

        class_logits = head_outputs.pop('cls_logits')
        box_regression = head_outputs.pop('bbox_regression')
        other_outputs = head_outputs

        device = class_logits.device
        num_classes = class_logits.shape[-1]

        scores = torch.sigmoid(class_logits)

        # create labels for each score
        labels = torch.arange(num_classes, device=device)
        labels = labels.view(1, -1).expand_as(scores)

        detections = torch.jit.annotate(List[Dict[str, Tensor]], [])
        al_idx = 0
        all_boxes = torch.empty([0, 4]).cuda()
        all_scores = torch.tensor([]).cuda()
        all_labels = []
        CONF_THRESH = 0.5  # bigger leads more active learning samples
        for index, (box_regression_per_image, scores_per_image, labels_per_image, anchors_per_image, image_shape) in \
                enumerate(zip(box_regression, scores, labels, anchors, image_shapes)):
            if torch.max(scores_per_image) < CONF_THRESH:
                # print(scores)
                al_idx = 1
                detections.append({
                    "boxes": all_boxes,
                    "labels": all_labels,
                    "scores": all_scores,
                    'al': al_idx,
                })
                continue
            boxes_per_image = self.box_coder.decode_single(
                box_regression_per_image, anchors_per_image)
            boxes_per_image = box_ops.clip_boxes_to_image(
                boxes_per_image, image_shape)

            other_outputs_per_image = [(k, v[index])
                                       for k, v in other_outputs.items()]

            image_boxes = []
            image_scores = []
            image_labels = []
            image_other_outputs = torch.jit.annotate(Dict[str, List[Tensor]],
                                                     {})

            for class_index in range(num_classes):
                # remove low scoring boxes
                inds = torch.gt(scores_per_image[:, class_index],
                                self.score_thresh)
                boxes_per_class, scores_per_class, scores_all_class, labels_per_class = \
                    boxes_per_image[inds], scores_per_image[inds, class_index], scores_per_image[inds], \
                    labels_per_image[inds, class_index]
                other_outputs_per_class = [(k, v[inds])
                                           for k, v in other_outputs_per_image]

                keep = [i for i in range(len(boxes_per_class))]
                random.shuffle(keep)
                keep = keep[:500]
                boxes_per_class, scores_per_class, scores_all_class, labels_per_class = \
                    boxes_per_class[keep], scores_per_class[keep], scores_all_class[keep], labels_per_class[keep]
                other_outputs_per_class = [(k, v[keep])
                                           for k, v in other_outputs_per_class]

                # remove empty boxes
                keep = box_ops.remove_small_boxes(boxes_per_class,
                                                  min_size=1e-2)
                boxes_per_class, scores_per_class, scores_all_class, labels_per_class = \
                    boxes_per_class[keep], scores_per_class[keep], scores_all_class[keep], labels_per_class[keep]
                other_outputs_per_class = [(k, v[keep])
                                           for k, v in other_outputs_per_class]

                # non-maximum suppression, independently done per class
                keep = box_ops.nms(boxes_per_class, scores_per_class,
                                   self.nms_thresh)

                # keep only topk scoring predictions
                keep = keep[:self.detections_per_img]
                boxes_per_class, scores_per_class, scores_all_class, labels_per_class = \
                    boxes_per_class[keep], scores_per_class[keep], scores_all_class[keep], labels_per_class[keep]
                other_outputs_per_class = [(k, v[keep])
                                           for k, v in other_outputs_per_class]

                image_boxes.append(boxes_per_class)
                image_scores.append(scores_per_class)
                image_labels.append(labels_per_class)

                for k, v in other_outputs_per_class:
                    if k not in image_other_outputs:
                        image_other_outputs[k] = []
                    image_other_outputs[k].append(v)

                for i in range(len(boxes_per_class)):
                    all_boxes = torch.cat(
                        (all_boxes, boxes_per_class[i].unsqueeze(0)), 0)
                    all_scores = torch.cat(
                        (all_scores, scores_per_class[i].unsqueeze(0)), 0)
                    all_labels.append(judge_y(scores_all_class[i][1:]))
            detections.append({
                "boxes": all_boxes,
                "labels": all_labels,
                "scores": all_scores,
                'al': al_idx,
            })
            for k, v in image_other_outputs.items():
                detections[-1].update({k: torch.cat(v, dim=0)})

        return detections
Ejemplo n.º 19
0
    def postprocess_detections(
            self,
            class_logits,  # type: Tensor
            box_regression,  # type: Tensor
            proposals,  # type: List[Tensor]
            image_shapes  # type: List[Tuple[int, int]]
    ):
        # type: (...) -> Tuple[List[Tensor], List[Tensor], List[Tensor]]
        device = class_logits.device
        num_classes = class_logits.shape[-1]

        boxes_per_image = [
            boxes_in_image.shape[0] for boxes_in_image in proposals
        ]
        pred_boxes = self.box_coder.decode(box_regression, proposals)

        pred_scores = F.softmax(class_logits, -1)

        pred_boxes_list = pred_boxes.split(boxes_per_image, 0)
        pred_scores_list = pred_scores.split(boxes_per_image, 0)

        all_boxes = []
        all_scores = []
        all_labels = []
        for boxes, scores, image_shape in zip(pred_boxes_list,
                                              pred_scores_list, image_shapes):
            boxes = box_ops.clip_boxes_to_image(boxes, image_shape)

            # create labels for each prediction
            labels = torch.arange(num_classes, device=device)
            labels = labels.view(1, -1).expand_as(scores)

            # remove predictions with the background label
            boxes = boxes[:, 1:]
            scores = scores[:, 1:]
            labels = labels[:, 1:]

            # batch everything, by making every class prediction be a separate instance
            boxes = boxes.reshape(-1, 4)
            scores = scores.reshape(-1)
            labels = labels.reshape(-1)

            # remove low scoring boxes
            inds = torch.nonzero(scores > cfg.BOX.SCORE_THRESH).squeeze(1)
            boxes, scores, labels = boxes[inds], scores[inds], labels[inds]

            # remove empty boxes
            keep = box_ops.remove_small_boxes(boxes, min_size=1e-2)
            boxes, scores, labels = boxes[keep], scores[keep], labels[keep]

            # non-maximum suppression, independently done per class
            keep = box_ops.batched_nms(boxes, scores, labels,
                                       cfg.BOX.NMS_THRESH)
            # keep only topk scoring predictions
            keep = keep[:cfg.BOX.DETECTIONS_PER_IMG]
            boxes, scores, labels = boxes[keep], scores[keep], labels[keep]

            all_boxes.append(boxes)
            all_scores.append(scores)
            all_labels.append(labels)

        return all_boxes, all_scores, all_labels
Ejemplo n.º 20
0
    def generate_proposals(self,
                           anchors,
                           objectness,
                           box_regression,
                           img_metas,
                           is_target_domain=False):
        """
        Args:
            anchors:
            objectness: (N, A, H, W)
            box_regression: (N, A * 4, H, W)
            img_metas:
            is_target_domain:
        Returns:
        """
        pre_nms_top_n = self.pre_nms_top_n[self.training]
        post_nms_top_n = self.post_nms_top_n[self.training]
        if is_target_domain:
            post_nms_top_n = self.cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE
        nms_thresh = self.nms_thresh

        device = objectness.device
        N, A, H, W = objectness.shape

        objectness = objectness.permute(0, 2, 3, 1).reshape(N, H * W * A)
        objectness = objectness.sigmoid()

        box_regression = box_regression.permute(0, 2, 3,
                                                1).reshape(N, H * W * A, 4)
        concat_anchors = cat(anchors, dim=0)
        concat_anchors = concat_anchors.reshape(N, A * H * W, 4)

        num_anchors = A * H * W
        pre_nms_top_n = min(pre_nms_top_n, num_anchors)
        objectness, topk_idx = objectness.topk(pre_nms_top_n,
                                               dim=1,
                                               sorted=True)
        batch_idx = torch.arange(N, device=device)[:, None]
        box_regression = box_regression[batch_idx, topk_idx]
        concat_anchors = concat_anchors[batch_idx, topk_idx]

        proposals = self.box_coder.decode(box_regression.view(-1, 4),
                                          concat_anchors.view(-1, 4))
        proposals = proposals.view(N, -1, 4)

        results = []
        for proposal, score, img_meta in zip(proposals, objectness, img_metas):
            img_width, img_height = img_meta['img_shape']
            proposal = box_ops.clip_boxes_to_image(proposal,
                                                   (img_height, img_width))
            keep = box_ops.remove_small_boxes(proposal, self.min_size)

            proposal = proposal[keep]
            score = score[keep]

            keep = ops.nms(proposal, score, nms_thresh)
            keep = keep[:post_nms_top_n]
            proposal = proposal[keep]
            score = score[keep]

            results.append(proposal)  # (N, 4)
        return results
Ejemplo n.º 21
0
    def postprocess_detections(self, class_logits, box_regression, anchors,
                               image_shapes):
        # type: (Tensor, Tensor, List[Tensor], List[Tuple[int, int]])
        device = class_logits.device
        num_classes = class_logits.shape[-1]
        #$

        boxes_per_image = [len(boxes_in_image) for boxes_in_image in anchors]
        pred_boxes = self.box_coder.decode(box_regression, anchors)

        pred_scores = F.softmax(class_logits, -1)

        # split boxes and scores per image
        if len(boxes_per_image) == 1:
            # TODO : remove this when ONNX support dynamic split sizes
            # and just assign to pred_boxes instead of pred_boxes_list
            pred_boxes_list = [pred_boxes]
            pred_scores_list = [pred_scores]
        else:
            pred_boxes_list = pred_boxes.split(boxes_per_image, 0)
            pred_scores_list = pred_scores.split(boxes_per_image, 0)

        all_boxes = []
        all_scores = []
        all_labels = []
        for boxes, scores, image_shape in zip(pred_boxes_list,
                                              pred_scores_list, image_shapes):
            boxes = box_ops.clip_boxes_to_image(boxes, image_shape)

            # create labels for each prediction
            labels = torch.arange(num_classes, device=device)

            row_to, collumn = 1, -1
            labels = labels.view(row_to, collumn).expand_as(scores)

            along_class_prediction = 1
            pred_class_labels = torch.argmax(scores,
                                             dim=along_class_prediction)

            prediction_num = torch.arange(boxes.shape[0], device=device)
            flat_pred_class_prob_idxs = prediction_num * num_classes + pred_class_labels
            top1_scores = torch.take(scores, flat_pred_class_prob_idxs)
            labels = pred_class_labels

            # remove predictions with the background label
            #boxes = boxes[:, 1:]
            #scores = scores[:, 1:]
            #labels = labels[:, 1:]

            # batch everything, by making every class prediction be a separate instance
            #boxes = boxes.reshape(-1, 4)
            #scores = scores.reshape(-1)
            #labels = labels.reshape(-1)

            # remove low scoring boxes
            inds = torch.nonzero(top1_scores > self.score_thresh).squeeze(1)
            boxes, scores, labels = boxes[inds].squeeze(
                1), top1_scores[inds], labels[inds]

            # remove empty boxes
            keep = box_ops.remove_small_boxes(boxes, min_size=1e-2)
            boxes, scores, labels = boxes[keep], scores[keep], labels[keep]

            # non-maximum suppression, independently done per class
            keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh)
            # keep only topk scoring predictions
            keep = keep[:self.detections_per_img]
            boxes, scores, labels = boxes[keep], scores[keep], labels[keep]

            all_boxes.append(boxes)
            all_scores.append(scores)
            all_labels.append(labels)

        return all_boxes, all_scores, all_labels
Ejemplo n.º 22
0
    def postprocess_detections(self, head_outputs, anchors, image_shapes):
        # type: (Dict[str, Tensor], List[Tensor], List[Tuple[int, int]]) -> List[Dict[str, Tensor]]
        # TODO: Merge this with roi_heads.RoIHeads.postprocess_detections ?

        class_logits = head_outputs.pop('cls_logits')
        box_regression = head_outputs.pop('bbox_regression')
        other_outputs = head_outputs

        device = class_logits.device
        num_classes = class_logits.shape[-1]

        scores = torch.sigmoid(class_logits)

        # create labels for each score
        labels = torch.arange(num_classes, device=device)
        labels = labels.view(1, -1).expand_as(scores)

        detections = torch.jit.annotate(List[Dict[str, Tensor]], [])

        for index, (box_regression_per_image, scores_per_image, labels_per_image, anchors_per_image, image_shape) in \
                enumerate(zip(box_regression, scores, labels, anchors, image_shapes)):

            boxes_per_image = self.box_coder.decode_single(
                box_regression_per_image, anchors_per_image)
            boxes_per_image = box_ops.clip_boxes_to_image(
                boxes_per_image, image_shape)

            other_outputs_per_image = [(k, v[index])
                                       for k, v in other_outputs.items()]

            image_boxes = []
            image_scores = []
            image_labels = []
            image_other_outputs = torch.jit.annotate(Dict[str, List[Tensor]],
                                                     {})

            for class_index in range(num_classes):
                # remove low scoring boxes
                inds = torch.gt(scores_per_image[:, class_index],
                                self.score_thresh)
                boxes_per_class, scores_per_class, labels_per_class = \
                    boxes_per_image[inds], scores_per_image[inds, class_index], labels_per_image[inds, class_index]
                other_outputs_per_class = [(k, v[inds])
                                           for k, v in other_outputs_per_image]

                # remove empty boxes
                keep = box_ops.remove_small_boxes(boxes_per_class,
                                                  min_size=1e-2)
                boxes_per_class, scores_per_class, labels_per_class = \
                    boxes_per_class[keep], scores_per_class[keep], labels_per_class[keep]
                other_outputs_per_class = [(k, v[keep])
                                           for k, v in other_outputs_per_class]

                # non-maximum suppression, independently done per class
                keep = box_ops.nms(boxes_per_class, scores_per_class,
                                   self.nms_thresh)

                # keep only topk scoring predictions
                keep = keep[:self.detections_per_img]
                boxes_per_class, scores_per_class, labels_per_class = \
                    boxes_per_class[keep], scores_per_class[keep], labels_per_class[keep]
                other_outputs_per_class = [(k, v[keep])
                                           for k, v in other_outputs_per_class]

                image_boxes.append(boxes_per_class)
                image_scores.append(scores_per_class)
                image_labels.append(labels_per_class)

                for k, v in other_outputs_per_class:
                    if k not in image_other_outputs:
                        image_other_outputs[k] = []
                    image_other_outputs[k].append(v)

            detections.append({
                'boxes': torch.cat(image_boxes, dim=0),
                'scores': torch.cat(image_scores, dim=0),
                'labels': torch.cat(image_labels, dim=0),
            })

            for k, v in image_other_outputs.items():
                detections[-1].update({k: torch.cat(v, dim=0)})

        return detections
Ejemplo n.º 23
0
    def postprocess_detections(
            self,
            class_logits,  # type: Tensor
            dof_regression,  # type: Tensor
            proposals,  # type: List[Tensor]
            image_shapes,  # type: List[Tuple[int, int]]
    ):
        # type: (...) -> Tuple[List[Tensor], List[Tensor], List[Tensor]]
        device = class_logits.device
        num_classes = class_logits.shape[-1]
        boxes_per_image = [
            boxes_in_image.shape[0] for boxes_in_image in proposals
        ]
        pred_boxes = torch.cat(proposals, dim=0)
        N = dof_regression.shape[0]
        pred_boxes = pred_boxes.reshape(N, -1, 4)
        pred_dofs = dof_regression.reshape(N, -1, 6)
        pred_scores = F.softmax(class_logits, -1)

        pred_boxes_list = pred_boxes.split(boxes_per_image, 0)
        pred_scores_list = pred_scores.split(boxes_per_image, 0)
        pred_dofs_list = pred_dofs.split(boxes_per_image, 0)

        all_boxes = []
        all_scores = []
        all_labels = []
        all_dofs = []
        for boxes, dofs, scores, image_shape in zip(pred_boxes_list,
                                                    pred_dofs_list,
                                                    pred_scores_list,
                                                    image_shapes):
            boxes = box_ops.clip_boxes_to_image(boxes, image_shape)

            # create labels for each prediction
            labels = torch.arange(num_classes, device=device)
            labels = labels.view(1, -1).expand_as(scores)

            # remove predictions with the background label
            dofs = dofs[:, 1:]
            scores = scores[:, 1:]
            labels = labels[:, 1:]
            # batch everything, by making every class prediction be a separate instance
            boxes = boxes.reshape(-1, 4)
            dofs = dofs.reshape(-1, 6)
            scores = scores.reshape(-1)
            labels = labels.reshape(-1)
            # remove low scoring boxes
            inds = torch.nonzero(scores > self.score_thresh).squeeze(1)
            boxes, dofs, scores, labels = (
                boxes[inds],
                dofs[inds],
                scores[inds],
                labels[inds],
            )

            # remove empty boxes
            keep = box_ops.remove_small_boxes(boxes, min_size=1e-2)
            boxes, dofs, scores, labels = (
                boxes[keep],
                dofs[keep],
                scores[keep],
                labels[keep],
            )

            # create boxes from the predicted poses
            boxes, dofs = transform_pose_global_project_bbox(
                boxes,
                dofs,
                self.pose_mean,
                self.pose_stddev,
                image_shape,
                self.threed_68_points,
                bbox_x_factor=self.bbox_x_factor,
                bbox_y_factor=self.bbox_y_factor,
                expand_forehead=self.expand_forehead,
            )

            # non-maximum suppression, independently done per class
            keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh)

            boxes, dofs, scores, labels = (
                boxes[keep],
                dofs[keep],
                scores[keep],
                labels[keep],
            )

            # keep only topk scoring predictions
            keep = keep[:self.detections_per_img]

            all_boxes.append(boxes)
            all_scores.append(scores)
            all_labels.append(labels)
            all_dofs.append(dofs)

        return all_boxes, all_dofs, all_scores, all_labels
Ejemplo n.º 24
0
    def process_detections(
        self,
        outputs: Dict[str, Tensor],
        anchors: List[Tensor],
        im_szs: List[Tuple[int, int]],
    ) -> Tuple[List[Tensor], List[Tensor], List[Tensor]]:
        " Process `outputs` and return the predicted bboxes, score, clas_labels above `detect_thres` "

        class_logits = outputs.pop("cls_preds")
        bboxes = outputs.pop("bbox_preds")
        scores = torch.sigmoid(class_logits)

        device = class_logits.device
        num_classes = class_logits.shape[-1]

        # create labels for each score
        labels = torch.arange(num_classes, device=device)
        labels = labels.view(1, -1).expand_as(scores)

        detections = torch.jit.annotate(List[Dict[str, Tensor]], [])

        for bb_per_im, sc_per_im, ancs_per_im, im_sz, lbl_per_im in zip(bboxes, scores, anchors, im_szs, labels):
            
            all_boxes = []
            all_scores = []
            all_labels = []
            # convert the activation i.e, outputs of the model to bounding boxes
            bb_per_im = activ_2_bbox(bb_per_im, ancs_per_im)
            # clip the bounding boxes to the image size
            bb_per_im = ops.clip_boxes_to_image(bb_per_im, im_sz)

            # Iterate over each `cls_idx` in `num_classes` and do nms
            # to each class individually
            for cls_idx in range(num_classes):
                # remove low predicitons with scores < score_thres
                #  and grab the predictions corresponding to the cls_idx
                inds = torch.gt(sc_per_im[:, cls_idx], self.score_thres)
                bb_per_cls, sc_per_cls, lbl_per_cls = (
                    bb_per_im[inds],
                    sc_per_im[inds, cls_idx],
                    lbl_per_im[inds, cls_idx],
                )
                # remove boxes that are too small ~(1-02)
                keep = ops.remove_small_boxes(bb_per_cls, min_size=1e-2)
                bb_per_cls, sc_per_cls, lbl_per_cls = (
                    bb_per_cls[keep],
                    sc_per_cls[keep],
                    lbl_per_cls[keep],
                )
                # compute non max supression to supress overlapping boxes
                keep = ops.nms(bb_per_cls, sc_per_cls, self.nms_thres)
                bb_per_cls, sc_per_cls, lbl_per_cls = (
                    bb_per_cls[keep],
                    sc_per_cls[keep],
                    lbl_per_cls[keep],
                )

                all_boxes.append(bb_per_cls)
                all_scores.append(sc_per_cls)
                all_labels.append(lbl_per_cls)

            # Convert to tensors
            all_boxes = torch.cat(all_boxes, dim=0)
            all_scores = torch.cat(all_scores, dim=0)
            all_labels = torch.cat(all_labels, dim=0)

            # model is going to predict classes which are going to be in the range of [0, num_classes]
            # 0 is reserved for the background class for which no loss is calculate , so
            # we will add 1 to all the class_predictions to shift the predicitons range from
            # [0, num_classes) -> [1, num_classes]
            all_labels = all_labels + 1

            # Sort by scores and
            # Grab the idxs from the corresponding to the topk predictions
            _, topk_idxs = all_scores.sort(descending=True)
            topk_idxs = topk_idxs[: self.detections_per_img]
            all_boxes, all_scores, all_labels = (
                all_boxes[topk_idxs],
                all_scores[topk_idxs],
                all_labels[topk_idxs],
            )

            detections.append({"boxes": all_boxes, "scores": all_scores, "labels": all_labels,})
        return detections
Ejemplo n.º 25
0
    def post_process(self, cls_logits: torch.Tensor, reg_deltas: torch.Tensor,
                     batched_rois: List[torch.Tensor]):
        nms_threshold = self._params['nms_threshold']
        conf_threshold = self._params['conf_threshold']
        keep_top_n = self._params['keep_top_n']

        batched_dets: List[torch.Tensor] = []
        current = 0
        for rois in batched_rois:
            N = rois.size(0)
            if N == 0:
                print("warning! found empty rois")
                batched_dets.append(
                    torch.empty(0,
                                6,
                                dtype=reg_deltas.dtype,
                                device=reg_deltas.device))
                continue

            logits = cls_logits[current:current + N]
            offsets = reg_deltas[current:current + N]
            current += N

            # logits: torch.Tensor(N,)
            # deltas: torch.Tensor(N,4)
            # rois: torch.Tensor(N,4)

            scores = torch.sigmoid(logits)
            preds = torch.zeros(scores.shape,
                                dtype=torch.int64,
                                device=scores.device)
            preds[scores >= 0.5] = 1

            fg_preds_mask = preds != 0

            # convert offsets to boxes
            # N,4 | N,4 => N,4 as xmin,ymin,xmax,ymax

            boxes = offsets2boxes(offsets.unsqueeze(0), rois).squeeze(0)

            # extract bg predictions
            boxes = boxes[fg_preds_mask]
            preds = preds[fg_preds_mask]
            scores = scores[fg_preds_mask]

            # apply conf threshold
            keep = scores >= conf_threshold
            scores, preds, boxes = scores[keep], preds[keep], boxes[keep]

            # remove small
            keep = box_ops.remove_small_boxes(boxes, 1e-3)  # TODO try 1
            scores, preds, boxes = scores[keep], preds[keep], boxes[keep]

            # batched nms
            keep = box_ops.batched_nms(boxes, scores, preds, nms_threshold)
            scores, preds, boxes = scores[keep], preds[keep], boxes[keep]

            # select top n
            keep_n = min(keep_top_n, scores.size(0))
            _, selected_ids = scores.topk(keep_n)
            scores, preds, boxes = scores[selected_ids], preds[
                selected_ids], boxes[selected_ids]
            scores.unsqueeze_(1)
            preds = preds.unsqueeze(1).to(boxes.dtype)
            dets = torch.cat([boxes, scores, preds], dim=-1)

            batched_dets.append(dets)

        return batched_dets
Ejemplo n.º 26
0
    def _postprocess_detections(
        self,
        class_logits: Tensor,
        box_features: Tensor,
        box_regression: Tensor,
        proposals: List[Tensor],
        image_shapes: List[Tuple[int, int]],
    ) -> Tuple[List[Tensor], List[Tensor], List[Tensor], List[Tensor]]:
        """
        Adapted from https://github.com/pytorch/vision/blob/
        4521f6d152875974e317fa247a633e9ad1ea05c8/torchvision/models/detection/roi_heads.py#L664.

        The only reason we have to re-implement this method is so we can pull out the box
        features that we want.
        """
        device = class_logits.device
        num_classes = class_logits.shape[-1]

        boxes_per_image = [boxes_in_image.shape[0] for boxes_in_image in proposals]

        # shape: (batch_size * boxes_per_image, num_classes, 4)
        pred_boxes = self.detector.roi_heads.box_coder.decode(box_regression, proposals)

        pred_scores = F.softmax(class_logits, -1)

        pred_boxes_list = pred_boxes.split(boxes_per_image, 0)
        features_list = box_features.split(boxes_per_image, dim=0)
        pred_scores_list = pred_scores.split(boxes_per_image, 0)

        all_boxes = []
        all_features = []
        all_scores = []
        all_labels = []
        for boxes, features, scores, image_shape in zip(
            pred_boxes_list, features_list, pred_scores_list, image_shapes
        ):
            # shape: (boxes_per_image, num_classes, 4)
            boxes = box_ops.clip_boxes_to_image(boxes, image_shape)

            # shape: (boxes_per_image, num_classes, feature_size)
            features = features.unsqueeze(1).expand(boxes.shape[0], boxes.shape[1], -1)

            # create labels for each prediction
            # shape: (num_classes,)
            labels = torch.arange(num_classes, device=device)
            # shape: (boxes_per_image, num_classes,)
            labels = labels.view(1, -1).expand_as(scores)

            # remove predictions with the background label
            # shape: (boxes_per_image, num_classes - 1, 4)
            boxes = boxes[:, 1:]
            # shape: (boxes_per_image, num_classes, feature_size)
            features = features[:, 1:]
            # shape: (boxes_per_image, num_classes - 1,)
            scores = scores[:, 1:]
            # shape: (boxes_per_image, num_classes - 1,)
            labels = labels[:, 1:]

            # batch everything, by making every class prediction be a separate instance
            # shape: (boxes_per_image * (num_classes - 1), 4)
            boxes = boxes.reshape(-1, 4)
            # shape: (boxes_per_image * (num_classes - 1), feature_size)
            features = features.reshape(boxes.shape[0], -1)
            # shape: (boxes_per_image * (num_classes - 1),)
            scores = scores.reshape(-1)
            # shape: (boxes_per_image * (num_classes - 1),)
            labels = labels.reshape(-1)

            # remove low scoring boxes
            inds = torch.where(scores > self.detector.roi_heads.score_thresh)[0]
            boxes, features, scores, labels = (
                boxes[inds],
                features[inds],
                scores[inds],
                labels[inds],
            )

            # remove empty boxes
            keep = box_ops.remove_small_boxes(boxes, min_size=1e-2)
            boxes, features, scores, labels = (
                boxes[keep],
                features[keep],
                scores[keep],
                labels[keep],
            )

            # non-maximum suppression, independently done per class
            keep = box_ops.batched_nms(boxes, scores, labels, self.detector.roi_heads.nms_thresh)
            # keep only topk scoring predictions
            keep = keep[: self.detector.roi_heads.detections_per_img]
            boxes, features, scores, labels = (
                boxes[keep],
                features[keep],
                scores[keep],
                labels[keep],
            )

            all_boxes.append(boxes)
            all_features.append(features)
            all_scores.append(scores)
            all_labels.append(labels)

        return all_boxes, all_features, all_scores, all_labels