def forward(self, images, features, targets=None):
        # RPN uses all feature maps that are available
        features = list(features.values())
        objectness, pred_bbox_deltas = self.rpn.head(features)
        anchors = self.rpn.anchor_generator(images, features)

        num_images = len(anchors)
        num_anchors_per_level = [o[0].numel() for o in objectness]
        objectness = [i.dequantize() for i in objectness]
        pred_bbox_deltas = [i.dequantize() for i in pred_bbox_deltas]
        objectness, pred_bbox_deltas = \
            concat_box_prediction_layers(objectness, pred_bbox_deltas)
        # apply pred_bbox_deltas to anchors to obtain the decoded proposals
        # note that we detach the deltas because Faster R-CNN do not backprop through
        # the proposals
        proposals = self.rpn.box_coder.decode(pred_bbox_deltas.detach(),
                                              anchors)
        proposals = proposals.view(num_images, -1, 4)
        boxes, scores = self.rpn.filter_proposals(proposals, objectness,
                                                  images.image_sizes,
                                                  num_anchors_per_level)

        losses = {}
        if self.rpn.training:
            labels, matched_gt_boxes = self.rpn.assign_targets_to_anchors(
                anchors, targets)
            regression_targets = self.rpn.box_coder.encode(
                matched_gt_boxes, anchors)
            loss_objectness, loss_rpn_box_reg = self.rpn.compute_loss(
                objectness, pred_bbox_deltas, labels, regression_targets)
            losses = {
                "loss_objectness": loss_objectness,
                "loss_rpn_box_reg": loss_rpn_box_reg,
            }
        return boxes, losses
Example #2
0
def rpn_forward(rcnn, images):
    features = rcnn.backbone(images.tensors)

    if isinstance(features, torch.Tensor):
        features = OrderedDict([('0', features)])

    features_list = list(features.values())

    objectness, pred_bbox_deltas = rcnn.rpn.head(features_list)
    anchors = rcnn.rpn.anchor_generator(images, features_list)

    num_images = len(anchors)
    num_anchors_per_level = [o[0].numel() for o in objectness]
    objectness, pred_bbox_deltas = \
        concat_box_prediction_layers(objectness, pred_bbox_deltas)

    proposals = rcnn.rpn.box_coder.decode(pred_bbox_deltas.detach(), anchors)
    proposals = proposals.view(num_images, -1, 4)
    boxes, scores = rcnn.rpn.filter_proposals(proposals, objectness, images.image_sizes, 
                            num_anchors_per_level)
    

    # normalize boxes
    bbox = boxes[0]

    # normalize scores
    scores = scores[0]
    scores += abs(scores.min())
    scores /= scores.max()

    return bbox, scores, features
Example #3
0
    def forward(self,
                images,
                features,
                targets=None,
                first=None,
                resource=None):
        """
        Arguments:
            images (ImageList): images for which we want to compute the predictions
            features (List[Tensor]): features computed from the images that are
                used for computing the predictions. Each tensor in the list
                correspond to different feature levels
            targets (List[Dict[Tensor]): ground-truth boxes present in the image (optional).
                If provided, each element in the dict should contain a field `boxes`,
                with the locations of the ground-truth boxes.

        Returns:
            boxes (List[Tensor]): the predicted boxes from the RPN, one Tensor per
                image.
            losses (Dict[Tensor]): the losses for the model during training. During
                testing, it is an empty dict.
        """
        # RPN uses all feature maps that are available
        features = list(features.values())
        objectness, pred_bbox_deltas = self.head(features)
        if first:
            return objectness, pred_bbox_deltas

        anchors = self.anchor_generator(images, features)

        num_images = len(anchors)
        num_anchors_per_level = [o[0].numel() for o in objectness]
        objectness, pred_bbox_deltas = \
            concat_box_prediction_layers(objectness, pred_bbox_deltas)

        proposals = self.box_coder.decode(pred_bbox_deltas.detach(), anchors)
        proposals = proposals.view(num_images, -1, 4)
        boxes, scores = self.filter_proposals(proposals, objectness,
                                              images.image_sizes,
                                              num_anchors_per_level)

        losses = {}
        if self.training:
            labels, matched_gt_boxes = self.assign_targets_to_anchors(
                anchors, targets)
            regression_targets = self.box_coder.encode(matched_gt_boxes,
                                                       anchors)
            loss_objectness, loss_rpn_box_reg = self.compute_loss(
                objectness, pred_bbox_deltas, labels, regression_targets)
            losses = {
                "loss_objectness": loss_objectness,
                "loss_rpn_box_reg": loss_rpn_box_reg,
            }

        return boxes, scores
Example #4
0
    def forward(
        self,
        images: ImageList,
        features: Dict[str, Tensor],
        targets: Optional[List[Dict[str, Tensor]]] = None,
    ) -> Tuple[List[Tensor], Dict[str, Tensor]]:
        # modified to also return objectness score
        """
        Args:
            images (ImageList): images for which we want to compute the predictions
            features (Dict[str, Tensor]): features computed from the images that are
                used for computing the predictions. Each tensor in the list
                correspond to different feature levels
            targets (List[Dict[str, Tensor]]): ground-truth boxes present in the image (optional).
                If provided, each element in the dict should contain a field `boxes`,
                with the locations of the ground-truth boxes.
        """
        # RPN uses all feature maps that are available
        features = list(features.values())
        objectness, pred_bbox_deltas = self.head(features)
        anchors = self.anchor_generator(images, features)

        num_images = len(anchors)
        num_anchors_per_level_shape_tensors = [o[0].shape for o in objectness]
        num_anchors_per_level = [
            s[0] * s[1] * s[2] for s in num_anchors_per_level_shape_tensors
        ]
        objectness, pred_bbox_deltas = concat_box_prediction_layers(
            objectness, pred_bbox_deltas
        )
        # apply pred_bbox_deltas to anchors to obtain the decoded proposals
        # note that we detach the deltas because Faster R-CNN do not backprop through
        # the proposals
        proposals = self.box_coder.decode(pred_bbox_deltas.detach(), anchors)
        proposals = proposals.view(num_images, -1, 4)
        boxes, scores = self.filter_proposals(
            proposals, objectness, images.image_sizes, num_anchors_per_level
        )

        losses = {}
        if self.training:
            assert targets is not None
            labels, matched_gt_boxes = self.assign_targets_to_anchors(anchors, targets)
            regression_targets = self.box_coder.encode(matched_gt_boxes, anchors)
            loss_objectness, loss_rpn_box_reg = self.compute_loss(
                objectness, pred_bbox_deltas, labels, regression_targets
            )
            losses = {
                "loss_objectness": loss_objectness,
                "loss_rpn_box_reg": loss_rpn_box_reg,
            }
        return {"boxes": boxes, "scores": scores, "losses": losses}
Example #5
0
    def forward(self,
                image: torch.Tensor,  # (batch_size, c, h, w)
                image_sizes: torch.Tensor,  # (batch_size, 2)
                boxes: torch.Tensor = None,  # (batch_size, max_boxes_in_batch, 4)
                box_classes: torch.Tensor = None) -> Dict[str, torch.Tensor]:
        # pylint: disable=arguments-differ
        im_sizes = [(x[1].item(), x[0].item()) for x in image_sizes]
        image_list = ImageList(image, im_sizes)
        features = self.backbone.forward(image)
        objectness, rpn_box_regression = self._rpn_head(features)
        anchors: List[torch.Tensor] = self.anchor_generator(image_list, features)
        num_anchors_per_level = [o[0].numel() for o in objectness]
        objectness, rpn_box_regression = \
            concat_box_prediction_layers(objectness, rpn_box_regression)

        out = {'features': features,
               'objectness': objectness,
               'rpn_box_regression': rpn_box_regression,
               'anchors': anchors,
               'sizes': image_sizes,
               'num_anchors_per_level': num_anchors_per_level}
        if boxes is not None:
            labels, matched_gt_boxes = self.assign_targets_to_anchors(
                    anchors, object_utils.unpad(boxes))
            regression_targets = self.box_coder.encode(matched_gt_boxes, anchors)

            sampled_pos_inds, sampled_neg_inds = self.sampler(labels)
            sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds, dim=0)).squeeze(1)
            sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds, dim=0)).squeeze(1)

            sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0)

            objectness = objectness.flatten()

            labels = torch.cat(labels, dim=0)
            regression_targets = torch.cat(regression_targets, dim=0)

            loss_rpn_box_reg = F.l1_loss(
                    rpn_box_regression[sampled_pos_inds],
                    regression_targets[sampled_pos_inds],
                    reduction="sum",
            ) / (sampled_inds.numel())

            loss_objectness = F.binary_cross_entropy_with_logits(
                    objectness[sampled_inds], labels[sampled_inds]
            )
            self._loss_meters['rpn_cls_loss'](loss_objectness.item())
            self._loss_meters['rpn_reg_loss'](loss_rpn_box_reg.item())
            out["loss_objectness"] = loss_objectness
            out["loss_rpn_box_reg"] = loss_rpn_box_reg
            out["loss"] = loss_objectness + 10*loss_rpn_box_reg
        return out
Example #6
0
    def forward(self, images, features, targets=None):
        """
        Arguments:
            images (ImageList): images for which we want to compute the predictions
            features (List[Tensor]): features computed from the images that are
                used for computing the predictions. Each tensor in the list
                correspond to different feature levels
            targets (List[Dict[Tensor]): ground-truth boxes present in the image (optional).
                If provided, each element in the dict should contain a field `boxes`,
                with the locations of the ground-truth boxes.

        Returns:
            boxes (List[Tensor]): the predicted boxes from the RPN, one Tensor per
                image.
            losses (Dict[Tensor]): the losses for the model during training. During
                testing, it is an empty dict.
        """
        # RPN uses all feature maps that are available
        features = list(features.values())
        objectness, pred_bbox_deltas = self.head(features)
        anchors = self.anchor_generator(images, features)

        num_images = len(anchors)
        num_anchors_per_level = [o[0].numel() for o in objectness]
        objectness, pred_bbox_deltas = concat_box_prediction_layers(
            objectness,
            pred_bbox_deltas,
        )
        # apply pred_bbox_deltas to anchors to obtain the decoded proposals
        # note that we detach the deltas because Faster R-CNN do not backprop through
        # the proposals
        proposals = self.box_coder.decode(pred_bbox_deltas.detach(), anchors)
        proposals = proposals.view(num_images, -1, 4)
        boxes, scores = self.filter_proposals(proposals, objectness,
                                              images.image_sizes,
                                              num_anchors_per_level)

        return {
            'anchors': anchors,
            'objectness': objectness,
            'pred_bbox_deltas': pred_bbox_deltas,
            'boxes': boxes,
            'scores': scores,
        }
def rpn_forward(self, is_dynamic, images, features, targets=None):
    from torchvision.models.detection.rpn import concat_box_prediction_layers
    # Copy of torchvision/models/detection/rpn.py
    features = list(features.values())
    objectness, pred_bbox_deltas = self.head(features)
    anchors = self.anchor_generator(images, features)

    shapes = [np.prod(o.shape) for o in objectness]

    objectness, pred_bbox_deltas = concat_box_prediction_layers(
        objectness, pred_bbox_deltas)
    # end of copy

    img_h, img_w = images.tensors.shape[2:]
    anchors = anchors[0].reshape(1, -1, 4)
    anchors /= torch.tensor([img_w, img_h, img_w, img_h])
    pred_bbox_deltas = pred_bbox_deltas.reshape(1, -1, 4)
    objectness = objectness.reshape(1, -1).sigmoid()

    start_idx = 0
    all_proposals = []
    for shape in shapes:
        end_idx = start_idx + shape
        scores = objectness[:, start_idx:end_idx]
        deltas = pred_bbox_deltas[:, start_idx:end_idx].reshape(1, -1)
        priors = anchors[:, start_idx:end_idx].reshape(1, 1, -1)

        det = DetectionOutput(top_k=min(shape, self.post_nms_top_n()),
                              nms_threshold=self.nms_thresh,
                              confidence_threshold=0.0,
                              background_label_id=2)
        proposals = forward_hook(det, (deltas, scores, OpenVINOTensor(priors)))
        proposals = filter_detections(proposals, is_dynamic)

        all_proposals.append(proposals)
        start_idx = end_idx

    all_proposals = torch.cat(all_proposals, dim=2)

    _, ids = torch.topk(all_proposals[0, 0, :, 2], self.post_nms_top_n())
    all_proposals = torch.gather(all_proposals, 2, ids).reshape(-1, 7)[:, 3:]
    return [all_proposals, OpenVINOTensor()]
Example #8
0
 def forward(self, images, features):
     # RPN uses all feature maps that are available
     features = list(features.values())
     objectness, pred_bbox_deltas = self.head(features)
     anchors = self.anchor_generator(images, features)
     num_images = len(anchors)
     num_anchors_per_level_shape_tensors = [o[0].shape for o in objectness]
     num_anchors_per_level = [
         s[0] * s[1] * s[2] for s in num_anchors_per_level_shape_tensors
     ]
     objectness, pred_bbox_deltas = concat_box_prediction_layers(
         objectness, pred_bbox_deltas)
     # apply pred_bbox_deltas to anchors to obtain the decoded proposals
     # note that we detach the deltas because Faster R-CNN do not backprop through the proposals
     proposals = self.box_coder.decode(pred_bbox_deltas.detach(), anchors)
     proposals = proposals.view(num_images, -1, 4)
     boxes, scores = self.filter_proposals(proposals, objectness,
                                           images.image_sizes,
                                           num_anchors_per_level)
     return boxes, objectness, pred_bbox_deltas, anchors