def compute_loss(self, model, net_output, sample):
        # get rpn proposals and box detections
        rpn_proposals = model.get_rpn_proposals(net_output)
        box_detections = model.get_box_detections(net_output)

        # compute proposal losses
        anchors = model.rpn.get_anchors(rpn_proposals)
        objectness = model.rpn.get_objectness(rpn_proposals)
        pred_bbox_deltas = model.rpn.get_pred_bbox_deltas(rpn_proposals)
        labels, matched_gt_boxes = model.rpn.assign_targets_to_anchors(anchors, sample['target'])
        regression_targets = model.rpn.box_coder.encode(matched_gt_boxes, anchors)

        loss_objectness, loss_rpn_box_reg = model.rpn.compute_loss(
            objectness, pred_bbox_deltas, labels, regression_targets,
        )

        # compute detector loss
        labels = model.roi_heads.get_labels(box_detections)
        regression_targets = model.roi_heads.get_regression_targets(box_detections)
        class_logits = model.roi_heads.get_class_logits(box_detections)
        box_regression = model.roi_heads.get_box_regression(box_detections)

        loss_classifier, loss_box_reg = fastrcnn_loss(
            class_logits, box_regression, labels, regression_targets,
        )

        losses = {
            # proposal losses
            'loss_objectness': loss_objectness,
            'loss_rpn_box_reg': loss_rpn_box_reg,
            # detector loss
            'loss_classifier': loss_classifier,
            'loss_box_reg': loss_box_reg,
        }
        return losses
Beispiel #2
0
    def forward(self, features, proposals, image_shapes, targets=None):
        """
        Arguments:
            features (List[Tensor])
            proposals (List[Tensor[N, 4]])
            image_shapes (List[Tuple[H, W]])
            targets (List[Dict])
        """
        if targets is not None:
            for t in targets:
                assert t["boxes"].dtype.is_floating_point, \
                    'target boxes must of float type'
                assert t["labels"].dtype == torch.int64, \
                    'target labels must of int64 type'
                if self.has_keypoint():
                    assert t["keypoints"].dtype == torch.float32, \
                        'target keypoints must of float type'

        if self.training:
            proposals, matched_idxs, labels, regression_targets = \
                self.select_training_samples(proposals, targets)
        # from IPython import embed
        # embed()
        roi_pooled_features = \
            self.box_roi_pool(features, proposals, image_shapes)
        rcnn_features = self.box_head(roi_pooled_features)
        cls_logits, box_regression = self.box_predictor(rcnn_features)

        person_feat = self.reid_head(rcnn_features)

        result, losses = [], {}
        if self.training:
            det_labels = [y.clamp(0, 1) for y in labels]  #just 0 or 1
            loss_detection, loss_box_reg = \
                fastrcnn_loss(cls_logits, box_regression,
                                     det_labels, regression_targets)

            loss_reid = self.reid_loss(person_feat, labels)

            losses = dict(loss_detection=loss_detection,
                          loss_box_reg=loss_box_reg,
                          loss_reid=loss_reid)
        else:
            boxes, scores, person_feat, labels = \
                self.oim_postprocess_detections(cls_logits, box_regression,person_feat,
                                            proposals, image_shapes)
            num_images = len(boxes)
            for i in range(num_images):
                result.append(
                    dict(
                        boxes=boxes[i],
                        labels=labels[i],
                        scores=scores[i],
                        person_feat=person_feat[i],
                    ))
        # Mask and Keypoint losses are deleted
        return result, losses
Beispiel #3
0
    def forward(self, features, proposals, image_shapes, targets=None):
        # type: (Dict[str, Tensor], List[Tensor], List[Tuple[int, int]], Optional[List[Dict[str, Tensor]]])
        """
        Arguments:
            features (List[Tensor])
            proposals (List[Tensor[N, 4]])
            image_shapes (List[Tuple[H, W]])
            targets (List[Dict])
        """
        if targets is not None:
            for t in targets:
                # TODO: https://github.com/pytorch/pytorch/issues/26731
                floating_point_types = (torch.float, torch.double, torch.half)
                assert t[
                    "boxes"].dtype in floating_point_types, 'target boxes must of float type'
                assert t[
                    "labels"].dtype == torch.int64, 'target labels must of int64 type'
                if self.has_keypoint():
                    assert t[
                        "keypoints"].dtype == torch.float32, 'target keypoints must of float type'
        if self.training:
            proposals, matched_idxs, labels, regression_targets = self.select_training_samples(
                proposals, targets)
        else:
            labels = None
            regression_targets = None
            matched_idxs = None
        box_features = self.box_roi_pool(features, proposals, image_shapes)
        box_features = self.box_head(box_features)
        class_logits, box_regression = self.box_predictor(box_features)
        result = torch.jit.annotate(List[Dict[str, torch.Tensor]], [])
        losses = {}
        if self.training:
            assert labels is not None and regression_targets is not None
            loss_classifier, loss_box_reg = fastrcnn_loss(
                class_logits, box_regression, labels, regression_targets)
            losses = {
                "loss_classifier": loss_classifier,
                "loss_box_reg": loss_box_reg
            }
        else:
            boxes, scores, labels, props, prob_max, scores_cls = self.postprocess_detections(
                class_logits, box_regression, proposals, image_shapes)
            num_images = len(boxes)
            for i in range(num_images):
                result.append({
                    "boxes": boxes[i],
                    "labels": labels[i],
                    "scores": scores[i],
                    "props": props[i],
                    "prob_max": prob_max[i],
                    "scores_cls": scores_cls[i],
                    "features": features
                })

        return result, losses
    def get_detections_losses(self,
                              model,
                              class_logits,
                              box_regression,
                              labels,
                              regression_targets,
                              proposals,
                              image_sizes,
                              original_image_sizes,
                              name=''):
        """
        Arguments:
            model (torchvision FasterRCNN model)
            class_logits, box_regression: output of model.roi_heads.box_predictor(box_features)
            labels (List[Tensor[N']]): ground truth class labels for training
            regression_targets (List[Tensor[N', 4]]): ground truth targets for box regression for training
            proposals (List[Tensor[N, 4]]): box proposals
            image_sizes (list[tuple[int, int]]): original shapes of all images
            original_image_sizes (List[Tuple[int, int]]): to postprocess detections
            name (str): append this name to keys in losses
        Returns:
            detections (List[Dict[str, torch.Tensor]]): predicted boxes, labels, and scores (in eval mode)
            detector_losses (Dict): classifier and box_regression losses for final detection (in train mode)
        """
        detections = torch.jit.annotate(List[Dict[str, torch.Tensor]], [])
        detector_losses = {}
        if self.training:
            assert labels is not None and regression_targets is not None
            loss_classifier, loss_box_reg = fastrcnn_loss(
                class_logits, box_regression, labels, regression_targets)
            detector_losses = {
                'loss_classifier' + name: loss_classifier,
                'loss_box_reg' + name: loss_box_reg
            }
        else:
            boxes, scores, labels = model.roi_heads.postprocess_detections(
                class_logits, box_regression, proposals, image_sizes)
            num_images = len(boxes)
            for i in range(num_images):
                detections.append({
                    'boxes': boxes[i],
                    'labels': labels[i],
                    'scores': scores[i]
                })

        detections = self.transform.postprocess(detections, image_sizes,
                                                original_image_sizes)

        return detections, detector_losses
Beispiel #5
0
    def forward(self, features, proposals, image_shapes, targets=None):
        """
        Arguments:
            features (List[Tensor])
            proposals (List[Tensor[N, 4]])
            image_shapes (List[Tuple[H, W]])
            targets (List[Dict])
        """
        if targets is not None:
            for t in targets:
                assert t["boxes"].dtype.is_floating_point, 'target boxes must of float type'
                assert t["labels"].dtype == torch.int64, 'target labels must of int64 type'
                if self.has_keypoint:
                    assert t["keypoints"].dtype == torch.float32, 'target keypoints must of float type'

        if self.training:
            proposals, matched_idxs, labels, regression_targets = self.select_training_samples(proposals, targets)

        box_features = self.box_roi_pool(features, proposals, image_shapes)
        box_features = self.box_head(box_features)
        class_logits, box_regression = self.box_predictor(box_features)

        result, losses = [], {}
        if self.training:
            loss_classifier, loss_box_reg = fastrcnn_loss(
                class_logits, box_regression, labels, regression_targets)
            losses = dict(loss_classifier=loss_classifier, loss_box_reg=loss_box_reg)
        else:
            boxes, scores, labels = self.postprocess_detections(class_logits, box_regression, proposals, image_shapes)
            num_images = len(boxes)
            for i in range(num_images):
                result.append(
                    dict(
                        boxes=boxes[i],
                        labels=labels[i],
                        scores=scores[i],
                    )
                )
        
        return result, losses
Beispiel #6
0
    def forward(
        self,
        features,  # type: Dict[str, Tensor]
        proposals,  # type: List[Tensor]
        image_shapes,  # type: List[Tuple[int, int]]
        targets=None,  # type: Optional[List[Dict[str, Tensor]]]
    ):

        # modified to return box features
        # type: (...) -> Tuple[List[Dict[str, Tensor]], Dict[str, Tensor]]
        """
        Args:
            features (List[Tensor])
            proposals (List[Tensor[N, 4]])
            image_shapes (List[Tuple[H, W]])
            targets (List[Dict])
        """
        if targets is not None:
            for t in targets:
                # TODO: https://github.com/pytorch/pytorch/issues/26731
                floating_point_types = (torch.float, torch.double, torch.half)
                assert (
                    t["boxes"].dtype in floating_point_types
                ), "target boxes must of float type"
                assert (
                    t["labels"].dtype == torch.int64
                ), "target labels must of int64 type"
                if self.has_keypoint():
                    assert (
                        t["keypoints"].dtype == torch.float32
                    ), "target keypoints must of float type"

        if self.training:
            (
                proposals,
                matched_idxs,
                labels,
                regression_targets,
            ) = self.select_training_samples(proposals, targets)
        else:
            labels = None
            regression_targets = None
            matched_idxs = None

        box_features0 = self.box_roi_pool(features, proposals, image_shapes)

        ## box_featuers0 is a 256x7x7 (the 7x7 is a parameter from the box_roi_palign).
        # it is then flattened to a single 1024 vector by the box_head
        box_features = self.box_head(box_features0)

        # there is a final box for each class (it is category dependent).
        class_logits, box_regression = self.box_predictor(box_features)

        result: List[Dict[str, torch.Tensor]] = []
        losses = {}
        if self.training:
            assert labels is not None and regression_targets is not None
            loss_classifier, loss_box_reg = fastrcnn_loss(
                class_logits, box_regression, labels, regression_targets
            )
            losses = {"loss_classifier": loss_classifier, "loss_box_reg": loss_box_reg}
        else:
            boxes, scores, labels = self.postprocess_detections(
                class_logits, box_regression, proposals, image_shapes
            )

            num_images = len(boxes)
            for i in range(num_images):
                result.append(
                    {
                        "boxes": boxes[i],
                        "labels": labels[i],
                        "scores": scores[i],
                    }
                )

        return {"result": result, "losses": losses, "box_features": box_features}
Beispiel #7
0
    def forward(self, features, proposals, image_shapes, targets=None):
        """
        Arguments:
            features (List[Tensor])
            proposals (List[Tensor[N, 4]])
            image_shapes (List[Tuple[H, W]])
            targets (List[Dict])
        """
        if targets is not None:
            for t in targets:
                assert t[
                    "boxes"].dtype.is_floating_point, 'target boxes must of float type'
                assert t[
                    "labels"].dtype == torch.int64, 'target labels must of int64 type'
                assert t["poses"].dtype.is_floating_point
                if self.has_keypoint:
                    assert t[
                        "keypoints"].dtype == torch.float32, 'target keypoints must of float type'

        if self.training:
            proposals, matched_idxs, labels, regression_targets = self.select_training_samples(
                proposals, targets)

        box_features = self.box_roi_pool(
            features, proposals,
            image_shapes)  # torch.Size([bs*1000, 256, 7, 7])
        box_features = self.box_head(
            box_features)  # torch.Size([bs*1000, 1024])
        class_logits, box_regression = self.box_predictor(
            box_features)  # torch.Size([bs*1000, 2]) torch.Size([bs*1000, 8])

        result, losses = [], {}  # result 是一个字典的列表, 每一个字典存着每张图片的预测值
        if self.training:
            loss_classifier, loss_box_reg = fastrcnn_loss(
                class_logits, box_regression, labels, regression_targets)
            losses = dict(loss_classifier=loss_classifier,
                          loss_box_reg=loss_box_reg)
        else:
            boxes, scores, labels = self.postprocess_detections(
                class_logits, box_regression, proposals, image_shapes)
            num_images = len(boxes)
            for i in range(num_images):
                result.append(
                    dict(
                        boxes=boxes[i],
                        labels=labels[i],
                        scores=scores[i],
                    ))
        # 如果是Mask R-CNN
        if self.has_mask:
            mask_proposals = [p["boxes"] for p in result]
            if self.training:
                # during training, only focus on positive boxes
                num_images = len(proposals)
                mask_proposals = []
                pos_matched_idxs = []
                for img_id in range(num_images):
                    pos = torch.nonzero(labels[img_id] > 0).squeeze(1)
                    mask_proposals.append(proposals[img_id][pos])
                    pos_matched_idxs.append(matched_idxs[img_id][pos])

            mask_features = self.mask_roi_pool(features, mask_proposals,
                                               image_shapes)
            mask_features = self.mask_head(mask_features)
            mask_logits = self.mask_predictor(mask_features)

            loss_mask = {}
            if self.training:
                gt_masks = [t["masks"] for t in targets]
                gt_labels = [t["labels"] for t in targets]
                loss_mask = maskrcnn_loss(mask_logits, mask_proposals,
                                          gt_masks, gt_labels,
                                          pos_matched_idxs)
                loss_mask = dict(loss_mask=loss_mask)
            else:
                labels = [r["labels"] for r in result]
                masks_probs = maskrcnn_inference(mask_logits, labels)
                for mask_prob, r in zip(masks_probs, result):
                    r["masks"] = mask_prob

            losses.update(loss_mask)

        if self.has_keypoint:
            keypoint_proposals = [p["boxes"] for p in result]
            if self.training:
                # during training, only focus on positive boxes
                num_images = len(proposals)
                keypoint_proposals = []
                pos_matched_idxs = []
                for img_id in range(num_images):
                    pos = torch.nonzero(labels[img_id] > 0).squeeze(1)
                    keypoint_proposals.append(
                        proposals[img_id][pos])  # shape=(num_pos, 4)
                    pos_matched_idxs.append(matched_idxs[img_id][pos])

            keypoint_features = self.keypoint_roi_pool(features,
                                                       keypoint_proposals,
                                                       image_shapes)
            keypoint_features = self.keypoint_head(keypoint_features)
            keypoint_logits = self.keypoint_predictor(keypoint_features)

            loss_keypoint = {}
            if self.training:
                gt_keypoints = [t["keypoints"] for t in targets]
                loss_keypoint = keypointrcnn_loss(keypoint_logits,
                                                  keypoint_proposals,
                                                  gt_keypoints,
                                                  pos_matched_idxs)
                loss_keypoint = dict(loss_keypoint=loss_keypoint)
            else:
                keypoints_probs, kp_scores = keypointrcnn_inference(
                    keypoint_logits, keypoint_proposals)
                for keypoint_prob, kps, r in zip(keypoints_probs, kp_scores,
                                                 result):
                    r["keypoints"] = keypoint_prob
                    r["keypoints_scores"] = kps

            losses.update(loss_keypoint)

        if self.has_pose:
            pose_proposals = [p["boxes"] for p in result]
            if self.training:
                # during training, only focus on positive boxes
                num_images = len(proposals)
                pose_proposals = []
                pos_matched_idxs = []
                for img_id in range(num_images):
                    pos = torch.nonzero(labels[img_id] > 0).squeeze(
                        1)  # 所有被分配为正样本的proposal的下标
                    pose_proposals.append(
                        proposals[img_id]
                        [pos])  # proposal的box(xmin, ymin, xmax, ymax)
                    pos_matched_idxs.append(
                        matched_idxs[img_id][pos])  # 每个proposal对应哪个target pose
            pose_features = self.pose_roi_pool(features, pose_proposals,
                                               image_shapes)
            pose_features = self.pose_head(pose_features)  #
            pose_regression = self.pose_predictor(pose_features)

            loss_pose = {}
            if self.training:
                gt_poses = [t["poses"]
                            for t in targets]  # a list of (rx, ry, rz, tz)
                loss_pose = posercnn_loss(pose_regression, gt_poses, labels,
                                          pos_matched_idxs)
                loss_pose = dict(loss_pose=loss_pose)
            else:
                pred_poses = postprocess_poses(pose_regression, pose_proposals)
                for poses, r in zip(pred_poses, result):
                    r['poses'] = poses
            losses.update(loss_pose)

            if self.has_trans:
                trans_proposals = [p["boxes"] for p in result]
                if self.training:
                    # during training, only focus on positive boxes
                    num_images = len(proposals)
                    trans_proposals = []
                    pos_matched_idxs = []
                    for img_id in range(num_images):
                        # keep_only_positive_boxes
                        pos = torch.nonzero(labels[img_id] > 0).squeeze(
                            1)  # 所有被分配为正样本的proposal的下标
                        trans_proposals.append(
                            proposals[img_id]
                            [pos])  # proposal的box(xmin, ymin, xmax, ymax)
                        pos_matched_idxs.append(
                            matched_idxs[img_id]
                            [pos])  # 每个proposal对应哪个target pose

                box_features = torch.cat(
                    trans_proposals,
                    dim=0)  # [N, 4]    N=batch_size*num_proposal_per_image
                trans_features = self.translation_head(box_features)
                trans_pred = self.translation_predictor(
                    trans_features, pose_features)

                loss_trans = {}
                if self.training:
                    gt_trans = [t["translations"] for t in targets]
                    # 6DVNET中平移损失的权重是0.05
                    loss_trans = 0.05 * trans_loss(trans_pred, gt_trans,
                                                   labels, pos_matched_idxs)
                    loss_trans = dict(loss_trans=loss_trans)
                else:
                    pred_trans = postprocess_trans(trans_pred, trans_proposals)
                    for translations, r in zip(pred_trans, result):
                        r['translations'] = translations
                losses.update(loss_trans)

        return result, losses
    def forward(self, features, proposals, image_shapes, targets=None, return_loss=False):
        """
        Arguments:
            features (List[Tensor])
            proposals (List[Tensor[N, 4]])
            image_shapes (List[Tuple[H, W]])
            targets (List[Dict])
            return_loss (Bool): return the loss (even if we are in eval mode)
        """
        if targets is not None:
            for t in targets:
                assert t["boxes"].dtype.is_floating_point, 'target boxes must of float type'
                assert t["labels"].dtype == torch.int64, 'target labels must of int64 type'
                # if self.has_keypoint:
                #     assert t["keypoints"].dtype == torch.float32, 'target keypoints must of float type'

        if self.training or return_loss:
            proposals, matched_idxs, labels, regression_targets = self.select_training_samples(proposals, targets)

        box_features = self.box_roi_pool(features, proposals, image_shapes)
        box_features = self.box_head(box_features)
        class_logits, box_regression = self.box_predictor(box_features)

        result, losses = [], {}
        if self.training or return_loss:
            loss_classifier, loss_box_reg = fastrcnn_loss(
                class_logits, box_regression, labels, regression_targets)
            losses = dict(loss_classifier=loss_classifier, loss_box_reg=loss_box_reg)
        else:
            boxes, scores, labels = self.postprocess_detections(class_logits, box_regression, proposals, image_shapes)
            num_images = len(boxes)
            for i in range(num_images):
                result.append(
                    dict(
                        boxes=boxes[i],
                        labels=labels[i],
                        scores=scores[i],
                    )
                )

        if self.has_mask:
            mask_proposals = [p["boxes"] for p in result]
            if self.training or return_loss:
                # during training, only focus on positive boxes
                num_images = len(proposals)
                mask_proposals = []
                pos_matched_idxs = []
                for img_id in range(num_images):
                    pos = torch.nonzero(labels[img_id] > 0).squeeze(1)
                    mask_proposals.append(proposals[img_id][pos])
                    pos_matched_idxs.append(matched_idxs[img_id][pos])

            mask_features = self.mask_roi_pool(features, mask_proposals, image_shapes)
            mask_features = self.mask_head(mask_features)
            mask_logits = self.mask_predictor(mask_features)

            loss_mask = {}
            if self.training or return_loss:
                gt_masks = [t["masks"] for t in targets]
                gt_labels = [t["labels"] for t in targets]
                loss_mask = maskrcnn_loss(
                    mask_logits, mask_proposals,
                    gt_masks, gt_labels, pos_matched_idxs)
                loss_mask = dict(loss_mask=loss_mask)
            else:
                labels = [r["labels"] for r in result]
                masks_probs = maskrcnn_inference(mask_logits, labels)
                for mask_prob, r in zip(masks_probs, result):
                    r["masks"] = mask_prob

            losses.update(loss_mask)

        if self.has_keypoint:
            keypoint_proposals = [p["boxes"] for p in result]
            if self.training or return_loss:
                # during training, only focus on positive boxes
                num_images = len(proposals)
                keypoint_proposals = []
                pos_matched_idxs = []
                for img_id in range(num_images):
                    pos = torch.nonzero(labels[img_id] > 0).squeeze(1)
                    keypoint_proposals.append(proposals[img_id][pos])
                    pos_matched_idxs.append(matched_idxs[img_id][pos])

            keypoint_features = self.keypoint_roi_pool(features, keypoint_proposals, image_shapes)
            keypoint_features = self.keypoint_head(keypoint_features)
            keypoint_logits = self.keypoint_predictor(keypoint_features)

            loss_keypoint = {}
            if self.training or return_loss:
                gt_keypoints = [t["keypoints"] for t in targets]
                loss_keypoint = keypointrcnn_loss(
                    keypoint_logits, keypoint_proposals,
                    gt_keypoints, pos_matched_idxs)
                loss_keypoint = dict(loss_keypoint=loss_keypoint)
            else:
                keypoints_probs, kp_scores = keypointrcnn_inference(keypoint_logits, keypoint_proposals)
                for keypoint_prob, kps, r in zip(keypoints_probs, kp_scores, result):
                    r["keypoints"] = keypoint_prob
                    r["keypoints_scores"] = kps

            losses.update(loss_keypoint)

        return result, losses
    def forward(
        self,
        features: Dict[str, torch.Tensor],
        proposals: List[torch.Tensor],
        image_shapes: List[Tuple[int, int]],
        targets: Optional[List[Dict[str, torch.Tensor]]] = None
    ) -> Tuple[List[Dict[str, torch.Tensor]], Dict[str, torch.Tensor]]:

        if targets is not None:
            for t in targets:
                floating_point_types = (torch.float, torch.double, torch.half)
                assert t[
                    "boxes"].dtype in floating_point_types, 'target boxes must of float type'
                assert t[
                    "ellipse_matrices"].dtype in floating_point_types, 'target ellipse_offsets must of float type'
                assert t[
                    "labels"].dtype == torch.int64, 'target labels must of int64 type'

        if self.training:
            proposals, matched_idxs, labels, regression_targets = self.select_training_samples(
                proposals, targets)
        else:
            labels = None
            regression_targets = None
            matched_idxs = None

        box_features = self.box_roi_pool(features, proposals, image_shapes)
        box_features = self.box_head(box_features)
        class_logits, box_regression = self.box_predictor(box_features)

        result: List[Dict[str, torch.Tensor]] = []
        losses = {}
        if self.training:
            assert labels is not None and regression_targets is not None
            loss_classifier, loss_box_reg = fastrcnn_loss(
                class_logits, box_regression, labels, regression_targets)
            losses = {
                "loss_classifier": loss_classifier,
                "loss_box_reg": loss_box_reg
            }
        else:
            boxes, scores, labels = self.postprocess_detections(
                class_logits, box_regression, proposals, image_shapes)
            num_images = len(boxes)
            for i in range(num_images):
                result.append({
                    "boxes": boxes[i],
                    "labels": labels[i],
                    "scores": scores[i],
                })

        if self.has_ellipse_reg():
            ellipse_proposals = [p["boxes"] for p in result]
            if self.training:
                assert matched_idxs is not None
                # during training, only focus on positive boxes
                num_images = len(proposals)
                ellipse_proposals = []
                pos_matched_idxs = []
                for img_id in range(num_images):
                    pos = torch.where(labels[img_id] > 0)[0]
                    ellipse_proposals.append(proposals[img_id][pos])
                    pos_matched_idxs.append(matched_idxs[img_id][pos])
            else:
                pos_matched_idxs = None

            if self.ellipse_roi_pool is not None:
                ellipse_features = self.ellipse_roi_pool(
                    features, ellipse_proposals, image_shapes)
                ellipse_features = self.ellipse_head(ellipse_features)
                ellipse_shapes_normalised = self.ellipse_predictor(
                    ellipse_features)
            else:
                raise Exception("Expected ellipse_roi_pool to be not None")

            loss_ellipse_regressor = {}
            if self.training:
                assert targets is not None
                assert pos_matched_idxs is not None
                assert ellipse_shapes_normalised is not None

                ellipse_matrix_targets = [
                    t["ellipse_matrices"] for t in targets
                ]
                rcnn_loss_ellipse = self.ellipse_loss_fn(
                    ellipse_shapes_normalised, ellipse_matrix_targets,
                    pos_matched_idxs, ellipse_proposals)
                loss_ellipse_regressor = {"loss_ellipse": rcnn_loss_ellipse}
            else:
                ellipses_per_image = [l.shape[0] for l in labels]
                for e_l, r, box in zip(
                        ellipse_shapes_normalised.split(ellipses_per_image,
                                                        dim=0), result,
                        ellipse_proposals):
                    d_a = e_l[:, 0]
                    d_b = e_l[:, 1]
                    d_angle = e_l[:, 2]
                    r["ellipse_matrices"] = postprocess_ellipse_predictor(
                        d_a, d_b, d_angle, box)

            losses.update(loss_ellipse_regressor)

        return result, losses
Beispiel #10
0
def forward(self, features, proposals, image_shapes, targets=None, task_heads=None):
    """
    Arguments:
        features (List[Tensor])
        proposals (List[Tensor[N, 4]])
        image_shapes (List[Tuple[H, W]])
        targets (List[Dict])
    """
    if targets is not None:
        for t in targets:
            # TODO: https://github.com/pytorch/pytorch/issues/26731
            floating_point_types = (torch.float, torch.double, torch.half)
            assert t["boxes"].dtype in floating_point_types, 'target boxes must of float type'
            assert t["labels"].dtype == torch.int64, 'target labels must of int64 type'

    if self.training:
        proposals, matched_idxs, labels, regression_targets = self.select_training_samples(proposals, targets)
    else:
        labels = None
        regression_targets = None
        matched_idxs = None

    box_features = self.box_roi_pool(features, proposals, image_shapes)
    box_features = self.box_head(box_features)
    class_logits, box_regression = self.box_predictor(box_features)
    task_results = {task_head.name: task_head(box_features) for task_head in task_heads}

    result = torch.jit.annotate(List[Dict[str, torch.Tensor]], [])
    losses = {}
    if self.training:
        assert labels is not None and regression_targets is not None
        loss_classifier, loss_box_reg = fastrcnn_loss(
            class_logits, box_regression, labels, regression_targets)

        losses = {
            "loss_classifier": loss_classifier,
            "loss_box_reg": loss_box_reg,
        }

        # Calculate losses for all the tasks
        for name, preds in task_results.items():
            actuals = []
            for idxs, target in zip(matched_idxs, targets):
                actuals.append(target[name][idxs])
            actuals = torch.cat(actuals, dim=0)
            # Discount the loss for task so that model still prioritises learning
            # the box / mask properly
            losses[f"loss_{name}"] = F.cross_entropy(preds, actuals)

    else:
        boxes, scores, labels = self.postprocess_detections(class_logits, box_regression, proposals, image_shapes)

        boxes_per_image = [len(boxes_in_image) for boxes_in_image in proposals]
        if len(boxes_per_image) == 1:
            task_results_list = task_results
        else:
            task_results_list = {name: preds.split(boxes_per_image, 0) for name, preds in task_results.items()}

        num_images = len(boxes)
        for i in range(num_images):
            result.append(
                {
                    "boxes": boxes[i],
                    "labels": labels[i],
                    "scores": scores[i],
                    "task_results": {name: preds[i] for name, preds in task_results_list.items()}
                }
            )

    if self.has_mask():
        mask_proposals = [p["boxes"] for p in result]
        if self.training:
            assert matched_idxs is not None
            # during training, only focus on positive boxes
            num_images = len(proposals)
            mask_proposals = []
            pos_matched_idxs = []
            for img_id in range(num_images):
                pos = torch.nonzero(labels[img_id] > 0).squeeze(1)
                mask_proposals.append(proposals[img_id][pos])
                pos_matched_idxs.append(matched_idxs[img_id][pos])
        else:
            pos_matched_idxs = None

        if self.mask_roi_pool is not None:
            mask_features = self.mask_roi_pool(features, mask_proposals, image_shapes)
            mask_features = self.mask_head(mask_features)
            mask_logits = self.mask_predictor(mask_features)
        else:
            mask_logits = torch.tensor(0)
            raise Exception("Expected mask_roi_pool to be not None")

        loss_mask = {}
        if self.training:
            assert targets is not None
            assert pos_matched_idxs is not None
            assert mask_logits is not None

            gt_masks = [t["masks"] for t in targets]
            gt_labels = [t["labels"] for t in targets]
            rcnn_loss_mask = maskrcnn_loss(
                mask_logits, mask_proposals,
                gt_masks, gt_labels, pos_matched_idxs)
            loss_mask = {
                "loss_mask": rcnn_loss_mask
            }
        else:
            labels = [r["labels"] for r in result]
            masks_probs = maskrcnn_inference(mask_logits, labels)
            for mask_prob, r in zip(masks_probs, result):
                r["masks"] = mask_prob

        losses.update(loss_mask)

    return result, losses
Beispiel #11
0
    def forward(self, features, proposals, image_shapes, targets=None):
        # type: (Dict[str, Tensor], List[Tensor], List[Tuple[int, int]], Optional[List[Dict[str, Tensor]]])
        """
        Arguments:
            features (List[Tensor])
            proposals (List[Tensor[N, 4]])
            image_shapes (List[Tuple[H, W]])
            targets (List[Dict])
        """
        if targets is not None:
            for t in targets:
                # TODO: https://github.com/pytorch/pytorch/issues/26731
                floating_point_types = (torch.float, torch.double, torch.half)
                assert t[
                    "boxes"].dtype in floating_point_types, 'target boxes must of float type'
                assert t[
                    "labels"].dtype == torch.int64, 'target labels must of int64 type'
                if self.has_keypoint():
                    assert t[
                        "keypoints"].dtype == torch.float32, 'target keypoints must of float type'

        if self.training:
            proposals, matched_idxs, labels, regression_targets = self.select_training_samples(
                proposals, targets)
        else:
            labels = None
            regression_targets = None
            matched_idxs = None

        # compute bbox embedding
        position_matrix = extract_position_matrix(proposals, Nongt_dim)
        position_embedding = extract_position_embedding(position_matrix,
                                                        feat_dim=64)

        box_features = self.box_roi_pool(features, proposals, image_shapes)
        box_features = self.box_head(box_features, position_embedding)
        class_logits, box_regression = self.box_predictor(box_features)

        result = torch.jit.annotate(List[Dict[str, torch.Tensor]], [])
        losses = {}
        if self.training:
            assert labels is not None and regression_targets is not None
            loss_classifier, loss_box_reg = fastrcnn_loss(
                class_logits, box_regression, labels, regression_targets)
            losses = {
                "loss_classifier": loss_classifier,
                "loss_box_reg": loss_box_reg
            }
        else:
            boxes, scores, labels = self.postprocess_detections(
                class_logits, box_regression, proposals, image_shapes)
            num_images = len(boxes)
            for i in range(num_images):
                result.append({
                    "boxes": boxes[i],
                    "labels": labels[i],
                    "scores": scores[i],
                })

        if self.has_mask():
            mask_proposals = [p["boxes"] for p in result]
            if self.training:
                assert matched_idxs is not None
                # during training, only focus on positive boxes
                num_images = len(proposals)
                mask_proposals = []
                pos_matched_idxs = []
                for img_id in range(num_images):
                    pos = torch.nonzero(labels[img_id] > 0).squeeze(1)
                    mask_proposals.append(proposals[img_id][pos])
                    pos_matched_idxs.append(matched_idxs[img_id][pos])
            else:
                pos_matched_idxs = None

            if self.mask_roi_pool is not None:
                mask_features = self.mask_roi_pool(features, mask_proposals,
                                                   image_shapes)
                mask_features = self.mask_head(mask_features)
                mask_logits = self.mask_predictor(mask_features)
            else:
                mask_logits = torch.tensor(0)
                raise Exception("Expected mask_roi_pool to be not None")

            loss_mask = {}
            if self.training:
                assert targets is not None
                assert pos_matched_idxs is not None
                assert mask_logits is not None

                gt_masks = [t["masks"] for t in targets]
                gt_labels = [t["labels"] for t in targets]
                rcnn_loss_mask = maskrcnn_loss(mask_logits, mask_proposals,
                                               gt_masks, gt_labels,
                                               pos_matched_idxs)
                loss_mask = {"loss_mask": rcnn_loss_mask}
            else:
                labels = [r["labels"] for r in result]
                masks_probs = maskrcnn_inference(mask_logits, labels)
                for mask_prob, r in zip(masks_probs, result):
                    r["masks"] = mask_prob

            losses.update(loss_mask)

        # keep none checks in if conditional so torchscript will conditionally
        # compile each branch
        if self.keypoint_roi_pool is not None and self.keypoint_head is not None \
                and self.keypoint_predictor is not None:
            keypoint_proposals = [p["boxes"] for p in result]
            if self.training:
                # during training, only focus on positive boxes
                num_images = len(proposals)
                keypoint_proposals = []
                pos_matched_idxs = []
                assert matched_idxs is not None
                for img_id in range(num_images):
                    pos = torch.nonzero(labels[img_id] > 0).squeeze(1)
                    keypoint_proposals.append(proposals[img_id][pos])
                    pos_matched_idxs.append(matched_idxs[img_id][pos])
            else:
                pos_matched_idxs = None

            keypoint_features = self.keypoint_roi_pool(features,
                                                       keypoint_proposals,
                                                       image_shapes)
            keypoint_features = self.keypoint_head(keypoint_features)
            keypoint_logits = self.keypoint_predictor(keypoint_features)

            loss_keypoint = {}
            if self.training:
                assert targets is not None
                assert pos_matched_idxs is not None

                gt_keypoints = [t["keypoints"] for t in targets]
                rcnn_loss_keypoint = keypointrcnn_loss(keypoint_logits,
                                                       keypoint_proposals,
                                                       gt_keypoints,
                                                       pos_matched_idxs)
                loss_keypoint = {"loss_keypoint": rcnn_loss_keypoint}
            else:
                assert keypoint_logits is not None
                assert keypoint_proposals is not None

                keypoints_probs, kp_scores = keypointrcnn_inference(
                    keypoint_logits, keypoint_proposals)
                for keypoint_prob, kps, r in zip(keypoints_probs, kp_scores,
                                                 result):
                    r["keypoints"] = keypoint_prob
                    r["keypoints_scores"] = kps

            losses.update(loss_keypoint)

        return result, losses
    def forward(self, features, proposals, image_shapes, targets=None):
        """
        Arguments:
            features (List[Tensor])
            proposals (List[Tensor[N, 4]])
            image_shapes (List[Tuple[H, W]])
            targets (List[Dict])
        """
        feature_dims = np.array(
            [features[layer].shape[1] for layer in features])
        if np.all(feature_dims == self.in_channels):  # RGB only
            features_rgb = features
        elif np.all(feature_dims == 2 *
                    self.in_channels):  # RGB-depth 6 channel, two backbones
            from collections import OrderedDict
            features_rgb = OrderedDict()
            for key in features.keys():
                features_rgb[key] = features[key][:, :self.in_channels]
        else:  # RGB-D 4 channel
            features_rgb = features

        # Detection
        if self.training:
            proposals, matched_idxs, labels, regression_targets = self.select_training_samples(
                proposals, targets)

        box_features = self.box_roi_pool(features_rgb, proposals, image_shapes)
        box_features = self.box_head(box_features)
        class_logits, box_regression = self.box_predictor(box_features)

        result, losses = [], {}
        if self.training:
            loss_classifier, loss_box_reg = fastrcnn_loss(
                class_logits, box_regression, labels, regression_targets)
            losses = dict(loss_classifier=loss_classifier,
                          loss_box_reg=loss_box_reg)
        else:
            boxes, scores, labels = self.postprocess_detections(
                class_logits, box_regression, proposals, image_shapes)
            num_images = len(boxes)
            for i in range(num_images):
                if boxes[i].shape[0] == 0:
                    return result, losses
                result.append(
                    dict(
                        boxes=boxes[i],
                        labels=labels[i],
                        scores=scores[i],
                    ))

        # Proposals selected by detection stage is shared by all other branches
        box_proposals = [p["boxes"] for p in result]
        if self.training:
            # during training, only focus on positive boxes
            num_images = len(proposals)
            box_proposals = []
            pos_matched_idxs = []
            for img_id in range(num_images):
                pos = torch.nonzero(labels[img_id] > 0).squeeze(1)
                box_proposals.append(proposals[img_id][pos])
                pos_matched_idxs.append(matched_idxs[img_id][pos])

        # MultiStage RoIAlign is shared by all other branches
        shared_features_rgb = self.shared_roi_pool(features_rgb, box_proposals,
                                                   image_shapes)

        # Segmentation
        mask_features = self.mask_head(shared_features_rgb)
        mask_logits = self.mask_predictor(mask_features)
        loss_mask = {}
        masks_on_features = None
        if self.training:
            gt_masks = [t["masks"] for t in targets]
            gt_labels = [t["labels"] for t in targets]
            gt_is = [t["instance_ids"] for t in targets]
            loss_mask, masks_for_paf, masks_for_vote = maskrcnn_loss_updated(
                mask_logits, box_proposals, gt_masks, gt_labels, gt_is,
                pos_matched_idxs)
            loss_mask = dict(loss_mask=loss_mask)
        else:
            ref_labels = [r["labels"] for r in result]
            masks_probs = maskrcnn_inference(mask_logits, ref_labels)
            for mask_prob, r in zip(masks_probs, result):
                r["masks"] = mask_prob

        losses.update(loss_mask)

        if self.with_paf_branch:
            paf_features = self.paf_head(shared_features_rgb)
            paf_logits = self.paf_predictor(paf_features)
            loss_paf = {}
            if self.training:
                gt_pafs = [t["target_pafs"] for t in targets]
                loss_paf = paf_loss_updated(paf_logits, masks_for_paf,
                                            pos_matched_idxs, gt_pafs,
                                            gt_labels)
                if torch.isnan(loss_paf):
                    print('error')
                loss_paf = dict(loss_paf=loss_paf)
            else:
                paf_ref_labels = torch.cat(ref_labels) - 1
                N, _, H, W = paf_logits.shape
                paf_logits = paf_logits.view(N, -1, 2, H, W)[torch.arange(N),
                                                             paf_ref_labels]
                paf_probs = [paf_logits]
                for paf_prob, r in zip(paf_probs, result):
                    r["pafs"] = F.normalize(paf_prob, dim=1)

            losses.update(loss_paf)

        if self.input_mode == config.INPUT_RGBD:

            shared_features = self.attention_block(
                shared_features_rgb
            )  # shared_features_rgb actually has 4-channel RGBD input
            bs, c, _, _ = shared_features.shape
            # shared_features = shared_features.view(bs, c, -1) # for conv1d
            if self.with_3d_keypoints:

                keypoint_features = self.vote_keypoint_head(shared_features)
                # keypoint_features = keypoint_features.view(bs, self.keypoint_dim_reduced, 14, 14)
                keypoint_offsets = self.vote_keypoint_predictor(
                    keypoint_features)

                loss_keypoint = {}
                if self.training:
                    gt_3d_keypoints = [t["frame"][:, :3] for t in targets]
                    ori_depth = [t["ori_image_depth"] for t in targets]
                    gt_labels = [t["labels"] for t in targets]
                    loss_keypoint = vote_keypoint_loss(
                        keypoint_offsets, box_proposals, ori_depth,
                        gt_3d_keypoints, pos_matched_idxs, masks_for_vote,
                        gt_labels)
                    loss_keypoint = dict(loss_keypoint=loss_keypoint)
                else:
                    ref_labels = torch.cat(ref_labels) - 1
                    N, _, H, W = keypoint_offsets.shape
                    keypoint_offsets = keypoint_offsets.view(
                        N, -1, 3, H, W)[torch.arange(N), ref_labels]
                    keypoints = [keypoint_offsets]
                    for kps, r in zip(keypoints, result):
                        r["keypoints_offset"] = kps
                losses.update(loss_keypoint)

            if self.with_axis_keypoints:

                keypoint_features = self.orientation_keypoint_head(
                    shared_features)
                # keypoint_features = keypoint_features.view(bs, self.keypoint_dim_reduced, 14, 14)
                axis_keypoint_offsets = self.orientation_keypoint_predictor(
                    keypoint_features)
                N, _, H, W = axis_keypoint_offsets.shape
                axis_keypoint_offsets = axis_keypoint_offsets.view(
                    N, -1, 2, 3, H, W)

                loss_orientation = {}
                if self.training:
                    gt_3d_keypoints = [t["axis_keypoints"] for t in targets]
                    ori_depth = [t["ori_image_depth"] for t in targets]
                    loss_orientation = vote_orientation_loss(
                        axis_keypoint_offsets, box_proposals, ori_depth,
                        gt_3d_keypoints, pos_matched_idxs, masks_for_vote,
                        gt_labels)
                    loss_orientation = dict(loss_orientation=loss_orientation)
                else:
                    axis_keypoint_offsets = axis_keypoint_offsets[
                        torch.arange(N), ref_labels]
                    axis_keypoints = [axis_keypoint_offsets]
                    for kps, r in zip(axis_keypoints, result):
                        r["axis_keypoint_offsets"] = kps

                losses.update(loss_orientation)

            if self.regress_axis:
                keypoint_features = self.axis_head(shared_features)
                keypoint_features = keypoint_features.view(
                    bs, self.keypoint_dim_reduced, 14, 14)
                axis_keypoint_offsets = self.axis_predictor(keypoint_features)
                N, _, H, W = axis_keypoint_offsets.shape
                axis_keypoint_offsets = axis_keypoint_offsets.view(
                    N, -1, 4, H, W)

                loss_axis = {}
                if self.training:
                    gt_3d_keypoints = [t["axis_keypoints"] for t in targets]
                    ori_depth = [t["ori_image_depth"] for t in targets]
                    loss_axis = vote_axis_loss(axis_keypoint_offsets,
                                               box_proposals, ori_depth,
                                               gt_3d_keypoints,
                                               pos_matched_idxs,
                                               masks_for_vote, gt_labels)
                    loss_axis = dict(loss_axis=loss_axis)
                else:
                    axis_keypoint_offsets = axis_keypoint_offsets[
                        torch.arange(N), ref_labels]
                    axis_keypoints = [axis_keypoint_offsets]
                    for kps, r in zip(axis_keypoints, result):
                        r["axis_offsets"] = kps

                losses.update(loss_axis)

            if self.estimate_norm_vector:
                keypoint_features = self.norm_vector_head(shared_features)
                keypoint_features = keypoint_features.view(
                    bs, self.keypoint_dim_reduced, 14, 14)
                norm_vectors = self.norm_vector_predictor(keypoint_features)
                N, _, H, W = norm_vectors.shape
                norm_vectors = norm_vectors.view(N, -1, 3, H, W)

                loss_norm_vector = {}
                if self.training:
                    gt_3d_keypoints = [t["axis_keypoints"] for t in targets]
                    loss_norm_vector = calculate_norm_vectors(
                        norm_vectors, gt_3d_keypoints, pos_matched_idxs,
                        masks_for_vote, gt_labels)
                    loss_norm_vector = dict(loss_norm_vector=loss_norm_vector)
                else:
                    norm_vectors = F.normalize(norm_vectors, dim=2)
                    norm_vectors = norm_vectors[torch.arange(N), ref_labels]
                    estimate_norm_vectors = [norm_vectors]
                    for norm_v, r in zip(estimate_norm_vectors, result):
                        r["norm_vector"] = norm_v

                losses.update(loss_norm_vector)
        return result, losses