Esempio n. 1
0
    def forward(self, fg_imgs, fg_txts, bg_imgs, bg_txts, is_test=False):
        loss = []
        encode_list = []
        for fg_img, fg_txt, bg_img, bg_txt in zip(fg_imgs, fg_txts, bg_imgs, bg_txts):
            fg_img_encode = self.encode(fg_img, is_img=True)
            fg_txt_encode = self.encode(fg_txt, is_txt=True)
            bg_img_encode = self.encode(bg_img, is_img=True)
            bg_txt_encode = self.encode(bg_txt, is_txt=True)

            fg_intra = smooth_l1_loss(fg_img_encode, fg_txt_encode)
            fg_inter = smooth_l1_loss(fg_img_encode, bg_txt_encode)
            triplet_fg = fg_intra + self.margin - fg_inter
            triplet_fg = triplet_fg * (triplet_fg >= 0).float()
            loss.append(triplet_fg.sum())

            bg_intra = smooth_l1_loss(bg_txt_encode, bg_img_encode)
            bg_inter = smooth_l1_loss(fg_txt_encode, bg_img_encode)
            triplet_bg = bg_intra + self.margin - bg_inter
            triplet_bg = triplet_bg * (triplet_bg >= 0).float()

            loss.append(triplet_bg.sum())
            encode_list.append([fg_img_encode, fg_txt_encode])
        
        if is_test:
            return encode_list
        else:
            return loss
Esempio n. 2
0
    def __call__(self, anchors, objectness, box_regression,
                 box_regression_right, targets_left, targets_right):
        """
        Arguments:
            anchors (list[BoxList])
            objectness (list[Tensor])
            box_regression (list[Tensor])
            targets (list[BoxList])

        Returns:
            objectness_loss (Tensor)
            box_loss (Tensor
        """
        anchors = [
            cat_boxlist(anchors_per_image) for anchors_per_image in anchors
        ]
        labels, regression_targets, regression_targets_right = self.prepare_targets(
            anchors, targets_left, targets_right)
        sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)
        sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds,
                                                   dim=0)).squeeze(1)
        sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds,
                                                   dim=0)).squeeze(1)

        sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0)

        _, box_regression = \
                concat_box_prediction_layers(objectness, box_regression)

        objectness, box_regression_right = \
                concat_box_prediction_layers(objectness, box_regression_right)

        objectness = objectness.squeeze()

        labels = torch.cat(labels, dim=0)
        regression_targets = torch.cat(regression_targets, dim=0)
        regression_targets_right = torch.cat(regression_targets_right, dim=0)

        box_loss = smooth_l1_loss(
            box_regression[sampled_pos_inds],
            regression_targets[sampled_pos_inds],
            beta=1.0 / 9,
            size_average=False,
        ) / (sampled_inds.numel())
        box_right_loss = smooth_l1_loss(
            box_regression_right[sampled_pos_inds],
            regression_targets_right[sampled_pos_inds],
            beta=1.0 / 9,
            size_average=False,
        ) / (sampled_inds.numel())

        objectness_loss = F.binary_cross_entropy_with_logits(
            objectness[sampled_inds], labels[sampled_inds])

        return objectness_loss, box_loss, box_right_loss
Esempio n. 3
0
    def __call__(self, anchors, objectness, box_regression,
                 box_regression_right, objectness2, box_regression2,
                 box_regression_right2):
        """
        Arguments:
            anchors (list[BoxList])
            objectness (list[Tensor])
            box_regression (list[Tensor])
            targets (list[BoxList])

        Returns:
            objectness_loss (Tensor)
            box_loss (Tensor
        """
        if self.cfg.MODEL.RPN_LR.LOSS_HEAD_CONSISTENCY == 0.0: return 0, 0, 0

        _, box_regression = \
                concat_box_prediction_layers(objectness, box_regression)

        objectness, box_regression_right = \
                concat_box_prediction_layers(objectness, box_regression_right)

        _, box_regression2 = \
                concat_box_prediction_layers(objectness2, box_regression2)

        objectness2, box_regression_right2 = \
                concat_box_prediction_layers(objectness2, box_regression_right2)

        objectness = objectness.squeeze()
        objectness2 = objectness2.squeeze()

        box_loss = smooth_l1_loss(
            box_regression,
            box_regression2,
            beta=1.0 / 9,
            size_average=True,
        ) * self.cfg.MODEL.RPN_LR.LOSS_HEAD_CONSISTENCY
        box_right_loss = smooth_l1_loss(
            box_regression_right,
            box_regression_right2,
            beta=1.0 / 9,
            size_average=True,
        ) * self.cfg.MODEL.RPN_LR.LOSS_HEAD_CONSISTENCY

        objectness_loss = smooth_l1_loss(
            objectness,
            objectness2) * self.cfg.MODEL.RPN_LR.LOSS_HEAD_CONSISTENCY

        return objectness_loss, box_loss, box_right_loss
Esempio n. 4
0
    def __call__(self, anchors, objectness, box_regression, targets):
        """
        Arguments:
            anchors (list[BoxList])
            objectness (list[Tensor])
            box_regression (list[Tensor])
            targets (list[BoxList])

        Returns:
            objectness_loss (Tensor)
            box_loss (Tensor
        """
        #print_log = (self.num_call % 100) == 0
        #self.num_call += 1
        #if print_log:
            #all_anchor_sizes_each_pyramid = [[len(a) for a in anchors_per_image]
                #for anchors_per_image in anchors]

        anchors = [cat_boxlist(anchors_per_image) for anchors_per_image in anchors]
        labels, regression_targets = self.prepare_targets(anchors, targets)

        #if print_log:
            #with torch.no_grad():
                #all_kind_to_num = get_kind_to_num_info(labels, all_anchor_sizes_each_pyramid)
                #from qd.qd_common import print_table
                #print_table(all_kind_to_num)
                #if self.all_kind_to_num is None:
                    #self.all_kind_to_num = all_kind_to_num
                #else:
                    #for kind_to_num, self_kind_to_num in zip(all_kind_to_num, self.all_kind_to_num):
                        #for kind, num in kind_to_num.items():
                            #self_kind_to_num[kind] += num
                #print_table(self.all_kind_to_num)

        sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)
        sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds, dim=0)).squeeze(1)
        sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds, dim=0)).squeeze(1)

        sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0)

        objectness, box_regression = \
                concat_box_prediction_layers(objectness, box_regression)

        objectness = objectness.squeeze()

        labels = torch.cat(labels, dim=0)
        regression_targets = torch.cat(regression_targets, dim=0)

        box_loss = smooth_l1_loss(
            box_regression[sampled_pos_inds],
            regression_targets[sampled_pos_inds],
            beta=1.0 / 9,
            size_average=False,
        ) / (sampled_inds.numel())

        objectness_loss = F.binary_cross_entropy_with_logits(
            objectness[sampled_inds], labels[sampled_inds]
        )

        return objectness_loss, box_loss
Esempio n. 5
0
def bbreg_ml_loss(box_regression,
                  box_regression_un_log,
                  target,
                  size_average=True,
                  eps=0.0,
                  loss_type="Laplace"):
    if loss_type == "Laplace":
        loss_all = (box_regression - target).abs() / (
            box_regression_un_log.exp() + eps) + box_regression_un_log
    elif loss_type == "Gauss":
        loss_all = (box_regression - target)**2 / (
            box_regression_un_log.exp() + eps) + box_regression_un_log
    elif loss_type == "SmoothL1":
        loss_all = smooth_l1_loss(
            box_regression,
            target,
            size_average=size_average,
            beta=1,
        )
        return loss_all
    else:
        raise ValueError

    loss = loss_all.sum(-1)

    if size_average:
        loss = loss.mean()
    else:
        loss = loss.sum()
    return loss
Esempio n. 6
0
    def __call__(self, class_logits, box_regression, closeup_logits,
                 closeup_labels):
        """
        Computes the loss for Faster R-CNN.
        This requires that the subsample method has been called beforehand.

        Arguments:
            class_logits (list[Tensor])
            box_regression (list[Tensor])

        Returns:
            classification_loss (Tensor)
            box_loss (Tensor)
        """

        class_logits = cat(class_logits, dim=0)
        box_regression = cat(box_regression, dim=0)
        device = class_logits.device

        if not hasattr(self, "_proposals"):
            raise RuntimeError("subsample needs to be called before")

        proposals = self._proposals

        labels = cat([proposal.get_field("labels") for proposal in proposals],
                     dim=0)
        regression_targets = cat([
            proposal.get_field("regression_targets") for proposal in proposals
        ],
                                 dim=0)

        classification_loss = F.cross_entropy(class_logits, labels)
        if closeup_logits is not None:
            closeup_labels = closeup_labels.repeat(len(closeup_logits))
            closeup_logits = torch.cat(closeup_logits, dim=0)
            extra_classification_loss = F.cross_entropy(
                closeup_logits, closeup_labels) / 10
        else:
            extra_classification_loss = None

        # get indices that correspond to the regression targets for
        # the corresponding ground truth labels, to be used with
        # advanced indexing
        sampled_pos_inds_subset = torch.nonzero(labels > 0).squeeze(1)
        labels_pos = labels[sampled_pos_inds_subset]
        if self.cls_agnostic_bbox_reg:
            map_inds = torch.tensor([4, 5, 6, 7], device=device)
        else:
            map_inds = 4 * labels_pos[:, None] + torch.tensor([0, 1, 2, 3],
                                                              device=device)

        box_loss = smooth_l1_loss(
            box_regression[sampled_pos_inds_subset[:, None], map_inds],
            regression_targets[sampled_pos_inds_subset],
            size_average=False,
            beta=1,
        )
        box_loss = box_loss / labels.numel()

        return classification_loss, box_loss, extra_classification_loss
Esempio n. 7
0
    def cal_box_loss(self, class_logits, box_regression, proposals):
        class_logits = cat(class_logits, dim=0)
        box_regression = cat(box_regression, dim=0)
        device = class_logits.device

        labels = cat([proposal.get_field("labels") for proposal in proposals],
                     dim=0)
        regression_targets = cat([
            proposal.get_field("regression_targets") for proposal in proposals
        ],
                                 dim=0)

        classification_loss = F.cross_entropy(class_logits, labels)

        self.sampled_pos_inds_subset = torch.nonzero(labels > 0).squeeze(1)
        self.labels_pos = labels[self.sampled_pos_inds_subset]
        if self.cls_agnostic_bbox_reg:
            map_inds = torch.tensor([4, 5, 6, 7], device=device)
        else:
            map_inds = 4 * self.labels_pos[:, None] + torch.tensor(
                [0, 1, 2, 3], device=device)

        box_loss = smooth_l1_loss(
            box_regression[self.sampled_pos_inds_subset[:, None], map_inds],
            regression_targets[self.sampled_pos_inds_subset],
            size_average=False,
            beta=1,
        )
        box_loss = box_loss / labels.numel()

        return classification_loss, box_loss
Esempio n. 8
0
    def __call__(self,
                 anchors,
                 objectness,
                 box_regression,
                 targets,
                 debugs=None):
        """
        Arguments:
            anchors (BoxList): box num: N
            objectness (list[Tensor]): len=scale_num
            box_regression (list[Tensor]): len=scale_num
            targets (list[BoxList]): len = batch size

        Returns:
            objectness_loss (Tensor)
            box_loss (Tensor
        """
        labels, regression_targets = self.prepare_targets(anchors, targets)
        sampled_pos_inds0, sampled_neg_inds0 = self.fg_bg_sampler(labels)
        sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds0,
                                                   dim=0)).squeeze(1)
        sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds0,
                                                   dim=0)).squeeze(1)

        labels = torch.cat(labels, dim=0)
        regression_targets = torch.cat(regression_targets, dim=0)

        batch_size = anchors.batch_size()

        if SHOW_POS_NEG_ANCHORS:
            self.show_pos_neg_anchors(anchors, sampled_pos_inds,
                                      sampled_neg_inds, targets)

        if SHOW_PRED_POS_ANCHORS:
            self.show_pos_anchors_pred(box_regression, anchors, objectness,
                                       targets, sampled_pos_inds,
                                       sampled_neg_inds, regression_targets)

        sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0)

        if CHECK_REGRESSION_TARGET_YAW:
            rpn_target_yaw = regression_targets[sampled_pos_inds]
            print(f'max_rpn_target_yaw: {rpn_target_yaw.max()}')
            print(f'min_rpn_target_yaw: {rpn_target_yaw.min()}')
            assert rpn_target_yaw.max() < 1.5
            assert rpn_target_yaw.min() > -1.5

        box_loss = smooth_l1_loss(
            box_regression[sampled_pos_inds],
            regression_targets[sampled_pos_inds],
            anchors[sampled_pos_inds].bbox3d,
            beta=1.0 / 9,
            size_average=False,
            yaw_loss_mode=self.yaw_loss_mode,
        ) / (sampled_inds.numel())

        objectness_loss = F.binary_cross_entropy_with_logits(
            objectness[sampled_inds], labels[sampled_inds])

        return objectness_loss, box_loss
    def __call__(self, class_logits, box_regression):
        """
        Computes the loss for Faster R-CNN.
        This requires that the subsample method has been called beforehand.

        Arguments:
            class_logits (list[Tensor])
            box_regression (list[Tensor])

        Returns:
            classification_loss (Tensor)
            box_loss (Tensor)
        """

        class_logits = cat(class_logits, dim=0)
        box_regression = cat(box_regression, dim=0)
        device = class_logits.device

        if not hasattr(self, "_proposals"):
            raise RuntimeError("subsample needs to be called before")

        proposals = self._proposals

        labels = cat([proposal.get_field("labels") for proposal in proposals],
                     dim=0)
        regression_targets = cat([
            proposal.get_field("regression_targets") for proposal in proposals
        ],
                                 dim=0)

        classification_loss = F.cross_entropy(class_logits, labels)

        # get indices that correspond to the regression targets for
        # the corresponding ground truth labels, to be used with
        # advanced indexing
        sampled_pos_inds_subset = torch.nonzero(labels > 0).squeeze(1)
        labels_pos = labels.index_select(0, sampled_pos_inds_subset)
        if self.cls_agnostic_bbox_reg:
            map_inds = torch.tensor([4, 5, 6, 7], device=device)
        else:
            map_inds = 4 * labels_pos[:, None] + torch.tensor([0, 1, 2, 3],
                                                              device=device)

        index_select_indices = (
            (sampled_pos_inds_subset[:, None]) * box_regression.size(1) +
            map_inds).view(-1)
        box_regression_sampled = box_regression.view(-1).index_select(
            0, index_select_indices).view(map_inds.shape[0], map_inds.shape[1])
        regression_targets_sampled = regression_targets.index_select(
            0, sampled_pos_inds_subset)

        box_loss = smooth_l1_loss(
            box_regression_sampled,
            regression_targets_sampled,
            size_average=False,
            beta=1,
        )
        box_loss = box_loss / labels.numel()

        return classification_loss, box_loss
Esempio n. 10
0
    def box_loss(self, labels, box_regression, regression_targets, bbox3ds):
        # get indices that correspond to the regression targets for
        # the corresponding ground truth labels, to be used with
        # advanced indexing
        device = box_regression.device
        sampled_pos_inds_subset = torch.nonzero(labels > 0).squeeze(1)
        labels_pos = labels[sampled_pos_inds_subset]
        map_inds = 7 * labels_pos[:, None] + torch.tensor(
            [0, 1, 2, 3, 4, 5, 6], device=device)
        box_regression_pos = box_regression[sampled_pos_inds_subset[:, None],
                                            map_inds]
        regression_targets_pos = regression_targets[sampled_pos_inds_subset]

        if CHECK_REGRESSION_TARGET_YAW:
            roi_target_yaw = regression_targets_pos[:, -1]
            print(f'max_roi_target_yaw: {roi_target_yaw.max()}')
            print(f'min_roi_target_yaw: {roi_target_yaw.min()}')
            assert roi_target_yaw.max() < 1.5
            assert roi_target_yaw.min() > -1.5

        box_loss = smooth_l1_loss(
            box_regression_pos,
            regression_targets_pos,
            bbox3ds[sampled_pos_inds_subset],
            size_average=False,
            beta=1 / 5.,  # 1
            yaw_loss_mode=self.yaw_loss_mode)
        box_loss = box_loss / labels.numel()
        return box_loss
Esempio n. 11
0
    def __call__(self, anchors, objectness, box_regression, targets):
        anchors = [
            cat_boxlist(anchors_per_image) for anchors_per_image in anchors
        ]
        labels, regression_targets = self.prepare_targets(anchors, targets)

        N = len(labels)
        objectness, box_regression = \
                concat_box_prediction_layers(objectness, box_regression)

        labels = torch.cat(labels, dim=0)
        regression_targets = torch.cat(regression_targets, dim=0)
        pos_inds = torch.nonzero(labels > 0).squeeze(1)

        box_loss = smooth_l1_loss(
            box_regression[pos_inds],
            regression_targets[pos_inds],
            beta=1.0 / 9,
            size_average=False,
        ) / (max(1, pos_inds.numel()))

        labels = labels.int()
        objectness_loss = self.box_objectness_loss_func(
            objectness, labels) / (pos_inds.numel() + N)

        return objectness_loss, box_loss
Esempio n. 12
0
    def rotation_loss(self, rot_logits, rot_regression, targets):
        rot_logits = cat(rot_logits, dim=0)
        rot_regression = cat(rot_regression, dim=0)
        rot_label_target = cat(
            [tar.get_field("rotation_label") for tar in targets], dim=0)
        rot_regression_target = cat(
            [tar.get_field("rotation_regerssion_target") for tar in targets],
            dim=0)

        device = rot_regression.device

        if (not hasattr(self, "labels_pos")) or (not hasattr(
                self, "sampled_pos_inds_subset")):
            raise RuntimeError("cal_box_loss needs to be called before")

        map_inds = self.labels_pos[:, None]
        rot_classification_loss = F.cross_entropy(
            rot_logits[self.sampled_pos_inds_subset],
            rot_label_target[self.sampled_pos_inds_subset]) * 0.2

        rot_regression_loss = smooth_l1_loss(
            rot_regression[self.sampled_pos_inds_subset, map_inds],
            rot_regression_target[self.sampled_pos_inds_subset],
            size_average=False,
            beta=1,
        )

        rot_regression_loss = rot_regression_loss / rot_regression_target.numel(
        ) * 0.1

        return rot_classification_loss, rot_regression_loss
Esempio n. 13
0
    def __call__(self, anchors, objectness, box_regression, targets,
                 closeup_objectness):
        """
        Arguments:
            anchors (list[BoxList])
            objectness (list[Tensor])
            box_regression (list[Tensor])
            targets (list[BoxList])

        Returns:
            objectness_loss (Tensor)
            box_loss (Tensor
        """
        anchors = [
            cat_boxlist(anchors_per_image) for anchors_per_image in anchors
        ]
        labels, regression_targets, matched_idxs = self.prepare_targets(
            anchors, targets)
        sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)
        sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds,
                                                   dim=0)).squeeze(1)
        sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds,
                                                   dim=0)).squeeze(1)

        sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0)

        objectness, box_regression = \
                concat_box_prediction_layers(objectness, box_regression)

        objectness = objectness.squeeze()

        labels = torch.cat(labels, dim=0)
        regression_targets = torch.cat(regression_targets, dim=0)
        matched_idxs = torch.cat(matched_idxs, dim=0)

        box_loss = smooth_l1_loss(
            box_regression[sampled_pos_inds],
            regression_targets[sampled_pos_inds],
            beta=1.0 / 9,
            size_average=False,
        ) / (sampled_inds.numel())

        if closeup_objectness is not None:
            closeup_objectness = torch.cat(
                [c.view(-1) for c in closeup_objectness])
            with torch.no_grad():
                fakelabel = torch.ones(closeup_objectness.size(0),
                                       device=closeup_objectness.device)
            #closeup_objectness_loss = F.binary_cross_entropy_with_logits(closeup_objectness, fakelabel)

            objectness_loss = F.binary_cross_entropy_with_logits(
                torch.cat([objectness[sampled_inds], closeup_objectness],
                          dim=0),
                torch.cat([labels[sampled_inds], fakelabel], dim=0))
        else:
            objectness_loss = F.binary_cross_entropy_with_logits(
                objectness[sampled_inds], labels[sampled_inds])

        return objectness_loss, box_loss
Esempio n. 14
0
    def __call__(self, class_logits, box_regression):
        """
        Computes the loss for Faster R-CNN.
        This requires that the subsample method has been called beforehand.

        Arguments:
            class_logits (list[Tensor])
            box_regression (list[Tensor])

        Returns:
            classification_loss (Tensor)
            box_loss (Tensor)
        """

        class_logits = cat(class_logits, dim=0)
        box_regression = cat(box_regression, dim=0)
        device = class_logits.device

        if not hasattr(self, "_proposals"):
            raise RuntimeError("subsample needs to be called before")

        proposals = self._proposals

        labels = cat([proposal.get_field("labels") for proposal in proposals],
                     dim=0)
        regression_targets = cat([
            proposal.get_field("regression_targets") for proposal in proposals
        ],
                                 dim=0)
        fitz_categories = cat(
            [proposal.get_field("fitz_categories") for proposal in proposals],
            dim=0)

        classification_loss = F.cross_entropy(class_logits,
                                              labels,
                                              reduction="none")
        classification_loss = self.augment_loss(classification_loss,
                                                fitz_categories,
                                                use_mean=True)

        # get indices that correspond to the regression targets for
        # the corresponding ground truth labels, to be used with
        # advanced indexing
        sampled_pos_inds_subset = torch.nonzero(labels > 0).squeeze(1)
        labels_pos = labels[sampled_pos_inds_subset]
        map_inds = 4 * labels_pos[:, None] + \
            torch.tensor([0, 1, 2, 3], device=device)

        box_loss = smooth_l1_loss(
            box_regression[sampled_pos_inds_subset[:, None], map_inds],
            regression_targets[sampled_pos_inds_subset],
            size_average=None,
            beta=1,
        )
        box_loss = self.augment_loss(box_loss,
                                     fitz_categories[sampled_pos_inds_subset])
        box_loss = box_loss / labels.numel()

        return classification_loss, box_loss
Esempio n. 15
0
    def __call__(self, anchors, objectness, box_regression, targets):
        """
        Arguments:
            anchors (list[BoxList])
            objectness (list[Tensor])
            box_regression (list[Tensor])
            targets (list[BoxList])

        Returns:
            objectness_loss (Tensor)
            box_loss (Tensor)
        """
        anchors = [cat_boxlist(anchors_per_image) for anchors_per_image in anchors]
        labels, regression_targets = self.prepare_targets(anchors, targets)
        sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)
        sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds, dim=0)).squeeze(1)
        sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds, dim=0)).squeeze(1)

        sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0)

        objectness_flattened = []
        box_regression_flattened = []
        # for each feature level, permute the outputs to make them be in the
        # same format as the labels. Note that the labels are computed for
        # all feature levels concatenated, so we keep the same representation
        # for the objectness and the box_regression
        for objectness_per_level, box_regression_per_level in zip(
            objectness, box_regression
        ):
            N, A, H, W = objectness_per_level.shape
            objectness_per_level = objectness_per_level.permute(0, 2, 3, 1).reshape(
                N, -1
            )
            box_regression_per_level = box_regression_per_level.view(N, -1, 4, H, W)
            box_regression_per_level = box_regression_per_level.permute(0, 3, 4, 1, 2)
            box_regression_per_level = box_regression_per_level.reshape(N, -1, 4)
            objectness_flattened.append(objectness_per_level)
            box_regression_flattened.append(box_regression_per_level)
        # concatenate on the first dimension (representing the feature levels), to
        # take into account the way the labels were generated (with all feature maps
        # being concatenated as well)
        objectness = cat(objectness_flattened, dim=1).reshape(-1)
        box_regression = cat(box_regression_flattened, dim=1).reshape(-1, 4)

        labels = torch.cat(labels, dim=0)
        regression_targets = torch.cat(regression_targets, dim=0)

        box_loss = smooth_l1_loss(
            box_regression[sampled_pos_inds],
            regression_targets[sampled_pos_inds],
            beta=1.0 / 9,
            size_average=False,
        ) / (sampled_inds.numel())

        objectness_loss = F.binary_cross_entropy_with_logits(
            objectness[sampled_inds], labels[sampled_inds]
        )

        return objectness_loss, box_loss
Esempio n. 16
0
    def __call__(self, anchors, objectness, box_regression, targets):
        """
        Arguments:
            anchors (list[BoxList])
            objectness (list[Tensor])
            box_regression (list[Tensor])
            targets (list[BoxList])

        Returns:
            objectness_loss (Tensor)
            box_loss (Tensor
        """
        anchors = [cat_boxlist(anchors_per_image) for anchors_per_image in anchors]
        labels, regression_targets = self.prepare_targets(anchors, targets)
        sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)
        sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds, dim=0)).squeeze(1)
        sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds, dim=0)).squeeze(1)

        sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0)

        objectness_flattened = []
        box_regression_flattened = []
        # for each feature level, permute the outputs to make them be in the
        # same format as the labels. Note that the labels are computed for
        # all feature levels concatenated, so we keep the same representation
        # for the objectness and the box_regression
        for objectness_per_level, box_regression_per_level in zip(
            objectness, box_regression
        ):
            N, A, H, W = objectness_per_level.shape
            objectness_per_level = objectness_per_level.permute(0, 2, 3, 1).reshape(
                N, -1
            )
            box_regression_per_level = box_regression_per_level.view(N, -1, 4, H, W)
            box_regression_per_level = box_regression_per_level.permute(0, 3, 4, 1, 2)
            box_regression_per_level = box_regression_per_level.reshape(N, -1, 4)
            objectness_flattened.append(objectness_per_level)
            box_regression_flattened.append(box_regression_per_level)
        # concatenate on the first dimension (representing the feature levels), to
        # take into account the way the labels were generated (with all feature maps
        # being concatenated as well)
        objectness = cat(objectness_flattened, dim=1).reshape(-1)
        box_regression = cat(box_regression_flattened, dim=1).reshape(-1, 4)

        labels = torch.cat(labels, dim=0)
        regression_targets = torch.cat(regression_targets, dim=0)

        box_loss = smooth_l1_loss(
            box_regression[sampled_pos_inds],
            regression_targets[sampled_pos_inds],
            beta=1.0 / 9,
            size_average=False,
        ) / (sampled_inds.numel())

        objectness_loss = F.binary_cross_entropy_with_logits(
            objectness[sampled_inds], labels[sampled_inds]
        )

        return objectness_loss, box_loss
Esempio n. 17
0
    def __call__(self, class_logits, box_regression):
        """
        Computes the loss for Faster R-CNN.
        This requires that the subsample method has been called beforehand.

        Arguments:
            class_logits (list[Tensor])
            box_regression (list[Tensor])

        Returns:
            classification_loss (Tensor)
            box_loss (Tensor)
        """

        class_logits = cat(class_logits, dim=0)  # [num_roi, 81]
        box_regression = cat(box_regression, dim=0)  # [num_roi, 81*4]
        device = class_logits.device

        # 调用这个函数之前必须已经调用了subsample函数
        if not hasattr(self, "_proposals"):
            raise RuntimeError("subsample needs to be called before")

        proposals = self._proposals

        # labels: 每个roi的标签, 0~80     [num_roi, 1]
        # 这个label是在match_targets_to_proposals函数中设置的, target[idx]会同时复制labels
        labels = cat([proposal.get_field("labels") for proposal in proposals], dim=0)
        # regression_targets: [t_x, t_y, t_w, t_h]    [num_roi, 4]
        regression_targets = cat(
            [proposal.get_field("regression_targets") for proposal in proposals], dim=0
        )

        # 结合了log_softmax和交叉熵损失
        classification_loss = F.cross_entropy(class_logits, labels)

        # 正样本的索引值
        sampled_pos_inds_subset = torch.nonzero(labels > 0).squeeze(1)
        # 正样本的label
        labels_pos = labels[sampled_pos_inds_subset]

        if self.cls_agnostic_bbox_reg:
            map_inds = torch.tensor([4, 5, 6, 7], device=device)
        else:
            # 对于每个roi, roi head的box预测值是为每个类别预测一个box
            # 这里直接选出ground truth类别的预测值
            map_inds = 4 * labels_pos[:, None] + torch.tensor(
                [0, 1, 2, 3], device=device)

        # 只对正样本计算回归损失
        box_loss = smooth_l1_loss(
            box_regression[sampled_pos_inds_subset[:, None], map_inds],
            regression_targets[sampled_pos_inds_subset],
            size_average=False,
            beta=1,
        )
        box_loss = box_loss / labels.numel()

        return classification_loss, box_loss
Esempio n. 18
0
    def __call__(self, anchors, predictions, targets):
        """
        Arguments:
            anchors (list[BoxList])
            box_cls (list[Tensor])
            box_reg (list[Tensor])
            targets (list[BoxList])

        Returns:
            retinanet_cls_loss (Tensor)
            retinanet_reg_loss (Tensor
        """
        box_cls, box_reg, box_obj = predictions
        anchors = [
            cat_boxlist(anchors_per_image) for anchors_per_image in anchors
        ]
        labels, reg_targets = self.prepare_targets(anchors, targets)

        N = len(labels)
        box_cls, box_reg = concat_box_prediction_layers(box_cls, box_reg)

        labels = torch.cat(labels, dim=0)
        pos_mask, use_mask = labels > 0, labels >= 0
        reg_targets = torch.cat(reg_targets, dim=0)
        pos_inds = torch.nonzero(pos_mask).squeeze(1)
        pos_numel = pos_inds.numel()

        retinanet_reg_loss = smooth_l1_loss(
            box_reg[pos_inds],
            reg_targets[pos_inds],
            beta=self.bbox_reg_beta,
            size_average=False,
        ) / (max(1, pos_numel * self.regress_norm))

        box_cls_labels = torch.zeros_like(box_cls, device=box_cls.device)
        box_cls_labels[pos_inds, labels[pos_inds].long() - 1] = 1
        retinanet_cls_loss = F.binary_cross_entropy_with_logits(
            box_cls[pos_mask], box_cls_labels[pos_mask],
            reduction="sum") / max(1, pos_numel)

        box_obj = concat_box_objectness_layers(box_obj)
        retinanet_obj_loss = F.binary_cross_entropy_with_logits(
            box_obj[use_mask].squeeze(),
            pos_mask[use_mask].float(),
            reduction="sum") * self.scale / max(1, pos_numel)

        # loss reweighting
        with torch.no_grad():
            norm = retinanet_reg_loss / (retinanet_cls_loss +
                                         retinanet_obj_loss)
        retinanet_cls_loss *= norm
        retinanet_obj_loss *= norm

        return {
            'retinanet_cls': retinanet_cls_loss,
            'retinanet_reg': retinanet_reg_loss,
            'retinanet_obj': retinanet_obj_loss
        }
Esempio n. 19
0
    def __call__(self, class_logits, box_regression):
        """
        Computes the loss for Faster R-CNN.
        This requires that the subsample method has been called beforehand.

        Arguments:
            class_logits (list[Tensor])
            box_regression (list[Tensor])

        Returns:
            classification_loss (Tensor)
            box_loss (Tensor)
        """

        class_logits = cat(class_logits, dim=0)
        box_regression = cat(box_regression, dim=0)
        device = class_logits.device

        if not hasattr(self, "_proposals"):
            raise RuntimeError("subsample needs to be called before")

        proposals = self._proposals

        labels = cat([proposal.get_field("labels") for proposal in proposals],
                     dim=0)
        regression_targets = cat([
            proposal.get_field("regression_targets") for proposal in proposals
        ],
                                 dim=0)

        # get indices that correspond to the regression targets for
        # the corresponding ground truth labels, to be used with
        # advanced indexing
        pos_inds = torch.nonzero(labels > 0).squeeze(1)
        pos_numel = pos_inds.numel()
        if pos_numel > 0:
            classification_loss = F.cross_entropy(class_logits, labels)

            if self.cls_agnostic_bbox_reg:
                map_inds = torch.tensor([4, 5, 6, 7], device=device)
            else:
                map_inds = 4 * labels[pos_inds][:, None] + torch.tensor(
                    [0, 1, 2, 3], device=device)

            box_loss = smooth_l1_loss(
                box_regression[pos_inds[:, None], map_inds],
                regression_targets[pos_inds],
                size_average=True,
                beta=1,
            )

            classification_loss *= self.guided_loss_weighter(
                box_loss, classification_loss)
            return classification_loss, box_loss
        else:
            zero = pos_inds.new_tensor([0]).float()
            return zero, zero
Esempio n. 20
0
def get_masked_loss(box_regression, regression_targets, sampled_pos_inds):
    box_loss = smooth_l1_loss(
        box_regression[sampled_pos_inds],
        regression_targets[sampled_pos_inds],
        beta=1.0 / 9,
        size_average=True,
    )

    return box_loss
Esempio n. 21
0
    def __call__(self, class_logits, box_regression):
        """
        Computes the loss for Faster R-CNN.
        This requires that the subsample method has been called beforehand.

        Arguments:
            class_logits (list[Tensor])
            box_regression (list[Tensor])

        Returns:
            classification_loss (Tensor)
            box_loss (Tensor)
        """

        class_logits = cat(class_logits, dim=0)
        box_regression = cat(box_regression, dim=0)
        device = class_logits.device

        if not hasattr(self, "_proposals"):
            raise RuntimeError("subsample needs to be called before")

        proposals = self._proposals

        labels = cat([proposal.get_field("labels") for proposal in proposals],
                     dim=0)
        regression_targets = cat([
            proposal.get_field("regression_targets") for proposal in proposals
        ],
                                 dim=0)
        # if sum(labels>100)>0:
        #     import pdb; pdb.set_trace()
        # set the not-trained classes as ignore lables>100 -> -1 ignore_index=-1
        # labels[labels>(self.num_classfiers-1)] = -1
        classification_loss = F.cross_entropy(class_logits,
                                              labels,
                                              ignore_index=-1)

        # get indices that correspond to the regression targets for
        # the corresponding ground truth labels, to be used with
        # advanced indexing
        sampled_pos_inds_subset = torch.nonzero(labels > 0).squeeze(1)
        labels_pos = labels[sampled_pos_inds_subset]
        if self.cls_agnostic_bbox_reg:
            map_inds = torch.tensor([4, 5, 6, 7], device=device)
        else:
            map_inds = 4 * labels_pos[:, None] + torch.tensor([0, 1, 2, 3],
                                                              device=device)

        box_loss = smooth_l1_loss(
            box_regression[sampled_pos_inds_subset[:, None], map_inds],
            regression_targets[sampled_pos_inds_subset],
            size_average=False,
            beta=1,
        )
        box_loss = box_loss / labels.numel()

        return classification_loss, box_loss
Esempio n. 22
0
    def __call__(self, anchors, objectness, box_regression, targets,
                 embeddings):
        """
        Arguments:
            anchors (list[BoxList])
            objectness (list[Tensor])
            box_regression (list[Tensor])
            targets (list[BoxList])

        Returns:
            objectness_loss (Tensor)
            box_loss (Tensor
        """
        anchors = [
            cat_boxlist(anchors_per_image) for anchors_per_image in anchors
        ]
        labels, regression_targets = self.prepare_targets(anchors, targets)
        sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)
        sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds,
                                                   dim=0)).squeeze(1)
        sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds,
                                                   dim=0)).squeeze(1)

        sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0)
        objectness, box_regression, embeddings = \
                concat_box_prediction_embeddings_layers(objectness, box_regression, embeddings)

        objectness = objectness.squeeze()
        embeddings = embeddings.squeeze()

        labels = torch.cat(labels, dim=0)
        regression_targets = torch.cat(regression_targets, dim=0)

        box_loss = smooth_l1_loss(
            box_regression[sampled_pos_inds],
            regression_targets[sampled_pos_inds],
            beta=1.0 / 9,
            size_average=False,
        ) / (sampled_inds.numel())

        objectness_loss = F.binary_cross_entropy_with_logits(
            objectness[sampled_inds], labels[sampled_inds])

        # siamese contrastive loss
        margin = RPNContrastiveLossComputation.PAIR_MARGIN
        C_loss = ContrastiveLoss(margin)
        # hard negtive mining version, TODO: no sampled
        embeddings1, embeddings2, targets = pair_embeddings(
            embeddings[sampled_inds], labels[sampled_inds])
        #         print("===============================================",anchor_embeddings.shape, positive_embeddings.shape)
        pair_loss = C_loss(embeddings1, embeddings2, targets)
        # TODO dynamic incremental margin
        #         if triplet_loss == 0 and np.random.random() > 0.5:
        #             RPNTripletLossComputation.TRIPLET_MARGIN += 1

        return objectness_loss, box_loss, pair_loss
Esempio n. 23
0
    def __call__(self,
                 anchors,
                 box_cls,
                 box_regression,
                 targets,
                 search=False):
        """
        Arguments:
            anchors (list[BoxList])
            box_cls (list[Tensor])
            box_regression (list[Tensor])
            targets (list[BoxList])

        Returns:
            retinanet_cls_loss (Tensor)
            retinanet_regression_loss (Tensor
        """
        anchors = [
            cat_boxlist(anchors_per_image) for anchors_per_image in anchors
        ]
        labels, regression_targets, matched_targets_boxes = self.prepare_targets(
            anchors, targets, True)

        N = len(labels)
        box_cls, box_regression = \
                concat_box_prediction_layers(box_cls, box_regression)

        labels = torch.cat(labels, dim=0)
        regression_targets = torch.cat(regression_targets, dim=0)
        pos_inds = torch.nonzero(labels > 0).squeeze(1)

        retinanet_regression_loss, box_loss_vec = smooth_l1_loss(
            box_regression[pos_inds],
            regression_targets[pos_inds],
            beta=self.bbox_reg_beta,
            size_average=False,
            return_loss_vec=True)

        labels = labels.int()

        retinanet_regression_loss /= (max(1,
                                          pos_inds.numel() *
                                          self.regress_norm))
        retinanet_cls_loss, cls_loss_vec = self.box_cls_loss_func(
            box_cls, labels, return_loss_vec=True)
        retinanet_cls_loss /= (pos_inds.numel() + N)

        if search:
            loss_scale = self._count_loss_scale(
                matched_targets_boxes, pos_inds,
                box_loss_vec / (max(1,
                                    pos_inds.numel() * self.regress_norm)),
                cls_loss_vec[pos_inds] / (pos_inds.numel() + N))
        else:
            loss_scale = None
        return retinanet_cls_loss, retinanet_regression_loss, loss_scale  #, ratio_small
    def __call__(self, anchors, objectness, box_regression, targets):
        """
        Arguments:
            anchors (list[BoxList])
            objectness (list[Tensor])
            box_regression (list[Tensor])
            targets (list[BoxList])

        Returns:
            objectness_loss (Tensor)
            box_loss (Tensor
        """
        anchors = [
            cat_boxlist(anchors_per_image) for anchors_per_image in anchors
        ]
        labels, regression_targets, areas = self.prepare_targets(
            anchors, targets)
        sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)
        sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds,
                                                   dim=0)).squeeze(1)
        sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds,
                                                   dim=0)).squeeze(1)

        sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0)

        objectness, box_regression = \
                concat_box_prediction_layers(objectness, box_regression)

        objectness = objectness.squeeze()

        labels = torch.cat(labels, dim=0)
        regression_targets = torch.cat(regression_targets, dim=0)
        areas = torch.cat(areas, dim=0)

        box_loss = smooth_l1_loss(
            box_regression[sampled_pos_inds],
            regression_targets[sampled_pos_inds],
            beta=1.0 / 9,
            size_average=False,
        ) / (sampled_inds.numel())

        # objectness_loss = objectness_loss_func(
        #     objectness[sampled_inds], labels[sampled_inds]
        # ) / (sampled_inds.numel())

        objectness_loss = self.objectness_loss['fn'](objectness[sampled_inds],
                                                     labels[sampled_inds],
                                                     areas=areas[sampled_inds])
        if self.objectness_loss['avg']:
            objectness_loss /= sampled_inds.numel()

        return objectness_loss, box_loss
Esempio n. 25
0
    def __call__(self, anchors, box_cls, box_regression, targets):
        """
        Arguments:
            anchors (list[BoxList])
            box_cls (list[Tensor])
            box_regression (list[Tensor])
            targets (list[BoxList])

        Returns:
            retinanet_cls_loss (Tensor)
            retinanet_regression_loss (Tensor
        """
        anchors = [cat_boxlist(anchors_per_image) for anchors_per_image in anchors]
        labels, regression_targets = self.prepare_targets(anchors, targets)

        N = len(labels)
        box_cls, box_regression = \
                concat_box_prediction_layers_2(box_cls, box_regression)

        labels = torch.stack(labels, dim=0)
        regression_targets = torch.stack(regression_targets, dim=0)
        
        _retinanet_regression_loss = []
        _retinanet_cls_loss = []
        lab_sum = len(torch.nonzero(labels > 0).squeeze(1))

        for _labels,_box_regression,_regression_targets,_box_cls in zip(
            labels.split(1,dim=0),box_regression.split(1,dim=0),regression_targets.split(1,dim=0),box_cls.split(1,dim=0)
            ):
            _labels = _labels.squeeze(0)
            _box_regression = _box_regression.squeeze(0)
            _regression_targets = _regression_targets.squeeze(0)
            _box_cls = _box_cls.squeeze(0)
            
            pos_inds = torch.nonzero(_labels > 0).squeeze(1)

            _retinanet_regression_loss.append(smooth_l1_loss(
                _box_regression[pos_inds],
                _regression_targets[pos_inds],
                beta=self.bbox_reg_beta,
                size_average=False,
            ) / (max(1, lab_sum * self.regress_norm)))

            _labels = _labels.int()

            _retinanet_cls_loss.append(self.box_cls_loss_func(
                _box_cls,
                _labels
            ) / (lab_sum + N))

        return _retinanet_cls_loss,_retinanet_regression_loss
Esempio n. 26
0
    def __call__(self, anchors, box_cls, box_regression, targets):
        """
        Arguments:
            anchors (list[BoxList])
            box_cls (list[Tensor])
            box_regression (list[Tensor])
            targets (list[BoxList])

        Returns:
            retinanet_cls_loss (Tensor)
            retinanet_regression_loss (Tensor
        """
        anchors = [cat_boxlist(anchors_per_image) for anchors_per_image in anchors]
        labels, regression_targets = self.prepare_targets(anchors, targets)

        N = len(labels)
        box_cls, box_regression = \
            concat_box_prediction_layers(box_cls, box_regression)

        labels = torch.cat(labels, dim=0)
        regression_targets = torch.cat(regression_targets, dim=0)

        if torch.isinf(regression_targets.sum()) or torch.isnan(regression_targets.sum()):
            print("nanananna")
            print("nanananannanan")
        pos_inds = torch.nonzero(labels > 0).squeeze(1)

        retinanet_regression_loss = smooth_l1_loss(
            box_regression[pos_inds],
            regression_targets[pos_inds],
            beta=self.bbox_reg_beta,
            size_average=False,
        ) / (max(1, pos_inds.numel() * self.regress_norm))

        labels = labels.int()

        retinanet_cls_loss = self.box_cls_loss_func(
            box_cls,
            labels
        ) / (pos_inds.numel() + N)

        # retinanet_reg_max = box_regression.max()
        if torch.isnan(retinanet_regression_loss.sum()) or torch.isnan(retinanet_cls_loss.sum()) \
            or torch.isinf(retinanet_regression_loss.sum()) or torch.isinf(retinanet_cls_loss.sum()):
            print("nananananna")

            print("amanannana")

            pass
        return cfg.MODEL.RETINANET.CLS_WEIGHT * retinanet_cls_loss, cfg.MODEL.RETINANET.LOC_WEIGHT * retinanet_regression_loss
Esempio n. 27
0
    def __call__(self, keypoint_offset_pred):
        '''
        :param proposals: (list[BoxList])
        :param keypoint_offset_pred:
        :return:
        '''
        bb8_keypoint_offset_targets = []
        positive_inds = []
        for proposals_per_image in self._proposals:
            bb8kp = proposals_per_image.get_field("bb8keypoints")
            labels_per_image = proposals_per_image.get_field("labels")

            positive_inds_per_image = torch.nonzero(
                labels_per_image > 0).squeeze(1)

            bb8kp = bb8kp[positive_inds_per_image]
            positive_proposals = proposals_per_image[positive_inds_per_image]

            # compute bb8keypoint offset regression targets
            regression_targets_per_image = bb8keypoint_offset_encode(
                bb8kp.keypoints, positive_proposals.bbox)
            bb8_keypoint_offset_targets.append(regression_targets_per_image)
            positive_inds.append(positive_inds_per_image)

        bb8_keypoint_offset_targets = cat(bb8_keypoint_offset_targets, dim=0)
        positive_inds = cat(positive_inds, dim=0)

        # get indices that correspond to the regression targets for
        # the corresponding ground truth labels, to be used with
        # advanced indexing, for class-specific regression
        # sampled_pos_inds_subset = torch.nonzero(labels > 0).squeeze(1)
        # labels_pos = labels[sampled_pos_inds_subset]
        # map_inds = 16 * labels_pos[:, None] + torch.tensor([0, 1, 2, 3, 4, 5, 6, 7, 8,
        #                                                     9, 10, 11, 12, 13, 14, 15], device=device)

        if bb8_keypoint_offset_targets.numel() == 0:
            return bb8_keypoint_offset_targets.sum() * 0

        # print("keypoint_offset_pred.device:{}".format(keypoint_offset_pred.device))
        # print("keypoint_offset_target.device:{}".format(bb8_keypoint_offset_targets.device))
        keypoint_loss = smooth_l1_loss(
            keypoint_offset_pred[positive_inds],
            bb8_keypoint_offset_targets,
            size_average=False,
            beta=1,
        )
        keypoint_loss = keypoint_loss / keypoint_offset_pred.shape[0]
        return keypoint_loss
Esempio n. 28
0
    def __call__(self, anchors, box_cls, box_regression, targets):
        """
        Arguments:
            anchors (list[BoxList])
            box_cls (list[Tensor])
            box_regression (list[Tensor])
            targets (list[BoxList])

        Returns:
            retinanet_cls_loss (Tensor)
            retinanet_regression_loss (Tensor
        """
        anchors = [cat_boxlist(anchors_per_image) for anchors_per_image in anchors]
        labels, regression_targets, matched_targets_boxes = self.prepare_targets(anchors, targets, True)

        N = len(labels)
        box_cls, box_regression = \
                concat_box_prediction_layers(box_cls, box_regression)

        labels = torch.cat(labels, dim=0)
        regression_targets = torch.cat(regression_targets, dim=0)
        pos_inds = torch.nonzero(labels > 0).squeeze(1)

        matched_targets_boxes = torch.cat(matched_targets_boxes, dim=0)
        reference_boxes = matched_targets_boxes[pos_inds]
        gt_widths = reference_boxes[:, 2] - reference_boxes[:, 0]
        gt_heights = reference_boxes[:, 3] - reference_boxes[:, 1]
        gt_areas = gt_widths * gt_heights
        small_index = gt_areas < 1024

        retinanet_regression_loss, box_loss_vec = smooth_l1_loss(
            box_regression[pos_inds],
            regression_targets[pos_inds],
            beta=self.bbox_reg_beta,
            size_average=False,
            return_loss_vec=True
        ) #/ (max(1, pos_inds.numel() * self.regress_norm))

        ratio_small = box_loss_vec[small_index].sum()/retinanet_regression_loss
        labels = labels.int()

        retinanet_regression_loss /= (max(1, pos_inds.numel() * self.regress_norm))
        retinanet_cls_loss = self.box_cls_loss_func(
            box_cls,
            labels
        ) / (pos_inds.numel() + N)

        return retinanet_cls_loss, retinanet_regression_loss, ratio_small
Esempio n. 29
0
    def loss_obj(self, predictions, labels, regression_targets):
        class_logits, box_regression, obj_logits = predictions
        device = class_logits.device
        obj_logits.squeeze_()

        pos_mask = labels > 0
        pos_inds = pos_mask.nonzero().squeeze(1)
        labels_pos = labels[pos_inds] - 1
        pos_numel = pos_inds.numel()

        class_logits_labels = torch.zeros_like(class_logits)
        class_logits_labels[pos_inds, labels_pos] = 1

        objectness_loss = F.binary_cross_entropy_with_logits(
            obj_logits, pos_mask.float())

        classification_loss = F.binary_cross_entropy_with_logits(
            class_logits[pos_inds],
            class_logits_labels[pos_inds],
            reduction="sum") / max(1, pos_numel)

        if self.cls_agnostic_bbox_reg:
            map_inds = torch.tensor([0, 1, 2, 3], device=device)
        else:
            map_inds = 4 * labels_pos[:, None] + torch.tensor([0, 1, 2, 3],
                                                              device=device)

        box_loss = smooth_l1_loss(
            box_regression[pos_inds[:, None], map_inds],
            regression_targets[pos_inds],
            size_average=True,
            beta=1,
        )

        # loss reweighting
        if pos_numel > 0:
            objectness_loss *= self.scale
            classification_loss /= self.scale
        else:
            box_loss.zero_()
            classification_loss.zero_()

        return {
            'roi_cls': classification_loss,
            'roi_reg': box_loss,
            'roi_obj': objectness_loss
        }
Esempio n. 30
0
    def __call__(self, anchors, objectness, box_regression, targets):
        """
        Arguments:
            anchors (list[list[BoxList]])
            objectness (list[Tensor])
            box_regression (list[Tensor])
            targets (list[BoxList])

        Returns:
            objectness_loss (Tensor)
            box_loss (Tensor)
        """
        # 分别将每一个图片的不同FPN层中生成的锚点合并起来
        anchors = [
            cat_boxlist(anchors_per_image) for anchors_per_image in anchors
        ]

        # 分别得到每一个图片的所有锚点相对应的基准边框的列表
        labels, regression_targets = self.prepare_targets(anchors, targets)

        sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)
        sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds,
                                                   dim=0)).squeeze(1)
        sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds,
                                                   dim=0)).squeeze(1)

        sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0)

        objectness, box_regression = \
            concat_box_prediction_layers(objectness, box_regression)

        objectness = objectness.squeeze()

        labels = torch.cat(labels, dim=0)
        regression_targets = torch.cat(regression_targets, dim=0)

        box_loss = smooth_l1_loss(  # 计算锚点边框损失,只是用随机选择的有目标的锚点进行计算
            box_regression[sampled_pos_inds],
            regression_targets[sampled_pos_inds],
            beta=1.0 / 9,
            size_average=False,
        ) / (sampled_inds.numel())

        objectness_loss = F.binary_cross_entropy_with_logits(
            objectness[sampled_inds], labels[sampled_inds])

        return objectness_loss, box_loss
Esempio n. 31
0
    def __call__(self, class_logits, box_regression):
        """
        Computes the loss for Faster R-CNN.
        This requires that the subsample method has been called beforehand.

        Arguments:
            class_logits (list[Tensor])
            box_regression (list[Tensor])

        Returns:
            classification_loss (Tensor)
            box_loss (Tensor)
        """

        class_logits = cat(class_logits, dim=0)
        box_regression = cat(box_regression, dim=0)
        device = class_logits.device

        if not hasattr(self, "_proposals"):
            raise RuntimeError("subsample needs to be called before")

        proposals = self._proposals

        labels = cat([proposal.get_field("labels") for proposal in proposals], dim=0)
        regression_targets = cat(
            [proposal.get_field("regression_targets") for proposal in proposals], dim=0
        )

        classification_loss = F.cross_entropy(class_logits, labels)

        # get indices that correspond to the regression targets for
        # the corresponding ground truth labels, to be used with
        # advanced indexing
        sampled_pos_inds_subset = torch.nonzero(labels > 0).squeeze(1)
        labels_pos = labels[sampled_pos_inds_subset]
        map_inds = 4 * labels_pos[:, None] + torch.tensor([0, 1, 2, 3], device=device)

        box_loss = smooth_l1_loss(
            box_regression[sampled_pos_inds_subset[:, None], map_inds],
            regression_targets[sampled_pos_inds_subset],
            size_average=False,
            beta=1,
        )
        box_loss = box_loss / labels.numel()

        return classification_loss, box_loss
Esempio n. 32
0
    def __call__(self, anchors, box_cls, box_regression, targets):
        """
        Arguments:
            anchors (list[BoxList])
            box_cls (list[Tensor])
            box_regression (list[Tensor])
            targets (list[BoxList])

        Returns:
            retinanet_cls_loss (Tensor)
            retinanet_regression_loss (Tensor
        """
        anchors = [cat_boxlist(anchors_per_image) for anchors_per_image in anchors]
        labels, regression_targets = self.prepare_targets(anchors, targets)

        N = len(labels)
        box_cls, box_regression = \
                concat_box_prediction_layers(box_cls, box_regression)

        labels = torch.cat(labels, dim=0)
        regression_targets = torch.cat(regression_targets, dim=0)
        pos_inds = torch.nonzero(labels > 0).squeeze(1)



        retinanet_regression_loss = smooth_l1_loss(
            box_regression[pos_inds],
            regression_targets[pos_inds],
            beta=self.bbox_reg_beta,
            size_average=False,
        ) / (max(1, pos_inds.numel() * self.regress_norm))

        labels = labels.int()

        a = {}
        for c in labels:
            if not str(c.cpu().numpy()) in a.keys():
                a[str(c.cpu().numpy())] = 0
            a[str(c.cpu().numpy())] += 1

        retinanet_cls_loss = self.box_cls_loss_func(
            box_cls,
            labels
        ) / (pos_inds.numel() + N)

        return retinanet_cls_loss, retinanet_regression_loss