Exemple #1
0
    def loss_single(self, anchors, cls_score, bbox_pred, labels,
                    label_weights, bbox_targets, stride, num_total_samples, cfg):

        anchors = anchors.reshape(-1, 4)
        cls_score = cls_score.permute(0, 2, 3,
                                      1).reshape(-1, self.cls_out_channels)
        bbox_pred = bbox_pred.permute(0, 2, 3, 1).reshape(-1, 4 * (self.reg_max + 1))
        bbox_targets = bbox_targets.reshape(-1, 4)
        labels = labels.reshape(-1)
        label_weights = label_weights.reshape(-1)

        pos_inds = torch.nonzero(labels).squeeze(1)
        score = label_weights.new_zeros(labels.shape)

        if len(pos_inds) > 0:
            pos_bbox_targets = bbox_targets[pos_inds]
            pos_bbox_pred = bbox_pred[pos_inds] # (n, 4 * (reg_max + 1))
            pos_anchors = anchors[pos_inds]

            norm_anchor_center = self.anchor_center(pos_anchors) / stride

            pos_bbox_pred_distance = self.distribution_project(pos_bbox_pred)

            pos_decode_bbox_pred = distance2bbox(norm_anchor_center,
                                                 pos_bbox_pred_distance)
            pos_decode_bbox_targets = pos_bbox_targets / stride

            target_ltrb = bbox2distance(norm_anchor_center,
                                        pos_decode_bbox_targets, 
                                        self.reg_max).reshape(-1)
            score[pos_inds] = self.iou_target(pos_decode_bbox_pred.detach(),
                                              pos_decode_bbox_targets)
            weight_targets = \
                    cls_score.detach().sigmoid().max(dim=1)[0][pos_inds]

            # regression loss
            loss_bbox = self.loss_bbox(
                pos_decode_bbox_pred,
                pos_decode_bbox_targets,
                weight=weight_targets,
                avg_factor=1.0)

            pred_ltrb = pos_bbox_pred.reshape(-1, self.reg_max + 1)
            # dfl loss TODO
            loss_dfl = self.loss_dfl(
                pred_ltrb, 
                target_ltrb, 
                weight=weight_targets[:, None].expand(-1, 4).reshape(-1),
                avg_factor=4.0)
        else:
            loss_bbox = bbox_pred.sum() * 0
            loss_dfl = bbox_pred.sum() * 0
            weight_targets = torch.tensor(0).cuda()
        
        # qfl loss TODO
        loss_qfl = self.loss_qfl(cls_score, labels, score,
                                 avg_factor=num_total_samples)

        return loss_qfl, loss_bbox, loss_dfl, weight_targets.sum()
    def get_bbox_prob_and_overlap(self, points, bbox_preds, gt_bboxes):

        bbox_targets = bbox2distance(points,
                                     gt_bboxes[:, None, :].repeat(
                                         1, points.shape[1], 1),
                                     norm=self.distance_norm)
        bbox_prob = self.loss_bbox(bbox_preds,
                                   bbox_targets,
                                   reduction_override='none').neg().exp()

        pred_boxes = distance2bbox(points, bbox_preds, norm=self.distance_norm)
        bbox_overlap = bbox_overlaps(gt_bboxes[:,
                                               None, :].expand_as(pred_boxes),
                                     pred_boxes,
                                     is_aligned=True)

        return bbox_prob, bbox_overlap
    def transform_bbox_targets(self, decoded_bboxes, mlvl_points, num_imgs):
        """Transform bbox_targets (x1, y1, x2, y2) into (l, t, r, b) format.
        Args:
            decoded_bboxes (list[Tensor]): Regression targets of each level,
                in the form of (x1, y1, x2, y2).
            mlvl_points (list[Tensor]): Points of each fpn level, each has
                shape (num_points, 2).
            num_imgs (int): the number of images in a batch.
        Returns:
            bbox_targets (list[Tensor]): Regression targets of each level in
                the form of (l, t, r, b).
        """
        # TODO: Re-implemented in Class PointCoder
        assert len(decoded_bboxes) == len(mlvl_points)
        num_levels = len(decoded_bboxes)
        mlvl_points = [points.repeat(num_imgs, 1) for points in mlvl_points]
        bbox_targets = []
        for i in range(num_levels):
            bbox_target = bbox2distance(mlvl_points[i], decoded_bboxes[i])
            bbox_targets.append(bbox_target)

        return bbox_targets
Exemple #4
0
    def loss_single(self, anchors, cls_score, bbox_pred, labels, label_weights,
                    bbox_targets, stride, soft_targets, num_total_samples):
        """Compute loss of a single scale level.

        Args:
            anchors (Tensor): Box reference for each scale level with shape
                (N, num_total_anchors, 4).
            cls_score (Tensor): Cls and quality joint scores for each scale
                level has shape (N, num_classes, H, W).
            bbox_pred (Tensor): Box distribution logits for each scale
                level with shape (N, 4*(n+1), H, W), n is max value of integral
                set.
            labels (Tensor): Labels of each anchors with shape
                (N, num_total_anchors).
            label_weights (Tensor): Label weights of each anchor with shape
                (N, num_total_anchors)
            bbox_targets (Tensor): BBox regression targets of each anchor wight
                shape (N, num_total_anchors, 4).
            stride (tuple): Stride in this scale level.
            num_total_samples (int): Number of positive samples that is
                reduced over all GPUs.

        Returns:
            dict[tuple, Tensor]: Loss components and weight targets.
        """
        assert stride[0] == stride[1], 'h stride is not equal to w stride!'
        anchors = anchors.reshape(-1, 4)
        cls_score = cls_score.permute(0, 2, 3,
                                      1).reshape(-1, self.cls_out_channels)
        bbox_pred = bbox_pred.permute(0, 2, 3,
                                      1).reshape(-1, 4 * (self.reg_max + 1))
        soft_targets = soft_targets.permute(0, 2, 3,
                                            1).reshape(-1,
                                                       4 * (self.reg_max + 1))

        bbox_targets = bbox_targets.reshape(-1, 4)
        labels = labels.reshape(-1)
        label_weights = label_weights.reshape(-1)

        # FG cat_id: [0, num_classes -1], BG cat_id: num_classes
        bg_class_ind = self.num_classes
        pos_inds = ((labels >= 0)
                    & (labels < bg_class_ind)).nonzero().squeeze(1)
        score = label_weights.new_zeros(labels.shape)

        if len(pos_inds) > 0:
            pos_bbox_targets = bbox_targets[pos_inds]
            pos_bbox_pred = bbox_pred[pos_inds]
            pos_anchors = anchors[pos_inds]
            pos_anchor_centers = self.anchor_center(pos_anchors) / stride[0]

            weight_targets = cls_score.detach().sigmoid()
            weight_targets = weight_targets.max(dim=1)[0][pos_inds]
            pos_bbox_pred_corners = self.integral(pos_bbox_pred)
            pos_decode_bbox_pred = distance2bbox(pos_anchor_centers,
                                                 pos_bbox_pred_corners)
            pos_decode_bbox_targets = pos_bbox_targets / stride[0]
            score[pos_inds] = bbox_overlaps(pos_decode_bbox_pred.detach(),
                                            pos_decode_bbox_targets,
                                            is_aligned=True)
            pred_corners = pos_bbox_pred.reshape(-1, self.reg_max + 1)
            pos_soft_targets = soft_targets[pos_inds]
            soft_corners = pos_soft_targets.reshape(-1, self.reg_max + 1)

            target_corners = bbox2distance(pos_anchor_centers,
                                           pos_decode_bbox_targets,
                                           self.reg_max).reshape(-1)

            # regression loss
            loss_bbox = self.loss_bbox(pos_decode_bbox_pred,
                                       pos_decode_bbox_targets,
                                       weight=weight_targets,
                                       avg_factor=1.0)

            # dfl loss
            loss_dfl = self.loss_dfl(pred_corners,
                                     target_corners,
                                     weight=weight_targets[:, None].expand(
                                         -1, 4).reshape(-1),
                                     avg_factor=4.0)

            # ld loss
            loss_ld = self.loss_ld(pred_corners,
                                   soft_corners,
                                   weight=weight_targets[:, None].expand(
                                       -1, 4).reshape(-1),
                                   avg_factor=4.0)

        else:
            loss_ld = bbox_pred.sum() * 0
            loss_bbox = bbox_pred.sum() * 0
            loss_dfl = bbox_pred.sum() * 0
            weight_targets = bbox_pred.new_tensor(0)

        # cls (qfl) loss
        loss_cls = self.loss_cls(cls_score, (labels, score),
                                 weight=label_weights,
                                 avg_factor=num_total_samples)

        return loss_cls, loss_bbox, loss_dfl, loss_ld, weight_targets.sum()
Exemple #5
0
    def loss_single(self, anchors, cls_score, bbox_pred, kps_pred, labels,
                    label_weights, bbox_targets, kps_targets, kps_weights,
                    stride, num_total_samples):
        """Compute loss of a single scale level.

        Args:
            anchors (Tensor): Box reference for each scale level with shape
                (N, num_total_anchors, 4).
            cls_score (Tensor): Cls and quality joint scores for each scale
                level has shape (N, num_classes, H, W).
            bbox_pred (Tensor): Box distribution logits for each scale
                level with shape (N, 4*(n+1), H, W), n is max value of integral
                set.
            labels (Tensor): Labels of each anchors with shape
                (N, num_total_anchors).
            label_weights (Tensor): Label weights of each anchor with shape
                (N, num_total_anchors)
            bbox_targets (Tensor): BBox regression targets of each anchor wight
                shape (N, num_total_anchors, 4).
            stride (tuple): Stride in this scale level.
            num_total_samples (int): Number of positive samples that is
                reduced over all GPUs.

        Returns:
            dict[str, Tensor]: A dictionary of loss components.
        """
        assert stride[0] == stride[1], 'h stride is not equal to w stride!'
        use_qscore = True
        anchors = anchors.reshape(-1, 4)
        cls_score = cls_score.permute(0, 2, 3,
                                      1).reshape(-1, self.cls_out_channels)
        if not self.use_dfl:
            bbox_pred = bbox_pred.permute(0, 2, 3, 1).reshape(-1, 4)
        else:
            bbox_pred = bbox_pred.permute(0, 2, 3,
                                          1).reshape(-1,
                                                     4 * (self.reg_max + 1))
        bbox_targets = bbox_targets.reshape(-1, 4)
        labels = labels.reshape(-1)
        label_weights = label_weights.reshape(-1)

        if self.use_kps:
            kps_pred = kps_pred.permute(0, 2, 3, 1).reshape(-1, self.NK * 2)
            kps_targets = kps_targets.reshape((-1, self.NK * 2))
            kps_weights = kps_weights.reshape((-1, self.NK * 2))
            #print('AAA000', kps_targets.shape, kps_weights.shape)

        # FG cat_id: [0, num_classes -1], BG cat_id: num_classes
        bg_class_ind = self.num_classes
        pos_inds = ((labels >= 0)
                    & (labels < bg_class_ind)).nonzero().squeeze(1)
        score = label_weights.new_zeros(labels.shape)

        if len(pos_inds) > 0:
            pos_bbox_targets = bbox_targets[pos_inds]
            pos_bbox_pred = bbox_pred[pos_inds]
            pos_anchors = anchors[pos_inds]
            pos_anchor_centers = self.anchor_center(pos_anchors) / stride[0]

            weight_targets = cls_score.detach().sigmoid()
            weight_targets = weight_targets.max(dim=1)[0][pos_inds]
            pos_decode_bbox_targets = pos_bbox_targets / stride[0]

            if self.use_dfl:
                pos_bbox_pred_corners = self.integral(pos_bbox_pred)
                pos_decode_bbox_pred = distance2bbox(pos_anchor_centers,
                                                     pos_bbox_pred_corners)
            else:
                pos_decode_bbox_pred = distance2bbox(pos_anchor_centers,
                                                     pos_bbox_pred)
            if self.use_kps:
                pos_kps_targets = kps_targets[pos_inds]
                pos_kps_pred = kps_pred[pos_inds]
                #print('CCC000', kps_weights.shape)
                pos_kps_weights = kps_weights.max(
                    dim=1)[0][pos_inds] * weight_targets
                #pos_kps_weights = kps_weights.max(dim=1)[0][pos_inds]
                pos_kps_weights = pos_kps_weights.reshape((-1, 1))
                #pos_kps_weights = kps_weights.max(dim=1, keepdims=True)[0][pos_inds]
                #print('SSS', pos_kps_weights.sum())

                #pos_decode_kps_targets = pos_kps_targets / stride[0]
                #pos_decode_kps_pred = distance2kps(pos_anchor_centers, pos_kps_pred)

                pos_decode_kps_targets = kps2distance(
                    pos_anchor_centers, pos_kps_targets / stride[0])
                pos_decode_kps_pred = pos_kps_pred
                #print('ZZZ', pos_decode_kps_targets.shape, pos_decode_kps_pred.shape)
                #print(pos_kps_weights[0,:].detach().cpu().numpy())
                #print(pos_decode_kps_targets[0,:].detach().cpu().numpy())
                #print(pos_decode_kps_pred[0,:].detach().cpu().numpy())

                #print('CCC111', weight_targets.shape, pos_bbox_pred.shape, pos_decode_bbox_pred.shape, pos_kps_pred.shape, pos_decode_kps_pred.shape, pos_kps_weights.shape)

            if use_qscore:
                score[pos_inds] = bbox_overlaps(pos_decode_bbox_pred.detach(),
                                                pos_decode_bbox_targets,
                                                is_aligned=True)
            else:
                score[pos_inds] = 1.0

            # regression loss
            loss_bbox = self.loss_bbox(pos_decode_bbox_pred,
                                       pos_decode_bbox_targets,
                                       weight=weight_targets,
                                       avg_factor=1.0)

            if self.use_kps:
                loss_kps = self.loss_kps(
                    pos_decode_kps_pred * self.loss_kps_std,
                    pos_decode_kps_targets * self.loss_kps_std,
                    weight=pos_kps_weights,
                    avg_factor=1.0)
            else:
                loss_kps = kps_pred.sum() * 0

            # dfl loss
            if self.use_dfl:
                pred_corners = pos_bbox_pred.reshape(-1, self.reg_max + 1)
                target_corners = bbox2distance(pos_anchor_centers,
                                               pos_decode_bbox_targets,
                                               self.reg_max).reshape(-1)
                loss_dfl = self.loss_dfl(pred_corners,
                                         target_corners,
                                         weight=weight_targets[:, None].expand(
                                             -1, 4).reshape(-1),
                                         avg_factor=4.0)
            else:
                loss_dfl = bbox_pred.sum() * 0
        else:
            loss_bbox = bbox_pred.sum() * 0
            loss_dfl = bbox_pred.sum() * 0
            loss_kps = kps_pred.sum() * 0
            weight_targets = torch.tensor(0).cuda()

        loss_cls = self.loss_cls(cls_score, (labels, score),
                                 weight=label_weights,
                                 avg_factor=num_total_samples)

        return loss_cls, loss_bbox, loss_dfl, loss_kps, weight_targets.sum()