def losses(self, gt_classes, reg_targets, pred_class_logits, pred_box_reg):
        pred_class_logits, pred_box_reg = \
            permute_and_concat(pred_class_logits, pred_box_reg, self.num_classes)
        # Shapes: (N x R) and (N x R, 4), (N x R) respectively.

        gt_classes = gt_classes.flatten()
        reg_targets = reg_targets.view(-1, 4)

        foreground_idxs = (gt_classes >= 0) & (gt_classes != self.num_classes)
        pos_inds = torch.nonzero(foreground_idxs).squeeze(1)

        num_gpus = get_num_gpus()
        # sync num_pos from all gpus
        total_num_pos = reduce_sum(pos_inds.new_tensor([pos_inds.numel()
                                                        ])).item()
        num_pos_avg_per_gpu = max(total_num_pos / float(num_gpus), 1.0)

        gt_classes_target = torch.zeros_like(pred_class_logits)
        gt_classes_target[foreground_idxs, gt_classes[foreground_idxs]] = 1

        self.loss_normalizer = (
            self.loss_normalizer_momentum * self.loss_normalizer +
            (1 - self.loss_normalizer_momentum) * total_num_pos)
        # logits loss
        cls_loss = sigmoid_focal_loss_jit(
            pred_class_logits,
            gt_classes_target,
            alpha=self.focal_loss_alpha,
            gamma=self.focal_loss_gamma,
            reduction="sum",
        ) / num_pos_avg_per_gpu
        if pos_inds.numel() > 0:
            #            reg_loss = smooth_l1_loss(
            #                pred_box_reg[foreground_idxs], reg_targets[foreground_idxs],
            #                0.11, reduction='sum') / num_pos_avg_per_gpu / max(1, self.loss_normalizer)

            #            gt_center_score = compute_centerness_targets(reg_targets[foreground_idxs])
            #            # average sum_centerness_targets from all gpus,
            #            # which is used to normalize centerness-weighed reg loss
            #            sum_centerness_targets_avg_per_gpu = \
            #                reduce_sum(gt_center_score.sum()).item() / float(num_gpus)

            reg_loss = iou_loss(
                pred_box_reg[foreground_idxs],
                reg_targets[foreground_idxs],
                loss_type=self.iou_loss_type) / num_pos_avg_per_gpu

        #            centerness_loss = F.binary_cross_entropy_with_logits(
        #                pred_center_score[foreground_idxs], gt_center_score, reduction='sum'
        #            ) / num_pos_avg_per_gpu
        else:
            reg_loss = pred_box_reg[foreground_idxs].sum()
            reduce_sum(pred_center_score[foreground_idxs].new_tensor([0.0]))
            # centerness_loss = pred_center_score[foreground_idxs].sum()

        return dict(cls_loss=cls_loss, reg_loss=reg_loss)
Beispiel #2
0
    def losses(self, labels, reg_targets, box_cls, box_regression, centerness):
        N, num_classes = box_cls[0].shape[:2]

        box_cls_flatten = []
        box_regression_flatten = []
        centerness_flatten = []
        labels_flatten = []
        reg_targets_flatten = []
        for l in range(len(labels)):
            box_cls_flatten.append(box_cls[l].permute(0, 2, 3, 1).reshape(
                -1, num_classes))
            box_regression_flatten.append(box_regression[l].permute(
                0, 2, 3, 1).reshape(-1, 4))
            labels_flatten.append(labels[l].reshape(-1))
            reg_targets_flatten.append(reg_targets[l].reshape(-1, 4))
            centerness_flatten.append(centerness[l].reshape(-1))

        box_cls_flatten = torch.cat(box_cls_flatten, dim=0)
        box_regression_flatten = torch.cat(box_regression_flatten, dim=0)
        centerness_flatten = torch.cat(centerness_flatten, dim=0)
        labels_flatten = torch.cat(labels_flatten, dim=0)
        reg_targets_flatten = torch.cat(reg_targets_flatten, dim=0)

        pos_inds = torch.nonzero((labels_flatten >= 0) & (
            labels_flatten != self.num_classes)).squeeze(1)

        box_regression_flatten = box_regression_flatten[pos_inds]
        reg_targets_flatten = reg_targets_flatten[pos_inds]
        centerness_flatten = centerness_flatten[pos_inds]

        num_gpus = get_num_gpus()
        # sync num_pos from all gpus
        total_num_pos = reduce_sum(pos_inds.new_tensor([pos_inds.numel()
                                                        ])).item()
        num_pos_avg_per_gpu = max(total_num_pos / float(num_gpus), 1.0)

        gt_classes_target = torch.zeros_like(box_cls_flatten)
        foreground_idxs = (labels_flatten >= 0) & (labels_flatten !=
                                                   self.num_classes)
        gt_classes_target[foreground_idxs, labels_flatten[foreground_idxs]] = 1

        cls_loss = sigmoid_focal_loss_jit(
            box_cls_flatten,
            gt_classes_target,
            alpha=self.focal_loss_alpha,
            gamma=self.focal_loss_gamma,
            reduction="sum",
        ) / num_pos_avg_per_gpu

        if pos_inds.numel() > 0:
            centerness_targets = compute_centerness_targets(
                reg_targets_flatten)
            # average sum_centerness_targets from all gpus,
            # which is used to normalize centerness-weighed reg loss
            sum_centerness_targets_avg_per_gpu = \
                reduce_sum(centerness_targets.sum()).item() / float(num_gpus)

            reg_loss = iou_loss(box_regression_flatten,
                                reg_targets_flatten,
                                centerness_targets,
                                loss_type=self.iou_loss_type
                                ) / sum_centerness_targets_avg_per_gpu

            centerness_loss = F.binary_cross_entropy_with_logits(
                centerness_flatten, centerness_targets,
                reduction='sum') / num_pos_avg_per_gpu
        else:
            reg_loss = box_regression_flatten.sum()
            reduce_sum(centerness_flatten.new_tensor([0.0]))
            centerness_loss = centerness_flatten.sum()

        return dict(cls_loss=cls_loss,
                    reg_loss=reg_loss,
                    centerness_loss=centerness_loss)
    def losses(self, init_gt_classes, init_reg_targets, refine_gt_classes, refine_reg_targets, \
               pred_class_logits, pred_box_reg_init, pred_box_reg, pred_center_score, strides, pred_ratio):

        strides = strides.repeat(pred_class_logits[0].shape[0])  # [N*X]
        pred_class_logits, pred_box_reg_init, pred_box_reg, pred_center_score, pred_ratio = \
            permute_and_concat(pred_class_logits, pred_box_reg_init, pred_box_reg, pred_center_score, pred_ratio, self.num_classes)
        # Shapes: (N x R) and (N x R, 4), (N x R) respectively.

        init_gt_classes = init_gt_classes.flatten()
        init_reg_targets = init_reg_targets.view(-1, 4)

        init_foreground_idxs = (init_gt_classes >= 0) & (init_gt_classes != self.num_classes)
        init_pos_inds = torch.nonzero(init_foreground_idxs).squeeze(1)

        num_gpus = get_num_gpus()
        # sync num_pos from all gpus
        init_total_num_pos = reduce_sum(init_pos_inds.new_tensor([init_pos_inds.numel()])).item()
        init_num_pos_avg_per_gpu = max(init_total_num_pos / float(num_gpus), 1.0)

        refine_gt_classes = refine_gt_classes.flatten()
        refine_reg_targets = refine_reg_targets.view(-1, 4)

        refine_foreground_idxs = (refine_gt_classes >= 0) & (refine_gt_classes != self.num_classes)
        refine_pos_inds = torch.nonzero(refine_foreground_idxs).squeeze(1)

        # sync num_pos from all gpus
        refine_total_num_pos = reduce_sum(refine_pos_inds.new_tensor([refine_pos_inds.numel()])).item()
        refine_num_pos_avg_per_gpu = max(refine_total_num_pos / float(num_gpus), 1.0)

        gt_classes_target = torch.zeros_like(pred_class_logits)
        gt_classes_target[refine_foreground_idxs, refine_gt_classes[refine_foreground_idxs]] = 1

        # logits loss
        cls_loss = sigmoid_focal_loss_jit(
            pred_class_logits, gt_classes_target,
            alpha=self.focal_loss_alpha, gamma=self.focal_loss_gamma, reduction="sum",
        ) / refine_num_pos_avg_per_gpu
        
        init_foreground_targets = init_reg_targets[init_foreground_idxs]
        gt_ratio_1 = (init_foreground_targets[:,0] + init_foreground_targets[:,2]) \
            / (init_foreground_targets[:,1] + init_foreground_targets[:,3])
        gt_ratio_2 = 1 / gt_ratio_1
        gt_ratios = torch.stack((gt_ratio_1,gt_ratio_2), dim = 1)
        gt_ratio = gt_ratios.min(dim=1)[0]
        gt_center_score = compute_centerness_targets(init_reg_targets[init_foreground_idxs], gt_ratio)
        
        # average sum_centerness_targets from all gpus,
        # which is used to normalize centerness-weighed reg loss
        sum_centerness_targets_avg_per_gpu = \
            reduce_sum(gt_center_score.sum()).item() / float(num_gpus)
        reg_loss_init = iou_loss(
            pred_box_reg_init[init_foreground_idxs], init_reg_targets[init_foreground_idxs], gt_center_score,
            loss_type=self.iou_loss_type
        ) / sum_centerness_targets_avg_per_gpu

        coords_norm_refine = strides[refine_foreground_idxs].unsqueeze(-1) * 4
        reg_loss = smooth_l1_loss(
            pred_box_reg[refine_foreground_idxs] / coords_norm_refine,
            refine_reg_targets[refine_foreground_idxs] / coords_norm_refine,
            0.11, reduction="sum") / max(1, refine_num_pos_avg_per_gpu)
        #        reg_loss = iou_loss(
        #            pred_box_reg[refine_foreground_idxs], refine_reg_targets[refine_foreground_idxs], 1,
        #            loss_type=self.iou_loss_type
        #        ) / sum_centerness_targets_avg_per_gpu
        centerness_loss = F.binary_cross_entropy_with_logits(
            torch.pow(torch.abs(pred_center_score[init_foreground_idxs]), pred_ratio[init_foreground_idxs]), gt_center_score, reduction='sum'
        ) / init_num_pos_avg_per_gpu

        return dict(cls_loss=cls_loss, reg_loss_init=reg_loss_init, reg_loss=reg_loss, centerness_loss=centerness_loss)
    def losses(self, locations, class_logits, center_score, box_reg_init,
               box_reg, gt_instances):
        gt_classes, loc_targets, topk_locations = self.get_ground_truth(
            locations, gt_instances)

        class_logits, box_reg_init, box_reg, center_score = permute_and_concat_v2(
            class_logits, box_reg_init, box_reg, center_score,
            self.num_classes)
        # Shapes: (N x R) and (N x R, 4), (N x R) respectively.

        gt_classes = gt_classes.flatten()
        loc_targets = loc_targets.view(-1, 4)

        foreground_idxs = (gt_classes >= 0) & (gt_classes != self.num_classes)
        pos_inds = torch.nonzero(foreground_idxs).squeeze(1)

        num_gpus = get_num_gpus()
        # sync num_pos from all gpus
        total_num_pos = reduce_sum(pos_inds.new_tensor([pos_inds.numel()
                                                        ])).item()
        num_pos_avg_per_gpu = max(total_num_pos / float(num_gpus), 1.0)

        gt_classes_target = torch.zeros_like(class_logits)
        gt_classes_target[foreground_idxs, gt_classes[foreground_idxs]] = 1

        # logits loss
        cls_loss = sigmoid_focal_loss_jit(
            class_logits,
            gt_classes_target,
            alpha=self.focal_loss_alpha,
            gamma=self.focal_loss_gamma,
            reduction="sum",
        ) / num_pos_avg_per_gpu

        if pos_inds.numel() > 0:
            if self.slender_centerness:
                gt_center_score = compute_slender_centerness_targets(
                    loc_targets[foreground_idxs])
            else:
                gt_center_score = compute_centerness_targets(
                    loc_targets[foreground_idxs])
            # average sum_centerness_targets from all gpus,
            # which is used to normalize centerness-weighed reg loss
            sum_centerness_targets_avg_per_gpu = \
                reduce_sum(gt_center_score.sum()).item() / float(num_gpus)

            topk_locations = topk_locations.view(-1)
            topk_gt_center_score = compute_centerness_targets(
                loc_targets[topk_locations])
            sum_topk_centerness_targets_avg_per_gpu = \
                reduce_sum(topk_gt_center_score.sum()).item() / float(num_gpus)

            loss_loc_init = iou_loss(
                box_reg_init[topk_locations],
                loc_targets[topk_locations],
                topk_gt_center_score,
                loss_type=self.iou_loss_type
            ) / sum_topk_centerness_targets_avg_per_gpu

            loss_loc_refine = iou_loss(box_reg[foreground_idxs],
                                       loc_targets[foreground_idxs],
                                       gt_center_score,
                                       loss_type=self.iou_loss_type
                                       ) / sum_centerness_targets_avg_per_gpu

            centerness_loss = F.binary_cross_entropy_with_logits(
                center_score[foreground_idxs],
                gt_center_score,
                reduction='sum') / num_pos_avg_per_gpu
        else:
            loss_loc_init = box_reg_init[foreground_idxs].sum()
            loss_loc_refine = box_reg[foreground_idxs].sum()
            reduce_sum(center_score[foreground_idxs].new_tensor([0.0]))
            centerness_loss = center_score[foreground_idxs].sum()

        return dict(
            loss_cls=cls_loss * self.loss_cls_weight,
            centerness_loss=centerness_loss * self.loss_cls_weight,
            loss_loc_init=loss_loc_init * self.loss_loc_init_weight,
            loss_loc_refine=loss_loc_refine * self.loss_loc_refine_weight,
        )