def __init__(self, alpha=0.25, gamma=2.0, lambda_p=5.0, temperature=1. / 3, strides=None, iou_type='giou'):
     self.alpha = alpha
     self.gamma = gamma
     self.lambda_p = lambda_p
     self.temperature = temperature
     if strides is None:
         strides = [8, 16, 32, 64, 128]
     self.strides = strides
     self.box_coder = BoxCoder()
     self.iou_loss_func = IOULoss(iou_type=iou_type, coord_type='ltrb')
Example #2
0
 def __init__(self, radius=1, strides=None, layer_limits=None):
     self.radius = radius  # decide matching method
     self.box_coder = BoxCoder()
     if strides is None:
         strides = [8, 16, 32, 64, 128]
     self.strides = torch.tensor(strides)
     if layer_limits is None:
         layer_limits = [64, 128, 256, 512]
     expand_limits = np.array(layer_limits)[None].repeat(
         2).tolist()  # list, len=8
     '''
     FCOS通过规定每一层预测的尺度范围来避免一个sample匹配到多个target的情况
     self.layer_limits=[ [-1,64],         shape=[5,2]
                         [64,128],
                         [128,256],
                         [256,512],
                         [512,inf]]
     '''
     self.layer_limits = torch.tensor([-1.] + expand_limits + [INF]).view(
         -1, 2)
Example #3
0
 def __init__(self,
              alpha=0.25,
              gamma=2.0,
              lambda_p=5.0,
              temperature=1. / 3,
              strides=None,
              iou_type='giou',
              positive_weights=0.1,
              negative_weights=1.0):
     self.alpha = alpha
     self.gamma = gamma
     self.lambda_p = lambda_p  # 平衡正负样本的损失权重
     self.temperature = temperature  # 突出具有高置信度的位置
     # 正负样本损失的权值,论文中并未明确给出,待定
     self.positive_weights = positive_weights
     self.negative_weights = negative_weights
     if strides is None:
         strides = [8, 16, 32, 64, 128]
     self.strides = strides
     self.box_coder = BoxCoder()
     self.iou_loss_func = IOULoss(iou_type=iou_type, coord_type='ltrb')
Example #4
0
class FCOSLossBuilder(object):
    # target build class
    def __init__(self, radius=1, strides=None, layer_limits=None):
        self.radius = radius  # decide matching method
        self.box_coder = BoxCoder()
        if strides is None:
            strides = [8, 16, 32, 64, 128]
        self.strides = torch.tensor(strides)
        if layer_limits is None:
            layer_limits = [64, 128, 256, 512]
        expand_limits = np.array(layer_limits)[None].repeat(
            2).tolist()  # list, len=8
        '''
        FCOS通过规定每一层预测的尺度范围来避免一个sample匹配到多个target的情况
        self.layer_limits=[ [-1,64],         shape=[5,2]
                            [64,128],
                            [128,256],
                            [256,512],
                            [512,inf]]
        '''
        self.layer_limits = torch.tensor([-1.] + expand_limits + [INF]).view(
            -1, 2)

    @torch.no_grad()
    def __call__(self, bs, grids, targets):
        '''
        params
        :param bs: batch_size
        :param grids (list, len=num_layer, num_layer=5) : its element shape = [h,w,2] 2==>(x,y) 原图尺度
        :param targets : [gts,7] (batch_id,weights,label_id,x1,y1,x2,y2)   x1,y1,x2,y2==>原图尺度
        :return:
        batch_reg_targets (list, len=bs): its element shape = [num_grids,4]   4==>(l*,t*,r*,b*)
                                          注意,即使是副样本也可能有对应的为正的取值,因此要和label_target结合来甄别出副样本
        batch_label_targets (list, len=bs): its element shape = [num_grids,1]  1==>label id (匹配到某个gt_box的 class id)  1==>-1 (neg_sample)
        '''
        device = grids[0].device
        self.layer_limits = self.layer_limits.to(device)
        self.strides = self.strides.to(device)

        # [num_grids,5]  5==>(xc,yc,min_limit,max_limit,stride)   num_grids = num of grid points among all featuremaps
        expand_grid = torch.cat([
            torch.cat([
                grid,
                layer_limit.expand_as(grid),
                stride.expand_as(grid[..., [0]])
            ],
                      dim=-1).view(-1, 5) for grid, layer_limit, stride in zip(
                          grids, self.layer_limits, self.strides)
        ],
                                dim=0)

        # build targets for each image
        batch_reg_targets = list()
        batch_label_targets = list()

        for bi in range(bs):
            batch_targets = targets[
                targets[:, 0] == bi,
                1:]  # [num_gts,6]==>(weights,label_id,x1,y1,x2,y2)
            # no target in the image
            if len(batch_targets) == 0:
                batch_reg_targets.append(torch.Tensor())
                batch_label_targets.append(
                    torch.ones(size=(len(expand_grid), ),
                               device=device,
                               dtype=torch.float32) * -1)
                continue

            # encode grid point with all targets
            reg_target_per_img = self.box_coder.encoder(
                expand_grid, batch_targets[:, 2:]
            )  # shape=[num_grids,num_gts,4] (l,t,r,b)==>原图尺度

            # 筛选条件1:
            '''
            if self.radius==0: 删除掉那些不再gt_box区域内部的点/样本
            else: 删除掉那些不在以gt_box中心为依据生成的坐标为
                    (gt_xc-radius*stride,gt_yc-radius*stride,gt_xc+radius*stride,gt_yc-radius*stride)且不在gt_box区域内部的点/样本
            '''
            if self.radius == 0:
                valid_in_box = reg_target_per_img.min(dim=2)[0] > 0
            else:
                limit_gt_xy = (batch_targets[:, [2, 3]] +
                               batch_targets[:, [4, 5]]
                               ) / 2.0  # shape=[num_gts,2], 2==>(xc,yc)
                limit_gt_min_xy = limit_gt_xy[None, :, :] - expand_grid[:, None, [
                    4, 4
                ]] * self.radius  # [1,num_gts,2]-[num_grids,1,2]=[num_grids,num_gts,2]
                limit_gt_max_xy = limit_gt_xy[None, :, :] + expand_grid[:, None, [
                    4, 4
                ]] * self.radius  # [1,num_gts,2]+[num_grids,1,2]=[num_grids,num_gts,2]
                limit_gt_min_xy = torch.where(
                    limit_gt_min_xy > batch_targets[None, :, [2, 3]],
                    limit_gt_min_xy, batch_targets[None, :, [2, 3]])
                limit_gt_max_xy = torch.where(
                    limit_gt_max_xy < batch_targets[None, :, [4, 5]],
                    limit_gt_max_xy, batch_targets[None, :, [4, 5]])

                left_top = expand_grid[:, None, [0, 1]] - limit_gt_min_xy
                right_bottom = limit_gt_max_xy - expand_grid[:, None, [0, 1]]
                valid_in_box = torch.cat([left_top, right_bottom],
                                         dim=2).min(dim=2)[0] > 0

            # 筛选条件2:
            '''
            删除掉那些所对应的gt_box不符合尺度限制条件的样本
            '''
            max_reg_targets_per_img = reg_target_per_img.max(
                dim=2)[0]  # shape=[num_grids,num_gts]
            is_card_in_level = (max_reg_targets_per_img >=
                                expand_grid[:, [2]]) & (max_reg_targets_per_img
                                                        <= expand_grid[:, [3]])

            #
            gt_area = (batch_targets[:, 4] - batch_targets[:, 2]) * (
                batch_targets[:, 5] - batch_targets[:, 3])  # shape=[num_gts]
            locations_to_gt_area = gt_area[None, :].repeat(
                len(expand_grid), 1)  # shape=[num_grid,num_gts]
            # 筛选掉不符合条件的样本
            locations_to_gt_area[~valid_in_box] = INF
            locations_to_gt_area[~is_card_in_level] = INF
            # 筛选条件3:
            '''
            当某个grid/sample匹配到多个gt_box时,选择面积最小的gt_box作为其匹配到的target
            '''
            min_area, gt_idx = locations_to_gt_area.min(
                dim=1
            )  # shape=[num_grids], calculate the minest gt_box area coorsponding to grid/sample
            reg_target_per_img = reg_target_per_img[
                range(len(reg_target_per_img)),
                gt_idx]  # shape=[num_grids,4]  (l,t,r,b)==>原图尺度

            labels_per_img = batch_targets[:, 1][gt_idx]
            labels_per_img[min_area == INF] = -1
            batch_reg_targets.append(reg_target_per_img)
            batch_label_targets.append(labels_per_img)

        return batch_reg_targets, batch_label_targets
class FCOSAutoAssignLoss(object):
    def __init__(self,
                 alpha=0.25,
                 gamma=2.0,
                 lambda_p=5.0,
                 temperature=1. / 3,
                 strides=None,
                 iou_type='giou',
                 positive_weights=0.1,
                 negative_weights=1.0):
        self.alpha = alpha
        self.gamma = gamma
        self.lambda_p = lambda_p
        self.temperature = temperature
        self.positive_weights = positive_weights
        self.negative_weights = negative_weights
        if strides is None:
            strides = [8, 16, 32, 64, 128]
        self.strides = strides
        self.box_coder = BoxCoder()
        self.iou_loss_func = IOULoss(iou_type=iou_type, coord_type='ltrb')

    def __call__(self, cls_predicts, box_predicts, implicits, grids, gaussian, targets):
        """
        :param cls_predicts: list(cls_predict) cls_predict [bs, cls, h, w]
        :param box_predicts: list(box_predict) box_predict [bs, 4, h, w]
        :param implicits: list(implicit) implicit[bs, 1, h, w]
        :param grids: [h, w, 2]
        :param gaussian: [cls, 4]
        :param targets: [gt, 7] (bs, weights, label_id, x1, y1, x2, y2)
        :return:
        """
        device = cls_predicts[0].device
        bs = cls_predicts[0].shape[0]
        cls_num = cls_predicts[0].shape[1]
        # expand_grid [grid_num,3](xc,yc,stride)
        expand_grid = torch.cat([
            torch.cat([
                grid_item,
                torch.tensor(data=stride_item, device=device, dtype=torch.float).expand_as(grid_item[..., [0]])
            ], dim=-1).view(-1, 3) for stride_item, grid_item in zip(self.strides, grids)], dim=0)
        for i in range(len(cls_predicts)):
            if cls_predicts[i].dtype == torch.float16:
                cls_predicts[i] = cls_predicts[i].float()
        for i in range(len(implicits)):
            if implicits[i].dtype == torch.float16:
                implicits[i] = implicits[i].float()
        negative_loss_list = list()
        positive_loss_list = list()
        for bi in range(bs):
            # batch_cls_predicts [grid_num,cls_num]
            batch_cls_predicts = torch.cat(
                [cls_item[bi].permute(1, 2, 0).contiguous().view(-1, cls_num) for cls_item in cls_predicts],
                dim=0).sigmoid()
            # batch_implicit [grid_num,1]
            batch_implicit = torch.cat(
                [implicit_item[bi].permute(1, 2, 0).contiguous().view(-1, 1) for implicit_item in implicits],
                dim=0).sigmoid()

            batch_join_predicts = (batch_cls_predicts * batch_implicit).clamp(1e-6, 1 - 1e-6)
            # batch_box_predicts [grid_num, 4]
            batch_box_predicts = torch.cat(
                [box_item[bi].permute(1, 2, 0).contiguous().view(-1, 4) for box_item in box_predicts], dim=0)
            batch_targets = targets[targets[:, 0] == bi, 1:]
            if len(batch_targets) == 0:
                negative_loss = -(1 - self.alpha) * batch_join_predicts ** self.gamma * (
                        1 - batch_join_predicts).log()
                negative_loss = negative_loss.sum()
                negative_loss_list.append(negative_loss)
                continue
            # [gt_num,6] (weights,label_idx,x1,y1,x2,y2)
            gt_xy = (batch_targets[:, [2, 3]] + batch_targets[:, [4, 5]]) / 2
            # [grid_num,gt_num,2]
            xy_offset = (expand_grid[:, None, :2] - gt_xy[None, :, :]) / expand_grid[:, None, [2]]
            # [grid_num,gt_num,4]
            batch_reg_targets = self.box_coder.encode(expand_grid[..., :2], batch_targets[..., 2:])
            grid_idx, gt_idx = (batch_reg_targets.min(dim=-1)[0] > 0).nonzero(as_tuple=False).t()

            cls_prob = batch_join_predicts[grid_idx, batch_targets[gt_idx, 1].long()]
            iou_loss = self.iou_loss_func(batch_box_predicts[grid_idx, :], batch_reg_targets[grid_idx, gt_idx, :])
            loc_prob = (-self.lambda_p * iou_loss).exp()
            joint_prob = cls_prob * loc_prob
            confidence = (joint_prob / self.temperature).exp()
            gaussian_delta_mu = -(
                    (xy_offset[grid_idx, gt_idx, :] - gaussian[batch_targets[gt_idx, 1].long(), :2]) ** 2
            ).sum(-1)
            gaussian_delta_theta = 2 * ((gaussian[batch_targets[gt_idx, 1].long(), 2:]) ** 2).sum(-1)
            gaussian_weights = (gaussian_delta_mu / gaussian_delta_theta).exp()
            positive_weights = confidence * gaussian_weights
            positive_loss = torch.tensor(data=0., device=device)
            for unique_gt_idx in gt_idx.unique():
                grid_idx_mask = gt_idx == unique_gt_idx
                instance_weights = positive_weights[grid_idx_mask] / positive_weights[grid_idx_mask].sum()
                instance_loss = -(instance_weights * joint_prob[grid_idx_mask]).sum().log()
                positive_loss += instance_loss
            positive_loss_list.append(positive_loss)

            decode_box = self.box_coder.decoder(expand_grid[..., :2], batch_box_predicts).detach()
            predict_targets_iou = box_iou(decode_box, batch_targets[..., 2:])
            max_iou, max_iou_gt_idx = predict_targets_iou.max(dim=-1)
            func_iou = 1 / (1 - max_iou)
            func_iou = 1 - (func_iou - 1) / (func_iou.max() - 1 + 1e-10)
            negative_weights = torch.ones(size=(expand_grid.shape[0], cls_num), device=device).float()
            negative_weights[grid_idx, batch_targets[gt_idx, 1].long()] = func_iou[grid_idx]
            weighted_negative_prob = negative_weights * batch_join_predicts
            negative_loss = -(1 - self.alpha) * weighted_negative_prob ** self.gamma * (
                    1 - weighted_negative_prob).log()
            negative_loss = negative_loss.sum()
            negative_loss_list.append(negative_loss)
        total_negative_loss = torch.stack(negative_loss_list).sum() / max(1, len(targets))
        if len(targets) == 0:
            return total_negative_loss, \
                   torch.stack([total_negative_loss, torch.tensor(0., device=device)]).detach(), \
                   len(targets)
        total_positive_loss = torch.stack(positive_loss_list).sum() / max(1, len(targets))
        total_negative_loss = total_negative_loss * self.negative_weights
        total_positive_loss = total_positive_loss * self.positive_weights
        total_loss = total_negative_loss + total_positive_loss
        return total_loss, torch.stack([total_negative_loss, total_positive_loss]).detach(), len(targets)
Example #6
0
class FCOSAutoAssignLoss(object):
    def __init__(self,
                 alpha=0.25,
                 gamma=2.0,
                 lambda_p=5.0,
                 temperature=1. / 3,
                 strides=None,
                 iou_type='giou',
                 positive_weights=0.1,
                 negative_weights=1.0):
        self.alpha = alpha
        self.gamma = gamma
        self.lambda_p = lambda_p  # 平衡正负样本的损失权重
        self.temperature = temperature  # 突出具有高置信度的位置
        # 正负样本损失的权值,论文中并未明确给出,待定
        self.positive_weights = positive_weights
        self.negative_weights = negative_weights
        if strides is None:
            strides = [8, 16, 32, 64, 128]
        self.strides = strides
        self.box_coder = BoxCoder()
        self.iou_loss_func = IOULoss(iou_type=iou_type, coord_type='ltrb')

    def __call__(self, cls_predicts, box_predicts, implicits, grids, gaussian,
                 targets):
        '''
        params
        :param cls_predicts: list(cls_predict) cls_predict [bs, num_cls, h, w]
        :param box_predicts: list(box_predict) box_predict [bs, 4, h, w]
        :param implicits: list(implicit) implicit[bs, 1, h, w]
        :param grids: list(grid,len=5) grid [h, w, 2]    2==>(xc,yc)原图尺度
        :param gaussian: [cls, 4]  4==>(ux,uy,theta_x,theta_y)
        :param targets: [gt, 7] (bs, weights, label_id, x1, y1, x2, y2)
        :return:
        '''
        device = cls_predicts[0].device
        bs = cls_predicts[0].shape[0]
        cls_num = cls_predicts[0].shape[1]

        # expand_grid.shape=[grid_num,3]  3==>(xc,yc,stride)
        expand_grid = torch.cat([
            torch.cat([
                grid_item,
                torch.tensor(data=stride_item,
                             device=device,
                             dtype=torch.float).expand_as(grid_item[..., [0]])
            ],
                      dim=-1).view(-1, 3)
            for stride_item, grid_item in zip(self.strides, grids)
        ],
                                dim=0)

        for i in range(len(cls_predicts)):
            if cls_predicts[i].dtype == torch.float16:
                cls_predicts[i] = cls_predicts[i].float()
        for i in range(len(implicits)):
            if implicits[i].dtype == torch.float16:
                implicits[i] = implicits[i].float()

        negative_loss_list = list()
        positive_loss_list = list()
        num_neg_grids = 0

        for bi in range(bs):
            # batch_cls_predicts [grid_num,cls_num]==>sigmoid
            batch_cls_predicts = torch.cat([
                cls_item[bi].permute(1, 2, 0).contiguous().view(-1, cls_num)
                for cls_item in cls_predicts
            ],
                                           dim=0).sigmoid()
            # batch_implicit [grid_num,1]
            batch_implicit = torch.cat([
                implicit_item[bi].permute(1, 2, 0).contiguous().view(-1, 1)
                for implicit_item in implicits
            ],
                                       dim=0).sigmoid()
            # join_predicts=cls_predicts*implicit_predicts(分类*object)   [grid_num,cls_num]
            batch_join_predicts = (batch_cls_predicts * batch_implicit).clamp(
                1e-6, 1 - 1e-6)

            # batch_box_predicts [grid_num, 4]
            batch_box_predicts = torch.cat([
                box_item[bi].permute(1, 2, 0).contiguous().view(-1, 4)
                for box_item in box_predicts
            ],
                                           dim=0)
            # target  [gt_num,6]  6==>(weights, label_id, x1, y1, x2, y2)
            batch_targets = targets[targets[:, 0] == bi, 1:]

            # 如果没有target,则直接loss= negative focal loss
            if len(batch_targets) == 0:
                negative_loss = -1 * (batch_join_predicts**self.gamma) * (
                    1 - batch_join_predicts).log()
                negative_loss = negative_loss.sum()
                negative_loss_list.append(negative_loss)
                continue

            ############################################################################################################
            ### clac positive loss -------------------------------------------------------------------------------------

            # [gt_num,6] (weights,label_idx,x1,y1,x2,y2)
            gt_xy = (batch_targets[:, [2, 3]] + batch_targets[:, [4, 5]]) / 2.
            # d=(grid_xy-gt_xy) 用来计算centerness weight [grid_num,gt_num,2]
            xy_offset = (expand_grid[:, None, :2] -
                         gt_xy[None, :, :]) / expand_grid[:, None, [2]]
            # 编码每个grid point的回归目标  [grid_num,gt_num,4]
            batch_reg_targets = self.box_coder.encode(expand_grid[..., :2],
                                                      batch_targets[..., 2:])
            # shape=[1,N]  N=num of positive grid/location 假设所有在gt_box内部的点都是正样本
            grid_idx, gt_idx = (batch_reg_targets.min(dim=-1)[0] > 0).nonzero(
                as_tuple=False).t()

            # debug
            num_neg_grids += grid_idx.shape[0]

            cls_prob = batch_join_predicts[grid_idx, batch_targets[
                gt_idx, 1].long()]  # shape=[N,1]
            iou_loss = self.iou_loss_func(
                batch_box_predicts[grid_idx, :], batch_reg_targets[grid_idx,
                                                                   gt_idx, :])
            loc_prob = (-self.lambda_p * iou_loss).exp()  # P_loc, shape=[N,1]
            joint_prob = cls_prob * loc_prob  # P_+=cls_prob*obj_prob ,P(confidence at the location) shape=[N,1]
            confidence = (
                joint_prob /
                self.temperature).exp()  # C(P)  weight_function  shape=[N,1]
            '''
            G(d)=e{-1*(d-u)**2/(2*theta**2)}
            d=xy_offset=grid_xy-gt_xy
            u,theta are learnable parameters.
            '''
            gaussian_delta_mu = -(
                (xy_offset[grid_idx, gt_idx, :] -
                 gaussian[batch_targets[gt_idx, 1].long(), :2])**2).sum(-1)
            gaussian_delta_theta = 2 * (
                (gaussian[batch_targets[gt_idx, 1].long(), 2:])**2).sum(-1)
            gaussian_weights = (gaussian_delta_mu /
                                gaussian_delta_theta).exp()  # shape=[N,1]

            # w+
            positive_weights = confidence * gaussian_weights  # shape=[N,1]
            positive_loss = torch.tensor(data=0., device=device)
            for unique_gt_idx in gt_idx.unique():
                gt_idx_mask = gt_idx == unique_gt_idx
                instance_weights = positive_weights[
                    gt_idx_mask] / positive_weights[gt_idx_mask].sum()
                instance_loss = -(instance_weights *
                                  joint_prob[gt_idx_mask]).sum().log()
                positive_loss += instance_loss
            positive_loss_list.append(positive_loss)

            ##########################################################################################################################
            ## calc negative loss ----------------------------------------------------------------------------------------------------
            decode_box = self.box_coder.decoder(
                expand_grid[..., :2], batch_box_predicts).detach(
                )  # shape=[grid_num,4]  4==>(x1,y1,x2,y2)
            predict_targets_iou = box_iou(
                decode_box, batch_targets[..., 2:])  # shape=[grid_num,gt_num]
            '''
            max_iou=max{iou between the predicted_box and all gt_boxes}
            '''
            max_iou, max_iou_gt_idx = predict_targets_iou.max(
                dim=-1)  # shape=[grid_num]
            func_iou = 1 / (1 - max_iou)
            func_iou = 1 - (func_iou - 1) / (
                func_iou.max() - 1 + 1e-10
            )  # max_iou==>(0,1) if max_iou=1, func_iou=0.  if max_iou=0, func_iou=1.

            # 任何gt_box区域之外的点w-=1.0
            negative_weights = torch.ones(
                size=(expand_grid.shape[0], cls_num),
                device=device).float()  # shape=[grid_num, cls_num]
            negative_weights[grid_idx,
                             batch_targets[gt_idx,
                                           1].long()] = func_iou[grid_idx]
            weighted_negative_prob = negative_weights * batch_join_predicts
            negative_loss = -1 * (weighted_negative_prob**self.gamma) * (
                1 - weighted_negative_prob).log()
            negative_loss = negative_loss.sum()
            negative_loss_list.append(negative_loss)

        total_negative_loss = torch.stack(negative_loss_list).sum() / max(
            1, len(targets))
        # total_negative_loss = torch.stack(negative_loss_list).sum() / num_neg_grids
        if len(targets) == 0:
            return total_negative_loss, torch.stack(
                [total_negative_loss,
                 torch.tensor(0., device=device)]).detach(), len(targets)
        total_positive_loss = torch.stack(positive_loss_list).sum() / max(
            1, len(targets))
        total_negative_loss = total_negative_loss * (1 - self.alpha)
        total_positive_loss = total_positive_loss * self.alpha
        total_loss = total_negative_loss + total_positive_loss
        return total_loss, torch.stack(
            [total_negative_loss, total_positive_loss]).detach(), len(targets)