Ejemplo n.º 1
0
    def __call__(self, bs, anchors, targets):
        """
        :param bs: batch_size
        :param anchors: list(anchor) anchor [all, 4] (x1,y1,x2,y2)
        :param targets: [gt_num, 7] (batch_id,weights,label_id,x1,y1,x2,y2)
        :return:
        """
        # [all,4] (x1,y1,x2,y2)
        all_anchors = torch.cat(anchors, dim=0)
        flag_list = list()
        targets_list = list()
        for bi in range(bs):
            flag = torch.ones(size=(len(all_anchors),), device=all_anchors.device)
            # flag = all_anchors.new_ones(size=(len(all_anchors),))
            # [gt_num, 6] (weights,label_idx,x1,y1,x2,y2)
            batch_targets = targets[targets[:, 0] == bi, 1:]
            if len(batch_targets) == 0:
                flag_list.append(flag * 0.)
                targets_list.append(torch.Tensor())
                continue
            flag *= -1.
            batch_box = batch_targets[:, 2:]
            # [all,gt_num]
            anchor_gt_iou = box_iou(all_anchors, batch_box)

            iou_val, gt_idx = anchor_gt_iou.max(dim=1)
            pos_idx = iou_val >= self.iou_thresh
            neg_idx = iou_val < self.ignore_iou
            flag[pos_idx] = 1.
            flag[neg_idx] = 0.
            flag_list.append(flag)
            gt_targets = batch_targets[gt_idx, :]
            targets_list.append(gt_targets)
        return flag_list, targets_list, all_anchors
Ejemplo n.º 2
0
    def select_train_sample(self, proposal, targets):
        '''

        :param proposal(list,len=bs): list(filter_box)  filter_box.shape=[N,4]  N=post_nms_num
        :param targets: (bs,7)  7==>(bs_idx,weights,label_idx,x1,y1,x2,y2)
        :return:
        ret_proposal (list, len=bs): list(proposal)  shape=[n_p+n_n,4]
        ret_labels (list, len=bs):  list(labels)  shape=[n_p+n_n,1]  =0 neg, >0 pos
        ret_targets (list, len=bs):  list(targets)  shape=[n_p+n_n,4]  =[0,0,0,0] neg, else pos
        '''
        bs = len(proposal)
        ret_proposal = list()
        ret_labels = list()
        ret_targets = list()
        # ret_mask=list()

        for i in range(bs):
            batch_targets = targets[targets[:, 0] == i, 1:]
            if len(batch_targets):
                # question: why add gt_box into proposals?
                batch_proposal = torch.cat(
                    [proposal[i], batch_targets[:, -4:]], dim=0)
                targets_proposal_iou = box_iou(batch_targets[:, -4:],
                                               batch_proposal)
                match_idx = self.matcher(targets_proposal_iou)
            else:
                batch_proposal = proposal[i]
                match_idx = torch.full((batch_proposal.shape[0], ),
                                       fill_value=-1,
                                       dtype=torch.long,
                                       device=batch_proposal.device)
            positive_negative_mask = self.sampler(
                match_idx)  # =1 pos, =0 neg, =255 other
            valid_mask = positive_negative_mask != 255  # pos neg =True, other =False
            ret_proposal.append(
                batch_proposal[valid_mask])  # shape=[n_p+n_n,4]
            compress_mask = positive_negative_mask[valid_mask].bool(
            )  # shape=[n_p+n_n,] =True pos,  =False neg
            # ret_mask.append(compress_mask)
            labels_idx = torch.zeros_like(compress_mask,
                                          dtype=torch.float,
                                          requires_grad=False)
            labels_idx[compress_mask] = batch_targets[
                match_idx[valid_mask][compress_mask].long(),
                1] + 1  # why add 1? the cls_output_size = (num_cls+1)
            ret_labels.append(labels_idx)  # shape=[n_p+n_g,1] =0 neg, >0 pos
            targets_box = torch.zeros_like(batch_proposal[valid_mask])
            targets_box[compress_mask, :] = batch_targets[
                match_idx[valid_mask][compress_mask].long(), -4:]
            ret_targets.append(targets_box)
        # ret_proposal = torch.stack(ret_proposal, dim=0)
        # ret_labels = torch.stack(ret_labels, dim=0)
        # ret_targets = torch.stack(ret_targets, dim=0)
        return ret_proposal, ret_labels, ret_targets
Ejemplo n.º 3
0
    def __call__(self,anchors,targets):
        '''

        :param anchors: shape=[N,4]
        :param targets: shape=[M,4]
        :return:
        matches_targets_ids: shape=[N,]     matches_targets_ids[i]=k(k>=0) positive_sample, k is index of matched gt_box
                                            matches_targets_ids[i]=-1  BELOW_LOW_THRESHOLD
                                            matches_targets_ids[i]=-2  BETWEEN_THRESHOLDS
        '''
        target_anchor_iou=box_iou(targets,anchors)  # shape=[M,N]
        matches_target_idx=self.matcher(target_anchor_iou)
        return matches_target_idx
Ejemplo n.º 4
0
def coco_map(predicts_list, targets_list):
    """
    :param predicts_list: per_img predicts_shape [n,6] (x1,y1,x2,y2,score,cls_id)
    :param targets_list: per_img targets_shape [m, 5] (cls_id,x1,y1,x2,y2)
    :return:
    """
    device = targets_list[0].device
    iouv = torch.linspace(0.5, 0.95, 10).to(device)
    niou = iouv.numel()
    stats = list()
    for predicts, targets in zip(predicts_list, targets_list):
        nl = len(targets)
        tcls = targets[:, 0].tolist() if nl else []
        if predicts is None:
            if nl:
                stats.append((torch.zeros(0, niou, dtype=torch.bool),
                              torch.Tensor(), torch.Tensor(), tcls))
            continue
        correct = torch.zeros(predicts.shape[0],
                              niou,
                              dtype=torch.bool,
                              device=device)
        if nl:
            detected = list()
            tcls_tensor = targets[:, 0]
            tbox = targets[:, 1:5]

            for cls in torch.unique(tcls_tensor):
                ti = (cls == tcls_tensor).nonzero(as_tuple=False).view(-1)
                pi = (cls == predicts[:, 5]).nonzero(as_tuple=False).view(-1)
                if pi.shape[0]:
                    ious, i = box_iou(predicts[pi, :4], tbox[ti]).max(1)
                    for j in (ious > iouv[0]).nonzero(as_tuple=False):
                        d = ti[i[j]]
                        if d not in detected:
                            detected.append(d)
                            correct[pi[j]] = ious[j] > iouv
                        if len(detected) == nl:
                            break
        stats.append(
            (correct.cpu(), predicts[:, 4].cpu(), predicts[:, 5].cpu(), tcls))

    stats = [np.concatenate(x, 0) for x in zip(*stats)]
    if len(stats):
        p, r, ap, f1, ap_class = ap_per_class(*stats)
        p, r, ap50, ap = p[:, 0], r[:, 0], ap[:, 0], ap.mean(
            1)  # [P, R, [email protected], [email protected]:0.95]
        mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean()
        return mp, mr, map50, map
    else:
        return 0., 0., 0., 0.
    def __call__(self, cls_predicts, box_predicts, anchors, targets):
        """
        :param cls_predicts:
        :param box_predicts:
        :param anchors:
        :param targets:
        :return:
        """
        device = cls_predicts[0].device
        bs = cls_predicts[0].shape[0]
        cls_num = cls_predicts[0].shape[-1]
        expand_anchor = torch.cat(anchors, dim=0)

        negative_loss_list = list()
        positive_loss_list = list()

        for bi in range(bs):
            batch_cls_predicts = torch.cat([cls_item[bi] for cls_item in cls_predicts], dim=0) \
                .sigmoid() \
                .clamp(min=1e-6, max=1 - 1e-6)
            batch_targets = targets[targets[:, 0] == bi, 1:]

            if len(batch_targets) == 0:
                negative_loss = -(1 - self.alpha) * (
                    batch_cls_predicts**
                    self.gamma) * (1 - batch_cls_predicts).log()
                negative_loss_list.append(negative_loss.sum())
                continue

            batch_box_predicts = torch.cat(
                [box_item[bi] for box_item in box_predicts], dim=0)
            # calc_positive_loss
            targets_anchor_iou = box_iou(batch_targets[:, 2:], expand_anchor)
            _, top_k_anchor_idx = targets_anchor_iou.topk(k=self.top_k,
                                                          dim=1,
                                                          sorted=False)

            matched_cls_prob = batch_cls_predicts[top_k_anchor_idx].gather(
                dim=-1,
                index=(batch_targets[:, [1]][:, None, :]).long().repeat(
                    1, self.top_k, 1)).squeeze(-1)
            match_box_target = self.box_coder.encoder(
                expand_anchor[top_k_anchor_idx], batch_targets[:, None, 2:])
            matched_box_prob = (
                -self.box_reg_weight *
                smooth_l1_loss(batch_box_predicts[top_k_anchor_idx],
                               match_box_target, self.beta).sum(-1)).exp()
            positive_loss = self.alpha * mean_max(
                matched_cls_prob * matched_box_prob).sum()
            positive_loss_list.append(positive_loss)

            with torch.no_grad():
                box_localization = self.box_coder.decoder(
                    batch_box_predicts, expand_anchor)
            target_box_iou = box_iou(batch_targets[:, 2:], box_localization)
            t1 = self.box_iou_thresh
            t2 = target_box_iou.max(dim=1,
                                    keepdim=True)[0].clamp(min=t1 + 1e-6)
            target_box_prob = ((target_box_iou - t1) / (t2 - t1)).clamp(min=0.,
                                                                        max=1.)
            indices = torch.stack([
                torch.arange(len(batch_targets), device=device),
                batch_targets[:, 1]
            ],
                                  dim=0).long()
            object_cls_box_prob = torch.sparse_coo_tensor(indices,
                                                          target_box_prob,
                                                          device=device)

            cls_idx, anchor_idx = torch.sparse.sum(
                object_cls_box_prob,
                dim=0).to_dense().nonzero(as_tuple=False).t()
            if len(cls_idx) == 0:
                negative_loss = -(1 - self.alpha) * (
                    batch_cls_predicts**
                    self.gamma) * (1 - batch_cls_predicts).log()
                negative_loss_list.append(negative_loss.sum())
                continue
            anchor_positive_max_prob = torch.where(
                batch_targets[:, [1]].long() == cls_idx,
                target_box_prob[:, anchor_idx],
                torch.tensor(data=0., device=device)).max(dim=0)[0]

            anchor_cls_assign_prob = torch.zeros(size=(len(expand_anchor),
                                                       cls_num),
                                                 device=device)
            anchor_cls_assign_prob[anchor_idx,
                                   cls_idx] = anchor_positive_max_prob
            negative_prob = batch_cls_predicts * (1 - anchor_cls_assign_prob)
            negative_loss = -(1 - self.alpha) * (negative_prob**self.gamma) * (
                1 - negative_prob).log()
            negative_loss_list.append(negative_loss.sum())

        negative_losses = torch.stack(negative_loss_list).sum() / max(
            1, len(targets))
        if len(positive_loss_list) == 0:
            total_loss = negative_losses
            return total_loss, torch.stack(
                [negative_losses,
                 torch.tensor(data=0., device=device)]), len(targets)

        positive_losses = torch.stack(positive_loss_list).sum() / max(
            1, len(targets))
        total_loss = negative_losses + positive_losses
        return total_loss, torch.stack([negative_losses,
                                        positive_losses]), len(targets)
Ejemplo n.º 6
0
    def __call__(self, cls_predicts, box_predicts, implicits, grids, gaussian, targets):
        """
        :param cls_predicts: list(cls_predict) cls_predict [bs, cls, h, w]
        :param box_predicts: list(box_predict) box_predict [bs, 4, h, w]
        :param implicits: list(implicit) implicit[bs, 1, h, w]
        :param grids: [h, w, 2]
        :param gaussian: [cls, 4]
        :param targets: [gt, 7] (bs, weights, label_id, x1, y1, x2, y2)
        :return:
        """
        device = cls_predicts[0].device
        bs = cls_predicts[0].shape[0]
        cls_num = cls_predicts[0].shape[1]
        # expand_grid [grid_num,3](xc,yc,stride)
        expand_grid = torch.cat([
            torch.cat([
                grid_item,
                torch.tensor(data=stride_item, device=device, dtype=torch.float).expand_as(grid_item[..., [0]])
            ], dim=-1).view(-1, 3) for stride_item, grid_item in zip(self.strides, grids)], dim=0)
        for i in range(len(cls_predicts)):
            if cls_predicts[i].dtype == torch.float16:
                cls_predicts[i] = cls_predicts[i].float()
        for i in range(len(implicits)):
            if implicits[i].dtype == torch.float16:
                implicits[i] = implicits[i].float()
        negative_loss_list = list()
        positive_loss_list = list()
        for bi in range(bs):
            # batch_cls_predicts [grid_num,cls_num]
            batch_cls_predicts = torch.cat(
                [cls_item[bi].permute(1, 2, 0).contiguous().view(-1, cls_num) for cls_item in cls_predicts],
                dim=0).sigmoid()
            # batch_implicit [grid_num,1]
            batch_implicit = torch.cat(
                [implicit_item[bi].permute(1, 2, 0).contiguous().view(-1, 1) for implicit_item in implicits],
                dim=0).sigmoid()

            batch_join_predicts = (batch_cls_predicts * batch_implicit).clamp(1e-6, 1 - 1e-6)
            # batch_box_predicts [grid_num, 4]
            batch_box_predicts = torch.cat(
                [box_item[bi].permute(1, 2, 0).contiguous().view(-1, 4) for box_item in box_predicts], dim=0)
            batch_targets = targets[targets[:, 0] == bi, 1:]
            if len(batch_targets) == 0:
                negative_loss = -(1 - self.alpha) * batch_join_predicts ** self.gamma * (
                        1 - batch_join_predicts).log()
                negative_loss = negative_loss.sum()
                negative_loss_list.append(negative_loss)
                continue
            # [gt_num,6] (weights,label_idx,x1,y1,x2,y2)
            gt_xy = (batch_targets[:, [2, 3]] + batch_targets[:, [4, 5]]) / 2
            # [grid_num,gt_num,2]
            xy_offset = (expand_grid[:, None, :2] - gt_xy[None, :, :]) / expand_grid[:, None, [2]]
            # [grid_num,gt_num,4]
            batch_reg_targets = self.box_coder.encode(expand_grid[..., :2], batch_targets[..., 2:])
            grid_idx, gt_idx = (batch_reg_targets.min(dim=-1)[0] > 0).nonzero(as_tuple=False).t()

            cls_prob = batch_join_predicts[grid_idx, batch_targets[gt_idx, 1].long()]
            iou_loss = self.iou_loss_func(batch_box_predicts[grid_idx, :], batch_reg_targets[grid_idx, gt_idx, :])
            loc_prob = (-self.lambda_p * iou_loss).exp()
            joint_prob = cls_prob * loc_prob
            confidence = (joint_prob / self.temperature).exp()
            gaussian_delta_mu = -(
                    (xy_offset[grid_idx, gt_idx, :] - gaussian[batch_targets[gt_idx, 1].long(), :2]) ** 2
            ).sum(-1)
            gaussian_delta_theta = 2 * ((gaussian[batch_targets[gt_idx, 1].long(), 2:]) ** 2).sum(-1)
            gaussian_weights = (gaussian_delta_mu / gaussian_delta_theta).exp()
            positive_weights = confidence * gaussian_weights
            positive_loss = torch.tensor(data=0., device=device)
            for unique_gt_idx in gt_idx.unique():
                grid_idx_mask = gt_idx == unique_gt_idx
                instance_weights = positive_weights[grid_idx_mask] / positive_weights[grid_idx_mask].sum()
                instance_loss = -(instance_weights * joint_prob[grid_idx_mask]).sum().log()
                positive_loss += instance_loss
            positive_loss_list.append(positive_loss)

            decode_box = self.box_coder.decoder(expand_grid[..., :2], batch_box_predicts).detach()
            predict_targets_iou = box_iou(decode_box, batch_targets[..., 2:])
            max_iou, max_iou_gt_idx = predict_targets_iou.max(dim=-1)
            func_iou = 1 / (1 - max_iou)
            func_iou = 1 - (func_iou - 1) / (func_iou.max() - 1 + 1e-10)
            negative_weights = torch.ones(size=(expand_grid.shape[0], cls_num), device=device).float()
            negative_weights[grid_idx, batch_targets[gt_idx, 1].long()] = func_iou[grid_idx]
            weighted_negative_prob = negative_weights * batch_join_predicts
            negative_loss = -(1 - self.alpha) * weighted_negative_prob ** self.gamma * (
                    1 - weighted_negative_prob).log()
            negative_loss = negative_loss.sum()
            negative_loss_list.append(negative_loss)
        total_negative_loss = torch.stack(negative_loss_list).sum() / max(1, len(targets))
        if len(targets) == 0:
            return total_negative_loss, \
                   torch.stack([total_negative_loss, torch.tensor(0., device=device)]).detach(), \
                   len(targets)
        total_positive_loss = torch.stack(positive_loss_list).sum() / max(1, len(targets))
        total_negative_loss = total_negative_loss * self.negative_weights
        total_positive_loss = total_positive_loss * self.positive_weights
        total_loss = total_negative_loss + total_positive_loss
        return total_loss, torch.stack([total_negative_loss, total_positive_loss]).detach(), len(targets)
Ejemplo n.º 7
0
def coco_map(predicts_list, targets_list,  ID_list, shape_list, net_input_size, save_json=True):
    """
    :param predicts_list(list, len=len(dataset)): per_img predicts_shape [n,6] (x1,y1,x2,y2,score,cls_id)
    :param targets_list(list, len=len(dataset)): per_img targets_shape [m, 5] (cls_id,x1,y1,x2,y2)
    :param ID_list(list, len=len(dataset)):  image path, shape=[w,h]
    :param shapes_list(list, len=len(dataset)): original image shape=[w0,h0], which is used for evaluate mAP
    :return:
    """
    device = targets_list[0].device
    # 设置iou阈值,从0.5~0.95,每间隔0.05取一次
    iouv = torch.linspace(0.5, 0.95, 10).to(device)    # iou vector for [email protected]:0.95
    niou = iouv.numel()
    stats = list()
    jdict=[]

    for predicts, targets, path, original_shape in zip(predicts_list, targets_list, ID_list, shape_list):
        # 获取第i张图片的标签信息, 包括x1,y1,x2,y2,score,cls_id
        nl = len(targets)
        tcls = targets[:, 0].tolist() if nl else []

        # 如果预测为空,则添加空的信息到stats里
        if predicts is None:
            if nl:
                stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls))
            continue

        # Append to pycocotools JSON dictionary
        if save_json:
            # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
            ##-------------- change it --------------------------------------------------------------------
            # image_id = Path(path).stem  # for coco
            image_id = path  # for BDD
            ##-------------------------------------------------------------------------------------------
            box_json = predicts[:, :4].clone()  # x1y1x2y2
            ratio,pad=calculate_border(original_shape[::-1], net_input_size)  # note: shape=[w,h]
            scale_coords(None, box_json, original_shape[::-1], (ratio,pad))  # to original shape
            box_json = xyxy2xywh(box_json)  # xywh
            box_json[:, :2] -= box_json[:, 2:] / 2  # xy center to top-left corner
            for p, b in zip(predicts.tolist(), box_json.tolist()):
                jdict.append({#'image_id': int(image_id) if image_id.isnumeric() else image_id,  # coco
                              'image_id': image_id,  # BDD100
                              # 'category_id': coco_ids[int(p[5])],  # coco
                              'category_id': BDD100_ids[int(p[5])],   #BDD100
                              'bbox': [round(x, 3) for x in b],
                              'score': round(p[4], 5)})

        # Assign all predictions as incorrect
        # 初始化预测评定,niou为iou阈值的个数
        correct = torch.zeros(predicts.shape[0], niou, dtype=torch.bool, device=device)
        if nl:
            detected = list()   # detected用来存放已检测到的目标
            tcls_tensor = targets[:, 0]
            tbox = targets[:, 1:5]

            # Per target class
            # 对图片中的每个类单独处理
            for cls in torch.unique(tcls_tensor):
                ti = (cls == tcls_tensor).nonzero(as_tuple=False).view(-1)     # prediction indices
                pi = (cls == predicts[:, 5]).nonzero(as_tuple=False).view(-1)   # target indices
                # Search for detections
                if pi.shape[0]:
                    # Prediction to target ious
                    # box_iou计算预测框与标签框的iou值,max(1)选出最大的ious值,i为对应的索引
                    """
                    pred shape[N, 4]
                    tbox shape[M, 4]
                    box_iou shape[N, M]
                    ious shape[N, 1]
                    i shape[N, 1], i里的值属于0~M
                    """
                    ious, i = box_iou(predicts[pi, :4], tbox[ti]).max(1)
                    # Append detections
                    for j in (ious > iouv[0]).nonzero(as_tuple=False):   #选择出iou>0.5的pred_box索引
                        d = ti[i[j]]  # 有对应iou>0.5的pred_box所对应的bbox
                        if d not in detected:
                            detected.append(d)
                            correct[pi[j]] = ious[j] > iouv  # iou_thres is 1xn (n=num of iou thresh)
                        if len(detected) == nl:              # all targets already located in image
                            break
        # Append statistics (correct, conf, pcls, tcls)
        stats.append((correct.cpu(), predicts[:, 4].cpu(), predicts[:, 5].cpu(), tcls))

    # Save JSON
    if save_json and len(jdict):
        f = 'detections_val2017_results.json'  # filename
        print('\nCOCO mAP with pycocotools... saving %s...' % f)
        with open(f, 'w') as file:
            json.dump(jdict, file)

        try:  # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
            from pycocotools.coco import COCO
            from pycocotools.cocoeval import COCOeval
            import glob

            ###---------------------------------------------------------------------------------------------------------
            # note: 以下为随着数据集不同而需要修改的项目
            # imgIds = [int(Path(x).stem) for x in ID_list]   # coco
            imgIds = ID_list  # BDD100
            # cocoGt = COCO('/home/wangchao/github_resposity/coco/annotations/instances_val2017.json')  # initialize COCO ground truth api
            cocoGt = COCO('/home/wangchao/public_dataset/BDD100/annotations/bdd100k_labels_images_det_coco_val.json')  # initialize BDD ground truth api
            ##----------------------------------------------------------------------------------------------------------


            cocoDt = cocoGt.loadRes(f)  # initialize COCO pred api
            # 创建评估器
            cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')
            cocoEval.params.imgIds = imgIds  # image IDs to evaluate
            # 评估
            cocoEval.evaluate()
            cocoEval.accumulate()
            # 展示结果
            cocoEval.summarize()
            map, map50 = cocoEval.stats[:2]  # update results ([email protected]:0.95, [email protected])
        except Exception as e:
            print('ERROR: pycocotools unable to run: %s' % e)

    stats = [np.concatenate(x, 0) for x in zip(*stats)]
    if len(stats):
        p, r, ap, f1, ap_class = ap_per_class(*stats)
        p, r, ap50, ap = p[:, 0], r[:, 0], ap[:, 0], ap.mean(1)  # [P, R, [email protected], [email protected]:0.95]
        # change it with dataset
        maps=np.zeros(13)+map
        for i,c in enumerate(ap_class):
            maps[c]=ap[i]
        mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean()
        return mp, mr, map50, map, maps
    else:
        # changed it with dataset
        return 0., 0., 0., 0., np.zeros(13)
Ejemplo n.º 8
0
    def __call__(self, cls_predicts, box_predicts, anchors, targets):
        '''

        :param cls_predicts:
        :param box_predicts:
        :param anchors:
        :param targets:
        :return:
        '''

        device = cls_predicts[0].device
        bs = cls_predicts[0].shape[0]
        cls_num = cls_predicts[0].shape[-1]
        expand_anchor = torch.cat(anchors, dim=0)  #shape=[num_anchors,4]

        positive_numels = 0  # gt_box的数量
        box_prob = list()  # store P_A+,  P_A-=1-P_A+
        positive_loss_list = list()
        negative_loss_list = list()
        cls_probs = list()

        for bi in range(bs):
            cls_prob = torch.cat(
                [cls_item[bi] for cls_item in cls_predicts],
                dim=0).sigmoid().clamp(
                    min=1e-6,
                    max=1 - 1e-6)  # cls_predict, shape=[num_anchors,80]
            target = targets[
                targets[:, 0] == bi,
                1:]  # gt_box, shape=[num_gts,6]  6==>conf_score,label_id,x1,y1,x2,y2

            # if no gt_box exist, just calc focal loss in negative condition
            if len(target) == 0:
                #                 negative_loss = -(cls_prob ** self.gamma) * (1 - cls_prob).log()
                negative_loss = -(cls_prob**self.gamma) * (
                    (1 - cls_prob).clamp(
                        min=1e-10, max=1.0 - 1e-10).log().clamp(min=-1000.,
                                                                max=1000.))
                negative_loss_list.append(negative_loss.sum())
                continue

            cls_probs.append(cls_prob)
            box_regression = torch.cat(
                [box_item[bi] for box_item in box_predicts],
                dim=0)  # box_predict , shape=[num_anchors,4]

            with torch.set_grad_enabled(False):

                # box_localization: a_{j}^{loc}, shape: [j, 4]
                box_localization = self.box_coder.decoder(
                    box_regression,
                    expand_anchor)  # shape=[num_anchors,4]  4==>x1,y1,x2,y2

                # object_box_iou: IoU_{ij}^{loc}, shape: [i, j]
                object_box_iou = box_iou(
                    target[:, 2:],
                    box_localization)  # shape=(num_gts,num_anchors)

                t1 = self.box_iou_thresh
                t2 = object_box_iou.max(dim=1, keepdim=True)[0].clamp(
                    min=t1 + 1e-12)  # shape=[num_gts,1]

                # object_box_prob: P{a_{j} -> b_{i}}, shape: [i, j]
                object_box_prob = ((object_box_iou - t1) / (t2 - t1)).clamp(
                    min=0, max=1.)
                '''
                indices.shape=[2,num_gts]
                第0行元素代表所对应的gt_box的索引, 第1行元素代表所对应的gt_box所属的类别
                '''
                indices = torch.stack(
                    [torch.arange(len(target), device=device), target[:, 1]],
                    dim=0).long()

                # object_cls_box_prob: P{a_{j} -> b_{i}}, shape: [i, c, j]
                '''
                object_cls_box_prob.shape=[num_gts, max_cls_id+1, num_anchors] 按照类别的取值填充
                note: 如果索引为gt_id的gt_box所属的类别为label_id, 则object_cls_box_prob[gt_id,label_id]=target_box_prob[gt_id], 其他位置均为0
                '''
                object_cls_box_prob = torch.sparse_coo_tensor(indices,
                                                              object_box_prob,
                                                              device=device)
                """
                image_box_prob: P{a_{j} \in A_{+}}, shape: [j, c] or [num_anchors,num_cls]
                image_box_prob是用来判断一个anchor是否可以匹配到某个目标(无论类别和匹配到gt box是什么)的置信度

                from "start" to "end" implement:
                image_box_prob = torch.sparse.max(object_cls_box_prob, dim=0).t()
                """
                # start

                # indices = torch.nonzero(torch.sparse.sum(object_cls_box_prob, dim=0).to_dense()).t_()  # shape=[2,N]
                indices = torch.sparse.sum(
                    object_cls_box_prob, dim=0).to_dense().nonzero(
                        as_tuple=False).t()  # shape=[2,N]

                if indices.numel() == 0:
                    image_box_prob = torch.zeros(
                        expand_anchor.shape[0],
                        cls_num).type_as(object_box_prob)
                else:
                    nonzero_box_prob = torch.where(
                        target[:, 1].unsqueeze(dim=-1) ==
                        indices[0],  # (num_gts,1)== (N) ===>(num_gts,N)
                        object_box_prob[:, indices[1]],
                        torch.tensor([
                            0
                        ]).type_as(object_box_prob)).max(dim=0)[0]  # ===> (N)

                    image_box_prob = torch.sparse_coo_tensor(
                        indices.flip([0]),
                        nonzero_box_prob,
                        size=(expand_anchor.shape[0],
                              cls_num),  # shape=[num_anchors,num_cls]
                        device=device).to_dense()
                # end
                box_prob.append(image_box_prob)

            # construct bags for objects
            match_quality_matrix = box_iou(target[:, 2:], expand_anchor)
            _, matched = torch.topk(
                match_quality_matrix, self.top_k, dim=1, sorted=False
            )  # shape=(num_gts,top_k)   元素的取值范围[0,num_gts) 表示匹配到某个gt的anchor集合的索引
            del match_quality_matrix

            # matched_cls_prob: P_{ij}^{cls}
            # shape=(num_gts,top_k) 元素的取值范围[0,num_cls) 表示匹配到某个gt的anchor所属的类别
            matched_cls_prob = cls_prob[matched].gather(
                dim=-1,
                index=(target[:,
                              [1]][:,
                                   None, :]).long().repeat(1, self.top_k,
                                                           1)).squeeze(-1)

            # matched_box_prob: P_{ij}^{loc}
            matched_object_targets = self.box_coder.encoder(
                expand_anchor[matched],
                target[:, 2:].unsqueeze(dim=1))  # shape=[num_gts,topk,4]
            # P_loc
            retinanet_regression_loss = smooth_l1_loss(box_regression[matched],
                                                       matched_object_targets,
                                                       self.box_reg_weight,
                                                       self.beta)
            matched_box_prob = torch.exp(-retinanet_regression_loss)

            # positive_losses: { -log( Mean-max(P_{ij}^{cls} * P_{ij}^{loc}) ) }
            positive_numels += len(target)
            positive_loss_list.append(
                self.positive_bag_loss_func(matched_cls_prob *
                                            matched_box_prob,
                                            dim=1))

        # positive_loss: \sum_{i}{ -log( Mean-max(P_{ij}^{cls} * P_{ij}^{loc}) ) } / ||B||
        # positive_loss = torch.cat(positive_loss_list).sum() / max(1, positive_numels)
        item1 = torch.cat(positive_loss_list).sum()
        item2 = max(1, positive_numels)
        positive_loss = reduce_sum(item1) / reduce_sum(
            torch.tensor(data=item2, device=device).float()).item()

        # box_prob: P{a_{j} \in A_{+}}
        box_prob = torch.stack(box_prob, dim=0)
        cls_probs = torch.stack(cls_probs, dim=0)

        # negative_loss: \sum_{j}{ FL( (1 - P{a_{j} \in A_{+}}) * (1 - P_{j}^{bg}) ) } / n||B||
        '''
        (1-P_bg)<==>P_cls   shape=[num_anchors,num_cls]
        P{A-}<==>(1-P{box_cls})
        '''
        if len(negative_loss_list) != 0:
            neg_loss_empty = torch.stack(negative_loss_list, dim=0).sum()
        else:
            neg_loss_empty = 0

        # negative_loss = (neg_loss_empty + self.negative_bag_loss_func(cls_probs * (1 - box_prob), self.gamma)) / max(1, positive_numels * self.top_k)
        item3 = neg_loss_empty + self.negative_bag_loss_func(
            cls_probs * (1 - box_prob), self.gamma)
        item4 = max(1, positive_numels * self.top_k)
        negative_loss = reduce_sum(item3) / reduce_sum(
            torch.tensor(data=item4, device=device).float()).item()

        total_loss = positive_loss * self.alpha + negative_loss * (1 -
                                                                   self.alpha)
        # total_loss=reduce_sum(total_loss)/get_world_size()
        return total_loss, torch.stack([negative_loss,
                                        positive_loss]), positive_numels
Ejemplo n.º 9
0
    def __call__(self, cls_predicts, box_predicts, implicits, grids, gaussian,
                 targets):
        '''
        params
        :param cls_predicts: list(cls_predict) cls_predict [bs, num_cls, h, w]
        :param box_predicts: list(box_predict) box_predict [bs, 4, h, w]
        :param implicits: list(implicit) implicit[bs, 1, h, w]
        :param grids: list(grid,len=5) grid [h, w, 2]    2==>(xc,yc)原图尺度
        :param gaussian: [cls, 4]  4==>(ux,uy,theta_x,theta_y)
        :param targets: [gt, 7] (bs, weights, label_id, x1, y1, x2, y2)
        :return:
        '''
        device = cls_predicts[0].device
        bs = cls_predicts[0].shape[0]
        cls_num = cls_predicts[0].shape[1]

        # expand_grid.shape=[grid_num,3]  3==>(xc,yc,stride)
        expand_grid = torch.cat([
            torch.cat([
                grid_item,
                torch.tensor(data=stride_item,
                             device=device,
                             dtype=torch.float).expand_as(grid_item[..., [0]])
            ],
                      dim=-1).view(-1, 3)
            for stride_item, grid_item in zip(self.strides, grids)
        ],
                                dim=0)

        for i in range(len(cls_predicts)):
            if cls_predicts[i].dtype == torch.float16:
                cls_predicts[i] = cls_predicts[i].float()
        for i in range(len(implicits)):
            if implicits[i].dtype == torch.float16:
                implicits[i] = implicits[i].float()

        negative_loss_list = list()
        positive_loss_list = list()
        num_neg_grids = 0

        for bi in range(bs):
            # batch_cls_predicts [grid_num,cls_num]==>sigmoid
            batch_cls_predicts = torch.cat([
                cls_item[bi].permute(1, 2, 0).contiguous().view(-1, cls_num)
                for cls_item in cls_predicts
            ],
                                           dim=0).sigmoid()
            # batch_implicit [grid_num,1]
            batch_implicit = torch.cat([
                implicit_item[bi].permute(1, 2, 0).contiguous().view(-1, 1)
                for implicit_item in implicits
            ],
                                       dim=0).sigmoid()
            # join_predicts=cls_predicts*implicit_predicts(分类*object)   [grid_num,cls_num]
            batch_join_predicts = (batch_cls_predicts * batch_implicit).clamp(
                1e-6, 1 - 1e-6)

            # batch_box_predicts [grid_num, 4]
            batch_box_predicts = torch.cat([
                box_item[bi].permute(1, 2, 0).contiguous().view(-1, 4)
                for box_item in box_predicts
            ],
                                           dim=0)
            # target  [gt_num,6]  6==>(weights, label_id, x1, y1, x2, y2)
            batch_targets = targets[targets[:, 0] == bi, 1:]

            # 如果没有target,则直接loss= negative focal loss
            if len(batch_targets) == 0:
                negative_loss = -1 * (batch_join_predicts**self.gamma) * (
                    1 - batch_join_predicts).log()
                negative_loss = negative_loss.sum()
                negative_loss_list.append(negative_loss)
                continue

            ############################################################################################################
            ### clac positive loss -------------------------------------------------------------------------------------

            # [gt_num,6] (weights,label_idx,x1,y1,x2,y2)
            gt_xy = (batch_targets[:, [2, 3]] + batch_targets[:, [4, 5]]) / 2.
            # d=(grid_xy-gt_xy) 用来计算centerness weight [grid_num,gt_num,2]
            xy_offset = (expand_grid[:, None, :2] -
                         gt_xy[None, :, :]) / expand_grid[:, None, [2]]
            # 编码每个grid point的回归目标  [grid_num,gt_num,4]
            batch_reg_targets = self.box_coder.encode(expand_grid[..., :2],
                                                      batch_targets[..., 2:])
            # shape=[1,N]  N=num of positive grid/location 假设所有在gt_box内部的点都是正样本
            grid_idx, gt_idx = (batch_reg_targets.min(dim=-1)[0] > 0).nonzero(
                as_tuple=False).t()

            # debug
            num_neg_grids += grid_idx.shape[0]

            cls_prob = batch_join_predicts[grid_idx, batch_targets[
                gt_idx, 1].long()]  # shape=[N,1]
            iou_loss = self.iou_loss_func(
                batch_box_predicts[grid_idx, :], batch_reg_targets[grid_idx,
                                                                   gt_idx, :])
            loc_prob = (-self.lambda_p * iou_loss).exp()  # P_loc, shape=[N,1]
            joint_prob = cls_prob * loc_prob  # P_+=cls_prob*obj_prob ,P(confidence at the location) shape=[N,1]
            confidence = (
                joint_prob /
                self.temperature).exp()  # C(P)  weight_function  shape=[N,1]
            '''
            G(d)=e{-1*(d-u)**2/(2*theta**2)}
            d=xy_offset=grid_xy-gt_xy
            u,theta are learnable parameters.
            '''
            gaussian_delta_mu = -(
                (xy_offset[grid_idx, gt_idx, :] -
                 gaussian[batch_targets[gt_idx, 1].long(), :2])**2).sum(-1)
            gaussian_delta_theta = 2 * (
                (gaussian[batch_targets[gt_idx, 1].long(), 2:])**2).sum(-1)
            gaussian_weights = (gaussian_delta_mu /
                                gaussian_delta_theta).exp()  # shape=[N,1]

            # w+
            positive_weights = confidence * gaussian_weights  # shape=[N,1]
            positive_loss = torch.tensor(data=0., device=device)
            for unique_gt_idx in gt_idx.unique():
                gt_idx_mask = gt_idx == unique_gt_idx
                instance_weights = positive_weights[
                    gt_idx_mask] / positive_weights[gt_idx_mask].sum()
                instance_loss = -(instance_weights *
                                  joint_prob[gt_idx_mask]).sum().log()
                positive_loss += instance_loss
            positive_loss_list.append(positive_loss)

            ##########################################################################################################################
            ## calc negative loss ----------------------------------------------------------------------------------------------------
            decode_box = self.box_coder.decoder(
                expand_grid[..., :2], batch_box_predicts).detach(
                )  # shape=[grid_num,4]  4==>(x1,y1,x2,y2)
            predict_targets_iou = box_iou(
                decode_box, batch_targets[..., 2:])  # shape=[grid_num,gt_num]
            '''
            max_iou=max{iou between the predicted_box and all gt_boxes}
            '''
            max_iou, max_iou_gt_idx = predict_targets_iou.max(
                dim=-1)  # shape=[grid_num]
            func_iou = 1 / (1 - max_iou)
            func_iou = 1 - (func_iou - 1) / (
                func_iou.max() - 1 + 1e-10
            )  # max_iou==>(0,1) if max_iou=1, func_iou=0.  if max_iou=0, func_iou=1.

            # 任何gt_box区域之外的点w-=1.0
            negative_weights = torch.ones(
                size=(expand_grid.shape[0], cls_num),
                device=device).float()  # shape=[grid_num, cls_num]
            negative_weights[grid_idx,
                             batch_targets[gt_idx,
                                           1].long()] = func_iou[grid_idx]
            weighted_negative_prob = negative_weights * batch_join_predicts
            negative_loss = -1 * (weighted_negative_prob**self.gamma) * (
                1 - weighted_negative_prob).log()
            negative_loss = negative_loss.sum()
            negative_loss_list.append(negative_loss)

        total_negative_loss = torch.stack(negative_loss_list).sum() / max(
            1, len(targets))
        # total_negative_loss = torch.stack(negative_loss_list).sum() / num_neg_grids
        if len(targets) == 0:
            return total_negative_loss, torch.stack(
                [total_negative_loss,
                 torch.tensor(0., device=device)]).detach(), len(targets)
        total_positive_loss = torch.stack(positive_loss_list).sum() / max(
            1, len(targets))
        total_negative_loss = total_negative_loss * (1 - self.alpha)
        total_positive_loss = total_positive_loss * self.alpha
        total_loss = total_negative_loss + total_positive_loss
        return total_loss, torch.stack(
            [total_negative_loss, total_positive_loss]).detach(), len(targets)
Ejemplo n.º 10
0
def non_max_suppression(prediction,
                        conf_thresh=0.1,
                        iou_thresh=0.6,
                        merge=False,
                        agnostic=False,
                        multi_label=True,
                        max_det=300):
    """Performs Non-Maximum Suppression (NMS) on inference results

    Returns:
         detections with shape: nx6 (x1, y1, x2, y2, conf, cls)
    """

    xc = prediction[..., 4] > conf_thresh  # candidates
    # Settings
    min_wh, max_wh = 2, 4096  # (pixels) minimum and maximum box width and height
    redundant = True  # require redundant detections
    output = [None] * prediction.shape[0]
    for xi, x in enumerate(prediction):  # image index, image inference
        # Apply constraints
        # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0  # width-height
        x = x[xc[xi]]  # confidence

        # If none remain process next image
        if not x.shape[0]:
            continue

        # Compute conf
        x[:, 5:] *= x[:, 4:5]  # conf = obj_conf * cls_conf

        # Box (center x, center y, width, height) to (x1, y1, x2, y2)
        box = xywh2xyxy(x[:, :4])

        # Detections matrix nx6 (xyxy, conf, cls)
        if multi_label:
            i, j = (x[:, 5:] > conf_thresh).nonzero(as_tuple=False).T
            x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
        else:  # best class only
            conf, j = x[:, 5:].max(1, keepdim=True)
            x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thresh]

        # Filter by class

        # Apply finite constraint
        # if not torch.isfinite(x).all():
        #     x = x[torch.isfinite(x).all(1)]

        # If none remain process next image
        n = x.shape[0]  # number of boxes
        if not n:
            continue

        # Sort by confidence
        # x = x[x[:, 4].argsort(descending=True)]

        # Batched NMS
        c = x[:, 5:6] * (0 if agnostic else max_wh)  # classes
        boxes, scores = x[:, :4] + c, x[:, 4]  # boxes (offset by class), scores
        i = nms(boxes, scores, iou_thresh)
        if i.shape[0] > max_det:  # limit detections
            i = i[:max_det]
        if merge and (1 < n < 3E3):  # Merge NMS (boxes merged using weighted mean)
            try:  # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
                iou = box_iou(boxes[i], boxes) > iou_thresh  # iou matrix
                weights = iou * scores[None]  # box weights
                x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True)  # merged boxes
                if redundant:
                    i = i[iou.sum(1) > 1]  # require redundancy
            except:  # possible CUDA error https://github.com/ultralytics/yolov3/issues/1139
                print(x, i, x.shape, i.shape)
                pass

        output[xi] = x[i]

    return output
Ejemplo n.º 11
0
def non_max_suppression(prediction,
                        conf_thresh=0.1,
                        iou_thresh=0.6,
                        merge=False,
                        agnostic=False,
                        multi_label=True,
                        max_det=300):
    """Performs Non-Maximum Suppression (NMS) on inference results

    Args:
    prediction(torch.Tensor): shape=[bs.-1,no(85)] note: box cords (x,y,w,h) have been decoded into input size.

    Returns:
         a list(len=bs) with element's shape: nx6 (x1, y1, x2, y2, conf, cls)
    """

    xc = prediction[..., 4] > conf_thresh  # candidates
    # Settings
    min_wh, max_wh = 2, 4096  # (pixels) minimum and maximum box width and height
    redundant = True  # require redundant detections
    output = [None] * prediction.shape[0]  # list len=bs

    for xi, x in enumerate(prediction):  # image index, image inference
        # Apply constraints
        # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0  # width-height
        x = x[
            xc[xi]]  # if confidence score/ objectness < conf_thres, passed it

        # If none remain process next image
        if not x.shape[0]:
            continue

        # Compute conf
        x[:, 5:] *= x[:, 4:5]  # conf = obj_conf * cls_conf

        # Box (center x, center y, width, height) to (x1, y1, x2, y2)
        box = xywh2xyxy(x[:, :4])

        # Detections matrix nx6 (xyxy, conf, cls)
        if multi_label:
            i, j = (x[:, 5:] > conf_thresh).nonzero(
                as_tuple=False).T  # (i,j) i索引1 j索引2
            # 一个Box选择置信度大于阈值的类别做预测,  note: x[i, j + 5, None]==> x[i,j+5]????
            x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
        else:  # best class only
            # best class only( 一个Box只选择其中置信度最高的类别)
            conf, j = x[:, 5:].max(1, keepdim=True)
            x = torch.cat(
                (box, conf, j.float()),
                1)[conf.view(-1) >
                   conf_thresh]  # 二次筛选,排除掉最终的class_score<conf_thresh的标签

        # Filter by class

        # Apply finite constraint
        # if not torch.isfinite(x).all():
        #     x = x[torch.isfinite(x).all(1)]

        # If none remain process next image
        n = x.shape[0]  # number of boxes
        if not n:
            continue

        # Sort by confidence
        # x = x[x[:, 4].argsort(descending=True)]

        # Batched NMS
        c = x[:, 5:6] * (0 if agnostic else max_wh)  # 按照类别加入偏置量
        '''
            按照类别拉大不同类别之间的box间距,为之后的maxtrix weight加权合并奠定基础(即加权合并主要在同类别的bbox之间进行)
        '''
        boxes, scores = x[:, :4] + c, x[:,
                                        4]  # boxes (offset by class), scores
        i = nms(boxes, scores, iou_thresh)
        if i.shape[0] > max_det:  # limit detections
            i = i[:max_det]
        if merge and (1 < n <
                      3E3):  # Merge NMS (boxes merged using weighted mean)
            try:  # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
                iou = box_iou(boxes[i], boxes) > iou_thresh  # iou matrix
                weights = iou * scores[None]  # box weights
                x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(
                    1, keepdim=True)  # merged boxes
                if redundant:
                    i = i[iou.sum(1) > 1]  # require redundancy
            except:  # possible CUDA error https://github.com/ultralytics/yolov3/issues/1139
                print(x, i, x.shape, i.shape)
                pass

        output[xi] = x[i]

    return output