def focalloss_simple(pcls, gcls, alpha=0.25, gamma=2, reduction='none'): ''' 基本不用 适用于 G = 1 简单化版 无需 正反忽略例 不带忽略 :param pcls: 支持 2D or 3D :param gcls: :param alpha: :param gamma: :param reduction: :return: ''' assert reduction in (None, 'none', 'mean', 'sum') eps = torch.finfo(torch.float16).eps # eps = 1e-6 pcls = pcls.clamp(min=eps, max=1 - eps) # 正例-0.25 反例-0.75 pt = (1 - pcls) * gcls + pcls * (1 - gcls) weight = (alpha * gcls + (1 - alpha) * (1 - gcls)) * pt.pow(gamma) loss_val = x_bce(pcls, gcls, weight) if reduction == 'mean': return loss_val.mean(-1) elif reduction == 'sum': return loss_val.sum() else: # 'none' return loss_val
def quality_focal_loss(pcls_sigmoid, gcls, score_iou, mask_pos, mash_ignore=None, beta=2.0, is_debug=False): ''' 用于连续label的 score_iou 分离训练 -> cls分数低 iou分数高 --- 标签分类回归联合 ^^ iou :param pcls_sigmoid: torch.Size([5, 3614, 3]) :param gcls: 是1的标签 torch.Size([5, 3614, 3]) 用于判断正例位置 :param score_iou: torch.Size([5, 3614]) box的iou分数 回归分数 0~1 :param beta: :return: ''' eps = torch.finfo(torch.float16).eps pcls_sigmoid = pcls_sigmoid.clamp(min=eps, max=1 - eps) # 定位到具体的正例 mask_pos4cls = gcls == 1 # 同维 torch.Size([5, 3614, 3]) # ([5, 3614] -> [5, 3614,1] -> [5, 3614, 3]) ^^ [5, 3614, 3] _mask_pos = torch.logical_and( mask_pos.unsqueeze(-1).repeat(1, 1, gcls.shape[-1]), mask_pos4cls) '''-----这里和输入复写-----''' # 维度匹配 [5, 3614] -> [5, 3614,1] -> [5, 3614, 3] _score_iou = score_iou.unsqueeze(-1).repeat(1, 1, gcls.shape[-1]) # 正反例处理 scale_factor = torch.where(_mask_pos, torch.abs(_score_iou - pcls_sigmoid), pcls_sigmoid) scale_factor = scale_factor.pow(beta) # 难易加成 -> 正负不平衡 _label = torch.where( _mask_pos, _score_iou, torch.zeros_like(pcls_sigmoid, device=pcls_sigmoid.device)) bce = x_bce(pcls_sigmoid, _label) '''-----这里和输入复写完成-----''' if mash_ignore is None: loss_val = scale_factor * bce else: loss_val = scale_factor * bce * torch.logical_not(mash_ignore) if is_debug: if mash_ignore is None: _mask_neg = torch.logical_not(_mask_pos) else: mash_ignore = mash_ignore.unsqueeze(-1).repeat( 1, 1, gcls.shape[-1]) _mask_neg = torch.logical_not( torch.logical_or(_mask_pos, mash_ignore)) l_pos = loss_val * _mask_pos l_neg = loss_val * _mask_neg return l_pos, l_neg return loss_val
def forward(self, pred, target, mask_calc=None, *args, **kwargs): """Calculate the GHM-C loss. Args: pred (float tensor of size [batch_num, class_num]): The direct prediction of classification fc layer. target (float tensor of size [batch_num, class_num]): Binary class target for each sample. mask_calc (float tensor of size [batch_num, class_num]): the value is 1 if the sample is valid and 0 if ignored. Returns: The gradient harmonized loss. """ device = pred.device if mask_calc is None: # 默认全部加入运算 mask_calc = torch.ones_like(pred, dtype=torch.bool, device=device) # # the target should be binary class label # if pred.dim() != target.dim(): # target, mask_calc = _expand_binary_labels( # target, mask_calc, pred.size(-1)) target, mask_calc = target.float(), mask_calc.float() edges = self.edges mmt = self.momentum weights = torch.zeros_like(pred) # gradient length # g = torch.abs(pred.sigmoid().detach() - target) g = torch.abs(pred.detach() - target) valid = mask_calc > 0 tot = max(valid.float().sum().item(), 1.0) n = 0 # n valid bins for i in range(self.bins): inds = (g >= edges[i]) & (g < edges[i + 1]) & valid num_in_bin = inds.sum().item() if num_in_bin > 0: if mmt > 0: self.acc_sum[i] = mmt * self.acc_sum[i] + ( 1 - mmt) * num_in_bin weights[inds] = tot / self.acc_sum[i] else: weights[inds] = tot / num_in_bin n += 1 if n > 0: weights = weights / n # loss = F.binary_cross_entropy_with_logits(pred, target, weights, reduction='sum') / tot loss = x_bce(pred, target, weights) / tot return loss * self.loss_weight
def forward(self, pcls_sigmoid, gcls, mask_pos, mash_ignore=None, is_debug=False): ''' :param pcls_sigmoid: 这个要求 3D torch.Size([5, 3614, 3]) :param gcls: :param mask_pos: 这个要求 2D :param mash_ignore: :param is_debug: :return: ''' eps = torch.finfo(torch.float16).eps pcls_sigmoid = pcls_sigmoid.clamp(min=eps, max=1 - eps) # 定位到具体的正例 mask_pos4cls = gcls > 1 # 同维 torch.Size([5, 3614, 3]) _mask_pos = torch.logical_and( mask_pos.unsqueeze(-1).repeat(1, 1, gcls.shape[-1]), mask_pos4cls) '''--- 这个要复写 ---''' weight, _label = self.get_args(_mask_pos, pcls_sigmoid, gcls) bce = x_bce(pcls_sigmoid, _label) if mash_ignore is None: loss_val = weight * bce else: loss_val = weight * bce * torch.logical_not(mash_ignore) if is_debug: if mash_ignore is None: _mask_neg = torch.logical_not(_mask_pos) else: mash_ignore = mash_ignore.unsqueeze(-1).repeat( 1, 1, gcls.shape[-1]) _mask_neg = torch.logical_not( torch.logical_or(_mask_pos, mash_ignore)) l_pos = loss_val * _mask_pos l_neg = loss_val * _mask_neg return l_pos, l_neg if self.reduction == 'mean': return loss_val.mean(-1) elif self.reduction == 'sum': return loss_val.sum() else: # 'none' return loss_val
def forward(self, outs, targets, imgs_ts=None): ''' 模型尺寸list[20,10,5,3] :param outs: torch.Size([2, 534, 7]) in 160 输出[] :param targets: 'image_id': 413, 'size': tensor([500., 309.]) 'boxes': tensor([[0.31400, 0.31715, 0.71000, 0.60841]]), 'labels': tensor([1.]) :param imgs_ts: :return: ''' cfg = self.cfg device = outs.device batch, dim_total, pdim = outs.shape # 1 + cfg.NUM_CLASSES + 1 + 4 + cfg.NUM_KEYPOINTS * 2 # back cls centerness ltrb positivesample iou area gdim = 1 + cfg.NUM_CLASSES + 1 + 4 + 1 + 1 + 1 gres = torch.empty((batch, dim_total, gdim), device=device) for i in range(batch): gboxes_ltrb_b = targets[i]['boxes'] glabels_b = targets[i]['labels'] gres[i] = match4fcos_v2( gboxes_ltrb_b=gboxes_ltrb_b, glabels_b=glabels_b, gdim=gdim, pcos=outs, img_ts=imgs_ts[i], cfg=cfg, ) s_ = 1 + cfg.NUM_CLASSES # outs = outs[:, :, :s_ + 1].sigmoid() mask_pos = gres[:, :, 0] == 0 # 背景为0 是正例 nums_pos = torch.sum(mask_pos, dim=-1) nums_pos = torch.max(nums_pos, torch.ones_like(nums_pos, device=device)) # back cls centerness ltrb positivesample iou(这个暂时无用) area [2125, 12] ''' ---------------- cls损失 计算全部样本,正反例,正例为框内本例---------------- ''' # obj_cls_loss = BCE_focal_loss() # 这里多一个背景一起算 pcls_sigmoid = outs[:, :, :s_].sigmoid() gcls = gres[:, :, :s_] # l_cls = torch.mean(obj_cls_loss(pcls_sigmoid, gcls) / nums_pos) l_cls_pos, l_cls_neg = focalloss_fcos(pcls_sigmoid, gcls) l_cls_pos = torch.mean( torch.sum(torch.sum(l_cls_pos, -1), -1) / nums_pos) l_cls_neg = torch.mean( torch.sum(torch.sum(l_cls_neg, -1), -1) / nums_pos) ''' ---------------- conf损失 只计算半径正例 center_ness---------------- ''' # 和 positive sample 算正例 mask_pp = gres[:, :, s_ + 1 + 4] == 1 pconf_sigmoid = outs[:, :, s_].sigmoid() # center_ness gcenterness = gres[:, :, s_] # (nn,1) # 使用centerness # _loss_val = x_bce(pconf_sigmoid, gcenterness, reduction="none") _loss_val = x_bce(pconf_sigmoid, torch.ones_like(pconf_sigmoid), reduction="none") # 用半径1 # 只算半径正例,提高准确性 l_conf = 5. * torch.mean( torch.sum(_loss_val * mask_pp.float(), dim=-1) / nums_pos) ''' ---------------- box损失 计算框内正例---------------- ''' # conf1 + cls3 + reg4 poff_ltrb = outs[:, :, s_:s_ + 4] # 这个全是特图的距离 全rule 或 exp # goff_ltrb = gres[:, :, s_ + 1:s_ + 1 + 4] g_ltrb = gres[:, :, s_ + 1:s_ + 1 + 4] # 这里是解析归一化图 归一化与特图计算的IOU是一致的 pboxes_ltrb = boxes_decode4fcos(self.cfg, poff_ltrb) p_ltrb_pos = pboxes_ltrb[mask_pos] g_ltrb_pos = g_ltrb[mask_pos] iou = bbox_iou4one(p_ltrb_pos, g_ltrb_pos, is_giou=True) # 使用 iou 与 1 进行bce debug iou.isnan().any() or iou.isinf().any() l_reg = 5 * torch.mean((1 - iou) * gcenterness[mask_pos]) l_total = l_cls_pos + l_cls_neg + l_conf + l_reg log_dict = {} log_dict['l_total'] = l_total.item() log_dict['l_cls_pos'] = l_cls_pos.item() log_dict['l_cls_neg'] = l_cls_neg.item() log_dict['l_conf'] = l_conf.item() log_dict['l_reg'] = l_reg.item() # log_dict['l_iou_max'] = iou.max().item() return l_total, log_dict
def forward(self, pyolos, targets, imgs_ts=None): ''' :param pyolos: torch.Size([3, 40, 13, 13]) [conf-1,class-3,box4] 5*8=40 d19 torch.Size([2, 81, 13, 13]) 9* conf +type4+ box4 :param targets: target['boxes'] = target['boxes'].to(device) target['labels'] = target['labels'].to(device) target['size'] = target['size'] target['image_id'] = int :param imgs_ts: :return: ''' cfg = self.cfg device = pyolos.device batch, c, h, w = pyolos.shape s_ = 1 + cfg.NUM_CLASSES # [3, 40, 13, 13] -> [3, 8, 5, 13*13] -> [3, 169, 5, 8] pyolos = pyolos.view(batch, s_ + 4, cfg.NUM_ANC, - 1).permute(0, 3, 2, 1).contiguous() # [3, 169, 5, 8] -> [3, 169*5, 8] pyolos = pyolos.view(batch, -1, s_ + 4) preg_pos = pyolos[..., s_:s_ + 4] '''--------------gt匹配---------------''' # conf-1, cls-num_class, txywh-4, weight-1, gltrb-4 if cfg.MODE_TRAIN == 4: gdim = 1 + cfg.NUM_CLASSES + 4 + 1 + 4 + 1 # torch.Size([3, 13, 13, 5, 13]) else: gdim = 1 + cfg.NUM_CLASSES + 4 + 1 + 4 # torch.Size([3, 13, 13, 5, 13]) gyolos = torch.empty((batch, h, w, cfg.NUM_ANC, gdim), device=device) # 匹配GT for i, target in enumerate(targets): # batch遍历 gboxes_ltrb_b = target['boxes'] # ltrb glabels_b = target['labels'] # conf-1, cls-num_class, txywh-4, weight-1, gltrb-4 if cfg.MODE_TRAIN == 4: gyolos[i] = fmatch4yolov2_99( gboxes_ltrb_b=gboxes_ltrb_b, glabels_b=glabels_b, grid=h, # 7 只有一层 gdim=gdim, device=device, cfg=cfg, preg_b=preg_pos[i], img_ts=imgs_ts[i], ) else: gyolos[i] = fmatch4yolov2( gboxes_ltrb_b=gboxes_ltrb_b, glabels_b=glabels_b, grid=h, # 7 只有一层 gdim=gdim, device=device, cfg=cfg, img_ts=imgs_ts[i], ) '''可视化验证''' if cfg.IS_VISUAL: # conf-1, cls-num_class, txywh-4, weight-1, gltrb-4 gyolo_test = gyolos[i].clone() # torch.Size([32, 13, 13, 9]) gyolo_test = gyolo_test.view(-1, gdim) gconf_one = gyolo_test[:, 0] # mask_pos = torch.logical_or(gconf_one == 1, gconf_one == -1) mask_pos_2d = gconf_one == 1 gtxywh = gyolo_test[:, 1 + cfg.NUM_CLASSES:1 + cfg.NUM_CLASSES + 4] # 这里是修复是 xy _xy_grid = gtxywh[:, :2] + f_mershgrid(h, w, is_rowcol=False, num_repeat=cfg.NUM_ANC).to(device) hw_ts = torch.tensor((h, w), device=device) gtxywh[:, :2] = torch.true_divide(_xy_grid, hw_ts) gtxywh = gtxywh[mask_pos_2d] gtxywh[:, 2:4] = torch.exp(gtxywh[:, 2:]) / h # 原图归一化 from f_tools.pic.enhance.f_data_pretreatment4pil import f_recover_normalization4ts img_ts = f_recover_normalization4ts(imgs_ts[i]) from torchvision.transforms import functional as transformsF img_pil = transformsF.to_pil_image(img_ts).convert('RGB') import numpy as np img_np = np.array(img_pil) f_show_od_np4plt(img_np, gboxes_ltrb=gboxes_ltrb_b.cpu() , pboxes_ltrb=xywh2ltrb(gtxywh.cpu()), is_recover_size=True, grids=(h, w)) # torch.Size([32, 13, 13, 5, 13]) -> [32, 13*13*5, 13] gyolos = gyolos.view(batch, -1, gdim) gconf = gyolos[:, :, 0] # 正例使用1 torch.Size([32, 910]) mask_pos_2d = gconf > 0 mask_neg_2d = gconf == 0 # 忽略-1 不管 nums_pos = (mask_pos_2d.sum(-1).to(torch.float)).clamp(min=torch.finfo(torch.float16).eps) nums_neg = (mask_neg_2d.sum(-1).to(torch.float)).clamp(min=torch.finfo(torch.float16).eps) pyolos_pos = pyolos[mask_pos_2d] # torch.Size([32, 845, 8]) -> torch.Size([40, 8]) gyolos_pos = gyolos[mask_pos_2d] # torch.Size([32, 845, 13]) -> torch.Size([40, 8]) ''' ----------------cls损失---------------- ''' pcls_sigmoid_pos = pyolos_pos[:, 1:s_].sigmoid() gcls_pos = gyolos_pos[:, 1:s_] _loss_val = x_bce(pcls_sigmoid_pos, gcls_pos, reduction="none") # torch.Size([46, 3]) # torch.Size([46, 3]) -> val l_cls = _loss_val.sum(-1).mean() * cfg.LOSS_WEIGHT[2] ''' ----------------conf损失 ---------------- ''' pconf_sigmoid = pyolos[:, :, 0].sigmoid() # 这个需要归一化 torch.Size([3, 845]) # ------------conf-mse ------------ # _loss_val = F.mse_loss(pconf_sigmoid, gconf, reduction="none") # l_conf_pos = ((_loss_val * mask_pos_2d).sum(-1) / nums_pos).mean() * 10 # l_conf_neg = ((_loss_val * mask_neg_2d).sum(-1) / nums_neg).mean() * 30 # ------------ focalloss ------------ mash_ignore_2d = torch.logical_not(torch.logical_or(mask_pos_2d, mask_neg_2d)) l_pos, l_neg = focalloss(pconf_sigmoid, gconf, mask_pos=mask_pos_2d, mash_ignore=mash_ignore_2d, is_debug=True, alpha=0.5) l_conf_pos = (l_pos.sum(-1).sum(-1) / nums_pos).mean() l_conf_neg = (l_neg.sum(-1).sum(-1) / nums_neg).mean() * 3 ''' ---------------- box损失 ----------------- ''' log_dict = {} if cfg.MODE_TRAIN == 4: # ------------ iou损失 ------------ # 解码pxywh 计算预测与 GT 的 iou 作为 gconf preg_pos = pyolos_pos[:, s_:s_ + 4] gltrb_pos_tx = gyolos_pos[:, s_ + 4 + 1:s_ + 4 + 1 + 4] match_anc_ids = gyolos_pos[:, s_ + 4 + 1 + 4] # 解码yolo2 特图尺寸 pxy_pos_sigmoid = preg_pos[..., :2].sigmoid() # 这里与yolo1不一样 match_ancs = torch.tensor(cfg.ANCS_SCALE, device=device)[match_anc_ids.long()] pwh_pos_scale = torch.exp(preg_pos[..., 2:4]) * match_ancs * h # 恢复到特图 pzxywh = torch.cat([pxy_pos_sigmoid, pwh_pos_scale], -1) iou_zg = bbox_iou4one_2d(xywh2ltrb(pzxywh), gltrb_pos_tx, is_giou=True) # iou_zg = bbox_iou4y(xywh2ltrb4ts(pzxywh), gltrb_pos_tx, GIoU=True) # print(iou_zg) l_reg = (1 - iou_zg).mean() * 2 ''' ---------------- loss完成 ----------------- ''' l_total = l_conf_pos + l_conf_neg + l_cls + l_reg log_dict['l_reg'] = l_reg.item() else: # ------------ mse+bce ------------ 666666 # conf-1, cls-num_class, txywh-4, weight-1, gltrb-4 pxy_pos_sigmoid = pyolos_pos[:, s_:s_ + 2].sigmoid() # 这个需要归一化 pwh_pos_scale = pyolos_pos[:, s_ + 2:s_ + 4] weight_pos = gyolos_pos[:, s_ + 4 + 1] # torch.Size([32, 845]) gtxy_pos = gyolos_pos[:, s_:s_ + 2] # [nn] gtwh_pos = gyolos_pos[:, s_ + 2:s_ + 4] _loss_val = x_bce(pxy_pos_sigmoid, gtxy_pos, reduction="none") l_txty = (_loss_val.sum(-1) * weight_pos).mean() _loss_val = F.mse_loss(pwh_pos_scale, gtwh_pos, reduction="none") l_twth = (_loss_val.sum(-1) * weight_pos).mean() ''' ---------------- loss完成 ----------------- ''' l_total = l_conf_pos + l_conf_neg + l_cls + l_txty + l_twth log_dict['l_xy'] = l_txty.item() log_dict['l_wh'] = l_twth.item() log_dict['l_total'] = l_total.item() log_dict['l_conf_pos'] = l_conf_pos.item() log_dict['l_conf_neg'] = l_conf_neg.item() log_dict['l_cls'] = l_cls.item() log_dict['p_max'] = pconf_sigmoid.max().item() log_dict['p_min'] = pconf_sigmoid.min().item() log_dict['p_mean'] = pconf_sigmoid.mean().item() return l_total, log_dict
def forward(self, pyolos, targets, imgs_ts=None): ''' :param pyolos: torch.Size([2, 45, 13, 13]) :param targets: :param imgs_ts: :return: ''' cfg = self.cfg device = pyolos.device batch, c, h, w = pyolos.shape # torch.Size([2, 45, 13, 13]) # [3, 40, 13, 13] -> [3, 8, 5, 13*13] -> [3, 169, 5, 8] pyolos = pyolos.view(batch, 1 + cfg.NUM_CLASSES + 4, cfg.NUM_ANC, - 1).permute(0, 3, 2, 1).contiguous() # [3, 169, 5, 8] -> [3, 169*5, 8] pyolos = pyolos.view(batch, h * w * cfg.NUM_ANC, -1) # pyolos = pyolos.view(batch, -1, s_ + 4) preg = pyolos[..., 1 + cfg.NUM_CLASSES:1 + cfg.NUM_CLASSES + 4] # torch.Size([2, 169, 5, 4]) pltrb = boxes_decode4yolo2_v2(preg, h, w, cfg) # 输出原图归一化 用于更新conf [2, 845, 4] '''--------------gt匹配---------------''' # conf-1, cls-1, txywh-4, weight-1, gltrb-4 if cfg.MODE_TRAIN == 99 or cfg.MODE_TRAIN == 98: gdim = 1 + 1 + 4 + 1 + 4 gyolos = torch.empty((batch, h, w, cfg.NUM_ANC, gdim), device=device) # 匹配GT for i, target in enumerate(targets): # batch遍历 gboxes_ltrb_b = target['boxes'] # ltrb glabels_b = target['labels'] # conf-1, cls-num_class, txywh-4, weight-1, gltrb-4 if cfg.MODE_TRAIN == 99 or cfg.MODE_TRAIN == 98: gyolos[i] = fmatch4yolov2_99( gboxes_ltrb_b=gboxes_ltrb_b, glabels_b=glabels_b, grid=h, # 7 只有一层 gdim=gdim, device=device, cfg=cfg, preg_b=preg[i], img_ts=imgs_ts[i], ) '''可视化验证''' if cfg.IS_VISUAL: # conf-1, cls-num_class, txywh-4, weight-1, gltrb-4 gyolo_test = gyolos[i].clone() # torch.Size([32, 13, 13, 9]) gyolo_test = gyolo_test.view(-1, gdim) gconf_one = gyolo_test[:, 0] # mask_pos = torch.logical_or(gconf_one == 1, gconf_one == -1) mask_pos_2d = gconf_one == 1 gtxywh = gyolo_test[:, 1 + cfg.NUM_CLASSES:1 + cfg.NUM_CLASSES + 4] # 这里是修复是 xy _xy_grid = gtxywh[:, :2] + f_mershgrid(h, w, is_rowcol=False, num_repeat=cfg.NUM_ANC).to(device) hw_ts = torch.tensor((h, w), device=device) gtxywh[:, :2] = torch.true_divide(_xy_grid, hw_ts) gtxywh = gtxywh[mask_pos_2d] gtxywh[:, 2:4] = torch.exp(gtxywh[:, 2:]) / h # 原图归一化 from f_tools.pic.enhance.f_data_pretreatment4pil import f_recover_normalization4ts img_ts = f_recover_normalization4ts(imgs_ts[i]) from torchvision.transforms import functional as transformsF img_pil = transformsF.to_pil_image(img_ts).convert('RGB') import numpy as np img_np = np.array(img_pil) f_show_od_np4plt(img_np, gboxes_ltrb=gboxes_ltrb_b.cpu() , pboxes_ltrb=xywh2ltrb(gtxywh.cpu()), is_recover_size=True, grids=(h, w)) # conf-1, cls-num_class, txywh-4, weight-1, gltrb-4 # torch.Size([32, 13, 13, 5, 11]) -> [32, 13*13*5, 11] ->[2, 845, 11] gyolos = gyolos.view(batch, -1, gdim) gconf = gyolos[:, :, 0] # 正例使用1 torch.Size([32, 910]) # mask_pos_3d = gyolos[:, :, :1] > 0 # mask_neg_3d = gyolos[:, :, :1] == 0 mask_pos_2d = gconf > 0 mask_neg_2d = gconf == 0 # 忽略-1 不管 nums_pos = (mask_pos_2d.sum(-1).to(torch.float)).clamp(min=torch.finfo(torch.float16).eps) nums_neg = (mask_neg_2d.sum(-1).to(torch.float)).clamp(min=torch.finfo(torch.float16).eps) pyolos_pos = pyolos[mask_pos_2d] # torch.Size([32, 845, 8]) -> torch.Size([40, 8]) gyolos_pos = gyolos[mask_pos_2d] # torch.Size([32, 845, 13]) -> torch.Size([40, 8]) # [2, 845, 4] -> # iou_zg = bbox_iou4one(pltrb, gyolos[..., 1 + 1 + 4 + 1:1 + 1 + 4 + 1 + 4], is_giou=True) iou_zg = bbox_iou4one(pltrb, gyolos[..., 1 + 1 + 4 + 1:1 + 1 + 4 + 1 + 4], is_ciou=True) ''' ----------------cls损失---------------- ''' # pcls_sigmoid_pos = pyolos_pos[:, 1:1 + cfg.NUM_CLASSES].sigmoid() pcls_pos = pyolos_pos[:, 1:1 + cfg.NUM_CLASSES] gcls_pos = gyolos_pos[:, 1].long() # torch.Size([3, 4]) ^^ tensor([2., 2., 3.]) _loss_val = F.cross_entropy(pcls_pos, gcls_pos, reduction="none") # _loss_val = x_bce(pcls_sigmoid_pos, gcls_pos, reduction="none") # torch.Size([46, 3]) # torch.Size([46, 3]) -> val l_cls = _loss_val.sum(-1).mean() ''' ----------------conf损失 ---------------- ''' pconf_sigmoid = pyolos[:, :, 0].sigmoid() # 这个需要归一化 torch.Size([3, 845]) # ------------conf-mse ------------ _loss_val = F.mse_loss(pconf_sigmoid, iou_zg, reduction="none") # 这理用的IOU l_conf_pos = ((_loss_val * mask_pos_2d).sum(-1) / nums_pos).mean() * 5. l_conf_neg = ((_loss_val * mask_neg_2d).sum(-1) / nums_neg).mean() * 1. ''' ---------------- box损失 ----------------- ''' pxy_pos_sigmoid = pyolos_pos[:, 1 + cfg.NUM_CLASSES:1 + cfg.NUM_CLASSES + 2].sigmoid() # 这个需要归一化 pwh_pos_scale = pyolos_pos[:, 1 + cfg.NUM_CLASSES + 2:1 + cfg.NUM_CLASSES + 4] weight_pos = gyolos_pos[:, 1 + 1 + 4 + 1] # torch.Size([32, 845]) gtxy_pos = gyolos_pos[:, 1 + 1:1 + 1 + 2] # [nn] gtwh_pos = gyolos_pos[:, 1 + 1 + 2:1 + 1 + 4] _loss_val = x_bce(pxy_pos_sigmoid, gtxy_pos, reduction="none") l_txty = (_loss_val.sum(-1) * weight_pos).mean() _loss_val = F.mse_loss(pwh_pos_scale, gtwh_pos, reduction="none") l_twth = (_loss_val.sum(-1) * weight_pos).mean() ''' ---------------- loss完成 ----------------- ''' log_dict = {} l_total = l_conf_pos + l_conf_neg + l_cls + l_txty + l_twth log_dict['l_total'] = l_total.item() log_dict['l_xy'] = l_txty.item() log_dict['l_wh'] = l_twth.item() log_dict['l_conf_pos'] = l_conf_pos.item() log_dict['l_conf_neg'] = l_conf_neg.item() log_dict['l_cls'] = l_cls.item() return l_total, log_dict
def forward(self, outs, targets, imgs_ts=None): ''' :param outs: tuple ptxywh, torch.Size([2, 32526, 4]) pcls, torch.Size([2, 32526, 4]) 已归一化 cls+1 :param targets: :param imgs_ts: :return: ''' cfg = self.cfg ptxywh, pcategory_sigmoid = outs pcategory_sigmoid = pcategory_sigmoid.sigmoid() # 统一归一化 pconf_sigmoid = pcategory_sigmoid[:, :, 0] # torch.Size([2, 32526]) pcls_sigmoid = pcategory_sigmoid[:, :, 1:] # 已sigmoid device = ptxywh.device batch, pdim1, c = ptxywh.shape # conf-1, cls-num_class, txywh-4, keypoint-nn = 8 + nn gdim = 1 + cfg.NUM_CLASSES + 4 if cfg.NUM_KEYPOINTS > 0: gdim += cfg.NUM_KEYPOINTS gretinas = torch.zeros((batch, pdim1, gdim), device=device) s_ = 1 + cfg.NUM_CLASSES # 前面 两个是 conf-1, cls-3, for i in range(batch): # if cfg.IS_VISUAL: # _img_ts = imgs_ts[i].clone() # from f_tools.pic.enhance.f_data_pretreatment4pil import f_recover_normalization4ts # _img_ts = f_recover_normalization4ts(_img_ts) # f_show_od_ts4plt(_img_ts, gboxes_ltrb=boxes_ltrb_one.cpu(), # is_recover_size=True, # # grids=grids_ts.cpu().numpy(), # # plabels_text=pconf_b[index_match_dim].sigmoid(), # # glabels_text=colrow_index[None] # ) gboxes_ltrb_b = targets[i]['boxes'] glabels_b = targets[i]['labels'] if cfg.NUM_KEYPOINTS > 0: gkeypoints_b = targets['keypoints'] # torch.Size([batch, 10]) else: gkeypoints_b = None # 这里是一批的 mask_neg_b, mash_ignore_b 可能为None boxes_index, mask_pos_b, mask_neg_b, mash_ignore_b = matchs_gt_b( cfg, gboxes_ltrb_b=gboxes_ltrb_b, glabels_b=glabels_b, anc_obj=self.anc_obj, mode='iou', # mode='atss', ptxywh_b=ptxywh[i], img_ts=imgs_ts[i], num_atss_topk=9) '''正反例设置''' gretinas[i][mask_pos_b, 0] = torch.tensor(1., device=device) if mash_ignore_b is not None: gretinas[i][mash_ignore_b, 0] = torch.tensor(-1., device=device) labels_b = labels2onehot4ts(glabels_b - 1, cfg.NUM_CLASSES) gretinas[i][mask_pos_b, 1:s_] = labels_b[boxes_index][mask_pos_b].type( torch.float) # 正例才匹配 # _gtxywh = boxes_encode4retina(cfg, self.anc_obj, gboxes_ltrb_b[boxes_index]) # _gtxywh = boxes_encode4ssd(cfg, self.anc_obj, gboxes_ltrb_b[boxes_index]) gretinas[i][mask_pos_b, s_:s_ + 4] = gboxes_ltrb_b[boxes_index][mask_pos_b] # gretinas[i] = pos_match_retina(cfg, dim=gdim, gkeypoints_b=None, # gboxes_ltrb_b=gboxes_ltrb_b, glabels_b=glabels_b, anc_obj=self.anc_obj, # ptxywh_b=ptxywh[i], img_ts=imgs_ts[i]) # 匹配正例可视化 if cfg.IS_VISUAL: _mask_pos = gretinas[i, :, 0] > 0 # 3d ->1d _img_ts = imgs_ts[i].clone() anc_ltrb = xywh2ltrb(self.anc_obj.ancs_xywh)[_mask_pos] from f_tools.pic.enhance.f_data_pretreatment4pil import f_recover_normalization4ts _img_ts = f_recover_normalization4ts(_img_ts) flog.debug('gt数 %s , 正例数量 %s' % (gboxes_ltrb_b.shape[0], anc_ltrb.shape[0])) f_show_od_ts4plt( _img_ts, gboxes_ltrb=gboxes_ltrb_b.cpu(), pboxes_ltrb=anc_ltrb.cpu(), is_recover_size=True, # grids=grids_ts.cpu().numpy(), # plabels_text=pconf_b[index_match_dim].sigmoid(), # glabels_text=colrow_index[None] ) mask_pos_2d = gretinas[:, :, 0] > 0 # torch.Size([2, 32526]) nums_pos = (mask_pos_2d.sum(-1).to( torch.float)).clamp(min=torch.finfo(torch.float16).eps) # mask_neg_2d = gretinas[:, :, 0] == 0 mask_ignore_2d = gretinas[:, :, 0] == -1 # s_ = 1 + cfg.NUM_CLASSES ''' ----------------cls损失---------------- ''' # pcls_sigmoid 已归一 gcls = gretinas[:, :, 1:s_] _loss_val = x_bce(pcls_sigmoid, gcls, reduction="none") l_cls = ((_loss_val.sum(-1) * mask_pos_2d).sum(-1) / nums_pos).mean() * cfg.LOSS_WEIGHT[2] ''' ----------------conf损失 ---------------- ''' # pconf_sigmoid 已归一 gconf = gretinas[:, :, 0] # 已归一化 _loss_val = x_bce(pconf_sigmoid, gconf, reduction="none") mask_neg_hard = f_ohem(_loss_val, nums_pos * 3, mask_pos=mask_pos_2d, mash_ignore=mask_ignore_2d) l_conf_pos = ((_loss_val * mask_pos_2d).sum(-1) / nums_pos).mean() * cfg.LOSS_WEIGHT[0] l_conf_neg = ((_loss_val * mask_neg_hard).sum(-1) / nums_pos).mean() * cfg.LOSS_WEIGHT[1] # l_pos, l_neg = focalloss(pconf_sigmoid, gconf, mask_pos=mask_pos_2d, mash_ignore=mask_ignore_2d, # is_debug=True, alpha=0.5) # l_conf_pos = (l_pos.sum(-1).sum(-1) / nums_pos).mean() * 7 # l_conf_neg = (l_neg.sum(-1).sum(-1) / nums_pos).mean() * 7 # l_pos, l_neg = focalloss(pconf, gconf, mask_pos=mask_pos, mash_ignore=mash_ignore, # alpha=0.25, gamma=2, # reduction='none', is_debug=True) # loss_conf_pos = (l_pos.sum(-1) / nums_pos).mean() * cfg.LOSS_WEIGHT[0] # loss_conf_neg = l_neg.sum(-1).mean() * cfg.LOSS_WEIGHT[1] ''' ---------------- 回归损失 ----------------- ''' # 正例筛选后计算 gboxes_ltrb_m_pos = gretinas[:, :, s_:s_ + 4][mask_pos_2d] ancs_xywh_m_pos = self.anc_obj.ancs_xywh.unsqueeze(0).repeat( batch, 1, 1)[mask_pos_2d] gtxywh_pos = boxes_encode4ssd(cfg, ancs_xywh_m_pos, ltrb2xywh(gboxes_ltrb_m_pos)) _loss_val = F.smooth_l1_loss(ptxywh[mask_pos_2d], gtxywh_pos, reduction="none") l_box = _loss_val.sum(-1).mean() log_dict = OrderedDict() loss_total = l_conf_pos + l_conf_neg + l_cls + l_box log_dict['l_total'] = loss_total.item() log_dict['l_conf_pos'] = l_conf_pos.item() log_dict['l_conf_neg'] = l_conf_neg.item() log_dict['loss_cls'] = l_cls.item() log_dict['l_box'] = l_box.item() log_dict['cls_max'] = pcls_sigmoid.max().item() log_dict['conf_max'] = pconf_sigmoid.max().item() log_dict['cls_mean'] = pcls_sigmoid.mean().item() log_dict['conf_mean'] = pconf_sigmoid.mean().item() log_dict['cls_min'] = pcls_sigmoid.min().item() log_dict['conf_min'] = pconf_sigmoid.min().item() return loss_total, log_dict
def focalloss(pcls_sigmoid, gcls, mask_pos, mash_ignore=None, alpha=0.25, gamma=2, is_debug=False): ''' 针对离散的 label :param pcls_sigmoid: :param gcls: :param mask_pos: mask为2D :param mash_ignore: mask为2D :param alpha: :param gamma: :param reduction: :param is_debug: :return: ''' eps = torch.finfo(torch.float16).eps pcls_sigmoid = pcls_sigmoid.clamp(min=eps, max=1 - eps) # 定位到具体的正例 if gcls.dim() == 3: mask_pos4cls = gcls == 1 # 同维 torch.Size([5, 3614, 3]) _mask_pos = torch.logical_and( mask_pos.unsqueeze(-1).repeat(1, 1, gcls.shape[-1]), mask_pos4cls) else: _mask_pos = mask_pos '''-----这里和输入复写-----''' # 正例-0.25 反例-0.75 alpha_ts = torch.tensor(alpha, device=pcls_sigmoid.device) _alpha_factor = torch.where(_mask_pos, alpha_ts, 1. - alpha_ts) # 简单的分低 难的分高 focal_weight = torch.where(_mask_pos, 1. - pcls_sigmoid, pcls_sigmoid) focal_weight = _alpha_factor * torch.pow(focal_weight, gamma) # bce = -(gcls * torch.log(pcls) + (1.0 - gcls) * torch.log(1.0 - pcls)) bce = x_bce(pcls_sigmoid, gcls) '''-----这里和输入复写完成-----''' if mash_ignore is None: loss_val = focal_weight * bce else: if gcls.dim() == 3: _mash_ignore = mash_ignore.unsqueeze(-1).repeat( 1, 1, gcls.shape[-1]) else: _mash_ignore = mash_ignore loss_val = focal_weight * bce * torch.logical_not(_mash_ignore) if is_debug: if mash_ignore is None: _mask_neg = torch.logical_not(_mask_pos) else: if gcls.dim() == 3: mash_ignore = mash_ignore.unsqueeze(-1).repeat( 1, 1, gcls.shape[-1]) _mask_neg = torch.logical_not( torch.logical_or(_mask_pos, mash_ignore)) l_pos = loss_val * _mask_pos l_neg = loss_val * _mask_neg return l_pos, l_neg return loss_val
def forward(self, p_center, targets, imgs_ts=None): ''' :param p_center: :param targets: list target['boxes'] = target['boxes'].to(device) target['labels'] = target['labels'].to(device) target['size'] = target['size'] target['image_id'] = int :param imgs_ts: :return: ''' cfg = self.cfg pcls, ptxy, ptwh = p_center device = pcls.device batch, c, h, w = pcls.shape # b,c,h,w -> b,h,w,c -> b,h*w,c pcls = pcls.permute(0, 2, 3, 1).contiguous().view(batch, -1, self.cfg.NUM_CLASSES) ptxy = ptxy.permute(0, 2, 3, 1).contiguous().view(batch, -1, 2) ptwh = ptwh.permute(0, 2, 3, 1).contiguous().view(batch, -1, 2) fsize_wh = torch.tensor([h, w], device=device) # num_class + txywh + weight + gt4 conf通过高斯生成 热力图层数表示类别索引 if cfg.NUM_KEYPOINTS > 0: gdim = cfg.NUM_CLASSES + cfg.NUM_KEYPOINTS * 2 + 4 + 1 + 4 else: gdim = cfg.NUM_CLASSES + 4 + 1 + 4 gres = torch.empty((batch, h, w, gdim), device=device) # 匹配GT for i, target in enumerate(targets): # batch 遍历每一张图 gboxes_ltrb_b = targets[i]['boxes'] glabels_b = targets[i]['labels'] # 处理这张图的所有标签 gres[i] = match4center(gboxes_ltrb_b=gboxes_ltrb_b, glabels_b=glabels_b, fsize_wh=fsize_wh, dim=gdim, cfg=cfg, ) if cfg.IS_VISUAL: from f_tools.pic.enhance.f_data_pretreatment4pil import f_recover_normalization4ts _img_ts = f_recover_normalization4ts(imgs_ts[i].clone()) from torchvision.transforms import functional as transformsF img_pil = transformsF.to_pil_image(_img_ts).convert('RGB') import numpy as np # img_np = np.array(img_pil) '''plt画图部分''' from matplotlib import pyplot as plt plt.rcParams['font.sans-serif'] = ['SimHei'] # 显示中文标签 plt.rcParams['axes.unicode_minus'] = False # 这里的热力图肯定的偏差 [128,128] data_hot = torch.zeros_like(gres[i, :, :, 0]) # 只需要一层即可 for label in glabels_b.unique(): # print(ids2classes[str(int(label))]) # 类别合并输出 flog.debug(' %s', gres[i, :, :, 3:7][gres[i, :, :, (label - 1).long()] == 1]) torch.max(data_hot, gres[i, :, :, (label - 1).long()], out=data_hot) # 这里是类别合并 plt.imshow(data_hot.cpu()) plt.imshow(img_pil.resize(fsize_wh), alpha=0.7) plt.colorbar() # x,y表示横纵坐标,color表示颜色:'r':红 'b':蓝色 等,marker:标记,edgecolors:标记边框色'r'、'g'等,s:size大小 boxes_xywh_cpu = ltrb2xywh(gboxes_ltrb_b).cpu() fsize_cpu = fsize_wh.cpu() xys_f = boxes_xywh_cpu[:, :2] * fsize_cpu plt.scatter(xys_f[:, 0], xys_f[:, 1], color='r', s=5) # 红色 boxes_ltrb_cpu = gboxes_ltrb_b.cpu() boxes_ltrb_f = boxes_ltrb_cpu * fsize_cpu.repeat(2) current_axis = plt.gca() for i, box_ltrb_f in enumerate(boxes_ltrb_f): l, t, r, b = box_ltrb_f # ltwh current_axis.add_patch(plt.Rectangle((l, t), r - l, b - t, color='green', fill=False, linewidth=2)) # current_axis.text(l, t - 2, ids2classes[int(glabels[i])], size=8, color='white', # bbox={'facecolor': 'green', 'alpha': 0.6}) plt.show() gres = gres.reshape(batch, -1, gdim) ''' ---------------- cls损失 只计算正例---------------- ''' gcls = gres[:, :, :cfg.NUM_CLASSES] # mask_pos_3d = gcls > 0 # torch.Size([3, 16384, 3]) # mask_neg_3d = gcls == 0 mask_pos_3d = gcls == 1 # 只有中心点为1正例 torch.Size([3, 16384, 3]) mask_neg_3d = gcls != 1 nums_pos = torch.sum(torch.sum(mask_pos_3d, dim=-1), dim=-1) # mask_pos_2d = torch.any(mask_pos_3d, -1) # focloss pcls_sigmoid = pcls.sigmoid() l_cls_pos, l_cls_neg = focalloss_center(pcls_sigmoid, gcls) l_cls_pos = torch.mean(torch.sum(torch.sum(l_cls_pos, -1), -1) / nums_pos) l_cls_neg = torch.mean(torch.sum(torch.sum(l_cls_neg, -1), -1) / nums_pos) # l_cls_neg = l_cls_neg.sum(-1).sum(-1).mean() # 等价 ''' ---------------- box损失 ----------------- ''' log_dict = {} # num_class + txywh + weight + gt4 if cfg.MODE_TRAIN == 2: # iou ptxywh = torch.cat([ptxy, ptwh], dim=-1) pboxes_ltrb = boxes_decode4center(self.cfg, fsize_wh, ptxywh) mask_pos_2d = torch.any(mask_pos_3d, -1) # torch.Size([16, 16384]) # torch.Size([16, 16384, 4]) -> torch.Size([19, 4]) p_ltrb_pos = pboxes_ltrb[mask_pos_2d] g_ltrb_pos = gres[..., cfg.NUM_CLASSES + 4 + 1:cfg.NUM_CLASSES + 4 + 1 + 4][mask_pos_2d] iou = bbox_iou4one(p_ltrb_pos, g_ltrb_pos, is_giou=True) l_reg = 5 * torch.mean(1 - iou) l_total = l_cls_pos + l_cls_neg + l_reg log_dict['l_total'] = l_total.item() log_dict['l_cls_pos'] = l_cls_pos.item() log_dict['l_cls_neg'] = l_cls_neg.item() log_dict['l_reg'] = l_reg.item() elif cfg.MODE_TRAIN == 1: weight = gres[:, :, cfg.NUM_CLASSES + 4] # 这个可以判断正例 torch.Size([32, 845]) gtxy = gres[:, :, cfg.NUM_CLASSES:cfg.NUM_CLASSES + 2] gtwh = gres[:, :, cfg.NUM_CLASSES + 2:cfg.NUM_CLASSES + 4] ptxy_sigmoid = ptxy.sigmoid() # 这个需要归一化 _loss_val = x_bce(ptxy_sigmoid, gtxy, reduction="none") # _loss_val = F.binary_cross_entropy_with_logits(ptxy, gtxy, reduction="none") # _loss_val[mask_pos_2d].sum() 与这个等价 l_txty = torch.mean(torch.sum(torch.sum(_loss_val * weight.unsqueeze(-1), -1), -1) / nums_pos) _loss_val = F.smooth_l1_loss(ptwh, gtwh, reduction="none") l_twth = torch.mean(torch.sum(torch.sum(_loss_val * weight.unsqueeze(-1), -1), -1) / nums_pos) l_total = l_cls_pos + l_cls_neg + l_txty + l_twth log_dict['l_total'] = l_total.item() log_dict['l_cls_pos'] = l_cls_pos.item() log_dict['l_cls_neg'] = l_cls_neg.item() log_dict['l_xy'] = l_txty.item() log_dict['l_wh'] = l_twth.item() else: raise Exception('cfg.MODE_TRAIN = %s 不存在' % cfg.MODE_TRAIN) return l_total, log_dict
def forward(self, p_yolo_tuple, targets, imgs_ts=None): ''' 只支持相同的anc数 :param p_yolo_tuple: pconf pcls ptxywh pconf: torch.Size([3, 10647, 1]) pcls: torch.Size([3, 10647, 3]) ptxywh: torch.Size([3, 10647, 4]) :param targets: list target['boxes'] = target['boxes'].to(device) target['labels'] = target['labels'].to(device) target['size'] = target['size'] target['image_id'] = int :return: ''' cfg = self.cfg pconf, pcls, ptxywh = p_yolo_tuple device = ptxywh.device batch, hwa, c = ptxywh.shape # [3, 10647, 4] if cfg.MODE_TRAIN == 5: # yolo5 conf1 + label1 + goff_xywh4 + gxywh4 =10 gdim = 12 else: # conf-1, cls-num_class, txywh-4, weight-1, gltrb-4 gdim = 1 + cfg.NUM_CLASSES + 4 + 1 + 4 # h*w*anc gyolos = torch.empty((batch, hwa, gdim), device=device) # 匹配GT for i, target in enumerate(targets): # batch遍历 gboxes_ltrb_b = target['boxes'] # ltrb glabels_b = target['labels'] ''' 可视化在里面 每层特图不一样''' if cfg.MODE_TRAIN == 5: gyolos[i] = fmatch4yolov5(gboxes_ltrb_b=gboxes_ltrb_b, glabels_b=glabels_b, dim=gdim, ptxywh_b=ptxywh[i], device=device, cfg=cfg, img_ts=imgs_ts[i], pconf_b=pconf[i]) else: # gyolos[i] = fmatch4yolov3(gboxes_ltrb_b=gboxes_ltrb_b, # glabels_b=glabels_b, # dim=gdim, # ptxywh_b=ptxywh[i], # device=device, cfg=cfg, # img_ts=imgs_ts[i], # pconf_b=pconf[i]) gyolos[i] = fmatch4yolov3_iou(gboxes_ltrb_b=gboxes_ltrb_b, glabels_b=glabels_b, dim=gdim, ptxywh_b=ptxywh[i], device=device, cfg=cfg, img_ts=imgs_ts[i], pconf_b=pconf[i], val_iou=0.3) # gyolos [3, 10647, 13] conf-1, cls-3, tbox-4, weight-1, gltrb-4 = 13 s_ = 1 + cfg.NUM_CLASSES gconf = gyolos[:, :, 0] # 正例使用1 mask_pos_2d = gconf > 0 # 同维bool索引 忽略的-1不计 mask_neg_2d = gconf == 0 # 忽略-1 不管 nums_pos = (mask_pos_2d.sum(-1).to( torch.float)).clamp(min=torch.finfo(torch.float16).eps) # # 使用 IOU 作为 conf 解码pxywh 计算预测与 GT 的 iou 作为 gconf # with torch.no_grad(): # torch.Size([3, 40, 13, 13]) # gyolos = gyolos.view(batch, -1, gdim) # 4d -> 3d [3, 13, 13, 5, 13] -> [3, 169*5, 13] # # mask_pos_2d = gyolos[:, :, 0] == 1 # 前面已匹配,降维运算 [3, xx, 13] -> [3, xx] # # gltrb = gyolos[:, :, -4:] # [3, 169*5, 13] -> [3, 169*5, 4] # pltrb = boxes_decode4yolo3(ptxywh, cfg) # # _pltrb = pltrb.view(-1, 4) # _gltrb = gltrb.view(-1, 4) # iou 只支持2位 # iou_p = bbox_iou4one(_pltrb, _gltrb, is_ciou=True) # 一一对应IOU # iou_p = iou_p.view(batch, -1) # 匹配每批的IOU [nn,1] -> [batch,nn/batch] # # '''可视化 匹配的预测框''' # debug = False # torch.isnan(loss_conf_pos) # if debug: # debug # # d0, d1 = torch.where(mask_pos_2d) # [3,845] # for i in range(batch): # from f_tools.pic.enhance.f_data_pretreatment4pil import f_recover_normalization4ts # # img_ts = f_recover_normalization4ts(imgs_ts[i]) # # mask_ = d0 == i # # _pltrb = _pltrb[d0[mask_], d1[mask_]].cpu() # # _gbox_p = _gltrb[d0[mask_], d1[mask_]].cpu() # _img_ts = imgs_ts[i].clone() # _pltrb_show = pltrb[i][mask_pos[i]] # _gltrb_show = gltrb[i][mask_pos[i]] # # iou = bbox_iou4one(_pltrb_show, _gltrb_show) # flog.debug('预测 iou %s', iou) # _img_ts = f_recover_normalization4ts(_img_ts) # f_show_od_ts4plt(_img_ts, gboxes_ltrb=_gltrb_show.detach().cpu() # , pboxes_ltrb=_pltrb_show.detach().cpu(), is_recover_size=True, # ) # # gconf = iou_p # 使用 iou赋值 gyolos_pos = gyolos[mask_pos_2d] log_dict = {} if cfg.MODE_TRAIN == 5: # yolo5 conf1 + label1 + goff_xywh4 + gxywh4 +ancwh2 =12 ''' ----------------cls损失 只计算正例---------------- ''' pcls_sigmoid_pos = pcls[mask_pos_2d].sigmoid() # 归一 gcls_pos = gyolos_pos[:, 1:2] gcls_pos = labels2onehot4ts(gcls_pos - 1, cfg.NUM_CLASSES) _loss_val = x_bce(pcls_sigmoid_pos, gcls_pos, reduction="none") l_cls = _loss_val.sum(-1).mean() ''' ----------------box损失 iou----------------- ''' ptxty_sigmoid_pos = ptxywh[mask_pos_2d][:, :2].sigmoid( ) * 2. - 0.5 # 格子偏移[-0.5 ~ 1.5] gxywh_pos = gyolos_pos[:, 6:10] _ancwh_pos = gyolos_pos[:, 10:12] ptwth_sigmoid_pos = (ptxywh[mask_pos_2d][:, 2:4].sigmoid() * 2)**2 * _ancwh_pos # [0~4] pxywh_pos = torch.cat([ptxty_sigmoid_pos, ptwth_sigmoid_pos], -1) iou_zg = bbox_iou4one_2d(xywh2ltrb4ts(pxywh_pos), xywh2ltrb4ts(gxywh_pos), is_giou=True) # iou_zg = bbox_iou4y(xywh2ltrb4ts(pzxywh), gltrb_pos_tx, GIoU=True) # print(iou_zg) l_reg = (1 - iou_zg).mean() ''' ----------------conf损失 ---------------- ''' pconf_sigmoid = pconf.sigmoid().view( batch, -1) # [3, 10647, 1] -> [3, 10647] gconf[mask_pos_2d] = iou_zg.detach().clamp(0) # 使用IOU值修正正例值 # ------------conf-mse ------------''' # _loss_val = F.mse_loss(pconf_sigmoid, gconf, reduction="none") # # _loss_val = F.binary_cross_entropy_with_logits(pconf_sigmoid, gconf, reduction="none") # l_conf_pos = ((_loss_val * mask_pos_2d).sum(-1) / nums_pos).mean() * 12 # l_conf_neg = ((_loss_val * mask_neg_2d).sum(-1) / nums_pos).mean() * 2.5 # pos_ = _loss_val[mask_pos_2d] # l_conf_pos = pos_.mean() # l_conf_neg = _loss_val[mask_neg_2d].mean() # ------------conf-focalloss ------------''' mask_ignore_2d = torch.logical_not( torch.logical_or(mask_pos_2d, mask_neg_2d)) # l_pos, l_neg = focalloss(pconf_sigmoid, gconf, mask_pos=mask_pos_2d, mash_ignore=mask_ignore_2d, # is_debug=True) # l_conf_pos = (l_pos.sum(-1) / nums_pos).mean() # l_conf_neg = (l_neg.sum(-1) / nums_pos).mean() # ------------conf-ohem ------------''' _loss_val = x_bce(pconf_sigmoid, gconf, reduction="none") mask_neg_hard = f_ohem(_loss_val, nums_pos * 3, mask_pos=mask_pos_2d, mash_ignore=mask_ignore_2d) l_conf_pos = ((_loss_val * mask_pos_2d).sum(-1) / nums_pos).mean() l_conf_neg = ((_loss_val * mask_neg_hard).sum(-1) / nums_pos).mean() ''' ---------------- loss完成 ----------------- ''' l_total = l_conf_pos + l_conf_neg + l_cls + l_reg log_dict['l_total'] = l_total.item() log_dict['l_conf_pos'] = l_conf_pos.item() log_dict['l_conf_neg'] = l_conf_neg.item() log_dict['l_cls'] = l_cls.item() log_dict['l_reg'] = l_reg.item() else: ''' ----------------cls损失---------------- ''' pcls_sigmoid = pcls.sigmoid() # 归一 gcls = gyolos[:, :, 1:s_] _loss_val = x_bce(pcls_sigmoid, gcls, reduction="none") l_cls = ((_loss_val.sum(-1) * mask_pos_2d).sum(-1) / nums_pos).mean() ''' ----------------conf损失 ---------------- ''' pconf_sigmoid = pconf.sigmoid().view( batch, -1) # [3, 10647, 1] -> [3, 10647] # ------------conf-mse ------------''' # _loss_val = F.mse_loss(pconf_sigmoid, gconf, reduction="none") # _loss_val = F.binary_cross_entropy_with_logits(pconf_sigmoid, gconf, reduction="none") # l_conf_pos = ((_loss_val * mask_pos_2d).mean(-1)).mean() * 5. # l_conf_neg = ((_loss_val * mask_neg_2d).mean(-1)).mean() * 1. # l_conf_pos = ((_loss_val * mask_pos_2d).sum(-1) / nums_pos).mean() * 5. # l_conf_neg = ((_loss_val * mask_neg_2d).sum(-1) / nums_pos).mean() * 1. # l_conf_pos = _loss_val[mask_pos].mean() * 5 # l_conf_neg = _loss_val[mask_neg].mean() # ------------conf-focalloss ------------''' mask_ignore_2d = torch.logical_not( torch.logical_or(mask_pos_2d, mask_neg_2d)) # l_pos, l_neg = focalloss(pconf_sigmoid, gconf, mask_pos=mask_pos_2d, mash_ignore=mask_ignore_2d, # is_debug=True) # l_conf_pos = (l_pos.sum(-1) / nums_pos).mean() # l_conf_neg = (l_neg.sum(-1) / nums_pos).mean() # ------------conf-ohem ------------''' _loss_val = x_bce(pconf_sigmoid, gconf, reduction="none") mask_neg_hard = f_ohem(_loss_val, nums_pos * 3, mask_pos=mask_pos_2d, mash_ignore=mask_ignore_2d) l_conf_pos = ((_loss_val * mask_pos_2d).sum(-1) / nums_pos).mean() l_conf_neg = ((_loss_val * mask_neg_hard).sum(-1) / nums_pos).mean() ''' ----------------box损失 xy采用bce wh采用mes----------------- ''' # conf-1, cls-3, tbox-4, weight-1, gltrb-4 = 13 weight = gyolos[:, :, s_ + 4] # torch.Size([32, 845]) ptxty_sigmoid = ptxywh[:, :, :2].sigmoid() # 这个需要归一化 ptwth = ptxywh[:, :, 2:4] gtxty = gyolos[:, :, s_:s_ + 2] gtwth = gyolos[:, :, s_ + 2:s_ + 4] _loss_val = x_bce(ptxty_sigmoid, gtxty, reduction="none") l_txty = ((_loss_val.sum(-1) * mask_pos_2d * weight).sum(-1) / nums_pos).mean() _loss_val = F.mse_loss(ptwth, gtwth, reduction="none") l_twth = ((_loss_val.sum(-1) * mask_pos_2d * weight).sum(-1) / nums_pos).mean() l_total = l_conf_pos + l_conf_neg + l_cls + l_txty + l_twth log_dict['l_total'] = l_total.item() log_dict['l_conf_pos'] = l_conf_pos.item() log_dict['l_conf_neg'] = l_conf_neg.item() log_dict['l_cls'] = l_cls.item() log_dict['l_xy'] = l_txty.item() log_dict['l_wh'] = l_twth.item() # log_dict['p_max'] = pconf.max().item() # log_dict['p_min'] = pconf.min().item() # log_dict['p_mean'] = pconf.mean().item() return l_total, log_dict
def forward(self, pyolos, targets, imgs_ts=None): ''' :param pyolos: torch.Size([32, 6, 14, 14]) [conf-1,class-20,box4] :param targets: :param imgs_ts: :return: ''' cfg = self.cfg device = pyolos.device batch, c, h, w = pyolos.shape # torch.Size([32, 13,13, 8]) # b,c,h,w -> b,c,hw -> b,hw,c torch.Size([32, 169, 8]) pyolos = pyolos.view(batch, c, -1).permute(0, 2, 1) s_ = 1 + cfg.NUM_CLASSES ptxywh = pyolos[..., s_:s_ + 4] # torch.Size([32, 169, 4]) # conf-1, cls-num_class, txywh-4, weight-1, gltrb-4 gdim = 1 + cfg.NUM_CLASSES + 4 + 1 + 4 gyolos = torch.empty((batch, h, w, gdim), device=device) # 每批会整体更新这里不需要赋0 for i, target in enumerate(targets): # batch遍历 gboxes_ltrb_b = target['boxes'] # ltrb glabels_b = target['labels'] ''' yolo4 1. 每层选一个匹配一个 anc与GT的IOU最大的一个 技巧gt的xy可调整成 格子偏移与pxy匹配 2. 其它的IOU>0.4忽略,除正例 3. reg损失: 解码预测 pxy.sigmoid exp(pwh*anc) -> 进行IOU loss 正例损失进行平均, 权重0.05 4. cls损失: label_smooth 标签平滑正则化, onehot* (1-0.01) + 0.01 /num_class pos_weight=0.5 loss_weight=0.5 * num_classes / 80 = 0.01875 5. conf损失: 整体权重0.4 忽略的 6. 每一层的损失全加起来 ''' gyolos[i] = fmatch4yolov1( gboxes_ltrb_b=gboxes_ltrb_b, glabels_b=glabels_b, grid=h, # 7 gdim=gdim, device=device, img_ts=imgs_ts[i], cfg=cfg, use_conf=True) '''可视化验证''' if cfg.IS_VISUAL: # conf-1, cls-num_class, txywh-4, weight-1, gltrb-4 gyolo_test = gyolos[i].clone() # torch.Size([32, 13, 13, 9]) gyolo_test = gyolo_test.view(-1, gdim) gconf_one = gyolo_test[:, 0] mask_pos = gconf_one == 1 # [169] # torch.Size([169, 4]) txywh_t = gyolo_test[:, 1 + cfg.NUM_CLASSES:1 + cfg.NUM_CLASSES + 4] # 这里是修复所有的xy zpxy_t = txywh_t[:, :2] + f_mershgrid( h, w, is_rowcol=False).to(device) hw_ts = torch.tensor((h, w), device=device) zpxy = torch.true_divide(zpxy_t, hw_ts) zpwh = torch.exp(txywh_t[:, 2:]) / hw_ts zpxywh_pos = torch.cat([zpxy, zpwh], dim=-1)[mask_pos] from f_tools.pic.enhance.f_data_pretreatment4pil import f_recover_normalization4ts img_ts = f_recover_normalization4ts(imgs_ts[i]) from torchvision.transforms import functional as transformsF img_pil = transformsF.to_pil_image(img_ts).convert('RGB') import numpy as np img_np = np.array(img_pil) f_show_od_np4plt(img_np, gboxes_ltrb=gboxes_ltrb_b.cpu(), pboxes_ltrb=xywh2ltrb(zpxywh_pos.cpu()), is_recover_size=True, grids=(h, w)) gyolos = gyolos.view(batch, -1, gdim) # b,hw,7 gconf = gyolos[:, :, 0] # torch.Size([5, 169]) mask_pos = gconf > 0 # torch.Size([32, 169]) # mask_pos = gconf == 1 # yolo1 gt 写死是1 mask_neg = gconf == 0 nums_pos = (mask_pos.sum(-1).to( torch.float)).clamp(min=torch.finfo(torch.float16).eps) nums_neg = (mask_neg.sum(-1).to( torch.float)).clamp(min=torch.finfo(torch.float16).eps) pyolos_pos = pyolos[mask_pos] # torch.Size([32, 169, 13]) -> [nn, 13] gyolos_pos = gyolos[mask_pos] # torch.Size([32, 169, 13]) -> [nn, 13] ''' ---------------- 类别-cls损失 ---------------- ''' # # conf-1, cls-num_class, txywh-4, weight-1, gltrb-4 pcls_sigmoid = pyolos[:, :, 1:s_].sigmoid() # torch.Size([32, 169, 8]) gcls = gyolos[:, :, 1:s_] # torch.Size([32, 169, 13]) _loss_val = x_bce(pcls_sigmoid, gcls, reduction="none") l_cls = ((_loss_val.sum(-1) * mask_pos).sum(-1) / nums_pos).mean() # pcls_sigmoid_pos = pyolos_pos[:, 1:s_].sigmoid() # gcls_pos = gyolos_pos[:, 1:s_] # _loss_val = x_bce(pcls_sigmoid_pos, gcls_pos, reduction="none") # torch.Size([46, 3]) # torch.Size([46, 3]) -> val # l_cls = _loss_val.sum(-1).mean() ''' ---------------- 类别-conf损失 ---------------- ''' # conf-1, cls-num_class, txywh-4, weight-1, gltrb-4 pconf_sigmoid = pyolos[:, :, 0].sigmoid() # ------------ conf-mse ------------''' 666666 _loss_val = F.mse_loss(pconf_sigmoid, gconf, reduction="none") # 用MSE效果更好 l_conf_pos = ((_loss_val * mask_pos).sum(-1) / nums_pos).mean() * 5. l_conf_neg = ((_loss_val * mask_neg).sum(-1) / nums_pos).mean() * 1. # 效果一样 169:1 # pos_ = _loss_val[mask_pos] # l_conf_pos = pos_.mean() * 1 # l_conf_neg = _loss_val[mask_neg].mean() * 3 # ------------ conf_ohem ap26_26 ------------''' # _loss_val = x_bce(pconf_sigmoid, gconf) # mask_ignore = torch.logical_not(torch.logical_or(mask_pos, mask_neg)) # mask_neg_hard = f_ohem(_loss_val, nums_pos * 3, mask_pos=mask_pos, mash_ignore=mask_ignore) # l_conf_pos = ((_loss_val * mask_pos).sum(-1) / nums_pos).mean() * 3 # 正例越多反例越多 # l_conf_neg = ((_loss_val * mask_neg_hard).sum(-1) / nums_pos).mean() * 3 # ------------ focalloss ------------ # l_pos, l_neg = focalloss(pconf_sigmoid, gconf, mask_pos=mask_pos, is_debug=True, alpha=0.5) # l_conf_pos = (l_pos.sum(-1).sum(-1) / nums_pos).mean() # l_conf_neg = (l_neg.sum(-1).sum(-1) / nums_neg).mean() * 3 log_dict = {} ''' ----------------回归损失 xy采用bce wh采用mes----------------- ''' if cfg.MODE_TRAIN == 4: # ------------ iou损失 ------------ # 解码pxywh 计算预测与 GT 的 iou 作为 gconf # preg_pos = pyolos_pos[:, s_:s_ + 4] # # 解码yolo1 # pxy_pos_toff = preg_pos[..., :2].sigmoid() # pwh_pos = torch.exp(preg_pos[..., 2:]) # pzxywh = torch.cat([pxy_pos_toff, pwh_pos], -1) # 这里是归一化的 gt gltrb_pos = gyolos_pos[:, s_ + 4 + 1:s_ + 4 + 1 + 4] ptxywh = pyolos[..., s_:s_ + 4] pltrb_pos = boxes_decode4yolo1(ptxywh, h, w, cfg)[mask_pos] iou_zg = bbox_iou4one(pltrb_pos, gltrb_pos, is_giou=True) # iou_zg = bbox_iou4y(xywh2ltrb4ts(pzxywh), gltrb_pos_tx, GIoU=True) # print(iou_zg) l_reg = (1 - iou_zg).mean() * 5 ''' ---------------- loss完成 ----------------- ''' l_total = l_conf_pos + l_conf_neg + l_cls + l_reg log_dict['l_reg'] = l_reg.item() else: # ------------ mse+bce ------------ 666666 # conf-1, cls-num_class, txywh-4, weight-1, gltrb-4 # torch.Size([32, 169, 13]) 9->实际是8 ptxty_sigmoid = pyolos[:, :, s_:s_ + 2].sigmoid() # 4:6 ptwth = pyolos[:, :, s_ + 2:s_ + 4] # 这里不需要归一 weight = gyolos[:, :, s_ + 4] # 这个是大小目标缩放比例 gtxty = gyolos[:, :, s_:s_ + 2] # torch.Size([5, 169, 2]) gtwth = gyolos[:, :, s_ + 2:s_ + 4] # _loss_val = x_bce(ptxty_sigmoid, gtxty, reduction="none") _loss_val = F.mse_loss(ptxty_sigmoid, gtxty, reduction="none") l_txty = ((_loss_val.sum(-1) * mask_pos * weight).sum(-1) / nums_pos).mean() _loss_val = F.mse_loss(ptwth, gtwth, reduction="none") l_twth = ((_loss_val.sum(-1) * mask_pos * weight).sum(-1) / nums_pos).mean() ''' ---------------- loss完成 ----------------- ''' l_total = l_conf_pos + l_conf_neg + l_cls + l_txty + l_twth log_dict['l_xy'] = l_txty.item() log_dict['l_wh'] = l_twth.item() log_dict['l_total'] = l_total.item() log_dict['l_conf_pos'] = l_conf_pos.item() log_dict['l_conf_neg'] = l_conf_neg.item() log_dict['l_cls'] = l_cls.item() log_dict['p_max'] = pconf_sigmoid.max().item() log_dict['p_min'] = pconf_sigmoid.min().item() log_dict['p_mean'] = pconf_sigmoid.mean().item() return l_total, log_dict
def forward(self, outs, targets, imgs_ts=None): ''' :param outs: torch.Size([2, 2125, 9]) :param targets: 'image_id': 413, 'size': tensor([500., 309.]) 'boxes': tensor([[0.31400, 0.31715, 0.71000, 0.60841]]), 'labels': tensor([1.]) :param imgs_ts: :return: ''' cfg = self.cfg device = outs.device batch, dim_total, pdim = outs.shape # back cls centerness ltrb positivesample iou area gdim = 1 + cfg.NUM_CLASSES + 1 + 4 + 1 + 1 + 1 gres = torch.empty((batch, dim_total, gdim), device=device) for i in range(batch): gboxes_ltrb_b = targets[i]['boxes'] glabels_b = targets[i]['labels'] import time # start = time.time() gres[i] = match4fcos_v2( gboxes_ltrb_b=gboxes_ltrb_b, glabels_b=glabels_b, gdim=gdim, pcos=outs, img_ts=imgs_ts[i], cfg=cfg, ) # gres[i] = match4fcos(gboxes_ltrb_b=gboxes_ltrb_b, # glabels_b=glabels_b, # gdim=gdim, # pcos=outs, # img_ts=imgs_ts[i], # cfg=cfg, ) # flog.debug('show_time---完成---%s--' % (time.time() - start)) s_ = 1 + cfg.NUM_CLASSES # outs = outs[:, :, :s_ + 1].sigmoid() mask_pos = gres[:, :, 0] == 0 # 背景为0 是正例 nums_pos = torch.sum(mask_pos, dim=-1) nums_pos = torch.max(nums_pos, torch.ones_like(nums_pos, device=device)) # back cls centerness ltrb positivesample iou(这个暂时无用) area [2125, 12] ''' ---------------- cls损失 计算全部样本,正反例,正例为框内本例---------------- ''' # obj_cls_loss = BCE_focal_loss() # 这里多一个背景一起算 pcls_sigmoid = outs[:, :, :s_].sigmoid() gcls = gres[:, :, :s_] # l_cls = torch.mean(obj_cls_loss(pcls_sigmoid, gcls) / nums_pos) l_cls_pos, l_cls_neg = focalloss_fcos(pcls_sigmoid, gcls) l_cls_pos = torch.mean( torch.sum(torch.sum(l_cls_pos, -1), -1) / nums_pos) l_cls_neg = torch.mean( torch.sum(torch.sum(l_cls_neg, -1), -1) / nums_pos) ''' ---------------- conf损失 只计算半径正例 center_ness---------------- ''' # 和 positive sample 算正例 mask_pp = gres[:, :, s_ + 1 + 4] == 1 pconf_sigmoid = outs[:, :, s_].sigmoid() # center_ness gcenterness = gres[:, :, s_] # (nn,1) # 使用centerness # _loss_val = x_bce(pconf_sigmoid, gcenterness, reduction="none") _loss_val = x_bce(pconf_sigmoid, torch.ones_like(pconf_sigmoid), reduction="none") # 用半径1 # 只算半径正例,提高准确性 l_conf = 5. * torch.mean( torch.sum(_loss_val * mask_pp.float(), dim=-1) / nums_pos) ''' ---------------- box损失 计算框内正例---------------- ''' # conf1 + cls3 + reg4 # poff_ltrb_exp = torch.exp(outs[:, :, s_:s_ + 4]) poff_ltrb = outs[:, :, s_:s_ + 4] # 这个全是特图的距离 全rule 或 exp # goff_ltrb = gres[:, :, s_ + 1:s_ + 1 + 4] g_ltrb = gres[:, :, s_ + 1:s_ + 1 + 4] # _loss_val = F.smooth_l1_loss(poff_ltrb, goff_ltrb, reduction='none') # _loss_val = F.mse_loss(poff_ltrb_exp, goff_ltrb, reduction='none') # l_reg = torch.sum(torch.sum(_loss_val, -1) * gconf * mask_pos.float(), -1) # l_reg = torch.mean(l_reg / nums_pos) # 这里是解析归一化图 # pboxes_ltrb = boxes_decode4fcos(self.cfg, poff_ltrb, is_t=True) # p_ltrb_t_pos = pboxes_ltrb[mask_pos] # image_size_ts = torch.tensor(cfg.IMAGE_SIZE, device=device) # g_ltrb_t_pos = g_ltrb[mask_pos] * image_size_ts.repeat(2).view(1, -1) # iou = bbox_iou4one(p_ltrb_t_pos, g_ltrb_t_pos, is_giou=True) # 这里是解析归一化图 归一化与特图计算的IOU是一致的 pboxes_ltrb = boxes_decode4fcos(self.cfg, poff_ltrb) p_ltrb_pos = pboxes_ltrb[mask_pos] g_ltrb_pos = g_ltrb[mask_pos] # iou = bbox_iou4one_2d(p_ltrb_pos, g_ltrb_pos, is_giou=True) iou = bbox_iou4one(p_ltrb_pos, g_ltrb_pos, is_giou=True) # 使用 iou 与 1 进行bce debug iou.isnan().any() or iou.isinf().any() l_reg = 5 * torch.mean((1 - iou) * gcenterness[mask_pos]) # iou2 = bbox_iou4one_3d(pboxes_ltrb, g_ltrb, is_giou=True) # 2D 和 3D效果是一样的 # l_reg2 = torch.mean(torch.sum((1 - iou2) * gcenterness * mask_pos.float(), -1) / nums_pos) # _loss_val = x_bce(iou, giou, reduction="none") # l_iou = torch.mean(torch.sum(_loss_val * gconf * mask_pos.float(), dim=-1) / nums_pos) l_total = l_cls_pos + l_cls_neg + l_conf + l_reg log_dict = {} log_dict['l_total'] = l_total.item() log_dict['l_cls_pos'] = l_cls_pos.item() log_dict['l_cls_neg'] = l_cls_neg.item() log_dict['l_conf'] = l_conf.item() log_dict['l_reg'] = l_reg.item() # log_dict['l_iou_max'] = iou.max().item() return l_total, log_dict
def forward(self, outs, targets, imgs_ts=None): ''' :param outs: cls1+conf1+ltrb4 torch.Size([2, 2125, 9]) :param targets: 'image_id': 413, 'size': tensor([500., 309.]) 'boxes': tensor([[0.31400, 0.31715, 0.71000, 0.60841]]), 'labels': tensor([1.]) :param imgs_ts: :return: ''' cfg = self.cfg device = outs.device batch, dim_total, pdim = outs.shape # cls3 centerness1 ltrb4 positive_radius1 positive_ingt1 area1 3+1+4+1+1+1=11 gdim = cfg.NUM_CLASSES + 1 + 4 + 1 + 1 + 1 gres = torch.empty((batch, dim_total, gdim), device=device) nums_pos = [] for i in range(batch): gboxes_ltrb_b = targets[i]['boxes'] glabels_b = targets[i]['labels'] nums_pos.append(gboxes_ltrb_b.shape[0]) # import time # start = time.time() gres[i] = match4fcos_v3_noback( gboxes_ltrb_b=gboxes_ltrb_b, glabels_b=glabels_b, gdim=gdim, pcos=outs, img_ts=imgs_ts[i], cfg=cfg, ) # flog.debug('show_time---完成---%s--' % (time.time() - start)) # cls3 centerness1 ltrb4 positive_radius1 positive_ingt1 area1 mask_pos = gres[:, :, cfg.NUM_CLASSES + 1 + 4 + 1] == 1 # 框内正例 nums_pos = torch.tensor(nums_pos, device=device) ''' ---------------- cls损失 计算全部样本,正反例,正例为框内本例---------------- ''' # 框内3D正例 可以用 mask_pos_3d = gcls == 1 pcls_sigmoid = outs[:, :, :cfg.NUM_CLASSES].sigmoid() gcls = gres[:, :, :cfg.NUM_CLASSES] l_cls_pos, l_cls_neg = focalloss_fcos(pcls_sigmoid, gcls) l_cls_pos = torch.mean( torch.sum(torch.sum(l_cls_pos, -1), -1) / nums_pos) l_cls_neg = torch.mean( torch.sum(torch.sum(l_cls_neg, -1), -1) / nums_pos) ''' ---------------- conf损失 只计算半径正例 center_ness---------------- ''' # 半径正例 mask_pp = gres[:, :, cfg.NUM_CLASSES + 1 + 4] == 1 # 半径正例 pconf_sigmoid = outs[:, :, cfg.NUM_CLASSES].sigmoid() # center_ness gcenterness = gres[:, :, cfg.NUM_CLASSES] # (nn,1) # 使用centerness # 与 gcenterness 还是以1为准 # _loss_val = x_bce(pconf_sigmoid, gcenterness, reduction="none") _loss_val = x_bce(pconf_sigmoid, torch.ones_like(pconf_sigmoid), reduction="none") # 用半径1 # 只算半径正例,提高准确性 l_conf = 5. * torch.mean( torch.sum(_loss_val * mask_pp.float(), dim=-1) / nums_pos) ''' ---------------- box损失 计算框内正例---------------- ''' # cls3+ conf1+ reg4 poff_ltrb = outs[:, :, cfg.NUM_CLASSES + 1:cfg.NUM_CLASSES + 1 + 4] # 这个全是特图的距离 全rule 或 exp # goff_ltrb = gres[:, :, s_ + 1:s_ + 1 + 4] g_ltrb = gres[:, :, cfg.NUM_CLASSES + 1:cfg.NUM_CLASSES + 1 + 4] # 这里是解析归一化图 归一化与特图计算的IOU是一致的 pboxes_ltrb = boxes_decode4fcos(self.cfg, poff_ltrb) # 这里采用的是正例计算 直接平均 p_ltrb_pos = pboxes_ltrb[mask_pos] g_ltrb_pos = g_ltrb[mask_pos] iou = bbox_iou4one(p_ltrb_pos, g_ltrb_pos, is_giou=True) # 使用 iou 与 1 进行bce debug iou.isnan().any() or iou.isinf().any() l_reg = 5 * torch.mean((1 - iou) * gcenterness[mask_pos]) l_total = l_cls_pos + l_cls_neg + l_conf + l_reg log_dict = {} log_dict['l_total'] = l_total.item() log_dict['l_cls_pos'] = l_cls_pos.item() log_dict['l_cls_neg'] = l_cls_neg.item() log_dict['l_conf'] = l_conf.item() log_dict['l_reg'] = l_reg.item() # log_dict['l_iou_max'] = iou.max().item() return l_total, log_dict
def i_loss_fun(self, pconf, gconf, weight): # return F.binary_cross_entropy(pconf, gconf, weight=weight, reduction='none') return x_bce(pconf, gconf, weight=weight)