def __call__(self, img_pil, target, cfg): ''' :param img_pil: PIL图片 :param target: :return: ''' img_np = np.array(img_pil) # old_size 是 hw img_np, ratio, old_size, (left, top, right, bottom) = resize_np_keep( img_np, self.newsize) img_pil = Image.fromarray(img_np, mode="RGB") if target: target['boxes'] = target['boxes'] * ratio if 'keypoints' in target: target['keypoints'] = target['keypoints'] * ratio if cfg.IS_VISUAL and cfg.IS_VISUAL_PRETREATMENT: flog.debug('ResizeKeep 后%s') show_bbox_keypoints4pil(img_pil, target['boxes'], target['keypoints'], target['labels']) elif cfg.IS_VISUAL and cfg.IS_VISUAL_PRETREATMENT: show_bbox4pil(img_pil, target['boxes'], target['labels']) return img_pil, target
def __call__(self, img_pil, target, cfg): if cfg.IS_VISUAL and cfg.IS_VISUAL_PRETREATMENT: flog.debug('显示原图 %s %s', img_pil.size, target['boxes'].shape) img_pil.show() w, h = img_pil.size # PIL wh h_ratio, w_ratio = np.array([h, w]) / self.size_hw_resize.size # hw img_pil = self.size_hw_resize(img_pil) if target: bbox = target['boxes'] bbox[:, [0, 2]] = bbox[:, [0, 2]] / w_ratio bbox[:, [1, 3]] = bbox[:, [1, 3]] / h_ratio if 'keypoints' in target: keypoints = target['keypoints'] keypoints[:, ::2] = keypoints[:, ::2] / w_ratio keypoints[:, 1::2] = keypoints[:, 1::2] / h_ratio if cfg.IS_VISUAL and cfg.IS_VISUAL_PRETREATMENT: flog.debug('缩放后%s', img_pil.size) show_bbox_keypoints4pil(img_pil, bbox, keypoints, target['labels']) elif cfg.IS_VISUAL and cfg.IS_VISUAL_PRETREATMENT: show_bbox4pil(img_pil, bbox, target['labels']) return img_pil, target
def __init__(self, cfg, fun_train_eval_set, fun_init_model, device) -> None: super(Predicted_Base, self).__init__(cfg, device) fun_train_eval_set(cfg) cfg.PATH_SAVE_WEIGHT = cfg.PATH_HOST + '/AI/weights/feadre' cfg.FILE_FIT_WEIGHT = os.path.join(cfg.PATH_SAVE_WEIGHT, cfg.FILE_NAME_WEIGHT) # 这里是原图 self.dataset_test = CustomCocoDataset(file_json=cfg.FILE_JSON_TEST, path_img=cfg.PATH_IMG_EVAL, mode=cfg.MODE_COCO_EVAL, transform=None, is_mosaic=False, is_mosaic_keep_wh=False, is_mosaic_fill=False, is_debug=cfg.DEBUG, cfg=cfg) self.data_transform = cre_transform_resize4np(cfg)['val'] # 初始化 labels ids_classes = self.dataset_test.ids_classes flog.debug('ids_classes %s', ids_classes) self.labels_lsit = list(ids_classes.values()) # index 从 1开始 前面随便加一个空 self.labels_lsit.insert(0, None) # index 从 1开始 前面随便加一个空 flog.debug('测试类型 %s', self.labels_lsit) '''------------------模型定义---------------------''' self.model, _, _, _ = fun_init_model( cfg, device, id_gpu=None) # model, optimizer, lr_scheduler, start_epoch self.model.eval()
def show_time(f, *arg): import time from f_tools.GLOBAL_LOG import flog start = time.time() flog.debug('show_time---开始---%s-------' % (f.__name__)) ret = f(*arg) flog.debug('show_time---完成---%s---%s' % (f.__name__, time.time() - start)) return ret
def _polt_keypoints(img_pil, p_boxes_ltrb, szie_scale4bbox, p_keypoints, szie_scale4landmarks, p_scores, p_labels, labels_lsit): if p_boxes_ltrb is not None: flog.debug('一共有 %s 个目标', p_boxes_ltrb.shape[0]) p_boxes = p_boxes_ltrb * szie_scale4bbox p_keypoints = p_keypoints * szie_scale4landmarks img_pil = f_plot_od4pil_keypoints(img_pil, p_boxes, p_keypoints, p_scores, p_labels, labels_lsit) return img_pil
def load_data4voc(data_transform, path_data_root, batch_size, bbox2one=False, isdebug=False, data_num_workers=0): ''' :param data_transform: :param path_data_root: :param batch_size: :param bbox2one: 是否gt框进行归一化 :return: ''' num_workers = data_num_workers file_name = ['train.txt', 'val.txt'] VOC_root = os.path.join(path_data_root, 'trainval') # ---------------------data_set生成--------------------------- train_data_set = VOCDataSet( VOC_root, file_name[0], # 正式训练要改这里 data_transform["train"], bbox2one=bbox2one, isdebug=isdebug ) # iter(train_data_set).__next__() # VOC2012DataSet 测试 class_dict = train_data_set.class_to_ids flog.debug('class_dict %s', class_dict) # 默认通过 torch.stack() 进行拼接 ''' 一次两张图片使用3504的显存 ''' train_data_loader = torch.utils.data.DataLoader( train_data_set, batch_size=batch_size, shuffle=True, num_workers=num_workers, # windows只能为0 collate_fn=lambda batch: tuple(zip(*batch)), # 输出多个时需要处理 pin_memory=True, ) val_data_set = VOCDataSet( VOC_root, file_name[1], data_transform["val"], bbox2one=bbox2one, isdebug=isdebug ) val_data_set_loader = torch.utils.data.DataLoader( val_data_set, batch_size=batch_size, shuffle=False, num_workers=num_workers, collate_fn=lambda batch: tuple(zip(*batch)), # 输出多个时需要处理 pin_memory=True, ) # images, targets = iter(train_data_loader).__next__() # show_pic_ts(images[0], targets[0]['labels'], classes=class_dict) return train_data_loader, val_data_set_loader
def show_train_info(cfg, loader_train, loader_val_coco): if loader_train is not None: flog.debug('%s dataset_train 数量: %s' % (cfg.PATH_TENSORBOARD, len(loader_train.dataset))) flog.debug('loader_train 类型 %s' % loader_train.dataset.ids_classes) if loader_val_coco is not None: flog.debug('%s dataset_val 数量: %s' % (cfg.PATH_TENSORBOARD, len(loader_val_coco.dataset))) flog.debug('loader_val_coco 类型 %s' % loader_val_coco.dataset.ids_classes) flog.debug('cfg.BATCH_SIZE---%s' % cfg.BATCH_SIZE)
def forward(self, pconf, gconf, mask_calc=None, is_debug=False): ''' 区间数量越多, 损失值权重越小 :param pconf: :param gconf: :param mask_calc: 排除忽略后 需要计算的mask :return: ''' float_eps = torch.finfo(torch.float16).eps device = pconf.device if mask_calc is None: # 默认全部加入运算 mask_calc = torch.ones_like(pconf, dtype=torch.bool, device=device) num_calc = mask_calc.sum().item() # 总样本 g = self.i_calc_gradient_length(pconf, gconf) # 梯度模长 越大越难 恰好也是反向的梯度 # 与pconf同维 区间索引 inds_bins = torch.floor(g * (self.num_bins - float_eps)).long().to( device) # 数据对应在哪个格子中 格子index # 统计各区间样本数量 nums_bins = torch.zeros(self.num_bins, device=device) for i in range(self.num_bins): # 样本区间数 no 区间占比---(倒数相加=1) 总样本/样本数 _num_in_bins = (torch.logical_and(inds_bins == i, mask_calc)).sum().item() nums_bins[i] = _num_in_bins # if _num_in_bins > 0: # nums_bins[i] = num_calc / _num_in_bins if self.nums_bins_last is None: self.nums_bins_last = nums_bins else: # 前动量引入 nums_bins = self.momentum * self.nums_bins_last + ( 1 - self.momentum) * nums_bins self.nums_bins_last = nums_bins # 有效区间个安徽 num_bins_valid = (nums_bins > 0).sum().item() nums_bins = nums_bins / num_bins_valid # weight_bins = num_calc / (nums_bins * num_bins_valid) # weight_bins[torch.isinf(weight_bins)] = 0 # weight_bins = weight_bins / min(num_calc, 100) if is_debug: flog.debug('GHM_Loss 梯度模长区间数量:%s', [round(d.item(), 2) for d in nums_bins]) # 区间数量 flog.debug('GHM_Loss 区间权重:%s', [round(d.item(), 2) for d in weight_bins]) # 权重 # show_distribution(pconf) # 在 pconf 分布 pass loss = self.i_loss_fun(pconf, gconf, weight_bins[inds_bins]) return loss
def show_pic_ts(img_ts, labels=None): ''' 查看处理后的图片 :param img_ts: :param labels: :return: ''' # show_pic_ts(images[0], targets[0], classes=train_data_set.class_dict) # plt.figure(figsize=(12.80, 7.20)) flog.debug('labels %s', labels) np_img = img_ts.numpy() plt.imshow(np.transpose(np_img, (1, 2, 0))) plt.show()
def t_other(): global coco_obj, target, img_np_tensor coco_obj = dataset.coco_obj '''检测dataset''' dataset_ = dataset[1] for img, target in dataset: # print(img, target['boxes'], target['labels']) # f_plt_show_cv(img, target['boxes']) glabels_text = [] for i in target['labels'].long(): glabels_text.append(dataset.ids_classes[i.item()]) f_show_od_ts4plt(img, target['boxes'], is_recover_size=True, glabels_text=glabels_text) pass '''打开某一个图''' img_id = coco_obj.getImgIds()[0] img_np_tensor = f_open_cocoimg(path_img, coco_obj, img_id=img_id) img_np_tensor.show() '''------------------- 获取指定类别名的id ---------------------''' ids_cat = coco_obj.getCatIds() print(ids_cat) infos_cat = coco_obj.loadCats(ids_cat) for info_cat in infos_cat: # {'id': 1, 'name': 'aeroplane'} ids = coco_obj.getImgIds(catIds=info_cat['id']) print('类型对应有多少个图片', info_cat['id'], info_cat['name'], len(ids)) # ids_cat = coco.getCatIds(catNms=['aeroplane', 'bottle']) # ids_cat = coco.getCatIds(catIds=[1, 3]) # ids_cat = coco_obj.getCatIds(catIds=[1]) # print(ids_cat) # infos_cat = coco.loadCats(ids=[1, 5]) # print(infos_cat) # 详细类别信息 [{'id': 1, 'name': 'aeroplane'}, {'id': 2, 'name': 'bicycle'}] '''获取指定类别id的图片id''' ids_img = [] for idc in ids_cat: ids_ = coco_obj.getImgIds(catIds=idc) ids_img += ids_ # print(ids_) # 这个只支持单个元素 ids_img = list(set(ids_img)) # 去重 '''查看图片信息 ''' infos_img = coco_obj.loadImgs(ids_img[0]) print(infos_img) # [{'height': 281, 'width': 500, 'id': 1, 'file_name': '2007_000032.jpg'}] ids_ann = coco_obj.getAnnIds(imgIds=infos_img[0]['id']) info_ann = coco_obj.loadAnns(ids_ann) # annotation 对象 '''获取数据集类别数''' flog.debug(coco_obj.loadCats(coco_obj.getCatIds())) '''显示标注'''
def f_show_coco_net_pic(): # 加载公交车示例 id = 233727 img_info = coco.loadImgs(id)[0] img = io.imread(img_info['coco_url']) flog.debug('加载图片成功 %s', img_info) # 获取该图片的所有标注的id annIds = coco.getAnnIds(imgIds=img_info['id']) anns = coco.loadAnns(annIds) # annotation 对象 flog.debug('anns %s', anns) plt.axis('off') plt.imshow(img) coco.showAnns(anns) # 显示标注 plt.show()
def _show(img_ts, target, cfg, name): flog.debug('%s 后', name) img_pil = transforms.ToPILImage('RGB')(img_ts) if target is not None: if 'keypoints' in target: concatenate = np.concatenate( [target['boxes'], target['keypoints']], axis=1) concatenate[:, ::2] = concatenate[:, ::2] * cfg.IMAGE_SIZE[0] concatenate[:, 1::2] = concatenate[:, 1::2] * cfg.IMAGE_SIZE[1] show_bbox_keypoints4pil(img_pil, concatenate[:, :4], concatenate[:, 4:14], target['labels']) else: boxes = target['boxes'] if isinstance(boxes, np.ndarray): bboxs_ = np.copy(target['boxes']) elif isinstance(boxes, torch.Tensor): bboxs_ = torch.clone(target['boxes']) else: raise Exception('类型错误', type(boxes)) bboxs_[:, ::2] = bboxs_[:, ::2] * cfg.IMAGE_SIZE[0] bboxs_[:, 1::2] = bboxs_[:, 1::2] * cfg.IMAGE_SIZE[1] show_bbox4pil(img_pil, bboxs_, target['labels'])
def spilt_voc2txt(path_files, val_rate=0.3, overlay=False): ''' 划分文件为训练集和验证集,适用于所有数据在同一文件夹 :param path_files: 指定 trainval 文件夹的路径 :param val_rate: 验证集比例 或数量 为0不要验证集 :return: 训练和验证的文件名 ''' if not os.path.exists(path_files): flog.debug('文件夹不存在 %s', path_files) exit(1) _file_name_train = 'train.txt' _file_name_val = 'val.txt' path_train = os.path.join(path_files, _file_name_train) path_val = os.path.join(path_files, _file_name_val) if os.path.exists(path_train): print('文件已存在 : ', path_train) else: path_xml = os.path.join(path_files, 'Annotations') # 文件名和目录都出来 只取文件名 files_name = sorted( [file.split('.')[0] for file in os.listdir(path_xml)]) files_num = len(files_name) # 文件数量 flog.debug('总文件数 %s', files_num) # 随机选出val的index if val_rate > 1.: val_index = random.sample(range(0, files_num), k=int(val_rate)) else: val_index = random.sample(range(0, files_num), k=int(files_num * val_rate)) flog.debug('测试集数量 %s', len(val_index)) train_files = [] val_files = [] for index, file_name in enumerate(files_name): if index in val_index: val_files.append(file_name) else: train_files.append(file_name) try: train_f = open(path_train, 'x') eval_f = open(path_val, 'x') train_f.write('\n'.join(train_files)) # 每个元素添加换行符 eval_f.write('\n'.join(val_files)) except FileExistsError as e: print(e) exit(1) return _file_name_train, _file_name_val,
def is_float(str): from f_tools.GLOBAL_LOG import flog if str.count('.') == 1: # 小数有且仅有一个小数点 left = str.split('.')[0] # 小数点左边(整数位,可为正或负) right = str.split('.')[1] # 小数点右边(小数位,一定为正) lright = '' # 取整数位的绝对值(排除掉负号) if str.count('-') == 1 and str[0] == '-': # 如果整数位为负,则第一个元素一定是负号 lright = left.split('-')[1] elif str.count('-') == 0: lright = left else: flog.debug('%s 不是小数' % str) return False if right.isdigit() and lright.isdigit(): # 判断整数位的绝对值和小数位是否全部为数字 flog.debug('%s 是小数' % str) return True else: flog.debug('%s 不是小数' % str) return False else: flog.debug('%s 不是小数' % str) return False
def get_prediction(image_bytes): try: tensor = transform_image(image_bytes=image_bytes) flog.debug('预处理完成 %s', tensor.shape) # 正向推理 __outs = model.forward(tensor).squeeze() flog.debug('正向完成 %s, %s', __outs.shape, __outs) outputs = torch.softmax(__outs, dim=0) prediction = outputs.detach().cpu().numpy() flog.debug('输出 %s', prediction) template = "class:{:<15} probability:{:.3f}" index_pre = [(class_indices[str(index)], float(p)) for index, p in enumerate(prediction)] # sort probability index_pre.sort(key=lambda x: x[1], reverse=True) text = [template.format(k, v) for k, v in index_pre] return_info = {"result": text} except Exception as e: return_info = {"result": [str(e)]} return return_info
def forward(self, p_center, targets, imgs_ts=None): ''' :param p_center: :param targets: list target['boxes'] = target['boxes'].to(device) target['labels'] = target['labels'].to(device) target['size'] = target['size'] target['image_id'] = int :param imgs_ts: :return: ''' cfg = self.cfg pcls, ptxy, ptwh = p_center device = pcls.device batch, c, h, w = pcls.shape # b,c,h,w -> b,h,w,c -> b,h*w,c pcls = pcls.permute(0, 2, 3, 1).contiguous().view(batch, -1, self.cfg.NUM_CLASSES) ptxy = ptxy.permute(0, 2, 3, 1).contiguous().view(batch, -1, 2) ptwh = ptwh.permute(0, 2, 3, 1).contiguous().view(batch, -1, 2) fsize_wh = torch.tensor([h, w], device=device) # num_class + txywh + weight + gt4 conf通过高斯生成 热力图层数表示类别索引 if cfg.NUM_KEYPOINTS > 0: gdim = cfg.NUM_CLASSES + cfg.NUM_KEYPOINTS * 2 + 4 + 1 + 4 else: gdim = cfg.NUM_CLASSES + 4 + 1 + 4 gres = torch.empty((batch, h, w, gdim), device=device) # 匹配GT for i, target in enumerate(targets): # batch 遍历每一张图 gboxes_ltrb_b = targets[i]['boxes'] glabels_b = targets[i]['labels'] # 处理这张图的所有标签 gres[i] = match4center(gboxes_ltrb_b=gboxes_ltrb_b, glabels_b=glabels_b, fsize_wh=fsize_wh, dim=gdim, cfg=cfg, ) if cfg.IS_VISUAL: from f_tools.pic.enhance.f_data_pretreatment4pil import f_recover_normalization4ts _img_ts = f_recover_normalization4ts(imgs_ts[i].clone()) from torchvision.transforms import functional as transformsF img_pil = transformsF.to_pil_image(_img_ts).convert('RGB') import numpy as np # img_np = np.array(img_pil) '''plt画图部分''' from matplotlib import pyplot as plt plt.rcParams['font.sans-serif'] = ['SimHei'] # 显示中文标签 plt.rcParams['axes.unicode_minus'] = False # 这里的热力图肯定的偏差 [128,128] data_hot = torch.zeros_like(gres[i, :, :, 0]) # 只需要一层即可 for label in glabels_b.unique(): # print(ids2classes[str(int(label))]) # 类别合并输出 flog.debug(' %s', gres[i, :, :, 3:7][gres[i, :, :, (label - 1).long()] == 1]) torch.max(data_hot, gres[i, :, :, (label - 1).long()], out=data_hot) # 这里是类别合并 plt.imshow(data_hot.cpu()) plt.imshow(img_pil.resize(fsize_wh), alpha=0.7) plt.colorbar() # x,y表示横纵坐标,color表示颜色:'r':红 'b':蓝色 等,marker:标记,edgecolors:标记边框色'r'、'g'等,s:size大小 boxes_xywh_cpu = ltrb2xywh(gboxes_ltrb_b).cpu() fsize_cpu = fsize_wh.cpu() xys_f = boxes_xywh_cpu[:, :2] * fsize_cpu plt.scatter(xys_f[:, 0], xys_f[:, 1], color='r', s=5) # 红色 boxes_ltrb_cpu = gboxes_ltrb_b.cpu() boxes_ltrb_f = boxes_ltrb_cpu * fsize_cpu.repeat(2) current_axis = plt.gca() for i, box_ltrb_f in enumerate(boxes_ltrb_f): l, t, r, b = box_ltrb_f # ltwh current_axis.add_patch(plt.Rectangle((l, t), r - l, b - t, color='green', fill=False, linewidth=2)) # current_axis.text(l, t - 2, ids2classes[int(glabels[i])], size=8, color='white', # bbox={'facecolor': 'green', 'alpha': 0.6}) plt.show() gres = gres.reshape(batch, -1, gdim) ''' ---------------- cls损失 只计算正例---------------- ''' gcls = gres[:, :, :cfg.NUM_CLASSES] # mask_pos_3d = gcls > 0 # torch.Size([3, 16384, 3]) # mask_neg_3d = gcls == 0 mask_pos_3d = gcls == 1 # 只有中心点为1正例 torch.Size([3, 16384, 3]) mask_neg_3d = gcls != 1 nums_pos = torch.sum(torch.sum(mask_pos_3d, dim=-1), dim=-1) # mask_pos_2d = torch.any(mask_pos_3d, -1) # focloss pcls_sigmoid = pcls.sigmoid() l_cls_pos, l_cls_neg = focalloss_center(pcls_sigmoid, gcls) l_cls_pos = torch.mean(torch.sum(torch.sum(l_cls_pos, -1), -1) / nums_pos) l_cls_neg = torch.mean(torch.sum(torch.sum(l_cls_neg, -1), -1) / nums_pos) # l_cls_neg = l_cls_neg.sum(-1).sum(-1).mean() # 等价 ''' ---------------- box损失 ----------------- ''' log_dict = {} # num_class + txywh + weight + gt4 if cfg.MODE_TRAIN == 2: # iou ptxywh = torch.cat([ptxy, ptwh], dim=-1) pboxes_ltrb = boxes_decode4center(self.cfg, fsize_wh, ptxywh) mask_pos_2d = torch.any(mask_pos_3d, -1) # torch.Size([16, 16384]) # torch.Size([16, 16384, 4]) -> torch.Size([19, 4]) p_ltrb_pos = pboxes_ltrb[mask_pos_2d] g_ltrb_pos = gres[..., cfg.NUM_CLASSES + 4 + 1:cfg.NUM_CLASSES + 4 + 1 + 4][mask_pos_2d] iou = bbox_iou4one(p_ltrb_pos, g_ltrb_pos, is_giou=True) l_reg = 5 * torch.mean(1 - iou) l_total = l_cls_pos + l_cls_neg + l_reg log_dict['l_total'] = l_total.item() log_dict['l_cls_pos'] = l_cls_pos.item() log_dict['l_cls_neg'] = l_cls_neg.item() log_dict['l_reg'] = l_reg.item() elif cfg.MODE_TRAIN == 1: weight = gres[:, :, cfg.NUM_CLASSES + 4] # 这个可以判断正例 torch.Size([32, 845]) gtxy = gres[:, :, cfg.NUM_CLASSES:cfg.NUM_CLASSES + 2] gtwh = gres[:, :, cfg.NUM_CLASSES + 2:cfg.NUM_CLASSES + 4] ptxy_sigmoid = ptxy.sigmoid() # 这个需要归一化 _loss_val = x_bce(ptxy_sigmoid, gtxy, reduction="none") # _loss_val = F.binary_cross_entropy_with_logits(ptxy, gtxy, reduction="none") # _loss_val[mask_pos_2d].sum() 与这个等价 l_txty = torch.mean(torch.sum(torch.sum(_loss_val * weight.unsqueeze(-1), -1), -1) / nums_pos) _loss_val = F.smooth_l1_loss(ptwh, gtwh, reduction="none") l_twth = torch.mean(torch.sum(torch.sum(_loss_val * weight.unsqueeze(-1), -1), -1) / nums_pos) l_total = l_cls_pos + l_cls_neg + l_txty + l_twth log_dict['l_total'] = l_total.item() log_dict['l_cls_pos'] = l_cls_pos.item() log_dict['l_cls_neg'] = l_cls_neg.item() log_dict['l_xy'] = l_txty.item() log_dict['l_wh'] = l_twth.item() else: raise Exception('cfg.MODE_TRAIN = %s 不存在' % cfg.MODE_TRAIN) return l_total, log_dict
def f_evaluate4coco3(model, data_loader, epoch, fun_datas_l2=None, res_eval=None, tb_writer=None, ann_type='bbox', device=None, eval_sampler=None, is_keep=False): ''' :param ann_type: ['segm', 'bbox', 'keypoints'] ''' if eval_sampler is not None: eval_sampler.set_epoch(epoch) # 使每一轮多gpu获取的数据不一样 # true 则不触发 assert ann_type == 'bbox', 'f_evaluate4coco3 不支持ann_type=%s' % ann_type cfg = model.cfg res_ = {} ids_coco = [] num_no_pos = 0 if fis_mgpu(): # flog.debug('get_rank %s 这里等待', get_rank()) torch.distributed.barrier() pbar = tqdm(data_loader, desc='%s' % epoch, postfix=dict, mininterval=0.1) for batch_data in pbar: # torch.Size([5, 3, 416, 416]) img_ts4, g_targets = fun_datas_l2(batch_data, device, cfg, epoch, model) # 处理size 和 ids 用于coco images, targets = batch_data '''提取真实ID及尺寸''' _sizes = [] # 用于修复box _ids = [] for target in targets: # 通过target 提取ID 和 size ids_coco.append(target['image_id']) # 加入总ID _ids.append(target['image_id']) _s = target['size'] if is_keep: # keep修复 max1 = max(_s) _s = [max1, max1] if isinstance(_s, torch.Tensor): _sizes.append(_s.clone().detach()) # tnesor else: _sizes.append(torch.tensor(_s)) # tnesor # if cfg.IS_VISUAL: # coco_gt = data_loader.dataset.coco # f_show_coco_pics(coco_gt, data_loader.dataset.path_img, ids_img=[target['image_id']]) if cfg.CUSTOM_EVEL: # 在net_center需要对应修改 model(img_ts4, g_targets, _ids, _sizes, data_loader.dataset.coco_obj) maps_val = [0, 0] return maps_val else: ids_batch, p_boxes_ltrb, p_keypoints, p_labels, p_scores = model( img_ts4, g_targets) if p_labels is None or len(p_labels) == 0: num_no_pos += len(data_loader) flog.info('本批没有目标 num_no_pos 3次后出 当前: %s', num_no_pos) # if num_no_pos > 3: # 如果3个批都没有目标则放弃 # return # else: # 没有目标就下一个 # num_no_pos += 1 pbar.set_description("未-%s" % num_no_pos) continue _res_t = {} # 每一批的结果 # 每一张图的 id 与批次顺序保持一致 选出匹配 for i, (size, image_id) in enumerate(zip(_sizes, _ids)): mask = ids_batch == i # 构建 batch 次的mask if torch.any(mask): # 如果最终有目标存在 将写出info中 if cfg.IS_VISUAL or cfg.tcfg_show_pic < cfg.NUM_EVAL_SHOW_PIC: cfg.tcfg_show_pic += 1 # img_ts = img_ts4[i] # flog.debug('nms后 预测共有多少个目标: %s' % p_boxes_ltrb[mask].shape[0]) # from f_tools.pic.enhance.f_data_pretreatment import f_recover_normalization4ts # img_ts = f_recover_normalization4ts(img_ts) # from torchvision.transforms import functional as transformsF # img_pil = transformsF.to_pil_image(img_ts).convert('RGB') # # 处理完后尺寸 # _size = torch.tensor(cfg.IMAGE_SIZE * 2) # p_boxes_ltrb_f = p_boxes_ltrb[mask].cpu() * _size # f_plt_od(img_pil, p_boxes_ltrb_f, # g_boxes_ltrb=targets[i]['boxes'].cpu(), # gbox 默认不归一化 # ids2classes=data_loader.dataset.ids_classes, # labels=p_labels[mask], # scores=p_scores[mask].tolist(), # is_recover_size=False # ) _size = torch.tensor(cfg.IMAGE_SIZE * 2) coco = data_loader.dataset.coco_obj img_info = coco.loadImgs([image_id]) file_img = os.path.join(data_loader.dataset.path_img, img_info[0]['file_name']) img_np = cv2.imread(file_img) img_np = cv2.cvtColor(img_np, cv2.COLOR_BGR2RGB) # import skimage.io as io # h,w,c # img_np = io.imread(file_img) whwh = np.tile(np.array(img_np.shape[:2][::-1]), 2) p_boxes_ltrb_f = p_boxes_ltrb[mask].cpu() * whwh f_plt_od_np( img_np, p_boxes_ltrb_f, g_boxes_ltrb=targets[i]['boxes'].cpu() / _size * whwh, # gbox 默认不归一化 ids2classes=data_loader.dataset.ids_classes, labels=p_labels[mask], scores=p_scores[mask].tolist(), is_recover_size=False) # 恢复真实尺寸(原装未处理) coco需要 ltwh boxes_ltwh = ltrb2ltwh(p_boxes_ltrb[mask] * size.repeat(2)[None]) _res_t[image_id] = { 'boxes': boxes_ltwh, # coco loadRes 会对ltwh 转换成 ltrb 'labels': p_labels[mask], 'scores': p_scores[mask], } # 更新进度条值 d = { 'pos': len(boxes_ltwh), 'max': round(p_scores[mask].max().item() * 100, 1), # 'min': round(p_scores.min().item(), 1), 'mean': round(p_scores[mask].mean().item() * 100, 1), } pbar.set_postfix(**d) else: # flog.warning('没有预测出框 %s', files_txt) num_no_pos += 1 pbar.set_description("未-%s" % num_no_pos) if len(_res_t) > 0: res_.update(_res_t) # 扩展单个 cfg.tcfg_show_pic = 0 # 置0 # if len(res_) # 重组数据 res_coco = [] '''这里处理多 GPU 数据同步 ''' if fis_mgpu(): # 这里未考虑 res_ 为空的优化 d = {} d['res_'] = res_ d['ids_coco'] = ids_coco d['num_no_pos'] = num_no_pos data_list = dict_all_gather(d) # 任务类型同步 if not is_main_process(): # 其它 GPU 进程退出 flog.debug('get_rank %s 已退出', get_rank()) return None res_.clear() # 重组多GPU的数据 ids_coco.clear() num_no_pos = 0 for d in data_list: res_.update(d['res_']) ids_coco.extend(d['ids_coco']) num_no_pos += d['num_no_pos'] for i, (image_id, target) in enumerate(res_.items()): labels = target['labels'].type(torch.int).tolist() boxes_ltwh = target['boxes'].tolist() score = target['scores'].tolist() for i in range(len(labels)): res_coco.append({ "image_id": image_id, "category_id": labels[i], "bbox": boxes_ltwh[i], "score": score[i] }) maps_val = [] if len(res_coco) > 0: # 有 coco 结果 coco_gt = data_loader.dataset.coco_obj # 第一个元素指示操作该临时文件的安全级别,第二个元素指示该临时文件的路径 _, tmp = tempfile.mkstemp() # 创建临时文件 json.dump(res_coco, open(tmp, 'w')) coco_dt = coco_gt.loadRes(tmp) ''' _summarizeDets()->_summarize() _summarizeDets 函数中调用了12次 _summarize 结果在 self.eval['precision'] , self.eval['recall']中 ''' if ann_type == 'bbox': coco_eval_obj = FCOCOeval(coco_gt, coco_dt, ann_type) # 这个添加了每个类别的map分 else: coco_eval_obj = COCOeval(coco_gt, coco_dt, ann_type) coco_eval_obj.params.imgIds = ids_coco # 多显卡id合并更新 coco_eval_obj.evaluate() coco_eval_obj.accumulate() if ann_type == 'bbox': coco_stats, print_coco = coco_eval_obj.summarize() print(print_coco) coco_eval_obj.stats = coco_stats else: coco_eval_obj.summarize() # 原装生成运行后自动赋值 coco_eval_obj.stats clses_name = list(data_loader.dataset.classes_ids) coco_eval_obj.print_clses(clses_name) maps_val.append(coco_eval_obj.stats[1]) maps_val.append(coco_eval_obj.stats[7]) if tb_writer is not None: # Precision_iou _d = { 'IoU=0.50:0.95': coco_eval_obj.stats[0], 'IoU=0.50': coco_eval_obj.stats[1], 'IoU=0.75': coco_eval_obj.stats[2], } tb_writer.add_scalars('mAP/Precision_iou', _d, epoch + 1) # Recall_iou _d = { 'maxDets= 1': coco_eval_obj.stats[6], 'maxDets= 10': coco_eval_obj.stats[7], 'maxDets=100': coco_eval_obj.stats[8], } tb_writer.add_scalars('mAP/Recall_iou', _d, epoch + 1) # 小中大 _d = { 'p_large': coco_eval_obj.stats[5], 'r_large': coco_eval_obj.stats[11], } tb_writer.add_scalars('mAP/large', _d, epoch + 1) _d = { 'p_medium': coco_eval_obj.stats[4], 'r_medium': coco_eval_obj.stats[10], } tb_writer.add_scalars('mAP/medium', _d, epoch + 1) _d = { 'p_small': coco_eval_obj.stats[3], 'r_small': coco_eval_obj.stats[9], } tb_writer.add_scalars('mAP/small', _d, epoch + 1) # 一个图只有一个值 tb_writer.add_scalar('mAP/num_no_pos', num_no_pos, epoch + 1) # 未检出的图片数 else: # 没有 coco 结果 if tb_writer is not None: tb_writer.add_scalar('mAP/num_no_pos', num_no_pos, epoch + 1) # 未检出的图片数 maps_val = [0, 0] return maps_val
import torchvision import torch import os from f_tools.GLOBAL_LOG import flog os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = " 1,2" # 全局加载 if __name__ == '__main__': for i in range(torch.cuda.device_count()): print(torch.cuda.get_device_name(i)) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # model = torchvision.models.vgg16(pretrained=True) # model = torchvision.models.MobileNetV2() # model = torchvision.models.ShuffleNetV2() # model = torchvision.models.SqueezeNet() model = torchvision.models.resnet50(num_classes=10) # model.to(device) rnn = model.cuda_idx() par = torch.nn.DataParallel(rnn) inp = torch.randn(100, 3, 32, 32).cuda() par(inp).sum().backward() print('完成') flog.debug('12 %s', 123)
def __getitem__(self, index): ''' :param index: :return: tensor or np.array 根据 out: 默认ts or other is np img: h,w,c target: coco原装是 ltwh dict{ image_id: int, bboxs: ts n4 原图 ltwh -> ltrb labels: ts n, keypoints: ts n,10 size: wh } ''' # 这里生成的是原图尺寸的 target 和img_np_uint8 (375, 500, 3) if self.is_mosaic and self.mode == 'bbox': res = self.do_mosaic(index) else: res = self.open_img_tar(index) if res is None: flog.error('这个图片没有标注信息 id为%s', index) return self.__getitem__(index + 1) img, target = res if len(target['boxes']) != len(target['labels']): flog.warning('!!! 数据有问题 1111 %s %s %s ', target, len(target['boxes']), len(target['labels'])) '''---------------cocoAPI测试 查看图片在归一化前------------------''' # 这个用于调试 # if self.cfg.IS_VISUAL_PRETREATMENT: # 可视化参数 is_mosaic 这个用不起 # f_show_coco_pics(self.coco_obj, self.path_img, ids_img=[index]) if target['boxes'].shape[0] == 0: flog.warning('数据有问题 重新加载 %s', index) return self.__getitem__(index + 1) if self.transform is not None: img, target = self.transform(img, target) # if self.is_img_np: # # 输入 ltrb 原图 # # f_plt_show_cv(img, gboxes_ltrb=target['boxes']) # # img, boxes, labels = self.transform(img, target['boxes'], target['labels']) # img, target = self.transform(img, target) # # 这里会刷新 boxes, labels # # f_plt_show_cv(img, gboxes_ltrb=boxes) # else: # # 预处理输入 PIL img 和 np的target # img, target = self.transform(img, target) if len(target['boxes']) != len(target['labels']): flog.warning('!!! 数据有问题 ttttttttt %s %s %s ', target, len(target['boxes']), len(target['labels'])) # target['boxes'] = torch.tensor(target['boxes'], dtype=torch.float) # target['labels'] = torch.tensor(target['labels'], dtype=torch.int64) target['size'] = torch.tensor(target['size'], dtype=torch.float) # 用于恢复真实尺寸 # if self.mode == 'keypoints': # target['keypoints'] = torch.tensor(target['keypoints'], dtype=torch.float) if target['boxes'].shape[0] == 0: flog.debug('二次检查出错 %s', index) return self.__getitem__(index + 1) if len(target['boxes']) != len(target['labels']): flog.warning('!!! 数据有问题 22222 %s %s %s ', target, len(target['boxes']), len(target['labels'])) # flog.warning('数据debug 有问题 %s %s %s ', target, len(target['boxes']), len(target['labels'])) return img, target
'''------------------系统配置---------------------''' cfg = CFG train_eval_set(cfg) # 自带数据 cfg_raccoon(cfg) index_start = 40 device = torch.device('cpu') flog.info('模型当前设备 ------ %s', device) # 加载数据 data_loader = DataLoader(cfg) dataset_val = data_loader.get_test_dataset() ids_classes = dataset_val.ids_classes labels_lsit = list(ids_classes.values()) # index 从 1开始 前面随便加一个空 labels_lsit.insert(0, None) # index 从 1开始 前面随便加一个空 flog.debug('测试类型 %s', labels_lsit) '''------------------模型定义---------------------''' model, optimizer, lr_scheduler, start_epoch = init_model(cfg, device, id_gpu=None) model.eval() data_transform = cre_transform_resize4pil(cfg) # 这里是原图 for i in range(index_start, len(dataset_val), 1): img, _ = dataset_val[i] f_prod_pic4one(img, model, labels_lsit, data_transform) # for name in file_names: # '''---------------数据加载及处理--------------'''
# 切换gpu device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") torch.Tensor(2, 3).to(torch.device('cuda:0')) net = torch.nn.DataParallel(model, device_ids=[0]) print('输入索引,返回gpu名字', torch.cuda.get_device_name(0)) print('返回当前设备索引', torch.cuda.current_device()) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") torch.cuda.set_device(0) t = t.to(device) print(t.device) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(device) def tf_set(): # 配置GPU以实际内存消耗占用 gpus = tf.config.experimental.list_physical_devices("GPU") if gpus: try: for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) except RuntimeError as e: print(e) exit(-1) if __name__ == '__main__': flog.debug(' %s', os.cpu_count()) # 显示CPU进程数
def f_plot_od4pil_keypoints(img_pil, boxes_ltrb, keypoints, scores, labels, id_to_class=None, font_size=10, text_fill=True): ''' :param img_pil: :param labels:list(int) torch.tensor :param id_to_class: 支持dict + list ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle',] :param font_size: :param text_fill: :return: ''' if isinstance(labels, torch.Tensor): labels = labels.type(torch.int).tolist() boxes_confs = torch.cat([boxes_ltrb, scores[:, None]], dim=1) try: font = ImageFont.truetype('simhei.ttf', font_size, encoding='utf-8') # 参数1:字体文件路径,参数2:字体大小 except IOError: font = ImageFont.load_default() # print(len(STANDARD_COLORS)) # color = random.randint(0, len(STANDARD_COLORS)) cw = 3 for box, k, conf, label in zip(boxes_confs[:, :4], keypoints, boxes_confs[:, 4], labels): left, top, right, bottom = box _s_text = '{}:{:.1%}' if id_to_class: show_text = _s_text.format(id_to_class[label], conf) else: show_text = _s_text.format(label, conf) flog.debug(show_text) text_width, text_height = font.getsize(show_text) margin = np.ceil(0.05 * text_height) # 超出屏幕判断 if top > text_height: text_bottom = top else: text_bottom = bottom + text_height color = COLORS_ImageDraw[label] draw = ImageDraw.Draw(img_pil) draw.rectangle([left, top, right, bottom], outline=color, width=2) # 画 keypoints draw.chord((k[0] - cw, k[1] - cw, k[0] + cw, k[1] + cw), 0, 360, fill=(255, 0, 0), outline=(0, 255, 0)) draw.chord((k[2] - cw, k[3] - cw, k[2] + cw, k[3] + cw), 0, 360, fill=(255, 0, 0), outline=(0, 255, 0)) draw.chord((k[4] - cw, k[5] - cw, k[4] + cw, k[5] + cw), 0, 360, fill=(0, 0, 255), outline=(0, 255, 0)) draw.chord((k[6] - cw, k[7] - cw, k[6] + cw, k[7] + cw), 0, 360, fill=(255, 0, 0), outline=(0, 255, 0)) draw.chord((k[8] - cw, k[9] - cw, k[8] + cw, k[9] + cw), 0, 360, fill=(255, 0, 0), outline=(0, 255, 0)) if text_fill: draw.rectangle([(left, text_bottom - text_height - 2 * margin), (left + text_width + 2 * margin, text_bottom)], fill=color) draw.text((left + margin, text_bottom - text_height - margin), show_text, fill='black', font=font) else: draw.text((left + margin, text_bottom - text_height - margin), show_text, fill=color, font=font) # font = ImageFont.truetype('simhei.ttf', 30, encoding='utf-8') # draw.text((100, 100), '优秀, 哈哈', (0, 255, 255), font=font) return img_pil
def forward(self, outs, targets, imgs_ts=None): ''' :param outs: tuple ptxywh, torch.Size([32, 10842, 4*8]) pcls, torch.Size([32, 10842, 3]) :param targets: :param imgs_ts: :return: ''' cfg = self.cfg preg_32d, pcls = outs # torch.Size([5, 3614, 4]) device = preg_32d.device batch, pdim1, c = preg_32d.shape ''' conf-1, cls-3, 与预测对应gt_ltrb-4 ,ious_zg-1 ,''' gdim = 1 + cfg.NUM_CLASSES + 4 + 1 if cfg.NUM_KEYPOINTS > 0: gdim += cfg.NUM_KEYPOINTS gretinas = torch.empty((batch, pdim1, gdim), device=device) for i in range(batch): # if cfg.IS_VISUAL: # _img_ts = imgs_ts[i].clone() # from f_tools.pic.enhance.f_data_pretreatment4pil import f_recover_normalization4ts # _img_ts = f_recover_normalization4ts(_img_ts) # f_show_od_ts4plt(_img_ts, gboxes_ltrb=boxes_ltrb_one.cpu(), # is_recover_size=True, # # grids=grids_ts.cpu().numpy(), # # plabels_text=pconf_b[index_match_dim].sigmoid(), # # glabels_text=colrow_index[None] # ) gboxes_ltrb_b = targets[i]['boxes'] glabels_b = targets[i]['labels'] if cfg.NUM_KEYPOINTS > 0: gkeypoints_b = targets['keypoints'] # torch.Size([batch, 10]) else: gkeypoints_b = None # gretinas[i] = matchs_gfl(cfg, dim=gdim, # gboxes_ltrb_b=gboxes_ltrb_b, glabels_b=glabels_b, anc_obj=self.anc_obj, # mode='atss', preg_32d_b=preg_32d[i], img_ts=imgs_ts[i]) gretinas[i] = pos_match_retina4cls(cfg, dim=gdim, gkeypoints_b=None, gboxes_ltrb_b=gboxes_ltrb_b, glabels_b=glabels_b, anc_obj=self.anc_obj, ptxywh_b=preg_32d[i], img_ts=imgs_ts[i]) # 匹配正例可视化 if cfg.IS_VISUAL: _mask_pos = gretinas[i, :, 0] > 0 # 3d ->1d _img_ts = imgs_ts[i].clone() anc_ltrb = xywh2ltrb(self.anc_obj.ancs_xywh)[_mask_pos] from f_tools.pic.enhance.f_data_pretreatment4pil import f_recover_normalization4ts _img_ts = f_recover_normalization4ts(_img_ts) flog.debug('gt数 %s , 正例数量 %s' % (gboxes_ltrb_b.shape[0], anc_ltrb.shape[0])) f_show_od_ts4plt( _img_ts, gboxes_ltrb=gboxes_ltrb_b.cpu(), pboxes_ltrb=anc_ltrb.cpu(), is_recover_size=True, # grids=grids_ts.cpu().numpy(), # plabels_text=pconf_b[index_match_dim].sigmoid(), # glabels_text=colrow_index[None] ) mask_pos_2d = gretinas[:, :, 0] > 0 # torch.Size([2, 32526]) nums_pos = (mask_pos_2d.sum(-1).to( torch.float)).clamp(min=torch.finfo(torch.float16).eps) mask_neg_2d = gretinas[:, :, 0] == 0 # 不需要负例 mash_ignore_2d = gretinas[:, :, 0] == -1 # 忽略没有 ''' conf-1, cls-3, 与预测对应gt_ltrb-4 ,ious_zg-1 ,''' s_ = 1 + cfg.NUM_CLASSES ''' ---------------- 类别损失 ---------------- ''' pcls_sigmoid = pcls.sigmoid() # 统一归一化 torch.Size([5, 3614, 3]) gcls = gretinas[:, :, 1:s_] # torch.Size([5, 3614, 3]) l_pos, l_neg = focalloss(pcls_sigmoid, gcls, mask_pos=mask_pos_2d, mash_ignore=mash_ignore_2d, is_debug=True, alpha=0.5) l_cls_pos = (l_pos.sum(-1).sum(-1) / nums_pos).mean() * 7 l_cls_neg = (l_neg.sum(-1).sum(-1) / nums_pos).mean() * 7 ''' ---------------- box损失 ----------------- ''' # 正例筛选后计算 gtxywh = gretinas[:, :, s_:s_ + 4] # _loss_val = F.mse_loss(ptxywh, gtxywh, reduction="none") * mask_pos.unsqueeze(-1) _loss_val = F.smooth_l1_loss( preg_32d, gtxywh, reduction="none") * mask_pos_2d.unsqueeze(-1) # _loss_val = torch.abs(ptxywh - gtxywh) * mask_pos.unsqueeze(-1) l_box = (_loss_val.sum(-1).sum(-1) / nums_pos).mean() # ''' ---------------- 正例box损失 giou与cls分数容合 ----------------- ''' # # 这个损失加和 # weight_cls = pcls_sigmoid.detach() # weight_cls = weight_cls.max(dim=-1)[0] # ious_zg = gretinas[:, :, 0] # _loss_val = (1 - ious_zg) * weight_cls * mask_pos_2d # loss_box = (_loss_val.sum(-1) / nums_pos).mean() * cfg.LOSS_WEIGHT[2] # 2 # # loss_box = (_loss_val.sum(-1)).mean() * cfg.LOSS_WEIGHT[2] # 2 # # ''' ---------------- 正例dfl损失 ---------------- ''' # ''' ious_zg-1 , cls-3, 与预测对应 gt_ltrb-4 ''' # gt_ltrb = gretinas[:, :, s_:s_ + 4] # 匹配的回归值 在0~7之间 # # preg_32d torch.Size([5, 3614, 32]) gt_ltrb # # torch.Size([5, 3614, 4]) # _loss_val = distribution_focal_loss(cfg, preg_32d, gt_ltrb, mask_pos_2d) # loss_dfl = ((_loss_val.sum(-1) * mask_pos_2d).sum(-1) / nums_pos).mean() * cfg.LOSS_WEIGHT[3] # # ''' ---------------- gfl 损失 使用IOU分---------------- ''' # # 这个需要每一个正例平均 # gcls4iou = gretinas[:, :, 1:s_] * ious_zg.unsqueeze(-1) # l_pos, l_neg = quality_focal_loss2(pcls_sigmoid, gcls4iou, mask_pos=mask_pos_2d, is_debug=True) # # l_pos, l_neg = quality_focal_loss(pcls_sigmoid, gcls, ious_zg, mask_pos=mask_pos, is_debug=True) # loss_gfl_pos = (l_pos.sum(-1).sum(-1) / nums_pos).mean() * cfg.LOSS_WEIGHT[0] # 0.25 # loss_gfl_neg = (l_neg.sum(-1).sum(-1) / nums_pos).mean() * cfg.LOSS_WEIGHT[1] # l_pos, l_neg = focalloss(pcls_sigmoid, gcls, mask_pos=mask_pos, is_debug=True) # loss_gfl_pos = (l_pos.sum(-1).sum(-1) / nums_pos).mean() * cfg.LOSS_WEIGHT[0] # loss_gfl_neg = (l_neg.sum(-1).sum(-1) / nums_pos).mean() * cfg.LOSS_WEIGHT[1] # loss_total = loss_gfl_pos + loss_gfl_neg + loss_dfl + loss_box loss_total = l_cls_pos + l_cls_neg + l_box log_dict = OrderedDict() log_dict['l_total'] = loss_total.item() log_dict['l_cls_pos'] = l_cls_pos.item() log_dict['l_cls_neg'] = l_cls_neg.item() log_dict['l_box'] = l_box.item() log_dict['cls_max'] = pcls_sigmoid.max().item() log_dict['cls_mean'] = pcls_sigmoid.mean().item() log_dict['cls_min'] = pcls_sigmoid.min().item() return loss_total, log_dict
def forward(self, outs, targets, imgs_ts=None): ''' :param outs: tuple ptxywh, torch.Size([2, 32526, 4]) pcls, torch.Size([2, 32526, 4]) 已归一化 cls+1 :param targets: :param imgs_ts: :return: ''' cfg = self.cfg ptxywh, pcategory_sigmoid = outs pcategory_sigmoid = pcategory_sigmoid.sigmoid() # 统一归一化 pconf_sigmoid = pcategory_sigmoid[:, :, 0] # torch.Size([2, 32526]) pcls_sigmoid = pcategory_sigmoid[:, :, 1:] # 已sigmoid device = ptxywh.device batch, pdim1, c = ptxywh.shape # conf-1, cls-num_class, txywh-4, keypoint-nn = 8 + nn gdim = 1 + cfg.NUM_CLASSES + 4 if cfg.NUM_KEYPOINTS > 0: gdim += cfg.NUM_KEYPOINTS gretinas = torch.zeros((batch, pdim1, gdim), device=device) s_ = 1 + cfg.NUM_CLASSES # 前面 两个是 conf-1, cls-3, for i in range(batch): # if cfg.IS_VISUAL: # _img_ts = imgs_ts[i].clone() # from f_tools.pic.enhance.f_data_pretreatment4pil import f_recover_normalization4ts # _img_ts = f_recover_normalization4ts(_img_ts) # f_show_od_ts4plt(_img_ts, gboxes_ltrb=boxes_ltrb_one.cpu(), # is_recover_size=True, # # grids=grids_ts.cpu().numpy(), # # plabels_text=pconf_b[index_match_dim].sigmoid(), # # glabels_text=colrow_index[None] # ) gboxes_ltrb_b = targets[i]['boxes'] glabels_b = targets[i]['labels'] if cfg.NUM_KEYPOINTS > 0: gkeypoints_b = targets['keypoints'] # torch.Size([batch, 10]) else: gkeypoints_b = None # 这里是一批的 mask_neg_b, mash_ignore_b 可能为None boxes_index, mask_pos_b, mask_neg_b, mash_ignore_b = matchs_gt_b( cfg, gboxes_ltrb_b=gboxes_ltrb_b, glabels_b=glabels_b, anc_obj=self.anc_obj, mode='iou', # mode='atss', ptxywh_b=ptxywh[i], img_ts=imgs_ts[i], num_atss_topk=9) '''正反例设置''' gretinas[i][mask_pos_b, 0] = torch.tensor(1., device=device) if mash_ignore_b is not None: gretinas[i][mash_ignore_b, 0] = torch.tensor(-1., device=device) labels_b = labels2onehot4ts(glabels_b - 1, cfg.NUM_CLASSES) gretinas[i][mask_pos_b, 1:s_] = labels_b[boxes_index][mask_pos_b].type( torch.float) # 正例才匹配 # _gtxywh = boxes_encode4retina(cfg, self.anc_obj, gboxes_ltrb_b[boxes_index]) # _gtxywh = boxes_encode4ssd(cfg, self.anc_obj, gboxes_ltrb_b[boxes_index]) gretinas[i][mask_pos_b, s_:s_ + 4] = gboxes_ltrb_b[boxes_index][mask_pos_b] # gretinas[i] = pos_match_retina(cfg, dim=gdim, gkeypoints_b=None, # gboxes_ltrb_b=gboxes_ltrb_b, glabels_b=glabels_b, anc_obj=self.anc_obj, # ptxywh_b=ptxywh[i], img_ts=imgs_ts[i]) # 匹配正例可视化 if cfg.IS_VISUAL: _mask_pos = gretinas[i, :, 0] > 0 # 3d ->1d _img_ts = imgs_ts[i].clone() anc_ltrb = xywh2ltrb(self.anc_obj.ancs_xywh)[_mask_pos] from f_tools.pic.enhance.f_data_pretreatment4pil import f_recover_normalization4ts _img_ts = f_recover_normalization4ts(_img_ts) flog.debug('gt数 %s , 正例数量 %s' % (gboxes_ltrb_b.shape[0], anc_ltrb.shape[0])) f_show_od_ts4plt( _img_ts, gboxes_ltrb=gboxes_ltrb_b.cpu(), pboxes_ltrb=anc_ltrb.cpu(), is_recover_size=True, # grids=grids_ts.cpu().numpy(), # plabels_text=pconf_b[index_match_dim].sigmoid(), # glabels_text=colrow_index[None] ) mask_pos_2d = gretinas[:, :, 0] > 0 # torch.Size([2, 32526]) nums_pos = (mask_pos_2d.sum(-1).to( torch.float)).clamp(min=torch.finfo(torch.float16).eps) # mask_neg_2d = gretinas[:, :, 0] == 0 mask_ignore_2d = gretinas[:, :, 0] == -1 # s_ = 1 + cfg.NUM_CLASSES ''' ----------------cls损失---------------- ''' # pcls_sigmoid 已归一 gcls = gretinas[:, :, 1:s_] _loss_val = x_bce(pcls_sigmoid, gcls, reduction="none") l_cls = ((_loss_val.sum(-1) * mask_pos_2d).sum(-1) / nums_pos).mean() * cfg.LOSS_WEIGHT[2] ''' ----------------conf损失 ---------------- ''' # pconf_sigmoid 已归一 gconf = gretinas[:, :, 0] # 已归一化 _loss_val = x_bce(pconf_sigmoid, gconf, reduction="none") mask_neg_hard = f_ohem(_loss_val, nums_pos * 3, mask_pos=mask_pos_2d, mash_ignore=mask_ignore_2d) l_conf_pos = ((_loss_val * mask_pos_2d).sum(-1) / nums_pos).mean() * cfg.LOSS_WEIGHT[0] l_conf_neg = ((_loss_val * mask_neg_hard).sum(-1) / nums_pos).mean() * cfg.LOSS_WEIGHT[1] # l_pos, l_neg = focalloss(pconf_sigmoid, gconf, mask_pos=mask_pos_2d, mash_ignore=mask_ignore_2d, # is_debug=True, alpha=0.5) # l_conf_pos = (l_pos.sum(-1).sum(-1) / nums_pos).mean() * 7 # l_conf_neg = (l_neg.sum(-1).sum(-1) / nums_pos).mean() * 7 # l_pos, l_neg = focalloss(pconf, gconf, mask_pos=mask_pos, mash_ignore=mash_ignore, # alpha=0.25, gamma=2, # reduction='none', is_debug=True) # loss_conf_pos = (l_pos.sum(-1) / nums_pos).mean() * cfg.LOSS_WEIGHT[0] # loss_conf_neg = l_neg.sum(-1).mean() * cfg.LOSS_WEIGHT[1] ''' ---------------- 回归损失 ----------------- ''' # 正例筛选后计算 gboxes_ltrb_m_pos = gretinas[:, :, s_:s_ + 4][mask_pos_2d] ancs_xywh_m_pos = self.anc_obj.ancs_xywh.unsqueeze(0).repeat( batch, 1, 1)[mask_pos_2d] gtxywh_pos = boxes_encode4ssd(cfg, ancs_xywh_m_pos, ltrb2xywh(gboxes_ltrb_m_pos)) _loss_val = F.smooth_l1_loss(ptxywh[mask_pos_2d], gtxywh_pos, reduction="none") l_box = _loss_val.sum(-1).mean() log_dict = OrderedDict() loss_total = l_conf_pos + l_conf_neg + l_cls + l_box log_dict['l_total'] = loss_total.item() log_dict['l_conf_pos'] = l_conf_pos.item() log_dict['l_conf_neg'] = l_conf_neg.item() log_dict['loss_cls'] = l_cls.item() log_dict['l_box'] = l_box.item() log_dict['cls_max'] = pcls_sigmoid.max().item() log_dict['conf_max'] = pconf_sigmoid.max().item() log_dict['cls_mean'] = pcls_sigmoid.mean().item() log_dict['conf_mean'] = pconf_sigmoid.mean().item() log_dict['cls_min'] = pcls_sigmoid.min().item() log_dict['conf_min'] = pconf_sigmoid.min().item() return loss_total, log_dict
def forward(self, pssd, targets, imgs_ts=None): ''' :param pssd: preg, pcls = class+1 [2, 4, 8732] :param targets: :param imgs_ts: :return: ''' cfg = self.cfg # pcls classes+1 [b, 4, 8732] preg, pcls = pssd preg = preg.permute(0, 2, 1) # [b, 4, 8732] -> [b, 8732, 4] device = preg.device batch, hw, c = preg.shape # cls_val-1, gltrb-4 gdim = 1 + 4 gssd = torch.empty((batch, hw, gdim), device=device) # 每批会整体更新这里不需要赋0 for i, target in enumerate(targets): # batch遍历 gboxes_ltrb_b = target['boxes'] # ltrb glabels_b = target['labels'] boxes_index, mask_pos_b, mask_neg_b, mash_ignore_b = matchs_gt_b( cfg, gboxes_ltrb_b=gboxes_ltrb_b, glabels_b=glabels_b, anc_obj=self.anc_obj, mode='iou', ptxywh_b=preg[i], img_ts=imgs_ts[i], num_atss_topk=9) '''正反例设置 正例才匹配''' gssd[i][:, 0] = 0 # 是背景 这个要计算正反例 故需全fill 0 gssd[i][mask_pos_b, 0] = glabels_b[boxes_index][mask_pos_b] gssd[i][mask_pos_b, 1:1 + 4] = gboxes_ltrb_b[boxes_index][mask_pos_b] # gltrb-4 '''可视化验证''' if cfg.IS_VISUAL: gssd_test = gssd[i].clone() # gssd_test = gssd_test.view(-1, gdim) gconf_one = gssd_test[:, 0] mask_pos_2d = gconf_one > 0 flog.debug('mask_pos_2d 个数%s', mask_pos_2d.sum()) # torch.Size([169, 4]) anc_ltrb_pos = xywh2ltrb(self.anc_obj.ancs_xywh[mask_pos_2d]) from f_tools.pic.enhance.f_data_pretreatment4pil import f_recover_normalization4ts img_ts = f_recover_normalization4ts(imgs_ts[i]) from torchvision.transforms import functional as transformsF img_pil = transformsF.to_pil_image(img_ts).convert('RGB') import numpy as np img_np = np.array(img_pil) f_show_od_np4plt( img_np, gboxes_ltrb=gboxes_ltrb_b.cpu(), pboxes_ltrb=anc_ltrb_pos.cpu(), # ltrb # other_ltrb=xywh2ltrb(self.anc_obj.ancs_xywh)[:100], is_recover_size=True, # grids=(h, w) ) # cls_val-1, gltrb-4 glabel = gssd[:, :, 0] # 0为背景 mask_pos_2d = glabel > 0 nums_pos = (mask_pos_2d.sum(-1).to( torch.float)).clamp(min=torch.finfo(torch.float16).eps) # mask_neg_2d = glabel == 0 反例没用上 # nums_neg = (mask_neg.sum(-1).to(torch.float)).clamp(min=torch.finfo(torch.float16).eps) ''' ---------------- 回归损失 ----------------- ''' gboxes_ltrb_m_pos = gssd[:, :, 1:1 + 4][mask_pos_2d] ancs_xywh_m_pos = self.anc_obj.ancs_xywh.unsqueeze(0).repeat( batch, 1, 1)[mask_pos_2d] gtxywh_pos = boxes_encode4ssd(cfg, ancs_xywh_m_pos, ltrb2xywh(gboxes_ltrb_m_pos)) _loss_val = F.smooth_l1_loss(preg[mask_pos_2d], gtxywh_pos, reduction="none") l_box = _loss_val.sum(-1).mean() ''' ---------------- 类别-cls损失 ---------------- ''' # 自带softmax _loss_val = F.cross_entropy(pcls, glabel.long(), reduction="none") mask_neg_hard = f_ohem(_loss_val, nums_pos * 3, mask_pos=mask_pos_2d) l_conf_pos = ((_loss_val * mask_pos_2d).sum(-1) / nums_pos).mean() # 正例越多反例越多 l_conf_neg = ((_loss_val * mask_neg_hard).sum(-1) / nums_pos).mean() log_dict = {} ''' ---------------- loss完成 ----------------- ''' l_total = l_box + l_conf_pos + l_conf_neg log_dict['l_total'] = l_total.item() log_dict['l_conf_pos'] = l_conf_pos.item() log_dict['l_conf_neg'] = l_conf_neg.item() log_dict['l_box'] = l_box.item() # log_dict['p_max'] = pcls.max().item() # log_dict['p_min'] = pcls.min().item() # log_dict['p_mean'] = pcls.mean().item() return l_total, log_dict
def hadler_voc(): # 这个可以直接创建json mode = 'bbox' # 'keypoints' # 'bbox': # path_root = r'M:/AI/datas/VOC2012' path_root = r'M:/AI/datas/VOC2007' path_data = path_root + '/val' # 这个是VOC文件名 train_type = 'test' # JSON名 name path_file_txt = 'train.txt' # 文件名txt file_classes_ids = path_root + '/classes_ids.json' path_coco_save = path_root # 这个是生成的根 目录必须存在 path_img = path_data + '/JPEGImages' # 真实图片路径 path_txt = os.path.join(path_data, path_file_txt) path_xml = os.path.join(path_data, 'Annotations') with open(path_txt) as read: # 读每一行加上路径和扩展名---完整路径 xml_list = [ os.path.join(path_xml, line.strip() + ".xml") for line in read.readlines() ] if len(xml_list) == 0: raise Exception('未读到数据') '''读文件获取类型名称''' # try: # # {"类别1": 1, "类别2":2} # path_classes = os.path.join(r'D:/tb/tb/ai_code/DL/f_tools/datas/f_coco/_file', 'classes_ids_voc.json') # json_file = open(path_classes, 'r') # class_dict = json.load(json_file) # except Exception as e: # flog.error(e) # exit(-1) rets = [] # for file_xml in tqdm(xml_list[:1000]): # 这里定义测试数量 for file_xml in tqdm(xml_list, desc='组装CSV标签'): with open(file_xml) as file: str_xml = file.read() doc = xmltodict.parse(str_xml) filename = doc['annotation']['filename'] ret = [] objs = doc['annotation']['object'] if isinstance(objs, dict): xmin = str(float(objs['bndbox']['xmin'])) ymin = str(float(objs['bndbox']['ymin'])) xmax = str(float(objs['bndbox']['xmax'])) ymax = str(float(objs['bndbox']['ymax'])) ret.append(filename) ret.extend([xmin, ymin, xmax, ymax]) ret.extend(objs['name']) rets.append(ret) else: for obj in objs: # 可能有多个目标 xmin = str(float(obj['bndbox']['xmin'])) ymin = str(float(obj['bndbox']['ymin'])) xmax = str(float(obj['bndbox']['xmax'])) ymax = str(float(obj['bndbox']['ymax'])) ret.append(filename) ret.extend([xmin, ymin, xmax, ymax]) ret.extend(obj['name']) rets.append(ret.copy()) ret.clear() # print(rets) # ['2007_000027.jpg', '174.0', '101.0', '349.0', '351.0', 'person'] infos = rets file_csv = '../_file/csv_labels_' + 'voc_' + path_file_txt.split( '.')[0] + '.csv' with open(file_csv, "w") as csv_labels: for info in infos: s_ = ','.join(info) + '\n' # ---类别--- csv_labels.write(s_) csv_labels.close() flog.debug('file_csv : %s', file_csv) '''这里是转换''' with open(file_classes_ids, 'r', encoding='utf-8') as f: classes_ids = json.load(f) # 文件转dict 或list classes_name = [] file = os.path.join(path_root, 'classes_name.txt') if not os.path.exists(file): for k, v in classes_ids.items(): classes_name.append(k) with open(file, 'w') as f: f.write(' '.join(classes_name)) to_coco(file_csv, classes_ids, path_img, path_coco_save, mode, is_copy=False, is_move=False, file_coco=train_type + '_' + str(len(xml_list)))