def get_yolov4_target(self, pred, img_metas, batch_size, gt_bbox, gt_class, gt_score): device = pred[0].device h, w = img_metas[0]['img_shape'][:2] tcls, tbox, indices, ignore_mask, anch = [], [], [], [], [] ft = torch.cuda.FloatTensor if pred[0].is_cuda else torch.Tensor lt = torch.cuda.LongTensor if pred[0].is_cuda else torch.Tensor for index, (mask, downsample_ratio) in enumerate( zip(self.anchor_masks, self.downsample_ratios)): b, a, gj, gi, gxywh = lt([]).to(device), lt([]).to(device), lt( []).to(device), lt([]).to(device), ft([]).to(device) cls = lt([]).to(device) anchors = np.array( self.anchors, dtype=np.float32)[mask] / downsample_ratio # Scale batch_ignore_mask = torch.ones( (batch_size, len(mask), int(h / downsample_ratio), int(w / downsample_ratio), 1)).to(device) # large object for bs in range(batch_size): xywh = xyxy2xywh(gt_bbox[bs]) if isinstance( gt_bbox[bs], torch.Tensor) else xyxy2xywh( torch.from_numpy(gt_bbox[bs]).to(device)) if len(xywh) == 0: continue grid_h, grid_w = int(h / downsample_ratio), int( w / downsample_ratio) all_anchors_grid = np.array( self.anchors, dtype=np.float32) / downsample_ratio # Scale ref_anchors = np.zeros((len(all_anchors_grid), 4), dtype=np.float32) ref_anchors[:, 2:] = np.array(all_anchors_grid, dtype=np.float32) ref_anchors = torch.from_numpy( ref_anchors) # [0,0,anchor_w,anchor_h] gt = xywh * torch.tensor( ([grid_w, grid_h, grid_w, grid_h ])).to(device).float() # x,y ,w, h,Scale score, _cls = gt_score[bs], gt_class[bs] cx_grid = gt[:, 0].floor().cpu().numpy() # grid_x grid_y cy_grid = gt[:, 1].floor().cpu().numpy() # grid_y n = len(gt) truth_box = torch.zeros(n, 4) truth_box[:n, 2:4] = gt[:n, 2:4] anchor_ious = box_iou(truth_box, ref_anchors) best_n_all = anchor_ious.argmax(dim=1) # 返回按行比较最大值的位置 best_n = best_n_all % 3 best_n_mask = ( (best_n_all == mask[0]) | (best_n_all == mask[1]) | (best_n_all == mask[2])) # 查看是否和当前尺度有最大值得IOU交集,如果有为1,否则为0 if sum(best_n_mask) == 0: # 如果和当前尺度不是最大IOU交集,返回 continue truth_box[:n, 0:2] = gt[:n, 0:2] # cx 包含位置和偏移量,整数位代表坐标位置,小数位代表偏移量 # truth_box[:n, 1] = gt[:n, 1] # cy 包含位置和偏移量,整数位代表坐标位置,小数位代表偏移量 single_ignore_mask = np.zeros((len(mask), grid_h, grid_w, 1), dtype=np.float32) pred_ious = box_iou(pred[index][bs, ..., :4].reshape(-1, 4), truth_box.reshape(-1, 4).to(device), xyxy=False) # truth框和基本锚框的IOU,含位置信息 pred_best_iou, _ = pred_ious.max(dim=1) # [最大值,索引] pred_best_iou = (pred_best_iou > self.ignore_thre ) # 过滤掉小于阈值的数据,大于阈值1,小于0 pred_best_iou = pred_best_iou.view( single_ignore_mask.shape) # 映射到具体位置,是否有目标,1代表有目标物,0代表没有目标物 # set mask to zero (ignore) if pred matches truth single_ignore_mask = ~pred_best_iou # 取反,为未包含目标的框位置,1代表没有目标物,0代表有目标物 # torch.ones(len(truth_box))[best_n_mask].to(device) b = torch.cat(( b, torch.ones(len(truth_box))[best_n_mask].long().to(device) * bs)) a = torch.cat((a, best_n[best_n_mask].to(device).long())) gi = torch.cat( (gi, torch.from_numpy(cx_grid)[best_n_mask].to(device).long())) gj = torch.cat( (gj, torch.from_numpy(cy_grid)[best_n_mask].to(device).long())) gxywh = torch.cat((gxywh, truth_box[best_n_mask].to(device))) cls = torch.cat( (cls, torch.from_numpy(_cls)[best_n_mask].to(device).long())) single_ignore_mask[a, gj, gi] = 0 # ignore_mask[gj, gi, a] = 0 batch_ignore_mask[bs, :] = single_ignore_mask indices.append((b, a, gj, gi)) gxywh[..., :2] = gxywh[..., :2] - gxywh[..., :2].long() tbox.append(gxywh) tcls.append(cls) anch.append(anchors[a.cpu().numpy()]) # anchors ignore_mask.append(batch_ignore_mask) return indices, tbox, tcls, anch, ignore_mask
return img file = '/disk2/project/pytorch-YOLOv4/cfg/dataset_test.py' cfg = Config.fromfile(file) dataset = build_from_dict(cfg.data.train,DATASET) dataloader = build_dataloader(dataset,data=cfg.data) for i, data_batch in enumerate(dataloader): if i>30: break for idx,data in enumerate(data_batch['img']): gt = data_batch['gt_bboxes'][idx] gt_xywh = xyxy2xywh(gt) # x,y ,w, h n_gt = (gt.sum(dim=-1) > 0).sum(dim=-1) n = int(n_gt) if n == 0: continue gt = gt[:n].cpu().numpy() gt_xywh = gt_xywh[:n].cpu().numpy() data = data.cpu().numpy()*255 data = data.transpose(1, 2, 0) h,w = data.shape[:2] a = draw_box(data.copy(), gt_xywh,(h,w)) cv2.imwrite(os.path.join('/disk2/project/test/v2.0/yolov5/dataset/123/test', str(i)+'+'+str(idx) + '.jpg'), a) # for i in range(30): # result = dataset.__getitem__(i) # img = result['img'] # gt_bboxes = result['gt_bboxes']
def get_yolov5_target(self, pred, img_metas, batch_size, gt_bbox, gt_class, gt_score): device = pred[0].device gain = torch.ones(6, device=device) # normalized to gridspace gain #ft = torch.cuda.FloatTensor if pred[0].is_cuda else torch.Tensor targets = torch.tensor([], dtype=torch.float32).to( device) #ft([]).to(device) for i, gtb in enumerate(gt_bbox): gtc = torch.from_numpy(gt_class[i]).to(device) img_idx = torch.ones(len(gtb), 1, device=device) * i targets = torch.cat( (targets, torch.cat((img_idx, gtc, torch.from_numpy(gtb).to(device)), dim=-1))) na, nt = len(self.anchor_masks), len(targets) tcls, tbox, indices, anch, ignore_mask = [], [], [], [], [] targets[..., 2:] = xyxy2xywh(targets[..., 2:]) g = 0.5 # offset grid中心偏移 off = torch.tensor([[1, 0], [0, 1], [-1, 0], [0, -1]], device=device).float( ) # overlap offsets 按grid区域换算偏移区域, 附近的4个网格 上下左右 at = torch.arange(na).view(na, 1).repeat( 1, nt) # anchor tensor, same as .repeat_interleave(nt) for idx, (mask, downsample_ratio) in enumerate( zip(self.anchor_masks, self.downsample_ratios)): anchors = np.array( self.anchors, dtype=np.float32)[mask] / downsample_ratio # Scale # for i in range(len(self.anchor_masks)): # anchors = self.anchors[i] gain[2:] = torch.tensor(pred[idx].shape)[[3, 2, 3, 2]] # xyxy gain # Match targets to anchors a, t, offsets = [], targets * gain, 0 if nt: r = t[None, :, 4:6] / torch.from_numpy(anchors[:, None]).to( device) # wh ratio j = torch.max(r, 1. / r).max(2)[0] < self.anchor_t # compare # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n) = wh_iou(anchors(3,2), gwh(n,2)) a, t = at[j], t.repeat( na, 1, 1)[j] # filter t为过滤后所有匹配锚框缩放尺度小于4的真框 a 位置信息 # overlaps gxy = t[:, 2:4] # grid xy z = torch.zeros_like(gxy) # j,k 为小于0.5的偏移 ,l,m为大于0.5的偏移 j, k = ((gxy % 1. < g) & (gxy > 1.)).T l, m = ((gxy % 1. > (1 - g)) & (gxy < (gain[[2, 3]] - 1.))).T a, t = torch.cat((a, a[j], a[k], a[l], a[m]), 0), torch.cat( (t, t[j], t[k], t[l], t[m]), 0 ) # t 原始target, t[j] x<.5 偏移的target, t[k] y<.5 偏移的target, t[l] x>.5 偏移的target, t[m] y>.5 偏移的target offsets = torch.cat( (z, z[j] + off[0], z[k] + off[1], z[l] + off[2], z[m] + off[3]), 0 ) * g # z 原始target,x<0.5 +0.5 ,y<0.5 +0.5,x>.5 -0.5,y>.5 -0.5 # Define b, c = t[:, :2].long().T # image, class gxy = t[:, 2:4] # grid xy gwh = t[:, 4:6] # grid wh gij = (gxy - offsets).long() # 获取所有的grid 位置 -0.5<offsets<0.5 gi, gj = gij.T # grid xy indices # Append indices.append((b, a, gj, gi)) # image, anchor, grid indices tbox.append(torch.cat((gxy - gij, gwh), 1)) # box x,y 偏移范围在[-0.5,1.5] anch.append(anchors[a]) # anchors tcls.append(c) # class ignore_mask.append([]) return indices, tbox, tcls, anch, ignore_mask
def single_gpu_test(model, data_loader, half=False, conf_thres=0.001, iou_thres=0.6, merge=False, save_json=False, augment=False, verbose=False, coco_val_path=''): device = next(model.parameters()).device # get model device # Half half = device.type != 'cpu' and half # half precision only supported on CUDA if half: model.half() # Configure model.eval() iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for [email protected] niou = iouv.numel() seen = 0 nc = model.head.num_classes names = model.CLASSES if hasattr( model, 'CLASSES') else data_loader.dataset.CLASSES coco91class = coco80_to_coco91_class() s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', '[email protected]', '[email protected]:.95') p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0. loss = torch.zeros(3, device=device) jdict, stats, ap, ap_class = [], [], [], [] for batch_i, batch in enumerate(tqdm(data_loader, desc=s)): img = batch['img'].to(device, non_blocking=True) batch['img'] = img.half() if half else img.float() # uint8 to fp16/32 nb, _, height, width = img.shape # batch size, channels, height, width whwh = torch.Tensor([width, height, width, height]).to(device) ft = torch.cuda.FloatTensor if half else torch.Tensor gt_bbox = batch['gt_bboxes'] gt_class = batch['gt_class'] img_metas = batch['img_metas'] targets = ft([]).to(device) for i, gtb in enumerate(gt_bbox): gtc = torch.from_numpy(gt_class[i]).to(device) img_idx = torch.ones(len(gtb), 1, device=device) * i targets = torch.cat([ targets, torch.cat((img_idx, gtc, torch.from_numpy(gtb).to(device)), dim=-1) ]) # Disable gradients with torch.no_grad(): # Run model batch['eval'] = True if augment: batch['augment'] = True t = torch_utils.time_synchronized() inf_out, train_out = model( return_loss=False, **batch) # inference and training outputs t0 += torch_utils.time_synchronized() - t # Run NMS t = torch_utils.time_synchronized() output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres, merge=merge) t1 += torch_utils.time_synchronized() - t # Statistics per image for si, pred in enumerate(output): labels = targets[targets[:, 0] == si, 1:] nl = len(labels) tcls = labels[:, 0].tolist() if nl else [] # target class seen += 1 if pred is None: if nl: stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls)) continue # Append to text file # if save_txt: # filename = img_metas[si]['filename'] # ori_shape = img_metas[si]['ori_shape'] # # img_shape = img_metas[si]['img_shape'] # # # gn = torch.tensor(ori_shape[:2])[[0, 1, 0, 1]] # normalization gain whwh # txt_path = str(out / Path(filename).stem) # pred[:, :4] = scale_coords(img[si].shape[1:], pred[:, :4], ori_shape[:2]) # to original # for *xyxy, conf, cls in pred: # # xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh # with open(txt_path + '.txt', 'a') as f: # f.write(('%g ' * 5 + '\n') % (cls, *xyxy)) # label format # Clip boxes to image bounds clip_coords(pred, (height, width)) # if save: # _pd = pred.cpu().numpy() # for _p in _pd: # left_top = (int(_p[0]), int(_p[1])) # right_bottom = (int(_p[2]), int(_p[3])) # cv2.rectangle( # img, left_top, right_bottom, color=(0, 0, 255), thickness=2) # label_text = str(_p[5]) # label_text += '|{:.02f}'.format(_p[4]) # cv2.putText(img, label_text, (int(_p[0]), int(_p[1]) - 2), cv2.FONT_HERSHEY_COMPLEX, fontScale=0.5,color=(0, 0, 255)) # Append to pycocotools JSON dictionary if save_json: # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ... filename = img_metas[si]['filename'] ori_shape = img_metas[si]['ori_shape'] box = pred[:, :4].clone() # xyxy scale_coords(img[si].shape[1:], box, ori_shape[:2]) # to original shape image_id = str(Path(filename).stem) box = xyxy2xywh(box) # xywh box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner for p, b in zip(pred.tolist(), box.tolist()): jdict.append({ 'image_id': int(image_id) if image_id.isnumeric() else image_id, 'category_id': coco91class[int(p[5])], 'bbox': [round(x, 3) for x in b], 'score': round(p[4], 5) }) # Assign all predictions as incorrect correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device) if nl: detected = [] # target indices tcls_tensor = labels[:, 0] # target boxes # tbox = xywh2xyxy(labels[:, 1:5]) * whwh tbox = labels[:, 1:5] * whwh # Per target class for cls in torch.unique(tcls_tensor): ti = (cls == tcls_tensor).nonzero().view( -1) # prediction indices pi = (cls == pred[:, 5]).nonzero().view(-1) # target indices # Search for detections if pi.shape[0]: # Prediction to target ious ious, i = box_iou(pred[pi, :4], tbox[ti]).max( 1) # best ious, indices # Append detections for j in (ious > iouv[0]).nonzero(): d = ti[i[j]] # detected target if d not in detected: detected.append(d) correct[ pi[j]] = ious[j] > iouv # iou_thres is 1xn if len( detected ) == nl: # all targets already located in image break # Append statistics (correct, conf, pcls, tcls) stats.append( (correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)) # Plot images # if batch_i < 1: # f = Path(save_dir) / ('test_batch%g_gt.jpg' % batch_i) # filename # plot_images(img, targets, paths, str(f), names) # ground truth # f = Path(save_dir) / ('test_batch%g_pred.jpg' % batch_i) # plot_images(img, output_to_target(output, width, height), paths, str(f), names) # predictions # Compute statistics stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy if len(stats) and stats[0].any(): p, r, ap, f1, ap_class = ap_per_class(*stats) p, r, ap50, ap = p[:, 0], r[:, 0], ap[:, 0], ap.mean( 1) # [P, R, [email protected], [email protected]:0.95] mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean() nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class else: nt = torch.zeros(1) # Print results pf = '%20s' + '%12.3g' * 6 # print format print(pf % ('all', seen, nt.sum(), mp, mr, map50, map)) # Print results per class if verbose and nc > 1 and len(stats): for i, c in enumerate(ap_class): print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i])) # Print speeds t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + ( height, width, data_loader.batch_size) # tuple # Save JSON if save_json and len(jdict): filename = model.cfg.filename basename = os.path.basename(filename) bname = os.path.splitext(basename)[0] f = 'detections_val2017_%s_results.json' % bname # filename print('\nCOCO mAP with pycocotools... saving %s...' % f) with open(f, 'w') as file: json.dump(jdict, file) print('\nCOCO mAP with pycocotools... saving %s finished' % f) try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval imgIds = [int(Path(x).stem) for x in data_loader.dataset.imgs] cocoGt = COCO( glob.glob(coco_val_path + '/instances_val*.json') [0]) # initialize COCO ground truth api cocoDt = cocoGt.loadRes(f) # initialize COCO pred api cocoEval = COCOeval(cocoGt, cocoDt, 'bbox') cocoEval.params.imgIds = imgIds # image IDs to evaluate cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() map, map50 = cocoEval.stats[: 2] # update results ([email protected]:0.95, [email protected]) except Exception as e: print('ERROR: pycocotools unable to run: %s' % e) # Return results model.float() # for training maps = np.zeros(nc) + map for i, c in enumerate(ap_class): maps[c] = ap[i] return (mp, mr, map50, map, *(loss.cpu() / len(data_loader)).tolist()), maps, t