def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False): # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4 box2 = box2.t() # Get the coordinates of bounding boxes if x1y1x2y2: # x1, y1, x2, y2 = box1 b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3] b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3] else: # transform from xywh to xyxy # print('box1: ', box1.shape) # print('box2: ', box2.shape) box1 = data_utils.xywh2xyxy(box1.t()).t() box2 = data_utils.xywh2xyxy(box2.t()).t() # b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2 # b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2 # b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2 # b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2 b1_x1, b1_x2 = box1[0], box1[2] b1_y1, b1_y2 = box1[1], box1[3] b2_x1, b2_x2 = box2[0], box2[2] b2_y1, b2_y2 = box2[1], box2[3] # Intersection area inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \ (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0) # Union Area w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 union = (w1 * h1 + 1e-16) + w2 * h2 - inter iou = inter / union # iou if GIoU or DIoU or CIoU: # convex (smallest enclosing box) width cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1) ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1) # convex height if GIoU: # Generalized IoU https://arxiv.org/pdf/1902.09630.pdf c_area = cw * ch + 1e-16 # convex area return iou - (c_area - union) / c_area # GIoU if DIoU or CIoU: # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1 # convex diagonal squared c2 = cw**2 + ch**2 + 1e-16 # centerpoint distance squared rho2 = ((b2_x1 + b2_x2) - (b1_x1 + b1_x2)) ** 2 / 4 + \ ((b2_y1 + b2_y2) - (b1_y1 + b1_y2)) ** 2 / 4 if DIoU: return iou - rho2 / c2 # DIoU elif CIoU: # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47 v = (4 / math.pi ** 2) * \ torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2) with torch.no_grad(): alpha = v / (1 - iou + v) return iou - (rho2 / c2 + v * alpha) # CIoU return iou
def apply_nms_onnx(prediction, num_classes=8, conf_thres=0.1, iou_thres=0.6, agnostic=False): """Performs Non-Maximum Suppression (NMS) on inference results Returns: detections with shape: nx6 (x1, y1, x2, y2, conf, cls) """ xc = prediction[..., 4] > conf_thres # candidates # Settings # (pixels) minimum and maximum box width and height min_wh, max_wh = 2, 4096 max_det = 300 # maximum number of detections per image output = [] for xi, x in enumerate(prediction): # image index, image inference # Apply constraints x = x[xc[xi]] # confidence # Compute conf x[:, 5:5 + num_classes] = x[:, 5:5 + num_classes] * x[:, 4:5] # conf = obj_conf * cls_conf # Box (center x, center y, width, height) to (x1, y1, x2, y2) box = data_utils.xywh2xyxy(x[:, :4]) # Detections matrix n x (7 + bin_num + bin_num*2 + 3)(bi, xyxy, conf, cls, 3d properties \ # (bin_num + bin_num*2 + 3)) i, j = torch.nonzero(x[:, 5:5 + num_classes] > conf_thres, as_tuple=False).t() bi = torch.zeros_like(j) + xi x = torch.cat((bi[:, None].float(), box[i], x[i, j + 5, None], j[:, None].float(), x[i, 5 + num_classes:]), 1) # Batched NMS c = x[:, 6:7] * (0 if agnostic else max_wh) # classes # boxes (offset by class), scores boxes, scores = x[:, 1:5] + c, x[:, 5] i = torchvision.ops.boxes.nms(boxes, scores, iou_thres) # limit detections i = i[:max_det] x = x[i] output.append(x) return torch.cat(output, dim=0)
def plot_images(images, targets, paths=None, fname='images.jpg', names=None, max_size=640, max_subplots=16): tl = 3 # line thickness tf = max(tl - 1, 1) # font thickness if os.path.isfile(fname): # do not overwrite return None if isinstance(images, torch.Tensor): images = images.cpu().float().numpy() if isinstance(targets, torch.Tensor): targets = targets.cpu().numpy() # un-normalise if np.max(images[0]) <= 1: images *= 255 bs, _, h, w = images.shape # batch size, _, height, width bs = min(bs, max_subplots) # limit plot images ns = np.ceil(bs ** 0.5) # number of subplots (square) # Check if we should resize scale_factor = max_size / max(h, w) if scale_factor < 1: h = math.ceil(scale_factor * h) w = math.ceil(scale_factor * w) # Empty array for output mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) # Fix class - colour map prop_cycle = plt.rcParams['axes.prop_cycle'] # https://stackoverflow.com/questions/51350872/python-from-color-name-to-rgb def hex2rgb(h): return tuple( int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4)) color_lut = [hex2rgb(h) for h in prop_cycle.by_key()['color']] for i, img in enumerate(images): if i == max_subplots: # if last batch has fewer images than we expect break block_x = int(w * (i // ns)) block_y = int(h * (i % ns)) img = img.transpose(1, 2, 0) if scale_factor < 1: img = cv2.resize(img, (w, h)) mosaic[block_y:block_y + h, block_x:block_x + w, :] = img if len(targets) > 0: image_targets = targets[targets[:, 0] == i] boxes = data_utils.xywh2xyxy(image_targets[:, 2:6]).T classes = image_targets[:, 1].astype('int') gt = image_targets.shape[1] == 6 # ground truth if no conf column # check for confidence presence (gt vs pred) conf = None if gt else image_targets[:, 6] boxes[[0, 2]] *= w boxes[[0, 2]] += block_x boxes[[1, 3]] *= h boxes[[1, 3]] += block_y for j, box in enumerate(boxes.T): cls = int(classes[j]) color = color_lut[cls % len(color_lut)] cls = names[cls] if names else cls if gt or conf[j] > 0.3: # 0.3 conf thresh label = '%s' % cls if gt else '%s %.1f' % (cls, conf[j]) plot_one_box(box, mosaic, label=label, color=color, line_thickness=tl) # Draw image filename labels if paths is not None: label = os.path.basename(paths[i])[:40] # trim to 40 char t_size = cv2.getTextSize( label, 0, fontScale=tl / 3, thickness=tf)[0] cv2.putText(mosaic, label, (block_x + 5, block_y + t_size[1] + 5), 0, tl / 3, [220, 220, 220], thickness=tf, lineType=cv2.LINE_AA) # Image border cv2.rectangle(mosaic, (block_x, block_y), (block_x + w, block_y + h), (255, 255, 255), thickness=3) if fname is not None: mosaic = cv2.resize( mosaic, (int(ns * w * 0.5), int(ns * h * 0.5)), interpolation=cv2.INTER_AREA) cv2.imwrite(fname, cv2.cvtColor(mosaic, cv2.COLOR_BGR2RGB)) return mosaic
def __init__(self, path, img_size=640, batch_size=16, augment=False, config=None, rect=False, image_weights=False, cache_images=False, single_cls=False, stride=32, pad=0.0, is_training=False): root_path = path # os-agnostic split = 'train' if is_training else 'test' file = os.path.join(root_path, 'ImageSets', split + '.txt') image_path = os.path.join(root_path, 'training', 'image_2') label_path = os.path.join(root_path, 'training', 'label_2') with open(file, 'r') as f: f = f.read().splitlines() self.img_files = [os.path.join(image_path, i + '.png') for i in f] self.label_files = [os.path.join(label_path, i + '.txt') for i in f] self.img_files = sorted(self.img_files) self.label_files = sorted(self.label_files) print('img num: {}, label num: {}'.format(len(self.img_files), len(self.label_files))) n = len(self.img_files) assert n > 0, 'No images found in %s. See %s' % (path, help_url) bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index nb = bi[-1] + 1 # number of batches self.n = n # number of images self.batch = bi # batch index of image self.img_size = img_size self.augment = augment self.config = config self.image_weights = image_weights self.rect = False if image_weights else rect self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training) self.mosaic_border = [-img_size // 2, -img_size // 2] self.stride = stride # Read image shapes (wh) sp = os.path.join(root_path, 'cache', split + '.shapes') # shapefile path try: with open(sp, 'r') as f: # read existing shapefile s = [x.split() for x in f.read().splitlines()] assert len(s) == n, 'Shapefile out of sync' except: s = [exif_size(Image.open(f)) for f in tqdm(self.img_files, desc='Reading image shapes')] np.savetxt(sp, s, fmt='%g') # overwrites existing (if any) self.shapes = np.array(s, dtype=np.float64) # Rectangular Training https://github.com/ultralytics/yolov3/issues/232 if self.rect: # Sort by aspect ratio s = self.shapes # wh ar = s[:, 1] / s[:, 0] # aspect ratio irect = ar.argsort() self.img_files = [self.img_files[i] for i in irect] self.label_files = [self.label_files[i] for i in irect] self.shapes = s[irect] # wh ar = ar[irect] # Set training image shapes shapes = [[1, 1]] * nb for i in range(nb): ari = ar[bi == i] mini, maxi = ari.min(), ari.max() if maxi < 1: shapes[i] = [maxi, 1] elif mini > 1: shapes[i] = [1, 1 / mini] self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride # Cache labels self.imgs = [None] * n self.labels = [np.zeros((0, 5), dtype=np.float32)] * n create_datasubset, extract_bounding_boxes, labels_loaded = False, False, False nm, nf, ne, ns, nd = 0, 0, 0, 0, 0 # number missing, found, empty, datasubset, duplicate np_labels_path = os.path.join(root_path, 'cache', split + '_labels.npy') # saved labels in *.npy file if os.path.isfile(np_labels_path): s = np_labels_path # print string x = np.load(np_labels_path, allow_pickle=True) if len(x) == n: self.labels = x labels_loaded = True else: s = path.replace('images', 'labels') pbar = tqdm(self.label_files) for i, file in enumerate(pbar): if labels_loaded: l = self.labels[i] # np.savetxt(file, l, '%g') # save *.txt from *.npy file else: try: with open(file, 'r') as f: shape = self.shapes[i] objs = [] for x in f.read().splitlines(): line = x.split() if line[0] not in classes: continue cls = classes.index(line[0]) xmin = float(line[4]) / shape[0] ymin = float(line[5]) / shape[1] xmax = float(line[6]) / shape[0] ymax = float(line[7]) / shape[1] objs.append([cls, (xmin + xmax) / 2, (ymin + ymax) / 2, (xmax - xmin), (ymax - ymin), ]) l = np.array(objs, dtype=np.float32) except: nm += 1 # print('missing labels for image %s' % self.img_files[i]) # file missing continue if l.shape[0]: assert l.shape[1] == 5, '> 5 label columns: %s' % file assert (l >= 0).all(), 'negative labels: %s' % file assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels: %s' % file if np.unique(l, axis=0).shape[0] < l.shape[0]: # duplicate rows nd += 1 # print('WARNING: duplicate rows in %s' % self.label_files[i]) # duplicate rows if single_cls: l[:, 0] = 0 # force dataset into single-class mode self.labels[i] = l nf += 1 # file found # Create subdataset (a smaller dataset) if create_datasubset and ns < 1E4: if ns == 0: create_folder(path='./datasubset') os.makedirs('./datasubset/images') exclude_classes = 43 if exclude_classes not in l[:, 0]: ns += 1 # shutil.copy(src=self.img_files[i], dst='./datasubset/images/') # copy image with open('./datasubset/images.txt', 'a') as f: f.write(self.img_files[i] + '\n') # Extract object detection boxes for a second stage classifier if extract_bounding_boxes: p = Path(self.img_files[i]) img = cv2.imread(str(p)) h, w = img.shape[:2] for j, x in enumerate(l): f = '%s%sclassifier%s%g_%g_%s' % (p.parent.parent, os.sep, os.sep, x[0], j, p.name) if not os.path.exists(Path(f).parent): os.makedirs(Path(f).parent) # make new output folder b = x[1:] * [w, h, w, h] # box b[2:] = b[2:].max() # rectangle to square b[2:] = b[2:] * 1.3 + 30 # pad b = data_utils.xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int) b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image b[[1, 3]] = np.clip(b[[1, 3]], 0, h) assert cv2.imwrite(f, img[b[1]:b[3], b[0]:b[2]]), 'Failure extracting classifier boxes' else: ne += 1 # print('empty labels for image %s' % self.img_files[i]) # file empty # os.system("rm '%s' '%s'" % (self.img_files[i], self.label_files[i])) # remove pbar.desc = 'Caching labels %s (%g found, %g missing, %g empty, %g duplicate, for %g images)' % ( s, nf, nm, ne, nd, n) assert nf > 0 or n == 20288, 'No labels found in %s. See %s' % (os.path.dirname(file) + os.sep, help_url) if not labels_loaded and n > 1000: print('Saving labels to %s for faster future loading' % np_labels_path) np.save(np_labels_path, self.labels) # save for next time # Cache images into memory for faster training (WARNING: large datasets may exceed system RAM) if cache_images: # if training gb = 0 # Gigabytes of cached images pbar = tqdm(range(len(self.img_files)), desc='Caching images') self.img_hw0, self.img_hw = [None] * n, [None] * n for i in pbar: # max 10k images self.imgs[i], self.img_hw0[i], self.img_hw[i] = load_image(self, i) # img, hw_original, hw_resized gb += self.imgs[i].nbytes pbar.desc = 'Caching images (%.1fGB)' % (gb / 1E9) # Detect corrupted images https://medium.com/joelthchao/programmatically-detect-corrupted-image-8c1b2006c3d3 detect_corrupted_images = False if detect_corrupted_images: from skimage import io # conda install -c conda-forge scikit-image for file in tqdm(self.img_files, desc='Detecting corrupted images'): try: _ = io.imread(file) except: print('Corrupted image detected: %s' % file)
def __call__(self, image, targets=None, **kwargs): if targets.has_field('bbox'): bboxes = targets.get_field("bbox") bboxes = data_utils.xywh2xyxy(bboxes) targets.update_field('bbox', bboxes) return image, targets
def test( data, weights=None, batch_size=16, imgsz=640, conf_thres=0.001, iou_thres=0.6, # for NMS save_json=False, verbose=False, model=None, dataloader=None, logdir='./runs', merge=False): # Initialize/load model and set device if model is None: training = False device = torch_utils.select_device(opt.device, batch_size=batch_size) # Remove previous for f in glob.glob(os.path.join(logdir, 'test_batch*.jpg')): os.remove(f) # Load model model = torch.load( weights, map_location=device)['model'].float() # load to FP32 torch_utils.model_info(model) model.fuse() model.to(device) # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99 # if device.type != 'cpu' and torch.cuda.device_count() > 1: # model = nn.DataParallel(model) else: # called by train.py training = True device = next(model.parameters()).device # get model device # Half half = device.type != 'cpu' and torch.cuda.device_count( ) == 1 # half precision only supported on single-GPU half = False if half: model.half() # to FP16 # Configure model.eval() with open(data) as f: data = yaml.load(f, Loader=yaml.FullLoader) # model dict nc = int(data['num_classes']) # number of classes iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for [email protected]:0.95 niou = iouv.numel() losser = YoloLoss(model) # Dataloader if dataloader is None: # not training merge = opt.merge # use Merge NMS img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once path = data['test'] if opt.task == 'test' else data[ 'val'] # path to val/test images dataloader = kitti.create_dataloader(path, imgsz, batch_size, int(max(model.stride)), config=None, augment=False, cache=False, pad=0.5, rect=True)[0] seen = 0 names = data['names'] kitti8class = data_utils.kitti8_classes() s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', '[email protected]', '[email protected]:.95') p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0. loss = torch.zeros(3, device=device) jdict, stats, ap, ap_class = [], [], [], [] for batch_i, (img, targets, paths, shapes) in enumerate(tqdm.tqdm(dataloader, desc=s)): targets.delete_by_mask() targets.to_float32() targ = ParamList(targets.size, True) targ.copy_from(targets) img_id = targets.get_field('img_id') classes = targets.get_field('class') bboxes = targets.get_field('bbox') targets = torch.cat( [img_id.unsqueeze(-1), classes.unsqueeze(-1), bboxes], dim=-1) img = img.to(device) img = img.half() if half else img.float() # uint8 to fp16/32 # img /= 1.0 # 0 - 255 to 0.0 - 1.0 targets = targets.to(device) nb, _, height, width = img.shape # batch size, channels, height, width whwh = torch.Tensor([width, height, width, height]).to(device) # Disable gradients with torch.no_grad(): # Run model t = torch_utils.time_synchronized() inf_out, train_out = model(img) # inference and training outputs t0 += torch_utils.time_synchronized() - t # Compute loss if training: # if model has loss hyperparameters # loss += calc_loss([x.float() for x in train_out], targets, model)[1][:3] # GIoU, obj, cls loss += losser([x.float() for x in train_out], targ)[1][:3] # Run NMS t = torch_utils.time_synchronized() output = postprocess.apply_nms(inf_out, nc, conf_thres=conf_thres, iou_thres=iou_thres, merge=merge) t1 += torch_utils.time_synchronized() - t # Statistics per image for si, pred in enumerate(output): labels = targets[targets[:, 0] == si, 1:] nl = len(labels) tcls = labels[:, 0].tolist() if nl else [] # target class seen += 1 if pred is None: if nl: stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls)) continue # Append to text file # with open('test.txt', 'a') as file: # [file.write('%11.5g' * 7 % tuple(x) + '\n') for x in pred] # Clip boxes to image bounds utils.clip_coords(pred, (height, width)) # Append to pycocotools JSON dictionary if save_json: # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ... image_id = int(Path(paths[si]).stem.split('_')[-1]) box = pred[:, :4].clone() # xyxy utils.scale_coords(img[si].shape[1:], box, shapes[si][0], shapes[si][1]) # to original shape box = data_utils.xyxy2xywh(box) # xywh box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner for p, b in zip(pred.tolist(), box.tolist()): jdict.append({ 'image_id': image_id, 'category_id': kitti8class[int(p[5])], 'bbox': [round(x, 3) for x in b], 'score': round(p[4], 5) }) # Assign all predictions as incorrect correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device) if nl: detected = [] # target indices tcls_tensor = labels[:, 0] # target boxes tbox = data_utils.xywh2xyxy(labels[:, 1:5]) * whwh # Per target class for cls in torch.unique(tcls_tensor): ti = (cls == tcls_tensor).nonzero(as_tuple=False).view( -1) # prediction indices pi = (cls == pred[:, 5]).nonzero(as_tuple=False).view( -1) # target indices # Search for detections if pi.shape[0]: # Prediction to target ious ious, i = metrics_utils.box_iou( pred[pi, :4], tbox[ti]).max(1) # best ious, indices # Append detections for j in (ious > iouv[0]).nonzero(as_tuple=False): d = ti[i[j]] # detected target if d not in detected: detected.append(d) correct[ pi[j]] = ious[j] > iouv # iou_thres is 1xn if len( detected ) == nl: # all targets already located in image break # Append statistics (correct, conf, pcls, tcls) stats.append( (correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)) # Plot images if batch_i < 1: f = os.path.join(logdir, 'test_batch%g_gt.jpg' % batch_i) # filename visual_utils.plot_images(img, targets, paths, f, names) # ground truth f = os.path.join(logdir, 'test_batch%g_pred.jpg' % batch_i) visual_utils.plot_images(img, utils.output_to_target( output, width, height), paths, f, names) # predictions # Compute statistics stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy if len(stats): p, r, ap, f1, ap_class = metrics_utils.ap_per_class(*stats) p, r, ap50, ap = p[:, 0], r[:, 0], ap[:, 0], ap.mean( 1) # [P, R, [email protected], [email protected]:0.95] mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean() nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class else: nt = torch.zeros(1) # Print results pf = '%20s' + '%12.3g' * 6 # print format print(pf % ('all', seen, nt.sum(), mp, mr, map50, map)) # Print results per class if verbose and nc > 1 and len(stats): for i, c in enumerate(ap_class): print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i])) # Print speeds t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + (imgsz, imgsz, batch_size) # tuple if not training: print( 'Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g' % t) # Save JSON if save_json and map50 and len(jdict): imgIds = [ int(Path(x).stem.split('_')[-1]) for x in dataloader.dataset.img_files ] f = 'detections_val2017_%s_results.json' % \ (weights.split(os.sep)[-1].replace('.pt', '') if weights else '') # filename print('\nCOCO mAP with pycocotools... saving %s...' % f) with open(f, 'w') as file: json.dump(jdict, file) try: from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb cocoGt = COCO( glob.glob('../coco/annotations/instances_val*.json') [0]) # initialize COCO ground truth api cocoDt = cocoGt.loadRes(f) # initialize COCO pred api cocoEval = COCOeval(cocoGt, cocoDt, 'bbox') cocoEval.params.imgIds = imgIds # image IDs to evaluate cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() map, map50 = cocoEval.stats[: 2] # update results ([email protected]:0.95, [email protected]) except: print( 'WARNING: pycocotools must be installed with numpy==1.17 to run correctly. ' 'See https://github.com/cocodataset/cocoapi/issues/356') # Return results model.float() # for training maps = np.zeros(nc) + map for i, c in enumerate(ap_class): maps[c] = ap[i] return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t
def non_max_suppression(prediction, conf_thres=0.1, iou_thres=0.6, merge=False, classes=None, agnostic=False): """Performs Non-Maximum Suppression (NMS) on inference results Returns: detections with shape: nx6 (x1, y1, x2, y2, conf, cls) """ if prediction.dtype is torch.float16: prediction = prediction.float() # to FP32 nc = prediction[0].shape[1] - 5 # number of classes xc = prediction[..., 4] > conf_thres # candidates # Settings # (pixels) minimum and maximum box width and height min_wh, max_wh = 2, 4096 max_det = 300 # maximum number of detections per image time_limit = 10.0 # seconds to quit after redundant = True # require redundant detections multi_label = nc > 1 # multiple labels per box (adds 0.5ms/img) t = time.time() output = [None] * prediction.shape[0] for xi, x in enumerate(prediction): # image index, image inference # Apply constraints # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height x = x[xc[xi]] # confidence # If none remain process next image if not x.shape[0]: continue # Compute conf x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf # Box (center x, center y, width, height) to (x1, y1, x2, y2) box = data_utils.xywh2xyxy(x[:, :4]) # Detections matrix nx6 (xyxy, conf, cls) if multi_label: i, j = torch.nonzero(x[:, 5:] > conf_thres, as_tuple=False).t() x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1) else: # best class only conf, j = x[:, 5:].max(1, keepdim=True) x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres] # Filter by class if classes: x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)] # Apply finite constraint # if not torch.isfinite(x).all(): # x = x[torch.isfinite(x).all(1)] # If none remain process next image n = x.shape[0] # number of boxes if not n: continue # Sort by confidence # x = x[x[:, 4].argsort(descending=True)] # Batched NMS c = x[:, 5:6] * (0 if agnostic else max_wh) # classes # boxes (offset by class), scores boxes, scores = x[:, :4] + c, x[:, 4] i = torchvision.ops.boxes.nms(boxes, scores, iou_thres) if i.shape[0] > max_det: # limit detections i = i[:max_det] if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean) try: # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) iou = metrics_utils.box_iou(boxes[i], boxes) > iou_thres # iou matrix weights = iou * scores[None] # box weights x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum( 1, keepdim=True) # merged boxes if redundant: i = i[iou.sum(1) > 1] # require redundancy except: # possible CUDA error https://github.com/ultralytics/yolov3/issues/1139 print(x, i, x.shape, i.shape) pass output[xi] = x[i] if (time.time() - t) > time_limit: break # time limit exceeded return output