help='update all models') parser.add_argument('--project', default='runs/detect', help='save results to project/name') parser.add_argument('--name', default='exp', help='save results to project/name') parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)') parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels') parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences') parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference') opt = parser.parse_args() print(opt) check_requirements(exclude=('tensorboard', 'thop')) detect(**vars(opt))
def main(opt): print(colorstr('detect: ') + ', '.join(f'{k}={v}' for k, v in vars(opt).items())) check_requirements(exclude=('tensorboard', 'thop')) run(**vars(opt))
type=str, default=os.environ["SM_CHANNEL_DATA"]) parser.add_argument("--num_gpus", type=int, default=os.environ["SM_NUM_GPUS"]) opt = parser.parse_args() # Set DDP variables opt.world_size = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 opt.global_rank = int(os.environ["RANK"]) if "RANK" in os.environ else -1 set_logging(opt.global_rank) if opt.global_rank in [-1, 0]: check_git_status() check_requirements(exclude=("pycocotools", "thop")) # Resume wandb_run = check_wandb_resume(opt) if opt.resume and not wandb_run: # resume an interrupted run ckpt = ( opt.resume if isinstance(opt.resume, str) else get_latest_run() ) # specified or most recent path assert os.path.isfile( ckpt), "ERROR: --resume checkpoint does not exist" apriori = opt.global_rank, opt.local_rank with open(Path(ckpt).parent.parent / "opt.yaml") as f: opt = argparse.Namespace(**yaml.safe_load(f)) # replace ( opt.cfg, opt.weights,
def main(opt): set_logging(RANK) if RANK in [-1, 0]: print( colorstr('train: ') + ', '.join(f'{k}={v}' for k, v in vars(opt).items())) check_git_status() check_requirements(exclude=['thop']) # Resume wandb_run = check_wandb_resume(opt) if opt.resume and not wandb_run: # resume an interrupted run ckpt = opt.resume if isinstance( opt.resume, str) else get_latest_run() # specified or most recent path assert os.path.isfile( ckpt), 'ERROR: --resume checkpoint does not exist' with open(Path(ckpt).parent.parent / 'opt.yaml') as f: opt = argparse.Namespace(**yaml.safe_load(f)) # replace opt.cfg, opt.weights, opt.resume, opt.batch_size = '', ckpt, True, opt.total_batch_size # reinstate logger.info('Resuming training from %s' % ckpt) else: # opt.hyp = opt.hyp or ('hyp.finetune.yaml' if opt.weights else 'hyp.scratch.yaml') opt.data, opt.cfg, opt.hyp = check_file(opt.data), check_file( opt.cfg), check_file(opt.hyp) # check files assert len(opt.cfg) or len( opt.weights), 'either --cfg or --weights must be specified' opt.img_size.extend( [opt.img_size[-1]] * (2 - len(opt.img_size))) # extend to 2 sizes (train, test) opt.name = 'evolve' if opt.evolve else opt.name opt.save_dir = str( increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok | opt.evolve)) # DDP mode opt.total_batch_size = opt.batch_size device = select_device(opt.device, batch_size=opt.batch_size) if LOCAL_RANK != -1: from datetime import timedelta assert torch.cuda.device_count( ) > LOCAL_RANK, 'too few GPUS for DDP command' torch.cuda.set_device(LOCAL_RANK) device = torch.device('cuda', LOCAL_RANK) dist.init_process_group(backend="gloo", timeout=timedelta(seconds=60)) assert opt.batch_size % WORLD_SIZE == 0, '--batch-size must be multiple of CUDA device count' assert not opt.image_weights, '--image-weights argument is not compatible with DDP training' opt.batch_size = opt.total_batch_size // WORLD_SIZE # Train if not opt.evolve: train(opt.hyp, opt, device) if WORLD_SIZE > 1 and RANK == 0: _ = [ print('Destroying process group... ', end=''), dist.destroy_process_group(), print('Done.') ] # Evolve hyperparameters (optional) else: # Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit) meta = { 'lr0': (1, 1e-5, 1e-1), # initial learning rate (SGD=1E-2, Adam=1E-3) 'lrf': (1, 0.01, 1.0), # final OneCycleLR learning rate (lr0 * lrf) 'momentum': (0.3, 0.6, 0.98), # SGD momentum/Adam beta1 'weight_decay': (1, 0.0, 0.001), # optimizer weight decay 'warmup_epochs': (1, 0.0, 5.0), # warmup epochs (fractions ok) 'warmup_momentum': (1, 0.0, 0.95), # warmup initial momentum 'warmup_bias_lr': (1, 0.0, 0.2), # warmup initial bias lr 'box': (1, 0.02, 0.2), # box loss gain 'cls': (1, 0.2, 4.0), # cls loss gain 'cls_pw': (1, 0.5, 2.0), # cls BCELoss positive_weight 'obj': (1, 0.2, 4.0), # obj loss gain (scale with pixels) 'obj_pw': (1, 0.5, 2.0), # obj BCELoss positive_weight 'iou_t': (0, 0.1, 0.7), # IoU training threshold 'anchor_t': (1, 2.0, 8.0), # anchor-multiple threshold 'anchors': (2, 2.0, 10.0), # anchors per output grid (0 to ignore) 'fl_gamma': (0, 0.0, 2.0), # focal loss gamma (efficientDet default gamma=1.5) 'hsv_h': (1, 0.0, 0.1), # image HSV-Hue augmentation (fraction) 'hsv_s': (1, 0.0, 0.9), # image HSV-Saturation augmentation (fraction) 'hsv_v': (1, 0.0, 0.9), # image HSV-Value augmentation (fraction) 'degrees': (1, 0.0, 45.0), # image rotation (+/- deg) 'translate': (1, 0.0, 0.9), # image translation (+/- fraction) 'scale': (1, 0.0, 0.9), # image scale (+/- gain) 'shear': (1, 0.0, 10.0), # image shear (+/- deg) 'perspective': (0, 0.0, 0.001), # image perspective (+/- fraction), range 0-0.001 'flipud': (1, 0.0, 1.0), # image flip up-down (probability) 'fliplr': (0, 0.0, 1.0), # image flip left-right (probability) 'mosaic': (1, 0.0, 1.0), # image mixup (probability) 'mixup': (1, 0.0, 1.0) } # image mixup (probability) with open(opt.hyp) as f: hyp = yaml.safe_load(f) # load hyps dict assert LOCAL_RANK == -1, 'DDP mode not implemented for --evolve' opt.notest, opt.nosave = True, True # only test/save final epoch # ei = [isinstance(x, (int, float)) for x in hyp.values()] # evolvable indices yaml_file = Path( opt.save_dir) / 'hyp_evolved.yaml' # save best result here if opt.bucket: os.system('gsutil cp gs://%s/evolve.txt .' % opt.bucket) # download evolve.txt if exists for _ in range(300): # generations to evolve if Path('evolve.txt').exists( ): # if evolve.txt exists: select best hyps and mutate # Select parent(s) parent = 'single' # parent selection method: 'single' or 'weighted' x = np.loadtxt('evolve.txt', ndmin=2) n = min(5, len(x)) # number of previous results to consider x = x[np.argsort(-fitness(x))][:n] # top n mutations w = fitness(x) - fitness(x).min() # weights if parent == 'single' or len(x) == 1: # x = x[random.randint(0, n - 1)] # random selection x = x[random.choices(range(n), weights=w)[0]] # weighted selection elif parent == 'weighted': x = (x * w.reshape( n, 1)).sum(0) / w.sum() # weighted combination # Mutate mp, s = 0.8, 0.2 # mutation probability, sigma npr = np.random npr.seed(int(time.time())) g = np.array([x[0] for x in meta.values()]) # gains 0-1 ng = len(meta) v = np.ones(ng) while all( v == 1 ): # mutate until a change occurs (prevent duplicates) v = (g * (npr.random(ng) < mp) * npr.randn(ng) * npr.random() * s + 1).clip(0.3, 3.0) for i, k in enumerate(hyp.keys()): # plt.hist(v.ravel(), 300) hyp[k] = float(x[i + 7] * v[i]) # mutate # Constrain to limits for k, v in meta.items(): hyp[k] = max(hyp[k], v[1]) # lower limit hyp[k] = min(hyp[k], v[2]) # upper limit hyp[k] = round(hyp[k], 5) # significant digits # Train mutation results = train(hyp.copy(), opt, device) # Write mutation results print_mutation(hyp.copy(), results, yaml_file, opt.bucket) # Plot results plot_evolution(yaml_file) print( f'Hyperparameter evolution complete. Best results saved as: {yaml_file}\n' f'Command to train a new model with these hyperparameters: $ python train.py --hyp {yaml_file}' )
def run(weights='yolov5s.pt', # model.pt path(s) source='data/images', # file/dir/URL/glob, 0 for webcam imgsz=640, # inference size (pixels) conf_thres=0.25, # confidence threshold iou_thres=0.45, # NMS IOU threshold max_det=1000, # maximum detections per image device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu view_img=False, # show results save_txt=False, # save results to *.txt save_conf=False, # save confidences in --save-txt labels save_crop=False, # save cropped prediction boxes nosave=False, # do not save images/videos classes=None, # filter by class: --class 0, or --class 0 2 3 agnostic_nms=False, # class-agnostic NMS augment=False, # augmented inference visualize=False, # visualize features update=False, # update all models project='runs/detect', # save results to project/name name='exp', # save results to project/name exist_ok=False, # existing project/name ok, do not increment line_thickness=3, # bounding box thickness (pixels) hide_labels=False, # hide labels hide_conf=False, # hide confidences half=False, # use FP16 half-precision inference tfl_int8=False, # INT8 quantized TFLite model ): save_img = not nosave and not source.endswith('.txt') # save inference images webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith( ('rtsp://', 'rtmp://', 'http://', 'https://')) # Directories save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(device) half &= device.type != 'cpu' # half precision only supported on CUDA # Load model w = weights[0] if isinstance(weights, list) else weights classify, suffix = False, Path(w).suffix.lower() pt, onnx, tflite, pb, saved_model = (suffix == x for x in ['.pt', '.onnx', '.tflite', '.pb', '']) # backend stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults if pt: model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride names = model.module.names if hasattr(model, 'module') else model.names # get class names if half: model.half() # to FP16 if classify: # second-stage classifier modelc = load_classifier(name='resnet50', n=2) # initialize modelc.load_state_dict(torch.load('resnet50.pt', map_location=device)['model']).to(device).eval() elif onnx: check_requirements(('onnx', 'onnxruntime')) import onnxruntime session = onnxruntime.InferenceSession(w, None) else: # TensorFlow models check_requirements(('tensorflow>=2.4.1',)) import tensorflow as tf if pb: # https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt def wrap_frozen_graph(gd, inputs, outputs): x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=""), []) # wrapped import return x.prune(tf.nest.map_structure(x.graph.as_graph_element, inputs), tf.nest.map_structure(x.graph.as_graph_element, outputs)) graph_def = tf.Graph().as_graph_def() graph_def.ParseFromString(open(w, 'rb').read()) frozen_func = wrap_frozen_graph(gd=graph_def, inputs="x:0", outputs="Identity:0") elif saved_model: model = tf.keras.models.load_model(w) elif tflite: interpreter = tf.lite.Interpreter(model_path=w) # load TFLite model interpreter.allocate_tensors() # allocate input_details = interpreter.get_input_details() # inputs output_details = interpreter.get_output_details() # outputs imgsz = check_img_size(imgsz, s=stride) # check image size # Dataloader if webcam: view_img = check_imshow() cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt) bs = len(dataset) # batch_size else: dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt) bs = 1 # batch_size vid_path, vid_writer = [None] * bs, [None] * bs # Run inference if pt and device.type != 'cpu': model(torch.zeros(1, 3, *imgsz).to(device).type_as(next(model.parameters()))) # run once t0 = time.time() for path, img, im0s, vid_cap in dataset: if onnx: img = img.astype('float32') else: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img = img / 255.0 # 0 - 255 to 0.0 - 1.0 if len(img.shape) == 3: img = img[None] # expand for batch dim # Inference t1 = time_sync() if pt: visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False pred = model(img, augment=augment, visualize=visualize)[0] elif onnx: pred = torch.tensor(session.run([session.get_outputs()[0].name], {session.get_inputs()[0].name: img})) else: # tensorflow model (tflite, pb, saved_model) imn = img.permute(0, 2, 3, 1).cpu().numpy() # image in numpy if pb: pred = frozen_func(x=tf.constant(imn)).numpy() elif saved_model: pred = model(imn, training=False).numpy() elif tflite: if tfl_int8: scale, zero_point = input_details[0]['quantization'] imn = (imn / scale + zero_point).astype(np.uint8) interpreter.set_tensor(input_details[0]['index'], imn) interpreter.invoke() pred = interpreter.get_tensor(output_details[0]['index']) if tfl_int8: scale, zero_point = output_details[0]['quantization'] pred = (pred.astype(np.float32) - zero_point) * scale pred[..., 0] *= imgsz[1] # x pred[..., 1] *= imgsz[0] # y pred[..., 2] *= imgsz[1] # w pred[..., 3] *= imgsz[0] # h pred = torch.tensor(pred) # NMS pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det) t2 = time_sync() # Second-stage classifier (optional) if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process predictions for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0, frame = path[i], f'{i}: ', im0s[i].copy(), dataset.count else: p, s, im0, frame = path, '', im0s.copy(), getattr(dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # img.jpg txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # img.txt s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh imc = im0.copy() if save_crop else im0 # for save_crop if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or save_crop or view_img: # Add bbox to image c = int(cls) # integer class label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}') im0 = plot_one_box(xyxy, im0, label=label, color=colors(c, True), line_width=line_thickness) if save_crop: save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True) # Print time (inference + NMS) print(f'{s}Done. ({t2 - t1:.3f}s)') # Stream results if view_img: cv2.imshow(str(p), im0) cv2.waitKey(1) # 1 millisecond # Save results (image with detections) if save_img: if dataset.mode == 'image': cv2.imwrite(save_path, im0) else: # 'video' or 'stream' if vid_path[i] != save_path: # new video vid_path[i] = save_path if isinstance(vid_writer[i], cv2.VideoWriter): vid_writer[i].release() # release previous video writer if vid_cap: # video fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) else: # stream fps, w, h = 30, im0.shape[1], im0.shape[0] save_path += '.mp4' vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) vid_writer[i].write(im0) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {colorstr('bold', save_dir)}{s}") if update: strip_optimizer(weights) # update model (to fix SourceChangeWarning) print(f'Done. ({time.time() - t0:.3f}s)')
def export( weights='./yolov5s.pt', # weights path img_size=(640, 640), # image (height, width) batch_size=1, # batch size device='cpu', # cuda device, i.e. 0 or 0,1,2,3 or cpu include=('torchscript', 'onnx', 'coreml'), # include formats half=False, # FP16 half-precision export inplace=False, # set YOLOv5 Detect() inplace=True train=False, # model.train() mode optimize=False, # TorchScript: optimize for mobile dynamic=False, # ONNX: dynamic axes simplify=False, # ONNX: simplify model opset_version=12, # ONNX: opset version ): t = time.time() include = [x.lower() for x in include] img_size *= 2 if len(img_size) == 1 else 1 # expand # Load PyTorch model device = select_device(device) assert not ( device.type == 'cpu' and opt.half ), '--half only compatible with GPU export, i.e. use --device 0' model = attempt_load(weights, map_location=device) # load FP32 model labels = model.names # Input gs = int(max(model.stride)) # grid size (max stride) img_size = [check_img_size(x, gs) for x in img_size] # verify img_size are gs-multiples img = torch.zeros(batch_size, 3, *img_size).to( device) # image size(1,3,320,192) iDetection # Update model if half: img, model = img.half(), model.half() # to FP16 model.train() if train else model.eval( ) # training mode = no Detect() layer grid construction for k, m in model.named_modules(): m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility if isinstance(m, Conv): # assign export-friendly activations if isinstance(m.act, nn.Hardswish): m.act = Hardswish() elif isinstance(m.act, nn.SiLU): m.act = SiLU() elif isinstance(m, Detect): m.inplace = inplace m.onnx_dynamic = dynamic # m.forward = m.forward_export # assign forward (optional) for _ in range(2): y = model(img) # dry runs print( f"\n{colorstr('PyTorch:')} starting from {weights} ({file_size(weights):.1f} MB)" ) # TorchScript export ----------------------------------------------------------------------------------------------- if 'torchscript' in include or 'coreml' in include: prefix = colorstr('TorchScript:') try: print( f'\n{prefix} starting export with torch {torch.__version__}...' ) f = weights.replace('.pt', '.torchscript.pt') # filename ts = torch.jit.trace(model, img, strict=False) (optimize_for_mobile(ts) if optimize else ts).save(f) print( f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)' ) except Exception as e: print(f'{prefix} export failure: {e}') # ONNX export ------------------------------------------------------------------------------------------------------ if 'onnx' in include: prefix = colorstr('ONNX:') try: import onnx print(f'{prefix} starting export with onnx {onnx.__version__}...') f = weights.replace('.pt', '.onnx') # filename torch.onnx.export( model, img, f, verbose=False, opset_version=opset_version, training=torch.onnx.TrainingMode.TRAINING if train else torch.onnx.TrainingMode.EVAL, do_constant_folding=not train, input_names=['images'], output_names=['output'], dynamic_axes={ 'images': { 0: 'batch', 2: 'height', 3: 'width' }, # shape(1,3,640,640) 'output': { 0: 'batch', 1: 'anchors' } # shape(1,25200,85) } if dynamic else None) # Checks model_onnx = onnx.load(f) # load onnx model onnx.checker.check_model(model_onnx) # check onnx model # print(onnx.helper.printable_graph(model_onnx.graph)) # print # Simplify if simplify: try: check_requirements(['onnx-simplifier']) import onnxsim print( f'{prefix} simplifying with onnx-simplifier {onnxsim.__version__}...' ) model_onnx, check = onnxsim.simplify( model_onnx, dynamic_input_shape=dynamic, input_shapes={'images': list(img.shape)} if dynamic else None) assert check, 'assert check failed' onnx.save(model_onnx, f) except Exception as e: print(f'{prefix} simplifier failure: {e}') print( f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)' ) except Exception as e: print(f'{prefix} export failure: {e}') # CoreML export ---------------------------------------------------------------------------------------------------- if 'coreml' in include: prefix = colorstr('CoreML:') try: import coremltools as ct print( f'{prefix} starting export with coremltools {ct.__version__}...' ) assert train, 'CoreML exports should be placed in model.train() mode with `python export.py --train`' model = ct.convert(ts, inputs=[ ct.ImageType('image', shape=img.shape, scale=1 / 255.0, bias=[0, 0, 0]) ]) f = weights.replace('.pt', '.mlmodel') # filename model.save(f) print( f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)' ) except Exception as e: print(f'{prefix} export failure: {e}') # Finish print( f'\nExport complete ({time.time() - t:.2f}s). Visualize with https://github.com/lutzroeder/netron.' )
def _create(name, pretrained=True, channels=3, classes=80, autoshape=True, verbose=True, device=None): """Creates or loads a YOLOv5 model Arguments: name (str): model name 'yolov5s' or path 'path/to/best.pt' pretrained (bool): load pretrained weights into the model channels (int): number of input channels classes (int): number of model classes autoshape (bool): apply YOLOv5 .autoshape() wrapper to model verbose (bool): print all information to screen device (str, torch.device, None): device to use for model parameters Returns: YOLOv5 model """ from pathlib import Path from models.common import AutoShape, DetectMultiBackend from models.yolo import Model from utils.downloads import attempt_download from utils.general import LOGGER, check_requirements, intersect_dicts, logging from utils.torch_utils import select_device if not verbose: LOGGER.setLevel(logging.WARNING) check_requirements(exclude=('tensorboard', 'thop', 'opencv-python')) name = Path(name) path = name.with_suffix( '.pt') if name.suffix == '' else name # checkpoint path try: device = select_device(('0' if torch.cuda.is_available() else 'cpu' ) if device is None else device) if pretrained and channels == 3 and classes == 80: model = DetectMultiBackend( path, device=device) # download/load FP32 model # model = models.experimental.attempt_load(path, map_location=device) # download/load FP32 model else: cfg = list( (Path(__file__).parent / 'models').rglob(f'{path.stem}.yaml'))[0] # model.yaml path model = Model(cfg, channels, classes) # create model if pretrained: ckpt = torch.load(attempt_download(path), map_location=device) # load csd = ckpt['model'].float().state_dict( ) # checkpoint state_dict as FP32 csd = intersect_dicts(csd, model.state_dict(), exclude=['anchors']) # intersect model.load_state_dict(csd, strict=False) # load if len(ckpt['model'].names) == classes: model.names = ckpt[ 'model'].names # set class names attribute if autoshape: model = AutoShape(model) # for file/URI/PIL/cv2/np inputs and NMS return model.to(device) except Exception as e: help_url = 'https://github.com/ultralytics/yolov5/issues/36' s = f'{e}. Cache may be out of date, try `force_reload=True` or see {help_url} for help.' raise Exception(s) from e
def export_engine(model, im, file, train, half, simplify, workspace=4, verbose=False, prefix=colorstr('TensorRT:')): # YOLOv5 TensorRT export https://developer.nvidia.com/tensorrt try: check_requirements(('tensorrt', )) import tensorrt as trt if trt.__version__[ 0] == '7': # TensorRT 7 handling https://github.com/ultralytics/yolov5/issues/6012 grid = model.model[-1].anchor_grid model.model[-1].anchor_grid = [a[..., :1, :1, :] for a in grid] export_onnx(model, im, file, 12, train, False, simplify) # opset 12 model.model[-1].anchor_grid = grid else: # TensorRT >= 8 check_version(trt.__version__, '8.0.0', hard=True) # require tensorrt>=8.0.0 export_onnx(model, im, file, 13, train, False, simplify) # opset 13 onnx = file.with_suffix('.onnx') LOGGER.info( f'\n{prefix} starting export with TensorRT {trt.__version__}...') assert im.device.type != 'cpu', 'export running on CPU but must be on GPU, i.e. `python export.py --device 0`' assert onnx.exists(), f'failed to export ONNX file: {onnx}' f = file.with_suffix('.engine') # TensorRT engine file logger = trt.Logger(trt.Logger.INFO) if verbose: logger.min_severity = trt.Logger.Severity.VERBOSE builder = trt.Builder(logger) config = builder.create_builder_config() config.max_workspace_size = workspace * 1 << 30 # config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, workspace << 30) # fix TRT 8.4 deprecation notice flag = (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) network = builder.create_network(flag) parser = trt.OnnxParser(network, logger) if not parser.parse_from_file(str(onnx)): raise RuntimeError(f'failed to load ONNX file: {onnx}') inputs = [network.get_input(i) for i in range(network.num_inputs)] outputs = [network.get_output(i) for i in range(network.num_outputs)] LOGGER.info(f'{prefix} Network Description:') for inp in inputs: LOGGER.info( f'{prefix}\tinput "{inp.name}" with shape {inp.shape} and dtype {inp.dtype}' ) for out in outputs: LOGGER.info( f'{prefix}\toutput "{out.name}" with shape {out.shape} and dtype {out.dtype}' ) LOGGER.info( f'{prefix} building FP{16 if builder.platform_has_fast_fp16 else 32} engine in {f}' ) if builder.platform_has_fast_fp16: config.set_flag(trt.BuilderFlag.FP16) with builder.build_engine(network, config) as engine, open(f, 'wb') as t: t.write(engine.serialize()) LOGGER.info( f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)') return f except Exception as e: LOGGER.info(f'\n{prefix} export failure: {e}')
def run( data=ROOT / 'data/coco128.yaml', # 'dataset.yaml path' weights=ROOT / 'yolov5s.pt', # weights path imgsz=(640, 640), # image (height, width) batch_size=1, # batch size device='cpu', # cuda device, i.e. 0 or 0,1,2,3 or cpu include=('torchscript', 'onnx'), # include formats half=False, # FP16 half-precision export inplace=False, # set YOLOv5 Detect() inplace=True train=False, # model.train() mode optimize=False, # TorchScript: optimize for mobile int8=False, # CoreML/TF INT8 quantization dynamic=False, # ONNX/TF: dynamic axes simplify=False, # ONNX: simplify model opset=12, # ONNX: opset version verbose=False, # TensorRT: verbose log workspace=4, # TensorRT: workspace size (GB) nms=False, # TF: add NMS to model agnostic_nms=False, # TF: add agnostic NMS to model topk_per_class=100, # TF.js NMS: topk per class to keep topk_all=100, # TF.js NMS: topk for all classes to keep iou_thres=0.45, # TF.js NMS: IoU threshold conf_thres=0.25, # TF.js NMS: confidence threshold ): t = time.time() include = [x.lower() for x in include] # to lowercase formats = tuple(export_formats()['Argument'][1:]) # --include arguments flags = [x in include for x in formats] assert sum(flags) == len( include ), f'ERROR: Invalid --include {include}, valid --include arguments are {formats}' jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs = flags # export booleans file = Path( url2file(weights) if str(weights).startswith( ('http:/', 'https:/')) else weights) # PyTorch weights # Load PyTorch model device = select_device(device) assert not ( device.type == 'cpu' and half), '--half only compatible with GPU export, i.e. use --device 0' model = attempt_load(weights, map_location=device, inplace=True, fuse=True) # load FP32 model nc, names = model.nc, model.names # number of classes, class names # Checks imgsz *= 2 if len(imgsz) == 1 else 1 # expand opset = 12 if ('openvino' in include) else opset # OpenVINO requires opset <= 12 assert nc == len( names), f'Model class count {nc} != len(names) {len(names)}' # Input gs = int(max(model.stride)) # grid size (max stride) imgsz = [check_img_size(x, gs) for x in imgsz] # verify img_size are gs-multiples im = torch.zeros(batch_size, 3, *imgsz).to( device) # image size(1,3,320,192) BCHW iDetection # Update model if half: im, model = im.half(), model.half() # to FP16 model.train() if train else model.eval( ) # training mode = no Detect() layer grid construction for k, m in model.named_modules(): if isinstance(m, Detect): m.inplace = inplace m.onnx_dynamic = dynamic m.export = True for _ in range(2): y = model(im) # dry runs shape = tuple(y[0].shape) # model output shape LOGGER.info( f"\n{colorstr('PyTorch:')} starting from {file} with output shape {shape} ({file_size(file):.1f} MB)" ) # Exports f = [''] * 10 # exported filenames warnings.filterwarnings( action='ignore', category=torch.jit.TracerWarning) # suppress TracerWarning if jit: f[0] = export_torchscript(model, im, file, optimize) if engine: # TensorRT required before ONNX f[1] = export_engine(model, im, file, train, half, simplify, workspace, verbose) if onnx or xml: # OpenVINO requires ONNX f[2] = export_onnx(model, im, file, opset, train, dynamic, simplify) if xml: # OpenVINO f[3] = export_openvino(model, im, file) if coreml: _, f[4] = export_coreml(model, im, file) # TensorFlow Exports if any((saved_model, pb, tflite, edgetpu, tfjs)): if int8 or edgetpu: # TFLite --int8 bug https://github.com/ultralytics/yolov5/issues/5707 check_requirements( ('flatbuffers==1.12', )) # required before `import tensorflow` assert not ( tflite and tfjs ), 'TFLite and TF.js models must be exported separately, please pass only one type.' model, f[5] = export_saved_model(model.cpu(), im, file, dynamic, tf_nms=nms or agnostic_nms or tfjs, agnostic_nms=agnostic_nms or tfjs, topk_per_class=topk_per_class, topk_all=topk_all, conf_thres=conf_thres, iou_thres=iou_thres) # keras model if pb or tfjs: # pb prerequisite to tfjs f[6] = export_pb(model, im, file) if tflite or edgetpu: f[7] = export_tflite(model, im, file, int8=int8 or edgetpu, data=data, nms=nms, agnostic_nms=agnostic_nms) if edgetpu: f[8] = export_edgetpu(model, im, file) if tfjs: f[9] = export_tfjs(model, im, file) # Finish f = [str(x) for x in f if x] # filter out '' and None if any(f): LOGGER.info( f'\nExport complete ({time.time() - t:.2f}s)' f"\nResults saved to {colorstr('bold', file.parent.resolve())}" f"\nDetect: python detect.py --weights {f[-1]}" f"\nPyTorch Hub: model = torch.hub.load('ultralytics/yolov5', 'custom', '{f[-1]}')" f"\nValidate: python val.py --weights {f[-1]}" f"\nVisualize: https://netron.app") return f # return list of exported files/dirs
def _create(name, pretrained=True, channels=3, classes=80, autoshape=True, verbose=True, device=None): """Creates a specified YOLOv5 model Arguments: name (str): name of model, i.e. 'yolov5s' pretrained (bool): load pretrained weights into the model channels (int): number of input channels classes (int): number of model classes autoshape (bool): apply YOLOv5 .autoshape() wrapper to model verbose (bool): print all information to screen device (str, torch.device, None): device to use for model parameters Returns: YOLOv5 pytorch model """ from pathlib import Path from models.yolo import Model, attempt_load from utils.general import check_requirements, set_logging from utils.google_utils import attempt_download from utils.torch_utils import select_device check_requirements(requirements=Path(__file__).parent / 'requirements.txt', exclude=('tensorboard', 'thop', 'opencv-python')) set_logging(verbose=verbose) fname = Path(name).with_suffix('.pt') # checkpoint filename try: device = select_device(('0' if torch.cuda.is_available() else 'cpu' ) if device is None else device) if pretrained and channels == 3 and classes == 80: model = attempt_load( fname, map_location=device) # download/load FP32 model else: cfg = list((Path(__file__).parent / 'models').rglob(f'{name}.yaml'))[0] # model.yaml path model = Model(cfg, channels, classes) # create model if pretrained: ckpt = torch.load(attempt_download(fname), map_location=device) # load msd = model.state_dict() # model state_dict csd = ckpt['model'].float().state_dict( ) # checkpoint state_dict as FP32 csd = { k: v for k, v in csd.items() if msd[k].shape == v.shape } # filter model.load_state_dict(csd, strict=False) # load if len(ckpt['model'].names) == classes: model.names = ckpt[ 'model'].names # set class names attribute if autoshape: model = model.autoshape() # for file/URI/PIL/cv2/np inputs and NMS return model.to(device) except Exception as e: help_url = 'https://github.com/ultralytics/yolov5/issues/36' s = 'Cache may be out of date, try `force_reload=True`. See %s for help.' % help_url raise Exception(s) from e
def export_onnx(model, im, file, opset, train, dynamic, simplify, prefix=colorstr('ONNX:')): # YOLOv5 ONNX export try: check_requirements(('onnx', )) import onnx LOGGER.info( f'\n{prefix} starting export with onnx {onnx.__version__}...') f = file.with_suffix('.onnx') torch.onnx.export( model, im, f, verbose=False, opset_version=opset, training=torch.onnx.TrainingMode.TRAINING if train else torch.onnx.TrainingMode.EVAL, do_constant_folding=not train, input_names=['images'], output_names=['output'], dynamic_axes={ 'images': { 0: 'batch', 2: 'height', 3: 'width' }, # shape(1,3,640,640) 'output': { 0: 'batch', 1: 'anchors' } # shape(1,25200,85) } if dynamic else None) # Checks model_onnx = onnx.load(f) # load onnx model onnx.checker.check_model(model_onnx) # check onnx model # Metadata d = {'stride': int(max(model.stride)), 'names': model.names} for k, v in d.items(): meta = model_onnx.metadata_props.add() meta.key, meta.value = k, str(v) onnx.save(model_onnx, f) # Simplify if simplify: try: check_requirements(('onnx-simplifier', )) import onnxsim LOGGER.info( f'{prefix} simplifying with onnx-simplifier {onnxsim.__version__}...' ) model_onnx, check = onnxsim.simplify( model_onnx, dynamic_input_shape=dynamic, input_shapes={'images': list(im.shape)} if dynamic else None) assert check, 'assert check failed' onnx.save(model_onnx, f) except Exception as e: LOGGER.info(f'{prefix} simplifier failure: {e}') LOGGER.info( f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)') return f except Exception as e: LOGGER.info(f'{prefix} export failure: {e}')
def test(source=' ', weights=' ', imlist=True): print('this is detect_inhwa.py') parser = argparse.ArgumentParser() parser.add_argument('--weights', nargs='+', type=str, default='yolov5s.pt', help='model.pt path(s)') parser.add_argument('--source', type=str, default='data/images', help='source') # file/folder, 0 for webcam parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)') parser.add_argument('--conf-thres', type=float, default=0.25, help='object confidence threshold') parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS') parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') parser.add_argument('--view-img', action='store_true', help='display results') parser.add_argument('--save-txt', action='store_true', help='save results to *.txt') parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels') parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3') parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS') parser.add_argument('--augment', action='store_true', help='augmented inference') parser.add_argument('--update', action='store_true', help='update all models') parser.add_argument('--project', default='runs/detect', help='save results to project/name') parser.add_argument('--name', default='exp', help='save results to project/name') parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') parser.add_argument('--imlist', action='store_true', help='existing project/name ok, do not increment') global opt opt = parser.parse_args() opt.source = source opt.imlist = True opt.weights = [weights] print('this is image list: ', opt.imlist) check_requirements() detect_start = time.time() * 1000 print("detect start : ", detect_start) with torch.no_grad(): detect() detect_end = time.time() * 1000 - detect_start logging.info("hfgf" + str(detect_end)) print("detect time : ", time.time() * 1000 - detect_start)
def __init__(self, weights='yolov5s.pt', device=None, dnn=False, data=None): # Usage: # PyTorch: weights = *.pt # TorchScript: *.torchscript # ONNX Runtime: *.onnx # ONNX OpenCV DNN: *.onnx with --dnn # OpenVINO: *.xml # CoreML: *.mlmodel # TensorRT: *.engine # TensorFlow SavedModel: *_saved_model # TensorFlow GraphDef: *.pb # TensorFlow Lite: *.tflite # TensorFlow Edge TPU: *_edgetpu.tflite from models.experimental import attempt_download, attempt_load # scoped to avoid circular import super().__init__() w = str(weights[0] if isinstance(weights, list) else weights) pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs = self.model_type( w) # get backend stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults w = attempt_download(w) # download if not local if data: # data.yaml path (optional) with open(data, errors='ignore') as f: names = yaml.safe_load(f)['names'] # class names if pt: # PyTorch model = attempt_load(weights if isinstance(weights, list) else w, map_location=device) stride = max(int(model.stride.max()), 32) # model stride names = model.module.names if hasattr( model, 'module') else model.names # get class names self.model = model # explicitly assign for to(), cpu(), cuda(), half() elif jit: # TorchScript LOGGER.info(f'Loading {w} for TorchScript inference...') extra_files = {'config.txt': ''} # model metadata model = torch.jit.load(w, _extra_files=extra_files) if extra_files['config.txt']: d = json.loads(extra_files['config.txt']) # extra_files dict stride, names = int(d['stride']), d['names'] elif dnn: # ONNX OpenCV DNN LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...') check_requirements(('opencv-python>=4.5.4', )) net = cv2.dnn.readNetFromONNX(w) elif onnx: # ONNX Runtime LOGGER.info(f'Loading {w} for ONNX Runtime inference...') cuda = torch.cuda.is_available() check_requirements( ('onnx', 'onnxruntime-gpu' if cuda else 'onnxruntime')) import onnxruntime providers = ['CUDAExecutionProvider', 'CPUExecutionProvider' ] if cuda else ['CPUExecutionProvider'] session = onnxruntime.InferenceSession(w, providers=providers) elif xml: # OpenVINO LOGGER.info(f'Loading {w} for OpenVINO inference...') check_requirements( ('openvino-dev', ) ) # requires openvino-dev: https://pypi.org/project/openvino-dev/ import openvino.inference_engine as ie core = ie.IECore() if not Path(w).is_file(): # if not *.xml w = next(Path(w).glob( '*.xml')) # get *.xml file from *_openvino_model dir network = core.read_network( model=w, weights=Path(w).with_suffix('.bin')) # *.xml, *.bin paths executable_network = core.load_network(network, device_name='CPU', num_requests=1) elif engine: # TensorRT LOGGER.info(f'Loading {w} for TensorRT inference...') import tensorrt as trt # https://developer.nvidia.com/nvidia-tensorrt-download check_version(trt.__version__, '7.0.0', hard=True) # require tensorrt>=7.0.0 Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr')) trt_fp16_input = False logger = trt.Logger(trt.Logger.INFO) with open(w, 'rb') as f, trt.Runtime(logger) as runtime: model = runtime.deserialize_cuda_engine(f.read()) bindings = OrderedDict() for index in range(model.num_bindings): name = model.get_binding_name(index) dtype = trt.nptype(model.get_binding_dtype(index)) shape = tuple(model.get_binding_shape(index)) data = torch.from_numpy(np.empty( shape, dtype=np.dtype(dtype))).to(device) bindings[name] = Binding(name, dtype, shape, data, int(data.data_ptr())) if model.binding_is_input(index) and dtype == np.float16: trt_fp16_input = True binding_addrs = OrderedDict( (n, d.ptr) for n, d in bindings.items()) context = model.create_execution_context() batch_size = bindings['images'].shape[0] elif coreml: # CoreML LOGGER.info(f'Loading {w} for CoreML inference...') import coremltools as ct model = ct.models.MLModel(w) else: # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU) if saved_model: # SavedModel LOGGER.info( f'Loading {w} for TensorFlow SavedModel inference...') import tensorflow as tf keras = False # assume TF1 saved_model model = tf.keras.models.load_model( w) if keras else tf.saved_model.load(w) elif pb: # GraphDef https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt LOGGER.info( f'Loading {w} for TensorFlow GraphDef inference...') import tensorflow as tf def wrap_frozen_graph(gd, inputs, outputs): x = tf.compat.v1.wrap_function( lambda: tf.compat.v1.import_graph_def(gd, name=""), []) # wrapped ge = x.graph.as_graph_element return x.prune(tf.nest.map_structure(ge, inputs), tf.nest.map_structure(ge, outputs)) gd = tf.Graph().as_graph_def() # graph_def gd.ParseFromString(open(w, 'rb').read()) frozen_func = wrap_frozen_graph(gd, inputs="x:0", outputs="Identity:0") elif tflite or edgetpu: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python try: # https://coral.ai/docs/edgetpu/tflite-python/#update-existing-tf-lite-code-for-the-edge-tpu from tflite_runtime.interpreter import Interpreter, load_delegate except ImportError: import tensorflow as tf Interpreter, load_delegate = tf.lite.Interpreter, tf.lite.experimental.load_delegate, if edgetpu: # Edge TPU https://coral.ai/software/#edgetpu-runtime LOGGER.info( f'Loading {w} for TensorFlow Lite Edge TPU inference...' ) delegate = { 'Linux': 'libedgetpu.so.1', 'Darwin': 'libedgetpu.1.dylib', 'Windows': 'edgetpu.dll' }[platform.system()] interpreter = Interpreter( model_path=w, experimental_delegates=[load_delegate(delegate)]) else: # Lite LOGGER.info( f'Loading {w} for TensorFlow Lite inference...') interpreter = Interpreter( model_path=w) # load TFLite model interpreter.allocate_tensors() # allocate input_details = interpreter.get_input_details() # inputs output_details = interpreter.get_output_details() # outputs elif tfjs: raise Exception( 'ERROR: YOLOv5 TF.js inference is not supported') self.__dict__.update(locals()) # assign all variables to self
Usage: import torch model = torch.hub.load('ultralytics/yolov5', 'yolov5s') """ from pathlib import Path import torch from models.yolo import Model from utils.general import check_requirements, set_logging from utils.google_utils import attempt_download from utils.torch_utils import select_device dependencies = ['torch', 'yaml'] check_requirements(Path(__file__).parent / 'requirements.txt', exclude=('pycocotools', 'thop')) set_logging() def create(name, pretrained, channels, classes, autoshape): """Creates a specified YOLOv5 model Arguments: name (str): name of model, i.e. 'yolov5s' pretrained (bool): load pretrained weights into the model channels (int): number of input channels classes (int): number of model classes Returns: pytorch model """
def run( data, weights=None, # model.pt path(s) batch_size=32, # batch size imgsz=640, # inference size (pixels) conf_thres=0.001, # confidence threshold iou_thres=0.6, # NMS IoU threshold task='val', # train, val, test, speed or study device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu workers=8, # max dataloader workers (per RANK in DDP mode) single_cls=False, # treat as single-class dataset augment=False, # augmented inference verbose=False, # verbose output save_txt=False, # save results to *.txt save_hybrid=False, # save label+prediction hybrid results to *.txt save_conf=False, # save confidences in --save-txt labels save_json=False, # save a COCO-JSON results file project=ROOT / 'runs/val', # save to project/name name='exp', # save to project/name exist_ok=False, # existing project/name ok, do not increment half=True, # use FP16 half-precision inference dnn=False, # use OpenCV DNN for ONNX inference model=None, dataloader=None, save_dir=Path(''), plots=True, callbacks=Callbacks(), compute_loss=None, ): # Initialize/load model and set device training = model is not None if training: # called by train.py device, pt, jit, engine = next(model.parameters( )).device, True, False, False # get model device, PyTorch model half &= device.type != 'cpu' # half precision only supported on CUDA model.half() if half else model.float() else: # called directly device = select_device(device, batch_size=batch_size) # Directories save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Load model model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half) stride, pt, jit, engine = model.stride, model.pt, model.jit, model.engine imgsz = check_img_size(imgsz, s=stride) # check image size half = model.fp16 # FP16 supported on limited backends with CUDA if engine: batch_size = model.batch_size else: device = model.device if not (pt or jit): batch_size = 1 # export.py models default to batch-size 1 LOGGER.info( f'Forcing --batch-size 1 square inference (1,3,{imgsz},{imgsz}) for non-PyTorch models' ) # Data data = check_dataset(data) # check # Configure model.eval() cuda = device.type != 'cpu' is_coco = isinstance(data.get('val'), str) and data['val'].endswith( f'coco{os.sep}val2017.txt') # COCO dataset nc = 1 if single_cls else int(data['nc']) # number of classes # iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for [email protected]:0.95 iouv = torch.linspace(0.1, 0.5, 10).to(device) # iou vector for [email protected]:0.95 niou = iouv.numel() # Dataloader if not training: if pt and not single_cls: # check --weights are trained on --data ncm = model.model.nc assert ncm == nc, f'{weights[0]} ({ncm} classes) trained on different --data than what you passed ({nc} ' \ f'classes). Pass correct combination of --weights and --data that are trained together.' model.warmup(imgsz=(1 if pt else batch_size, 3, imgsz, imgsz)) # warmup pad = 0.0 if task in ('speed', 'benchmark') else 0.5 rect = False if task == 'benchmark' else pt # square inference for benchmarks task = task if task in ( 'train', 'val', 'test') else 'val' # path to train/val/test images dataloader = create_dataloader(data[task], imgsz, batch_size, stride, single_cls, pad=pad, rect=rect, workers=workers, prefix=colorstr(f'{task}: '))[0] seen = 0 confusion_matrix = ConfusionMatrix(nc=nc) names = { k: v for k, v in enumerate( model.names if hasattr(model, 'names') else model.module.names) } class_map = coco80_to_coco91_class() if is_coco else list(range(1000)) s = ('%20s' + '%11s' * 6) % ('Class', 'Images', 'Labels', 'P', 'R', '[email protected]', '[email protected]:.95') dt, p, r, f1, mp, mr, map50, map = [0.0, 0.0, 0.0], 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 loss = torch.zeros(3, device=device) jdict, stats, ap, ap_class = [], [], [], [] callbacks.run('on_val_start') pbar = tqdm(dataloader, desc=s, bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}') # progress bar for batch_i, (im, targets, paths, shapes) in enumerate(pbar): callbacks.run('on_val_batch_start') t1 = time_sync() if cuda: im = im.to(device, non_blocking=True) targets = targets.to(device) im = im.half() if half else im.float() # uint8 to fp16/32 im /= 255 # 0 - 255 to 0.0 - 1.0 nb, _, height, width = im.shape # batch size, channels, height, width t2 = time_sync() dt[0] += t2 - t1 # Inference out, train_out = model(im) if training else model( im, augment=augment, val=True) # inference, loss outputs dt[1] += time_sync() - t2 # Loss if compute_loss: loss += compute_loss([x.float() for x in train_out], targets)[1] # box, obj, cls # NMS targets[:, 2:] *= torch.tensor((width, height, width, height), device=device) # to pixels lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] # for autolabelling t3 = time_sync() out = non_max_suppression(out, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls) dt[2] += time_sync() - t3 # Metrics for si, pred in enumerate(out): labels = targets[targets[:, 0] == si, 1:] nl, npr = labels.shape[0], pred.shape[ 0] # number of labels, predictions path, shape = Path(paths[si]), shapes[si][0] correct = torch.zeros(npr, niou, dtype=torch.bool, device=device) # init seen += 1 if npr == 0: if nl: stats.append((correct, *torch.zeros( (3, 0), device=device))) continue # Predictions if single_cls: pred[:, 5] = 0 predn = pred.clone() scale_coords(im[si].shape[1:], predn[:, :4], shape, shapes[si][1]) # native-space pred # Evaluate if nl: tbox = xywh2xyxy(labels[:, 1:5]) # target boxes scale_coords(im[si].shape[1:], tbox, shape, shapes[si][1]) # native-space labels labelsn = torch.cat((labels[:, 0:1], tbox), 1) # native-space labels correct = process_batch(predn, labelsn, iouv) if plots: confusion_matrix.process_batch(predn, labelsn) stats.append((correct, pred[:, 4], pred[:, 5], labels[:, 0])) # (correct, conf, pcls, tcls) # Save/log if save_txt: save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / (path.stem + '.txt')) if save_json: save_one_json(predn, jdict, path, class_map) # append to COCO-JSON dictionary callbacks.run('on_val_image_end', pred, predn, path, names, im[si]) # Plot images if plots and batch_i < 3: f = save_dir / f'val_batch{batch_i}_labels.jpg' # labels Thread(target=plot_images, args=(im, targets, paths, f, names), daemon=True).start() f = save_dir / f'val_batch{batch_i}_pred.jpg' # predictions Thread(target=plot_images, args=(im, output_to_target(out), paths, f, names), daemon=True).start() callbacks.run('on_val_batch_end') # Compute metrics stats = [torch.cat(x, 0).cpu().numpy() for x in zip(*stats)] # to numpy if len(stats) and stats[0].any(): tp, fp, p, r, f1, ap, ap_class = ap_per_class(*stats, plot=plots, save_dir=save_dir, names=names) ap50, ap = ap[:, 0], ap.mean(1) # [email protected], [email protected]:0.95 mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean() nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class else: nt = torch.zeros(1) # Print results pf = '%20s' + '%11i' * 2 + '%11.3g' * 4 # print format LOGGER.info(pf % ('all', seen, nt.sum(), mp, mr, map50, map)) # Print results per class if (verbose or (nc < 50 and not training)) and nc > 1 and len(stats): for i, c in enumerate(ap_class): LOGGER.info(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i])) # Print speeds t = tuple(x / seen * 1E3 for x in dt) # speeds per image if not training: shape = (batch_size, 3, imgsz, imgsz) LOGGER.info( f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}' % t) # Plots if plots: confusion_matrix.plot(save_dir=save_dir, names=list(names.values())) callbacks.run('on_val_end') # Save JSON if save_json and len(jdict): w = Path(weights[0] if isinstance(weights, list) else weights ).stem if weights is not None else '' # weights anno_json = str( Path(data.get('path', '../coco')) / 'annotations/instances_val2017.json') # annotations json pred_json = str(save_dir / f"{w}_predictions.json") # predictions json LOGGER.info(f'\nEvaluating pycocotools mAP... saving {pred_json}...') with open(pred_json, 'w') as f: json.dump(jdict, f) try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb check_requirements(['pycocotools']) from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval anno = COCO(anno_json) # init annotations api pred = anno.loadRes(pred_json) # init predictions api eval = COCOeval(anno, pred, 'bbox') if is_coco: eval.params.imgIds = [ int(Path(x).stem) for x in dataloader.dataset.im_files ] # image IDs to evaluate eval.evaluate() eval.accumulate() eval.summarize() map, map50 = eval.stats[: 2] # update results ([email protected]:0.95, [email protected]) except Exception as e: LOGGER.info(f'pycocotools unable to run: {e}') # Return results model.float() # for training if not training: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}") maps = np.zeros(nc) + map for i, c in enumerate(ap_class): maps[c] = ap[i] return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t
parser.add_argument('--quad', action='store_true', help='quad dataloader') parser.add_argument('--linear-lr', action='store_true', help='linear LR') parser.add_argument('--label-smoothing', type=float, default=0.0, help='Label smoothing epsilon') parser.add_argument('--upload_dataset', action='store_true', help='Upload dataset as W&B artifact table') parser.add_argument('--bbox_interval', type=int, default=-1, help='Set bounding-box image logging interval for W&B') parser.add_argument('--save_period', type=int, default=-1, help='Log model after every "save_period" epoch') parser.add_argument('--artifact_alias', type=str, default="latest", help='version of dataset artifact to be used') opt = parser.parse_args() # Set DDP variables opt.world_size = int(getattr(os.environ, 'WORLD_SIZE', 1)) opt.global_rank = int(getattr(os.environ, 'RANK', -1)) set_logging(opt.global_rank) if opt.global_rank in [-1, 0]: check_git_status() check_requirements(exclude=['thop']) # Resume wandb_run = check_wandb_resume(opt) if opt.resume and not wandb_run: # resume an interrupted run ckpt = opt.resume if isinstance(opt.resume, str) else get_latest_run() # specified or most recent path assert os.path.isfile(ckpt), 'ERROR: --resume checkpoint does not exist' apriori = opt.global_rank, opt.local_rank with open(Path(ckpt).parent.parent / 'opt.yaml') as f: opt = argparse.Namespace(**yaml.safe_load(f)) # replace opt.cfg, opt.weights, opt.resume, opt.batch_size, opt.global_rank, opt.local_rank = \ '', ckpt, True, opt.total_batch_size, *apriori # reinstate logger.info('Resuming training from %s' % ckpt) else: # opt.hyp = opt.hyp or ('hyp.finetune.yaml' if opt.weights else 'hyp.scratch.yaml') opt.data, opt.cfg, opt.hyp = check_file(opt.data), check_file(opt.cfg), check_file(opt.hyp) # check files
def run( data, weights=None, # model.pt path(s) batch_size=32, # batch size imgsz=640, # inference size (pixels) conf_thres=0.001, # confidence threshold iou_thres=0.6, # NMS IoU threshold task='val', # train, val, test, speed or study device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu single_cls=False, # treat as single-class dataset augment=False, # augmented inference verbose=False, # verbose output save_txt=False, # save results to *.txt save_hybrid=False, # save label+prediction hybrid results to *.txt save_conf=False, # save confidences in --save-txt labels save_json=False, # save a COCO-JSON results file classes=None, # filter by class: --class 0, or --class 0 2 3 project=ROOT / 'runs/val', # save to project/name name='exp', # save to project/name exist_ok=False, # existing project/name ok, do not increment half=True, # use FP16 half-precision inference model=None, dataloader=None, save_dir=Path(''), plots=True, callbacks=Callbacks(), compute_loss=None, ): # Initialize/load model and set device training = model is not None if training: # called by train.py device = next(model.parameters()).device # get model device else: # called directly device = select_device(device, batch_size=batch_size) # Directories save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Load model check_suffix(weights, '.pt') model = attempt_load(weights, map_location=device) # load FP32 model gs = max(int(model.stride.max()), 32) # grid size (max stride) imgsz = check_img_size(imgsz, s=gs) # check image size # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99 # if device.type != 'cpu' and torch.cuda.device_count() > 1: # model = nn.DataParallel(model) # Data data = check_dataset(data) # check # Half half &= device.type != 'cpu' # half precision only supported on CUDA model.half() if half else model.float() # Configure model.eval() is_coco = isinstance(data.get('val'), str) and data['val'].endswith( 'coco/val2017.txt') # COCO dataset nc = 1 if single_cls else int(data['nc']) # number of classes iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for [email protected]:0.95 niou = iouv.numel() # Dataloader if not training: if device.type != 'cpu': model( torch.zeros(1, 3, imgsz, imgsz).to(device).type_as( next(model.parameters()))) # run once task = task if task in ( 'train', 'val', 'test') else 'val' # path to train/val/test images dataloader = create_dataloader(data[task], imgsz, batch_size, gs, single_cls, pad=0.5, rect=True, prefix=colorstr(f'{task}: '))[0] seen = 0 confusion_matrix = ConfusionMatrix(nc=nc) names = { k: v for k, v in enumerate( model.names if hasattr(model, 'names') else model.module.names) } # # names = {(k - 1): v for k, v in names.items() if v != "head"} # class_map = coco80_to_coco91_class() if is_coco else list(range(1000)) s = ('%20s' + '%11s' * 6) % ('Class', 'Images', 'Labels', 'P', 'R', '[email protected]', '[email protected]:.95') dt, p, r, f1, mp, mr, map50, map = [0.0, 0.0, 0.0], 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 loss = torch.zeros(3, device=device) jdict, stats, ap, ap_class = [], [], [], [] for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)): t1 = time_sync() img = img.to(device, non_blocking=True) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 targets = targets.to(device) nb, _, height, width = img.shape # batch size, channels, height, width t2 = time_sync() dt[0] += t2 - t1 # Run model out, train_out = model( img, augment=augment) # inference and training outputs dt[1] += time_sync() - t2 # Compute loss if compute_loss: loss += compute_loss([x.float() for x in train_out], targets)[1] # box, obj, cls # Run NMS targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(device) # to pixels lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] # for autolabelling t3 = time_sync() out = non_max_suppression(out, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls) dt[2] += time_sync() - t3 # if out: # for i, pred in enumerate(out): # if len(pred) == 0: # continue # only_person_pred = pred[pred[:, 5] != 0] # only_person_pred[:, 5] = 0 # out[i] = only_person_pred # # Statistics per image for si, pred in enumerate(out): labels = targets[targets[:, 0] == si, 1:] nl = len(labels) tcls = labels[:, 0].tolist() if nl else [] # target class path, shape = Path(paths[si]), shapes[si][0] seen += 1 if len(pred) == 0: if nl: stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls)) continue # Predictions if single_cls: pred[:, 5] = 0 predn = pred.clone() scale_coords(img[si].shape[1:], predn[:, :4], shape, shapes[si][1]) # native-space pred # Evaluate if nl: tbox = xywh2xyxy(labels[:, 1:5]) # target boxes scale_coords(img[si].shape[1:], tbox, shape, shapes[si][1]) # native-space labels labelsn = torch.cat((labels[:, 0:1], tbox), 1) # native-space labels correct = process_batch(predn, labelsn, iouv) if plots: confusion_matrix.process_batch(predn, labelsn) else: correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool) stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)) # (correct, conf, pcls, tcls) # Save/log if save_txt: save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / (path.stem + '.txt')) if save_json: save_one_json(predn, jdict, path, class_map) # append to COCO-JSON dictionary callbacks.run('on_val_image_end', pred, predn, path, names, img[si]) # Plot images if plots and batch_i < 3: f = save_dir / f'val_batch{batch_i}_labels.jpg' # labels Thread(target=plot_images, args=(img, targets, paths, f, names), daemon=True).start() f = save_dir / f'val_batch{batch_i}_pred.jpg' # predictions Thread(target=plot_images, args=(img, output_to_target(out), paths, f, names), daemon=True).start() # Compute statistics stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy if len(stats) and stats[0].any(): p, r, ap, f1, ap_class = ap_per_class(*stats, plot=plots, save_dir=save_dir, names=names) ap50, ap = ap[:, 0], ap.mean(1) # [email protected], [email protected]:0.95 mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean() nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class else: nt = torch.zeros(1) # Print results pf = '%20s' + '%11i' * 2 + '%11.3g' * 4 # print format print(pf % ('all', seen, nt.sum(), mp, mr, map50, map)) # Print results per class if (verbose or (nc < 50 and not training)) and nc > 1 and len(stats): for i, c in enumerate(ap_class): print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i])) # Print speeds t = tuple(x / seen * 1E3 for x in dt) # speeds per image if not training: shape = (batch_size, 3, imgsz, imgsz) print( f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}' % t) # Plots if plots: confusion_matrix.plot(save_dir=save_dir, names=list(names.values())) callbacks.run('on_val_end') # Save JSON if save_json and len(jdict): w = Path(weights[0] if isinstance(weights, list) else weights ).stem if weights is not None else '' # weights anno_json = str( Path(data.get('path', '../coco')) / 'annotations/instances_val2017.json') # annotations json pred_json = str(save_dir / f"{w}_predictions.json") # predictions json print(f'\nEvaluating pycocotools mAP... saving {pred_json}...') with open(pred_json, 'w') as f: json.dump(jdict, f) try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb check_requirements(['pycocotools']) from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval anno = COCO(anno_json) # init annotations api pred = anno.loadRes(pred_json) # init predictions api eval = COCOeval(anno, pred, 'bbox') if is_coco: eval.params.imgIds = [ int(Path(x).stem) for x in dataloader.dataset.img_files ] # image IDs to evaluate eval.evaluate() eval.accumulate() eval.summarize() map, map50 = eval.stats[: 2] # update results ([email protected]:0.95, [email protected]) except Exception as e: print(f'pycocotools unable to run: {e}') # Return results model.float() # for training if not training: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {colorstr('bold', save_dir)}{s}") maps = np.zeros(nc) + map for i, c in enumerate(ap_class): maps[c] = ap[i] return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t
def __init__(self, weights='yolov5s.pt', device=None, dnn=False): # Usage: # PyTorch: weights = *.pt # TorchScript: *.torchscript # CoreML: *.mlmodel # TensorFlow: *_saved_model # TensorFlow: *.pb # TensorFlow Lite: *.tflite # ONNX Runtime: *.onnx # OpenCV DNN: *.onnx with dnn=True # TensorRT: *.engine from models.experimental import attempt_download, attempt_load # scoped to avoid circular import super().__init__() w = str(weights[0] if isinstance(weights, list) else weights) suffix = Path(w).suffix.lower() suffixes = ['.pt', '.torchscript', '.onnx', '.engine', '.tflite', '.pb', '', '.mlmodel'] check_suffix(w, suffixes) # check weights have acceptable suffix pt, jit, onnx, engine, tflite, pb, saved_model, coreml = (suffix == x for x in suffixes) # backend booleans stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults w = attempt_download(w) # download if not local if jit: # TorchScript LOGGER.info(f'Loading {w} for TorchScript inference...') extra_files = {'config.txt': ''} # model metadata model = torch.jit.load(w, _extra_files=extra_files) if extra_files['config.txt']: d = json.loads(extra_files['config.txt']) # extra_files dict stride, names = int(d['stride']), d['names'] elif pt: # PyTorch model = attempt_load(weights if isinstance(weights, list) else w, map_location=device) stride = int(model.stride.max()) # model stride names = model.module.names if hasattr(model, 'module') else model.names # get class names self.model = model # explicitly assign for to(), cpu(), cuda(), half() elif coreml: # CoreML LOGGER.info(f'Loading {w} for CoreML inference...') import coremltools as ct model = ct.models.MLModel(w) elif dnn: # ONNX OpenCV DNN LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...') check_requirements(('opencv-python>=4.5.4',)) net = cv2.dnn.readNetFromONNX(w) elif onnx: # ONNX Runtime LOGGER.info(f'Loading {w} for ONNX Runtime inference...') cuda = torch.cuda.is_available() check_requirements(('onnx', 'onnxruntime-gpu' if cuda else 'onnxruntime')) import onnxruntime providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider'] session = onnxruntime.InferenceSession(w, providers=providers) elif engine: # TensorRT LOGGER.info(f'Loading {w} for TensorRT inference...') import tensorrt as trt # https://developer.nvidia.com/nvidia-tensorrt-download Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr')) logger = trt.Logger(trt.Logger.INFO) with open(w, 'rb') as f, trt.Runtime(logger) as runtime: model = runtime.deserialize_cuda_engine(f.read()) bindings = OrderedDict() for index in range(model.num_bindings): name = model.get_binding_name(index) dtype = trt.nptype(model.get_binding_dtype(index)) shape = tuple(model.get_binding_shape(index)) data = torch.from_numpy(np.empty(shape, dtype=np.dtype(dtype))).to(device) bindings[name] = Binding(name, dtype, shape, data, int(data.data_ptr())) binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items()) context = model.create_execution_context() batch_size = bindings['images'].shape[0] else: # TensorFlow model (TFLite, pb, saved_model) if pb: # https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt LOGGER.info(f'Loading {w} for TensorFlow *.pb inference...') import tensorflow as tf def wrap_frozen_graph(gd, inputs, outputs): x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=""), []) # wrapped return x.prune(tf.nest.map_structure(x.graph.as_graph_element, inputs), tf.nest.map_structure(x.graph.as_graph_element, outputs)) graph_def = tf.Graph().as_graph_def() graph_def.ParseFromString(open(w, 'rb').read()) frozen_func = wrap_frozen_graph(gd=graph_def, inputs="x:0", outputs="Identity:0") elif saved_model: LOGGER.info(f'Loading {w} for TensorFlow saved_model inference...') import tensorflow as tf model = tf.keras.models.load_model(w) elif tflite: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python if 'edgetpu' in w.lower(): LOGGER.info(f'Loading {w} for TensorFlow Lite Edge TPU inference...') import tflite_runtime.interpreter as tfli delegate = {'Linux': 'libedgetpu.so.1', # install https://coral.ai/software/#edgetpu-runtime 'Darwin': 'libedgetpu.1.dylib', 'Windows': 'edgetpu.dll'}[platform.system()] interpreter = tfli.Interpreter(model_path=w, experimental_delegates=[tfli.load_delegate(delegate)]) else: LOGGER.info(f'Loading {w} for TensorFlow Lite inference...') import tensorflow as tf interpreter = tf.lite.Interpreter(model_path=w) # load TFLite model interpreter.allocate_tensors() # allocate input_details = interpreter.get_input_details() # inputs output_details = interpreter.get_output_details() # outputs self.__dict__.update(locals()) # assign all variables to self
def main(opt, callbacks=Callbacks()): # Checks if RANK in {-1, 0}: print_args(vars(opt)) check_git_status() check_requirements(exclude=['thop']) # Resume if opt.resume and not check_wandb_resume( opt) and not opt.evolve: # resume an interrupted run ckpt = opt.resume if isinstance( opt.resume, str) else get_latest_run() # specified or most recent path assert os.path.isfile( ckpt), 'ERROR: --resume checkpoint does not exist' with open(Path(ckpt).parent.parent / 'opt.yaml', errors='ignore') as f: opt = argparse.Namespace(**yaml.safe_load(f)) # replace opt.cfg, opt.weights, opt.resume = '', ckpt, True # reinstate LOGGER.info(f'Resuming training from {ckpt}') else: opt.data, opt.cfg, opt.hyp, opt.weights, opt.project = \ check_file(opt.data), check_yaml(opt.cfg), check_yaml(opt.hyp), str(opt.weights), str(opt.project) # checks assert len(opt.cfg) or len( opt.weights), 'either --cfg or --weights must be specified' if opt.evolve: if opt.project == str( ROOT / 'runs/train' ): # if default project name, rename to runs/evolve opt.project = str(ROOT / 'runs/evolve') opt.exist_ok, opt.resume = opt.resume, False # pass resume to exist_ok and disable resume if opt.name == 'cfg': opt.name = Path(opt.cfg).stem # use model.yaml as name opt.save_dir = str( increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # DDP mode device = select_device(opt.device, batch_size=opt.batch_size) if LOCAL_RANK != -1: msg = 'is not compatible with YOLOv5 Multi-GPU DDP training' assert not opt.image_weights, f'--image-weights {msg}' assert not opt.evolve, f'--evolve {msg}' assert opt.batch_size != -1, f'AutoBatch with --batch-size -1 {msg}, please pass a valid --batch-size' assert opt.batch_size % WORLD_SIZE == 0, f'--batch-size {opt.batch_size} must be multiple of WORLD_SIZE' assert torch.cuda.device_count( ) > LOCAL_RANK, 'insufficient CUDA devices for DDP command' torch.cuda.set_device(LOCAL_RANK) device = torch.device('cuda', LOCAL_RANK) dist.init_process_group( backend="nccl" if dist.is_nccl_available() else "gloo") # Train if not opt.evolve: train(opt.hyp, opt, device, callbacks) if WORLD_SIZE > 1 and RANK == 0: LOGGER.info('Destroying process group... ') dist.destroy_process_group() # Evolve hyperparameters (optional) else: # Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit) meta = { 'lr0': (1, 1e-5, 1e-1), # initial learning rate (SGD=1E-2, Adam=1E-3) 'lrf': (1, 0.01, 1.0), # final OneCycleLR learning rate (lr0 * lrf) 'momentum': (0.3, 0.6, 0.98), # SGD momentum/Adam beta1 'weight_decay': (1, 0.0, 0.001), # optimizer weight decay 'warmup_epochs': (1, 0.0, 5.0), # warmup epochs (fractions ok) 'warmup_momentum': (1, 0.0, 0.95), # warmup initial momentum 'warmup_bias_lr': (1, 0.0, 0.2), # warmup initial bias lr 'box': (1, 0.02, 0.2), # box loss gain 'cls': (1, 0.2, 4.0), # cls loss gain 'cls_pw': (1, 0.5, 2.0), # cls BCELoss positive_weight 'obj': (1, 0.2, 4.0), # obj loss gain (scale with pixels) 'obj_pw': (1, 0.5, 2.0), # obj BCELoss positive_weight 'iou_t': (0, 0.1, 0.7), # IoU training threshold 'anchor_t': (1, 2.0, 8.0), # anchor-multiple threshold 'anchors': (2, 2.0, 10.0), # anchors per output grid (0 to ignore) 'fl_gamma': (0, 0.0, 2.0), # focal loss gamma (efficientDet default gamma=1.5) 'hsv_h': (1, 0.0, 0.1), # image HSV-Hue augmentation (fraction) 'hsv_s': (1, 0.0, 0.9), # image HSV-Saturation augmentation (fraction) 'hsv_v': (1, 0.0, 0.9), # image HSV-Value augmentation (fraction) 'degrees': (1, 0.0, 45.0), # image rotation (+/- deg) 'translate': (1, 0.0, 0.9), # image translation (+/- fraction) 'scale': (1, 0.0, 0.9), # image scale (+/- gain) 'shear': (1, 0.0, 10.0), # image shear (+/- deg) 'perspective': (0, 0.0, 0.001), # image perspective (+/- fraction), range 0-0.001 'flipud': (1, 0.0, 1.0), # image flip up-down (probability) 'fliplr': (0, 0.0, 1.0), # image flip left-right (probability) 'mosaic': (1, 0.0, 1.0), # image mixup (probability) 'mixup': (1, 0.0, 1.0), # image mixup (probability) 'copy_paste': (1, 0.0, 1.0) } # segment copy-paste (probability) with open(opt.hyp, errors='ignore') as f: hyp = yaml.safe_load(f) # load hyps dict if 'anchors' not in hyp: # anchors commented in hyp.yaml hyp['anchors'] = 3 opt.noval, opt.nosave, save_dir = True, True, Path( opt.save_dir) # only val/save final epoch # ei = [isinstance(x, (int, float)) for x in hyp.values()] # evolvable indices evolve_yaml, evolve_csv = save_dir / 'hyp_evolve.yaml', save_dir / 'evolve.csv' if opt.bucket: os.system(f'gsutil cp gs://{opt.bucket}/evolve.csv {evolve_csv}' ) # download evolve.csv if exists for _ in range(opt.evolve): # generations to evolve if evolve_csv.exists( ): # if evolve.csv exists: select best hyps and mutate # Select parent(s) parent = 'single' # parent selection method: 'single' or 'weighted' x = np.loadtxt(evolve_csv, ndmin=2, delimiter=',', skiprows=1) n = min(5, len(x)) # number of previous results to consider x = x[np.argsort(-fitness(x))][:n] # top n mutations w = fitness(x) - fitness(x).min() + 1E-6 # weights (sum > 0) if parent == 'single' or len(x) == 1: # x = x[random.randint(0, n - 1)] # random selection x = x[random.choices(range(n), weights=w)[0]] # weighted selection elif parent == 'weighted': x = (x * w.reshape( n, 1)).sum(0) / w.sum() # weighted combination # Mutate mp, s = 0.8, 0.2 # mutation probability, sigma npr = np.random npr.seed(int(time.time())) g = np.array([meta[k][0] for k in hyp.keys()]) # gains 0-1 ng = len(meta) v = np.ones(ng) while all( v == 1 ): # mutate until a change occurs (prevent duplicates) v = (g * (npr.random(ng) < mp) * npr.randn(ng) * npr.random() * s + 1).clip(0.3, 3.0) for i, k in enumerate(hyp.keys()): # plt.hist(v.ravel(), 300) hyp[k] = float(x[i + 7] * v[i]) # mutate # Constrain to limits for k, v in meta.items(): hyp[k] = max(hyp[k], v[1]) # lower limit hyp[k] = min(hyp[k], v[2]) # upper limit hyp[k] = round(hyp[k], 5) # significant digits # Train mutation results = train(hyp.copy(), opt, device, callbacks) callbacks = Callbacks() # Write mutation results print_mutation(results, hyp.copy(), save_dir, opt.bucket) # Plot results plot_evolve(evolve_csv) LOGGER.info( f'Hyperparameter evolution finished {opt.evolve} generations\n' f"Results saved to {colorstr('bold', save_dir)}\n" f'Usage example: $ python train.py --hyp {evolve_yaml}')
'output': { 0: 'batch', 2: 'y', 3: 'x' } } if opt.dynamic else None) # Checks model_onnx = onnx.load(f) # load onnx model onnx.checker.check_model(model_onnx) # check onnx model # print(onnx.helper.printable_graph(model_onnx.graph)) # print # Simplify if opt.simplify: try: check_requirements(['onnx-simplifier']) import onnxsim print( f'{prefix} simplifying with onnx-simplifier {onnxsim.__version__}...' ) model_onnx, check = onnxsim.simplify( model_onnx, dynamic_input_shape=opt.dynamic, input_shapes={'images': list(img.shape)} if opt.dynamic else None) assert check, 'assert check failed' onnx.save(model_onnx, f) except Exception as e: print(f'{prefix} simplifier failure: {e}') print(
def __init__(self, sources='streams.txt', img_size=640, stride=32, auto=True): self.mode = 'stream' self.img_size = img_size self.stride = stride if os.path.isfile(sources): with open(sources) as f: sources = [ x.strip() for x in f.read().strip().splitlines() if len(x.strip()) ] else: sources = [sources] n = len(sources) self.imgs, self.fps, self.frames, self.threads = [None] * n, [0] * n, [ 0 ] * n, [None] * n self.sources = [clean_str(x) for x in sources] # clean source names for later self.auto = auto for i, s in enumerate(sources): # index, source # Start thread to read frames from video stream st = f'{i + 1}/{n}: {s}... ' if urlparse(s).hostname in ( 'www.youtube.com', 'youtube.com', 'youtu.be'): # if source is YouTube video check_requirements(('pafy', 'youtube_dl==2020.12.2')) import pafy s = pafy.new(s).getbest(preftype="mp4").url # YouTube URL s = eval(s) if s.isnumeric() else s # i.e. s = '0' local webcam cap = cv2.VideoCapture(s) assert cap.isOpened(), f'{st}Failed to open {s}' w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = cap.get(cv2.CAP_PROP_FPS) # warning: may return 0 or nan self.frames[i] = max(int(cap.get(cv2.CAP_PROP_FRAME_COUNT)), 0) or float('inf') # infinite stream fallback self.fps[i] = max((fps if math.isfinite(fps) else 0) % 100, 0) or 30 # 30 FPS fallback _, self.imgs[i] = cap.read() # guarantee first frame self.threads[i] = Thread(target=self.update, args=([i, cap, s]), daemon=True) LOGGER.info( f"{st} Success ({self.frames[i]} frames {w}x{h} at {self.fps[i]:.2f} FPS)" ) self.threads[i].start() LOGGER.info('') # newline # check for common shapes s = np.stack([ letterbox(x, self.img_size, stride=self.stride, auto=self.auto)[0].shape for x in self.imgs ]) self.rect = np.unique( s, axis=0).shape[0] == 1 # rect inference if all shapes equal if not self.rect: LOGGER.warning( 'WARNING: Stream shapes differ. For optimal performance supply similarly-shaped streams.' )
def export_onnx(model, im, file, opset, train, dynamic, simplify, prefix=colorstr('ONNX:')): # YOLOv5 ONNX export try: check_requirements(('onnx', )) import onnx print(f'\n{prefix} starting export with onnx {onnx.__version__}...') f = file.with_suffix('.onnx') torch.onnx.export( model, im, f, verbose=False, opset_version=opset, training=torch.onnx.TrainingMode.TRAINING if train else torch.onnx.TrainingMode.EVAL, do_constant_folding=not train, input_names=['images'], output_names=['output'], dynamic_axes={ 'images': { 0: 'batch', 2: 'height', 3: 'width' }, # shape(1,3,640,640) 'output': { 0: 'batch', 1: 'anchors' } # shape(1,25200,85) } if dynamic else None) # Checks model_onnx = onnx.load(f) # load onnx model onnx.checker.check_model(model_onnx) # check onnx model # print(onnx.helper.printable_graph(model_onnx.graph)) # print # Simplify if simplify: try: check_requirements(('onnx-simplifier', )) import onnxsim print( f'{prefix} simplifying with onnx-simplifier {onnxsim.__version__}...' ) model_onnx, check = onnxsim.simplify( model_onnx, dynamic_input_shape=dynamic, input_shapes={'images': list(im.shape)} if dynamic else None) assert check, 'assert check failed' onnx.save(model_onnx, f) except Exception as e: print(f'{prefix} simplifier failure: {e}') print(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)') print( f"{prefix} run --dynamic ONNX model inference with: 'python detect.py --weights {f}'" ) except Exception as e: print(f'{prefix} export failure: {e}')
def main(opt): check_requirements(exclude=('tensorboard', 'thop')) run(**vars(opt))
def run( data, weights=None, # model.pt path(s) batch_size=32, # batch size imgsz=640, # inference size (pixels) conf_thres=0.001, # confidence threshold iou_thres=0.6, # NMS IoU threshold task='val', # train, val, test, speed or study device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu single_cls=False, # treat as single-class dataset augment=False, # augmented inference verbose=False, # verbose output save_txt=False, # save results to *.txt save_hybrid=False, # save label+prediction hybrid results to *.txt save_conf=False, # save confidences in --save-txt labels save_json=False, # save a cocoapi-compatible JSON results file project='runs/test', # save to project/name name='exp', # save to project/name exist_ok=False, # existing project/name ok, do not increment half=True, # use FP16 half-precision inference model=None, dataloader=None, save_dir=Path(''), plots=True, wandb_logger=None, compute_loss=None, ): # Initialize/load model and set device training = model is not None if training: # called by train.py device = next(model.parameters()).device # get model device else: # called directly device = select_device(device, batch_size=batch_size) # Directories save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Load model model = attempt_load(weights, map_location=device) # load FP32 model gs = max(int(model.stride.max()), 32) # grid size (max stride) imgsz = check_img_size(imgsz, s=gs) # check image size # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99 # if device.type != 'cpu' and torch.cuda.device_count() > 1: # model = nn.DataParallel(model) # Half half &= device.type != 'cpu' # half precision only supported on CUDA if half: model.half() # Configure model.eval() if isinstance(data, str): with open(data) as f: data = yaml.safe_load(f) check_dataset(data) # check is_coco = type(data['val']) is str and data['val'].endswith( 'coco/val2017.txt') # COCO dataset nc = 1 if single_cls else int(data['nc']) # number of classes iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for [email protected]:0.95 niou = iouv.numel() # Logging log_imgs = 0 if wandb_logger and wandb_logger.wandb: log_imgs = min(wandb_logger.log_imgs, 100) # Dataloader if not training: if device.type != 'cpu': model( torch.zeros(1, 3, imgsz, imgsz).to(device).type_as( next(model.parameters()))) # run once task = task if task in ( 'train', 'val', 'test') else 'val' # path to train/val/test images dataloader = create_dataloader(data[task], imgsz, batch_size, gs, single_cls, pad=0.5, rect=True, prefix=colorstr(f'{task}: '))[0] seen = 0 confusion_matrix = ConfusionMatrix(nc=nc) names = { k: v for k, v in enumerate( model.names if hasattr(model, 'names') else model.module.names) } coco91class = coco80_to_coco91_class() s = ('%20s' + '%11s' * 6) % ('Class', 'Images', 'Labels', 'P', 'R', '[email protected]', '[email protected]:.95') p, r, f1, mp, mr, map50, map, t0, t1, t2 = 0., 0., 0., 0., 0., 0., 0., 0., 0., 0. loss = torch.zeros(3, device=device) jdict, stats, ap, ap_class, wandb_images = [], [], [], [], [] for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)): t_ = time_synchronized() img = img.to(device, non_blocking=True) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 targets = targets.to(device) nb, _, height, width = img.shape # batch size, channels, height, width t = time_synchronized() t0 += t - t_ # Run model out, train_out = model( img, augment=augment) # inference and training outputs t1 += time_synchronized() - t # Compute loss if compute_loss: loss += compute_loss([x.float() for x in train_out], targets)[1][:3] # box, obj, cls # Run NMS targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(device) # to pixels lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] # for autolabelling t = time_synchronized() out = non_max_suppression(out, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls) t2 += time_synchronized() - t # Statistics per image for si, pred in enumerate(out): labels = targets[targets[:, 0] == si, 1:] nl = len(labels) tcls = labels[:, 0].tolist() if nl else [] # target class path = Path(paths[si]) seen += 1 if len(pred) == 0: if nl: stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls)) continue # Predictions if single_cls: pred[:, 5] = 0 predn = pred.clone() scale_coords(img[si].shape[1:], predn[:, :4], shapes[si][0], shapes[si][1]) # native-space pred # Append to text file if save_txt: gn = torch.tensor(shapes[si][0])[[1, 0, 1, 0 ]] # normalization gain whwh for *xyxy, conf, cls in predn.tolist(): xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format with open(save_dir / 'labels' / (path.stem + '.txt'), 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') # W&B logging - Media Panel plots if len( wandb_images ) < log_imgs and wandb_logger.current_epoch > 0: # Check for test operation if wandb_logger.current_epoch % wandb_logger.bbox_interval == 0: box_data = [{ "position": { "minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3] }, "class_id": int(cls), "box_caption": "%s %.3f" % (names[cls], conf), "scores": { "class_score": conf }, "domain": "pixel" } for *xyxy, conf, cls in pred.tolist()] boxes = { "predictions": { "box_data": box_data, "class_labels": names } } # inference-space wandb_images.append( wandb_logger.wandb.Image(img[si], boxes=boxes, caption=path.name)) wandb_logger.log_training_progress( predn, path, names) if wandb_logger and wandb_logger.wandb_run else None # Append to pycocotools JSON dictionary if save_json: # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ... image_id = int( path.stem) if path.stem.isnumeric() else path.stem box = xyxy2xywh(predn[:, :4]) # xywh box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner for p, b in zip(pred.tolist(), box.tolist()): jdict.append({ 'image_id': image_id, 'category_id': coco91class[int(p[5])] if is_coco else int(p[5]), 'bbox': [round(x, 3) for x in b], 'score': round(p[4], 5) }) # Assign all predictions as incorrect correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device) if nl: detected = [] # target indices tcls_tensor = labels[:, 0] # target boxes tbox = xywh2xyxy(labels[:, 1:5]) scale_coords(img[si].shape[1:], tbox, shapes[si][0], shapes[si][1]) # native-space labels if plots: confusion_matrix.process_batch( predn, torch.cat((labels[:, 0:1], tbox), 1)) # Per target class for cls in torch.unique(tcls_tensor): ti = (cls == tcls_tensor).nonzero(as_tuple=False).view( -1) # target indices pi = (cls == pred[:, 5]).nonzero(as_tuple=False).view( -1) # prediction indices # Search for detections if pi.shape[0]: # Prediction to target ious ious, i = box_iou(predn[pi, :4], tbox[ti]).max( 1) # best ious, indices # Append detections detected_set = set() for j in (ious > iouv[0]).nonzero(as_tuple=False): d = ti[i[j]] # detected target if d.item() not in detected_set: detected_set.add(d.item()) detected.append(d) correct[ pi[j]] = ious[j] > iouv # iou_thres is 1xn if len( detected ) == nl: # all targets already located in image break # Append statistics (correct, conf, pcls, tcls) stats.append( (correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)) # Plot images if plots and batch_i < 3: f = save_dir / f'test_batch{batch_i}_labels.jpg' # labels Thread(target=plot_images, args=(img, targets, paths, f, names), daemon=True).start() f = save_dir / f'test_batch{batch_i}_pred.jpg' # predictions Thread(target=plot_images, args=(img, output_to_target(out), paths, f, names), daemon=True).start() # Compute statistics stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy if len(stats) and stats[0].any(): p, r, ap, f1, ap_class = ap_per_class(*stats, plot=plots, save_dir=save_dir, names=names) ap50, ap = ap[:, 0], ap.mean(1) # [email protected], [email protected]:0.95 mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean() nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class else: nt = torch.zeros(1) # Print results pf = '%20s' + '%11i' * 2 + '%11.3g' * 4 # print format print(pf % ('all', seen, nt.sum(), mp, mr, map50, map)) # Print results per class if (verbose or (nc < 50 and not training)) and nc > 1 and len(stats): for i, c in enumerate(ap_class): print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i])) # Print speeds t = tuple(x / seen * 1E3 for x in (t0, t1, t2)) # speeds per image if not training: shape = (batch_size, 3, imgsz, imgsz) print( f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}' % t) # Plots if plots: confusion_matrix.plot(save_dir=save_dir, names=list(names.values())) if wandb_logger and wandb_logger.wandb: val_batches = [ wandb_logger.wandb.Image(str(f), caption=f.name) for f in sorted(save_dir.glob('test*.jpg')) ] wandb_logger.log({"Validation": val_batches}) if wandb_images: wandb_logger.log({"Bounding Box Debugger/Images": wandb_images}) # Save JSON if save_json and len(jdict): w = Path(weights[0] if isinstance(weights, list) else weights ).stem if weights is not None else '' # weights anno_json = '../coco/annotations/instances_val2017.json' # annotations json pred_json = str(save_dir / f"{w}_predictions.json") # predictions json print('\nEvaluating pycocotools mAP... saving %s...' % pred_json) with open(pred_json, 'w') as f: json.dump(jdict, f) try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb check_requirements(['pycocotools']) from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval anno = COCO(anno_json) # init annotations api pred = anno.loadRes(pred_json) # init predictions api eval = COCOeval(anno, pred, 'bbox') if is_coco: eval.params.imgIds = [ int(Path(x).stem) for x in dataloader.dataset.img_files ] # image IDs to evaluate eval.evaluate() eval.accumulate() eval.summarize() map, map50 = eval.stats[: 2] # update results ([email protected]:0.95, [email protected]) except Exception as e: print(f'pycocotools unable to run: {e}') # Return results model.float() # for training if not training: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") maps = np.zeros(nc) + map for i, c in enumerate(ap_class): maps[c] = ap[i] return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t
help='save results to project/name') parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)') parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels') parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences') opt = parser.parse_args() print(opt) check_requirements(exclude=('torch', 'torchvision', 'tensorboard', 'pycocotools', 'thop')) with torch.no_grad(): if opt.update: # update all models (to fix SourceChangeWarning) for opt.weights in [ 'yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt' ]: detect(opt=opt) strip_optimizer(opt.weights) else: detect(opt=opt)
def __init__(self, weights='yolov5s.pt', device=None, dnn=True): # Usage: # PyTorch: weights = *.pt # TorchScript: *.torchscript.pt # CoreML: *.mlmodel # TensorFlow: *_saved_model # TensorFlow: *.pb # TensorFlow Lite: *.tflite # ONNX Runtime: *.onnx # OpenCV DNN: *.onnx with dnn=True super().__init__() w = str(weights[0] if isinstance(weights, list) else weights) suffix, suffixes = Path(w).suffix.lower(), [ '.pt', '.onnx', '.tflite', '.pb', '', '.mlmodel' ] check_suffix(w, suffixes) # check weights have acceptable suffix pt, onnx, tflite, pb, saved_model, coreml = (suffix == x for x in suffixes ) # backend booleans jit = pt and 'torchscript' in w.lower() stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults if jit: # TorchScript LOGGER.info(f'Loading {w} for TorchScript inference...') extra_files = {'config.txt': ''} # model metadata model = torch.jit.load(w, _extra_files=extra_files) if extra_files['config.txt']: d = json.loads(extra_files['config.txt']) # extra_files dict stride, names = int(d['stride']), d['names'] elif pt: # PyTorch from models.experimental import attempt_load # scoped to avoid circular import model = torch.jit.load(w) if 'torchscript' in w else attempt_load( weights, map_location=device) stride = int(model.stride.max()) # model stride names = model.module.names if hasattr( model, 'module') else model.names # get class names elif coreml: # CoreML *.mlmodel import coremltools as ct model = ct.models.MLModel(w) elif dnn: # ONNX OpenCV DNN LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...') check_requirements(('opencv-python>=4.5.4', )) net = cv2.dnn.readNetFromONNX(w) elif onnx: # ONNX Runtime LOGGER.info(f'Loading {w} for ONNX Runtime inference...') check_requirements( ('onnx', 'onnxruntime-gpu' if torch.has_cuda else 'onnxruntime')) import onnxruntime session = onnxruntime.InferenceSession(w, None) else: # TensorFlow model (TFLite, pb, saved_model) import tensorflow as tf if pb: # https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt def wrap_frozen_graph(gd, inputs, outputs): x = tf.compat.v1.wrap_function( lambda: tf.compat.v1.import_graph_def(gd, name=""), []) # wrapped return x.prune( tf.nest.map_structure(x.graph.as_graph_element, inputs), tf.nest.map_structure(x.graph.as_graph_element, outputs)) LOGGER.info(f'Loading {w} for TensorFlow *.pb inference...') graph_def = tf.Graph().as_graph_def() graph_def.ParseFromString(open(w, 'rb').read()) frozen_func = wrap_frozen_graph(gd=graph_def, inputs="x:0", outputs="Identity:0") elif saved_model: LOGGER.info( f'Loading {w} for TensorFlow saved_model inference...') model = tf.keras.models.load_model(w) elif tflite: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python if 'edgetpu' in w.lower(): LOGGER.info( f'Loading {w} for TensorFlow Edge TPU inference...') import tflite_runtime.interpreter as tfli delegate = { 'Linux': 'libedgetpu.so.1', # install https://coral.ai/software/#edgetpu-runtime 'Darwin': 'libedgetpu.1.dylib', 'Windows': 'edgetpu.dll' }[platform.system()] interpreter = tfli.Interpreter( model_path=w, experimental_delegates=[tfli.load_delegate(delegate)]) else: LOGGER.info( f'Loading {w} for TensorFlow Lite inference...') interpreter = tf.lite.Interpreter( model_path=w) # load TFLite model interpreter.allocate_tensors() # allocate input_details = interpreter.get_input_details() # inputs output_details = interpreter.get_output_details() # outputs self.__dict__.update(locals()) # assign all variables to self
def run(weights='yolov5s.pt', # model.pt path(s) source='data/images', # file/dir/URL/glob, 0 for webcam imgsz=640, # inference size (pixels) conf_thres=0.25, # confidence threshold iou_thres=0.45, # NMS IOU threshold max_det=1000, # maximum detections per image device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu view_img=False, # show results save_txt=False, # save results to *.txt save_conf=False, # save confidences in --save-txt labels save_crop=False, # save cropped prediction boxes nosave=False, # do not save images/videos classes=None, # filter by class: --class 0, or --class 0 2 3 agnostic_nms=False, # class-agnostic NMS augment=False, # augmented inference visualize=False, # visualize features update=False, # update all models project='runs/detect', # save results to project/name name='exp', # save results to project/name exist_ok=False, # existing project/name ok, do not increment line_thickness=3, # bounding box thickness (pixels) hide_labels=False, # hide labels hide_conf=False, # hide confidences half=False, # use FP16 half-precision inference ): save_img = not nosave and not source.endswith('.txt') # save inference images webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith( ('rtsp://', 'rtmp://', 'http://', 'https://')) sheet = pd.DataFrame() # Directories save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(device) half &= device.type != 'cpu' # half precision only supported on CUDA # Load model w = weights[0] if isinstance(weights, list) else weights classify, pt, onnx = False, w.endswith('.pt'), w.endswith('.onnx') # inference type stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults if pt: model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride names = model.module.names if hasattr(model, 'module') else model.names # get class names if half: model.half() # to FP16 if classify: # second-stage classifier modelc = load_classifier(name='resnet50', n=2) # initialize modelc.load_state_dict(torch.load('resnet50.pt', map_location=device)['model']).to(device).eval() elif onnx: check_requirements(('onnx', 'onnxruntime')) import onnxruntime session = onnxruntime.InferenceSession(w, None) imgsz = check_img_size(imgsz, s=stride) # check image size # Dataloader if webcam: view_img = check_imshow() cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride) bs = len(dataset) # batch_size else: dataset = LoadImages(source, img_size=imgsz, stride=stride) bs = 1 # batch_size vid_path, vid_writer = [None] * bs, [None] * bs # Run inference if pt and device.type != 'cpu': model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once t0 = time.time() for path, img, im0s, vid_cap in dataset: # print(path) # sys.exit(0) if pt: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 elif onnx: img = img.astype('float32') img /= 255.0 # 0 - 255 to 0.0 - 1.0 if len(img.shape) == 3: img = img[None] # expand for batch dim # Inference t1 = time_sync() if pt: visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False pred = model(img, augment=augment, visualize=visualize)[0] elif onnx: pred = torch.tensor(session.run([session.get_outputs()[0].name], {session.get_inputs()[0].name: img})) # NMS pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det) t2 = time_sync() # Second-stage classifier (optional) if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process predictions # print(len(pred)) for i, det in enumerate(pred): # detections per image # print(det.shape) # sys.exit(0) if webcam: # batch_size >= 1 p, s, im0, frame = path[i], f'{i}: ', im0s[i].copy(), dataset.count else: p, s, im0, frame = path, '', im0s.copy(), getattr(dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # img.jpg txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # img.txt s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh imc = im0.copy() if save_crop else im0 # for save_crop if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file # xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh # line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format # with open(txt_path + '.txt', 'a') as f: # f.write(('%g ' * len(line)).rstrip() % line + '\n') lst = torch.tensor(xyxy).tolist() # print(i) dic = dict(name=names[int(cls)], image_id=str(path.split('\\')[-1].split('.')[0]), confidence=float(conf), xmin=int(lst[0]), ymin=int(lst[1]), xmax=int(lst[2]), ymax=int(lst[3])) sheet = sheet.append(dic, ignore_index=True) # print(dic) # sys.exit(0) # print(dic) # sys.exit(0) if save_img or save_crop or view_img: # Add bbox to image c = int(cls) # integer class label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}') plot_one_box(xyxy, im0, label=label, color=colors(c, True), line_thickness=line_thickness) if save_crop: save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True) # print(sheet) # sys.exit(0) # Print time (inference + NMS) print(f'{s}Done. ({t2 - t1:.3f}s)') # Stream results if view_img: cv2.imshow(str(p), im0) cv2.waitKey(1) # 1 millisecond # Save results (image with detections) if save_img: if dataset.mode == 'image': cv2.imwrite(save_path, im0) else: # 'video' or 'stream' if vid_path[i] != save_path: # new video vid_path[i] = save_path if isinstance(vid_writer[i], cv2.VideoWriter): vid_writer[i].release() # release previous video writer if vid_cap: # video fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) else: # stream fps, w, h = 30, im0.shape[1], im0.shape[0] save_path += '.mp4' vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) vid_writer[i].write(im0) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") if update: strip_optimizer(weights) # update model (to fix SourceChangeWarning) cols=['name', 'image_id', 'confidence', 'xmin', 'ymin', 'xmax', 'ymax'] sheet = sheet.loc[:, cols] # sheet = sheet.set_index('name') sheet.to_csv('result.csv', index=False) print(f'Done. ({time.time() - t0:.3f}s)')
default=-1, help='Log model after every "save_period" epoch') parser.add_argument('--artifact_alias', type=str, default="latest", help='version of dataset artifact to be used') opt = parser.parse_args() # Set DDP variables opt.world_size = int( os.environ['WORLD_SIZE']) if 'WORLD_SIZE' in os.environ else 1 opt.global_rank = int(os.environ['RANK']) if 'RANK' in os.environ else -1 set_logging(opt.global_rank) if opt.global_rank in [-1, 0]: check_git_status() check_requirements(exclude=('pycocotools', 'thop')) # Resume wandb_run = check_wandb_resume(opt) if opt.resume and not wandb_run: # resume an interrupted run ckpt = opt.resume if isinstance( opt.resume, str) else get_latest_run() # specified or most recent path assert os.path.isfile( ckpt), 'ERROR: --resume checkpoint does not exist' apriori = opt.global_rank, opt.local_rank with open(Path(ckpt).parent.parent / 'opt.yaml') as f: opt = argparse.Namespace(**yaml.safe_load(f)) # replace opt.cfg, opt.weights, opt.resume, opt.batch_size, opt.global_rank, opt.local_rank = \ '', ckpt, True, opt.total_batch_size, *apriori # reinstate logger.info('Resuming training from %s' % ckpt)
parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)') parser.add_argument('--conf-thres', type=float, default=0.25, help='object confidence threshold') parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS') parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') parser.add_argument('--view-img', action='store_true', help='display results') parser.add_argument('--save-txt', action='store_true', help='save results to *.txt') parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels') parser.add_argument('--save-crop', action='store_true', help='save cropped prediction boxes') parser.add_argument('--nosave', action='store_true', help='do not save images/videos') parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3') parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS') parser.add_argument('--augment', action='store_true', help='augmented inference') parser.add_argument('--update', action='store_true', help='update all models') parser.add_argument('--project', default='runs/detect', help='save results to project/name') parser.add_argument('--name', default='exp', help='save results to project/name') parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)') parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels') parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences') opt = parser.parse_args() print(opt) check_requirements() with torch.no_grad(): if opt.update: # update all models (to fix SourceChangeWarning) for opt.weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt']: detect(opt=opt) strip_optimizer(opt.weights) else: detect(opt=opt)
def run( weights=ROOT / 'yolov5s.pt', # model.pt path(s) 训练的权重 source=ROOT / 'data/images', # file/dir/URL/glob, 0 for webcam 测试数据,图片/视频路径,'0'摄像头,rtsp视频流 imgsz=640, # inference size (pixels) 网络输入图片大小 conf_thres=0.25, # confidence threshold 置信度阈值 iou_thres=0.45, # NMS IOU threshold nms的iou阈值 max_det=1000, # maximum detections per image 分类数 device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu 设备 view_img=True, # show results 是否展示预测之后的图片/视频 save_txt=False, # save results to *.txt 是否将预测的框坐标保持txt格式,默认false # save_conf=False, # save confidences in --save-txt labels 置信度保存 save_crop=False, # save cropped prediction boxes nosave=False, # do not save images/videos 不保存 classes=None, # filter by class: --class 0, or --class 0 2 3 设置只保留某一部分类别 agnostic_nms=False, # class-agnostic NMS 进行nms是否也去除不同类别之间的框 augment=False, # augmented inference 图像增强 visualize=False, # visualize features 可视化 # update=False, # update all models 若ture,则对所有模型进行strip_optimizer操作,去除pt文件中的优化器等信息,默认false project=ROOT / 'runs/detect', # save results to project/name name='exp', # save results to project/name exist_ok=False, # existing project/name ok, do not increment line_thickness=3, # bounding box thickness (pixels) hide_labels=False, # hide labels hide_conf=False, # hide confidences half=False, # use FP16 half-precision inference ): source = str(source) save_img = not nosave and not source.endswith( '.txt') # save inference images webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith( ('rtsp://', 'rtmp://', 'http://', 'https://')) # Directories save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(device) half &= device.type != 'cpu' # half precision only supported on CUDA # Load model w = weights[0] if isinstance(weights, list) else weights classify, suffix, suffixes = False, Path(w).suffix.lower(), [ '.pt', '.onnx', '.tflite', '.pb', '' ] check_suffix(w, suffixes) # check weights have acceptable suffix pt, onnx, tflite, pb, saved_model = (suffix == x for x in suffixes) # backend booleans stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults if pt: model = attempt_load( weights, map_location=device) # load FP32 model 加载float32模型,确保图片分辨率能整除32 stride = int(model.stride.max()) # model stride names = model.module.names if hasattr( model, 'module') else model.names # get class names #设置Float16 if half: model.half() # to FP16 # 设置2次分类 if classify: # second-stage classifier modelc = load_classifier(name='resnet50', n=2) # initialize modelc.load_state_dict( torch.load('resnet50.pt', map_location=device)['model']).to(device).eval() # elif onnx: # check_requirements(('onnx', 'onnxruntime')) # import onnxruntime # session = onnxruntime.InferenceSession(w, None) else: # TensorFlow models check_requirements(('tensorflow>=2.4.1', )) import tensorflow as tf if pb: # https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt def wrap_frozen_graph(gd, inputs, outputs): x = tf.compat.v1.wrap_function( lambda: tf.compat.v1.import_graph_def(gd, name=""), []) # wrapped import return x.prune( tf.nest.map_structure(x.graph.as_graph_element, inputs), tf.nest.map_structure(x.graph.as_graph_element, outputs)) graph_def = tf.Graph().as_graph_def() graph_def.ParseFromString(open(w, 'rb').read()) frozen_func = wrap_frozen_graph(gd=graph_def, inputs="x:0", outputs="Identity:0") elif saved_model: model = tf.keras.models.load_model(w) elif tflite: interpreter = tf.lite.Interpreter( model_path=w) # load TFLite model interpreter.allocate_tensors() # allocate input_details = interpreter.get_input_details() # inputs output_details = interpreter.get_output_details() # outputs int8 = input_details[0][ 'dtype'] == np.uint8 # is TFLite quantized uint8 model imgsz = check_img_size(imgsz, s=stride) # check image size # Dataloader # 通过不同的输入源来设置不同的数据加载方式 # 摄像头 if webcam: view_img = check_imshow() cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt) bs = len(dataset) # batch_size # 图片或视频 else: dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt) bs = 1 # batch_size vid_path, vid_writer = [None] * bs, [None] * bs # Run inference if pt and device.type != 'cpu': # 进行一次前向推理,测试程序是否正常 model( torch.zeros(1, 3, *imgsz).to(device).type_as( next(model.parameters()))) # run once dt, seen = [0.0, 0.0, 0.0], 0 ''' path 图片/视频路径 img 进行resize+pad之后的图片,如(3,640,512) 格式(c,h,w) img0s 原size图片,如(1080,810,3) cap 当读取图片时为None,读取视频时为视频源 ''' for path, img, im0s, vid_cap in dataset: t1 = time_sync() if onnx: img = img.astype('float32') else: img = torch.from_numpy(img).to(device) # 图片也设置为Float16或者32 img = img.half() if half else img.float() # uint8 to fp16/32 img = img / 255.0 # 0 - 255 to 0.0 - 1.0 # 没有batch_size时,在最前面添加一个轴 if len(img.shape) == 3: img = img[None] # expand for batch dim t2 = time_sync() dt[0] += t2 - t1 # Inference if pt: visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False ''' 前向传播,返回pred的shape是(1,num_boxes,5+num_class) h,w为传入网络图片的高和宽,注意dataset在检测时使用了矩形推理,所以h不一定等于w num_boxes = (h/32*w/32+h/16*w/16+h/8*w/8)*3 例如:图片大小720,1280 -> 15120个boxes = (20*12 + 40*24 + 80*48 = 5040)*3 pred[...,0:4]为预测框坐标;预测框坐标为xywh pred[...,4]为objectness置信度 pred[...,5:-1]为分类结果 ''' pred = model(img, augment=augment, visualize=visualize)[0] # elif onnx: # pred = torch.tensor(session.run([session.get_outputs()[0].name], {session.get_inputs()[0].name: img})) else: # tensorflow model (tflite, pb, saved_model) imn = img.permute(0, 2, 3, 1).cpu().numpy() # image in numpy if pb: pred = frozen_func(x=tf.constant(imn)).numpy() elif saved_model: pred = model(imn, training=False).numpy() elif tflite: if int8: scale, zero_point = input_details[0]['quantization'] imn = (imn / scale + zero_point).astype( np.uint8) # de-scale interpreter.set_tensor(input_details[0]['index'], imn) interpreter.invoke() pred = interpreter.get_tensor(output_details[0]['index']) if int8: scale, zero_point = output_details[0]['quantization'] pred = (pred.astype(np.float32) - zero_point) * scale # re-scale pred[..., 0] *= imgsz[1] # x pred[..., 1] *= imgsz[0] # y pred[..., 2] *= imgsz[1] # w pred[..., 3] *= imgsz[0] # h pred = torch.tensor(pred) t3 = time_sync() dt[1] += t3 - t2 # NMS ''' pred:前向传播的输出 conf_thres:置信度阈值 iou_thres:iou阈值 classes:是否只保留特定的类别 agnostic_nmsL进行nms是否也去除不同类别之间的框 经过nms后预测框格式,xywh->xyxy(左上角右上角) pred是一个列表list[torch.tensor],长度为nms后目标框个数 每一个torch.tensor的shape为(num_boxes,6),内容为box(4个值)+cunf+cls ''' pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det) dt[2] += time_sync() - t3 # Second-stage classifier (optional) # 添加二级分类,默认false if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process predictions # 对每一张图片处理 for i, det in enumerate(pred): # per image seen += 1 # 如果输入源是webcam,则batch_size不为1,取出dataset中的一张图片 if webcam: # batch_size >= 1 p, s, im0, frame = path[i], f'{i}: ', im0s[i].copy( ), dataset.count else: p, s, im0, frame = path, '', im0s.copy(), getattr( dataset, 'frame', 0) p = Path(p) # to Path # 设置保存图片或视频的路径 # p是原图片路径 save_path = str(save_dir / p.name) # img.jpg #设置保存框坐标txt文件的路径 txt_path = str(save_dir / 'labels' / p.stem) + ( '' if dataset.mode == 'image' else f'_{frame}') # img.txt # 设置打印信息(图片宽高),s如'640*512' s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh imc = im0.copy() if save_crop else im0 # for save_crop annotator = Annotator(im0, line_width=line_thickness, example=str(names)) if len(det): # Rescale boxes from img_size to im0 size # 调整预测框坐标,基于resize+pad的图片坐标->基于原size图片坐标 # 此时坐标格式为xyxy det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results # 打印检测到的类别数量 for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Write results # 保存预测结果 for *xyxy, conf, cls in reversed(det): # if save_txt: # Write to file # # 将xyxy格式转为xywh格式,并除上我w,h作归一化,转化为列表再保存 # xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh # line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format # with open(txt_path + '.txt', 'a') as f: # f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or save_crop or view_img: # Add bbox to image c = int(cls) # integer class label = None if hide_labels else ( names[c] if hide_conf else f'{names[c]} {conf:.2f}') annotator.box_label(xyxy, label, color=colors(c, True)) if save_crop: save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True) # Print time (inference-only) # print(f'{pred[0][0][0].tolist()} {pred[0][0][1].tolist()} {s}Done. ({t3 - t2:.3f}s)') # Stream results im0 = annotator.result() # xxx = (pred[0][0][0].tolist()+pred[0][0][2].tolist())/2 # yyy = (pred[0][0][1].tolist()+pred[0][0][3].tolist())/2 if view_img: # + / 2 + cv2.imshow(str(p), im0) cv2.moveWindow(str(p), 0, 0) # pyautogui.moveTo(xxx, yyy) cv2.waitKey(1000) # 1 millisecond # Save results (image with detections) # if save_img: # if dataset.mode == 'image': # cv2.imwrite(save_path, im0) # else: # 'video' or 'stream' # if vid_path[i] != save_path: # new video # vid_path[i] = save_path # if isinstance(vid_writer[i], cv2.VideoWriter): # vid_writer[i].release() # release previous video writer # if vid_cap: # video # fps = vid_cap.get(cv2.CAP_PROP_FPS) # w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) # h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) # else: # stream # fps, w, h = 30, im0.shape[1], im0.shape[0] # save_path += '.mp4' # vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) # vid_writer[i].write(im0) # Print results t = tuple(x / seen * 1E3 for x in dt) # speeds per image print( f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}' % t)