def display(self, pprint=False, show=False, save=False, crop=False, render=False, save_dir=Path('')): for i, (im, pred) in enumerate(zip(self.imgs, self.pred)): str = f'image {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} ' if pred.shape[0]: for c in pred[:, -1].unique(): n = (pred[:, -1] == c).sum() # detections per class str += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string if show or save or render or crop: annotator = Annotator(im, pil=not self.ascii) for *box, conf, cls in reversed(pred): # xyxy, confidence, class label = f'{self.names[int(cls)]} {conf:.2f}' if crop: save_one_box(box, im, file=save_dir / 'crops' / self.names[int(cls)] / self.files[i]) else: # all others annotator.box_label(box, label, color=colors(cls)) im = annotator.im else: str += '(no detections)' im = Image.fromarray(im.astype(np.uint8)) if isinstance(im, np.ndarray) else im # from np if pprint: LOGGER.info(str.rstrip(', ')) if show: im.show(self.files[i]) # show if save: f = self.files[i] im.save(save_dir / f) # save if i == self.n - 1: LOGGER.info(f"Saved {self.n} image{'s' * (self.n > 1)} to {colorstr('bold', save_dir)}") if render: self.imgs[i] = np.asarray(im)
def plot(self, im, det): '''names = self.names; colors = self.colors for *xyxy, conf, c in reversed(det): # (x1,y1,x2,y2,conf,cls) c = int(c); label = f'{names[c]} {conf:.2f}' # Add bbox to image plot_one_box(xyxy, im, label=label, color=colors[c], line_width=2)#''' names = self.names from utils.plots import colors annotator = Annotator(im, line_width=2, example=str(names)) for *xyxy, conf, c in reversed(det): # (x1,y1,x2,y2,conf,cls) c = int(c) label = f'{names[c]} {conf:.2f}' # Add bbox to image annotator.box_label(xyxy, label, color=colors(c, True)) im[:] = annotator.result() #''' return { names[int(c)]: int((det[:, -1] == c).sum()) for c in det[:, -1].unique() }
def run( weights=ROOT / 'yolov5s.pt', # model.pt path(s) source=ROOT / 'data/images', # file/dir/URL/glob, 0 for webcam imgsz=640, # inference size (pixels) conf_thres=0.25, # confidence threshold iou_thres=0.45, # NMS IOU threshold max_det=1000, # maximum detections per image device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu view_img=False, # show results save_txt=False, # save results to *.txt save_conf=False, # save confidences in --save-txt labels save_crop=False, # save cropped prediction boxes nosave=False, # do not save images/videos classes=None, # filter by class: --class 0, or --class 0 2 3 agnostic_nms=False, # class-agnostic NMS augment=False, # augmented inference visualize=False, # visualize features update=False, # update all models project=ROOT / 'runs/detect', # save results to project/name name='exp', # save results to project/name exist_ok=False, # existing project/name ok, do not increment line_thickness=3, # bounding box thickness (pixels) hide_labels=False, # hide labels hide_conf=False, # hide confidences half=False, # use FP16 half-precision inference dnn=False, # use OpenCV DNN for ONNX inference ): source = str(source) save_img = not nosave and not source.endswith( '.txt') # save inference images webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith( ('rtsp://', 'rtmp://', 'http://', 'https://')) # Directories save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(device) half &= device.type != 'cpu' # half precision only supported on CUDA # Load model w = str(weights[0] if isinstance(weights, list) else weights) classify, suffix, suffixes = False, Path(w).suffix.lower(), [ '.pt', '.onnx', '.tflite', '.pb', '' ] check_suffix(w, suffixes) # check weights have acceptable suffix pt, onnx, tflite, pb, saved_model = (suffix == x for x in suffixes) # backend booleans stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults if pt: model = torch.jit.load(w) if 'torchscript' in w else attempt_load( weights, map_location=device, fuse=False) stride = int(model.stride.max()) # model stride names = model.module.names if hasattr( model, 'module') else model.names # get class names """ for _, param in enumerate(model.named_parameters()): print("====>", param[0], param[1].shape) torch.save(model.state_dict(), 'new_params.pt') for k, v in model.state_dict().items(): print(k, v.shape) exit() """ if half: model.half() # to FP16 if classify: # second-stage classifier modelc = load_classifier(name='resnet50', n=2) # initialize modelc.load_state_dict( torch.load('resnet50.pt', map_location=device)['model']).to(device).eval() elif onnx: if dnn: # check_requirements(('opencv-python>=4.5.4',)) net = cv2.dnn.readNetFromONNX(w) else: check_requirements(('onnx', 'onnxruntime')) import onnxruntime session = onnxruntime.InferenceSession(w, None) else: # TensorFlow models check_requirements(('tensorflow>=2.4.1', )) import tensorflow as tf if pb: # https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt def wrap_frozen_graph(gd, inputs, outputs): x = tf.compat.v1.wrap_function( lambda: tf.compat.v1.import_graph_def(gd, name=""), []) # wrapped import return x.prune( tf.nest.map_structure(x.graph.as_graph_element, inputs), tf.nest.map_structure(x.graph.as_graph_element, outputs)) graph_def = tf.Graph().as_graph_def() graph_def.ParseFromString(open(w, 'rb').read()) frozen_func = wrap_frozen_graph(gd=graph_def, inputs="x:0", outputs="Identity:0") elif saved_model: model = tf.keras.models.load_model(w) elif tflite: interpreter = tf.lite.Interpreter( model_path=w) # load TFLite model interpreter.allocate_tensors() # allocate input_details = interpreter.get_input_details() # inputs output_details = interpreter.get_output_details() # outputs int8 = input_details[0][ 'dtype'] == np.uint8 # is TFLite quantized uint8 model imgsz = check_img_size(imgsz, s=stride) # check image size # Dataloader if webcam: view_img = check_imshow() cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt) bs = len(dataset) # batch_size else: dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt) bs = 1 # batch_size vid_path, vid_writer = [None] * bs, [None] * bs # Run inference if pt and device.type != 'cpu': model( torch.zeros(1, 3, *imgsz).to(device).type_as( next(model.parameters()))) # run once dt, seen = [0.0, 0.0, 0.0], 0 for path, img, im0s, vid_cap in dataset: t1 = time_sync() if onnx: img = img.astype('float32') else: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img = img / 255.0 # 0 - 255 to 0.0 - 1.0 if len(img.shape) == 3: img = img[None] # expand for batch dim t2 = time_sync() dt[0] += t2 - t1 # Inference if pt: visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False pred = model(img, augment=augment, visualize=visualize)[0] anchor_grid = model.model[-1].anchors * model.model[-1].stride[ ..., None, None] delattr(model.model[-1], 'anchor_grid') # model.model[-1] is detect layer model.model[-1].register_buffer("anchor_grid", anchor_grid) model.to(device).eval() wts_file = "generated.wts" with open(wts_file, 'w') as f: f.write('{}\n'.format(len(model.state_dict().keys()))) for k, v in model.state_dict().items(): if len(v.shape) == 0: continue print(k, v.shape) vr = v.reshape(-1).cpu().numpy() f.write('{} {} {} {}'.format( k, len(vr), v.shape[0], v.shape[1] if len(v.shape) > 1 else 0)) for vv in vr: f.write(' ') f.write(struct.pack('>f', float(vv)).hex()) f.write('\n') exit() elif onnx: if dnn: net.setInput(img) pred = torch.tensor(net.forward()) else: pred = torch.tensor( session.run([session.get_outputs()[0].name], {session.get_inputs()[0].name: img})) else: # tensorflow model (tflite, pb, saved_model) imn = img.permute(0, 2, 3, 1).cpu().numpy() # image in numpy if pb: pred = frozen_func(x=tf.constant(imn)).numpy() elif saved_model: pred = model(imn, training=False).numpy() elif tflite: if int8: scale, zero_point = input_details[0]['quantization'] imn = (imn / scale + zero_point).astype( np.uint8) # de-scale interpreter.set_tensor(input_details[0]['index'], imn) interpreter.invoke() pred = interpreter.get_tensor(output_details[0]['index']) if int8: scale, zero_point = output_details[0]['quantization'] pred = (pred.astype(np.float32) - zero_point) * scale # re-scale pred[..., 0] *= imgsz[1] # x pred[..., 1] *= imgsz[0] # y pred[..., 2] *= imgsz[1] # w pred[..., 3] *= imgsz[0] # h pred = torch.tensor(pred) t3 = time_sync() dt[1] += t3 - t2 # NMS pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det) dt[2] += time_sync() - t3 # Second-stage classifier (optional) if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process predictions for i, det in enumerate(pred): # per image seen += 1 if webcam: # batch_size >= 1 p, s, im0, frame = path[i], f'{i}: ', im0s[i].copy( ), dataset.count else: p, s, im0, frame = path, '', im0s.copy(), getattr( dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # img.jpg txt_path = str(save_dir / 'labels' / p.stem) + ( '' if dataset.mode == 'image' else f'_{frame}') # img.txt s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh imc = im0.copy() if save_crop else im0 # for save_crop annotator = Annotator(im0, line_width=line_thickness, example=str(names)) if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or save_crop or view_img: # Add bbox to image c = int(cls) # integer class label = None if hide_labels else ( names[c] if hide_conf else f'{names[c]} {conf:.2f}') annotator.box_label(xyxy, label, color=colors(c, True)) if save_crop: save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True) # Print time (inference-only) print(f'{s}Done. ({t3 - t2:.3f}s)') # Stream results im0 = annotator.result() if view_img: cv2.imshow(str(p), im0) cv2.waitKey(1) # 1 millisecond # Save results (image with detections) if save_img: if dataset.mode == 'image': cv2.imwrite(save_path, im0) else: # 'video' or 'stream' if vid_path[i] != save_path: # new video vid_path[i] = save_path if isinstance(vid_writer[i], cv2.VideoWriter): vid_writer[i].release( ) # release previous video writer if vid_cap: # video fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) else: # stream fps, w, h = 30, im0.shape[1], im0.shape[0] save_path += '.mp4' vid_writer[i] = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) vid_writer[i].write(im0) # Print results t = tuple(x / seen * 1E3 for x in dt) # speeds per image print( f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}' % t) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {colorstr('bold', save_dir)}{s}") if update: strip_optimizer(weights) # update model (to fix SourceChangeWarning)
def display(self, pprint=False, show=False, save=False, crop=False, render=False, labels=True, save_dir=Path('')): crops = [] for i, (im, pred) in enumerate(zip(self.imgs, self.pred)): s = f'image {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} ' # string if pred.shape[0]: for c in pred[:, -1].unique(): n = (pred[:, -1] == c).sum() # detections per class s += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string if show or save or render or crop: annotator = Annotator(im, example=str(self.names)) for *box, conf, cls in reversed( pred): # xyxy, confidence, class label = f'{self.names[int(cls)]} {conf:.2f}' if crop: file = save_dir / 'crops' / self.names[int( cls)] / self.files[i] if save else None crops.append({ 'box': box, 'conf': conf, 'cls': cls, 'label': label, 'im': save_one_box(box, im, file=file, save=save) }) else: # all others annotator.box_label(box, label if labels else '', color=colors(cls)) im = annotator.im else: s += '(no detections)' im = Image.fromarray(im.astype(np.uint8)) if isinstance( im, np.ndarray) else im # from np if pprint: print(s.rstrip(', ')) if show: im.show(self.files[i]) # show if save: f = self.files[i] im.save(save_dir / f) # save if i == self.n - 1: LOGGER.info( f"Saved {self.n} image{'s' * (self.n > 1)} to {colorstr('bold', save_dir)}" ) if render: self.imgs[i] = np.asarray(im) if crop: if save: LOGGER.info(f'Saved results to {save_dir}\n') return crops
gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh imc = img0.copy() # for save_crop annotator = Annotator(im0, line_width=line_thickness) if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class #s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Write results for *xyxy, conf, cls in reversed(det): # Add bbox to image c = int(cls) # integer class #label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}') label= None annotator.box_label(xyxy, label, color=colors(c, True)) # Stream results im0 = annotator.result() if True: cv2.imwrite("pred.jpg", im0) cv2.waitKey(0) # 1 millisecond
def run( weights=ROOT / 'yolov5s.pt', # model.pt path(s) source=ROOT / 'data/images', # file/dir/URL/glob, 0 for webcam data=ROOT / 'data/coco128.yaml', # dataset.yaml path imgsz=(640, 640), # inference size (height, width) conf_thres=0.25, # confidence threshold iou_thres=0.45, # NMS IOU threshold max_det=1000, # maximum detections per image device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu view_img=False, # show results save_txt=False, # save results to *.txt save_conf=False, # save confidences in --save-txt labels save_crop=False, # save cropped prediction boxes nosave=False, # do not save images/videos classes=None, # filter by class: --class 0, or --class 0 2 3 agnostic_nms=False, # class-agnostic NMS augment=False, # augmented inference visualize=False, # visualize features update=False, # update all models project=ROOT / 'runs/detect', # save results to project/name name='exp', # save results to project/name exist_ok=False, # existing project/name ok, do not increment line_thickness=3, # bounding box thickness (pixels) hide_labels=False, # hide labels hide_conf=False, # hide confidences half=False, # use FP16 half-precision inference dnn=False, # use OpenCV DNN for ONNX inference ): source = str(source) save_img = not nosave and not source.endswith( '.txt') # save inference images is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS) is_url = source.lower().startswith( ('rtsp://', 'rtmp://', 'http://', 'https://')) webcam = source.isnumeric() or source.endswith('.txt') or (is_url and not is_file) if is_url and is_file: source = check_file(source) # download # Directories save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Load model device = select_device(device) model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half) stride, names, pt = model.stride, model.names, model.pt imgsz = check_img_size(imgsz, s=stride) # check image size # Dataloader if webcam: view_img = check_imshow() cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt) bs = len(dataset) # batch_size else: dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt) bs = 1 # batch_size vid_path, vid_writer = [None] * bs, [None] * bs # Run inference model.warmup(imgsz=(1 if pt else bs, 3, *imgsz)) # warmup dt, seen = [0.0, 0.0, 0.0], 0 for path, im, im0s, vid_cap, s in dataset: t1 = time_sync() im = torch.from_numpy(im).to(device) im = im.half() if model.fp16 else im.float() # uint8 to fp16/32 im /= 255 # 0 - 255 to 0.0 - 1.0 if len(im.shape) == 3: im = im[None] # expand for batch dim t2 = time_sync() dt[0] += t2 - t1 # Inference visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False pred = model(im, augment=augment, visualize=visualize) t3 = time_sync() dt[1] += t3 - t2 # NMS pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det) dt[2] += time_sync() - t3 # Second-stage classifier (optional) # pred = utils.general.apply_classifier(pred, classifier_model, im, im0s) # Process predictions for i, det in enumerate(pred): # per image seen += 1 if webcam: # batch_size >= 1 p, im0, frame = path[i], im0s[i].copy(), dataset.count s += f'{i}: ' else: p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # im.jpg txt_path = str(save_dir / 'labels' / p.stem) + ( '' if dataset.mode == 'image' else f'_{frame}') # im.txt s += '%gx%g ' % im.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh imc = im0.copy() if save_crop else im0 # for save_crop annotator = Annotator(im0, line_width=line_thickness, example=str(names)) if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format with open(f'{txt_path}.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or save_crop or view_img: # Add bbox to image c = int(cls) # integer class label = None if hide_labels else ( names[c] if hide_conf else f'{names[c]} {conf:.2f}') annotator.box_label(xyxy, label, color=colors(c, True)) if save_crop: save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True) # Stream results im0 = annotator.result() if view_img: cv2.imshow(str(p), im0) cv2.waitKey(1) # 1 millisecond # Save results (image with detections) if save_img: if dataset.mode == 'image': cv2.imwrite(save_path, im0) else: # 'video' or 'stream' if vid_path[i] != save_path: # new video vid_path[i] = save_path if isinstance(vid_writer[i], cv2.VideoWriter): vid_writer[i].release( ) # release previous video writer if vid_cap: # video fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) else: # stream fps, w, h = 30, im0.shape[1], im0.shape[0] save_path = str(Path(save_path).with_suffix( '.mp4')) # force *.mp4 suffix on results videos vid_writer[i] = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) vid_writer[i].write(im0) # Print time (inference-only) LOGGER.info(f'{s}Done. ({t3 - t2:.3f}s)') # Print results t = tuple(x / seen * 1E3 for x in dt) # speeds per image LOGGER.info( f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}' % t) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}") if update: strip_optimizer(weights) # update model (to fix SourceChangeWarning)
def run( weights=ROOT / 'yolov5s.pt', # model.pt path(s) 训练的权重 source=ROOT / 'data/images', # file/dir/URL/glob, 0 for webcam 测试数据,图片/视频路径,'0'摄像头,rtsp视频流 imgsz=640, # inference size (pixels) 网络输入图片大小 conf_thres=0.25, # confidence threshold 置信度阈值 iou_thres=0.45, # NMS IOU threshold nms的iou阈值 max_det=1000, # maximum detections per image 分类数 device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu 设备 view_img=True, # show results 是否展示预测之后的图片/视频 save_txt=False, # save results to *.txt 是否将预测的框坐标保持txt格式,默认false # save_conf=False, # save confidences in --save-txt labels 置信度保存 save_crop=False, # save cropped prediction boxes nosave=False, # do not save images/videos 不保存 classes=None, # filter by class: --class 0, or --class 0 2 3 设置只保留某一部分类别 agnostic_nms=False, # class-agnostic NMS 进行nms是否也去除不同类别之间的框 augment=False, # augmented inference 图像增强 visualize=False, # visualize features 可视化 # update=False, # update all models 若ture,则对所有模型进行strip_optimizer操作,去除pt文件中的优化器等信息,默认false project=ROOT / 'runs/detect', # save results to project/name name='exp', # save results to project/name exist_ok=False, # existing project/name ok, do not increment line_thickness=3, # bounding box thickness (pixels) hide_labels=False, # hide labels hide_conf=False, # hide confidences half=False, # use FP16 half-precision inference ): source = str(source) save_img = not nosave and not source.endswith( '.txt') # save inference images webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith( ('rtsp://', 'rtmp://', 'http://', 'https://')) # Directories save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(device) half &= device.type != 'cpu' # half precision only supported on CUDA # Load model w = weights[0] if isinstance(weights, list) else weights classify, suffix, suffixes = False, Path(w).suffix.lower(), [ '.pt', '.onnx', '.tflite', '.pb', '' ] check_suffix(w, suffixes) # check weights have acceptable suffix pt, onnx, tflite, pb, saved_model = (suffix == x for x in suffixes) # backend booleans stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults if pt: model = attempt_load( weights, map_location=device) # load FP32 model 加载float32模型,确保图片分辨率能整除32 stride = int(model.stride.max()) # model stride names = model.module.names if hasattr( model, 'module') else model.names # get class names #设置Float16 if half: model.half() # to FP16 # 设置2次分类 if classify: # second-stage classifier modelc = load_classifier(name='resnet50', n=2) # initialize modelc.load_state_dict( torch.load('resnet50.pt', map_location=device)['model']).to(device).eval() # elif onnx: # check_requirements(('onnx', 'onnxruntime')) # import onnxruntime # session = onnxruntime.InferenceSession(w, None) else: # TensorFlow models check_requirements(('tensorflow>=2.4.1', )) import tensorflow as tf if pb: # https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt def wrap_frozen_graph(gd, inputs, outputs): x = tf.compat.v1.wrap_function( lambda: tf.compat.v1.import_graph_def(gd, name=""), []) # wrapped import return x.prune( tf.nest.map_structure(x.graph.as_graph_element, inputs), tf.nest.map_structure(x.graph.as_graph_element, outputs)) graph_def = tf.Graph().as_graph_def() graph_def.ParseFromString(open(w, 'rb').read()) frozen_func = wrap_frozen_graph(gd=graph_def, inputs="x:0", outputs="Identity:0") elif saved_model: model = tf.keras.models.load_model(w) elif tflite: interpreter = tf.lite.Interpreter( model_path=w) # load TFLite model interpreter.allocate_tensors() # allocate input_details = interpreter.get_input_details() # inputs output_details = interpreter.get_output_details() # outputs int8 = input_details[0][ 'dtype'] == np.uint8 # is TFLite quantized uint8 model imgsz = check_img_size(imgsz, s=stride) # check image size # Dataloader # 通过不同的输入源来设置不同的数据加载方式 # 摄像头 if webcam: view_img = check_imshow() cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt) bs = len(dataset) # batch_size # 图片或视频 else: dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt) bs = 1 # batch_size vid_path, vid_writer = [None] * bs, [None] * bs # Run inference if pt and device.type != 'cpu': # 进行一次前向推理,测试程序是否正常 model( torch.zeros(1, 3, *imgsz).to(device).type_as( next(model.parameters()))) # run once dt, seen = [0.0, 0.0, 0.0], 0 ''' path 图片/视频路径 img 进行resize+pad之后的图片,如(3,640,512) 格式(c,h,w) img0s 原size图片,如(1080,810,3) cap 当读取图片时为None,读取视频时为视频源 ''' for path, img, im0s, vid_cap in dataset: t1 = time_sync() if onnx: img = img.astype('float32') else: img = torch.from_numpy(img).to(device) # 图片也设置为Float16或者32 img = img.half() if half else img.float() # uint8 to fp16/32 img = img / 255.0 # 0 - 255 to 0.0 - 1.0 # 没有batch_size时,在最前面添加一个轴 if len(img.shape) == 3: img = img[None] # expand for batch dim t2 = time_sync() dt[0] += t2 - t1 # Inference if pt: visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False ''' 前向传播,返回pred的shape是(1,num_boxes,5+num_class) h,w为传入网络图片的高和宽,注意dataset在检测时使用了矩形推理,所以h不一定等于w num_boxes = (h/32*w/32+h/16*w/16+h/8*w/8)*3 例如:图片大小720,1280 -> 15120个boxes = (20*12 + 40*24 + 80*48 = 5040)*3 pred[...,0:4]为预测框坐标;预测框坐标为xywh pred[...,4]为objectness置信度 pred[...,5:-1]为分类结果 ''' pred = model(img, augment=augment, visualize=visualize)[0] # elif onnx: # pred = torch.tensor(session.run([session.get_outputs()[0].name], {session.get_inputs()[0].name: img})) else: # tensorflow model (tflite, pb, saved_model) imn = img.permute(0, 2, 3, 1).cpu().numpy() # image in numpy if pb: pred = frozen_func(x=tf.constant(imn)).numpy() elif saved_model: pred = model(imn, training=False).numpy() elif tflite: if int8: scale, zero_point = input_details[0]['quantization'] imn = (imn / scale + zero_point).astype( np.uint8) # de-scale interpreter.set_tensor(input_details[0]['index'], imn) interpreter.invoke() pred = interpreter.get_tensor(output_details[0]['index']) if int8: scale, zero_point = output_details[0]['quantization'] pred = (pred.astype(np.float32) - zero_point) * scale # re-scale pred[..., 0] *= imgsz[1] # x pred[..., 1] *= imgsz[0] # y pred[..., 2] *= imgsz[1] # w pred[..., 3] *= imgsz[0] # h pred = torch.tensor(pred) t3 = time_sync() dt[1] += t3 - t2 # NMS ''' pred:前向传播的输出 conf_thres:置信度阈值 iou_thres:iou阈值 classes:是否只保留特定的类别 agnostic_nmsL进行nms是否也去除不同类别之间的框 经过nms后预测框格式,xywh->xyxy(左上角右上角) pred是一个列表list[torch.tensor],长度为nms后目标框个数 每一个torch.tensor的shape为(num_boxes,6),内容为box(4个值)+cunf+cls ''' pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det) dt[2] += time_sync() - t3 # Second-stage classifier (optional) # 添加二级分类,默认false if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process predictions # 对每一张图片处理 for i, det in enumerate(pred): # per image seen += 1 # 如果输入源是webcam,则batch_size不为1,取出dataset中的一张图片 if webcam: # batch_size >= 1 p, s, im0, frame = path[i], f'{i}: ', im0s[i].copy( ), dataset.count else: p, s, im0, frame = path, '', im0s.copy(), getattr( dataset, 'frame', 0) p = Path(p) # to Path # 设置保存图片或视频的路径 # p是原图片路径 save_path = str(save_dir / p.name) # img.jpg #设置保存框坐标txt文件的路径 txt_path = str(save_dir / 'labels' / p.stem) + ( '' if dataset.mode == 'image' else f'_{frame}') # img.txt # 设置打印信息(图片宽高),s如'640*512' s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh imc = im0.copy() if save_crop else im0 # for save_crop annotator = Annotator(im0, line_width=line_thickness, example=str(names)) if len(det): # Rescale boxes from img_size to im0 size # 调整预测框坐标,基于resize+pad的图片坐标->基于原size图片坐标 # 此时坐标格式为xyxy det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results # 打印检测到的类别数量 for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Write results # 保存预测结果 for *xyxy, conf, cls in reversed(det): # if save_txt: # Write to file # # 将xyxy格式转为xywh格式,并除上我w,h作归一化,转化为列表再保存 # xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh # line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format # with open(txt_path + '.txt', 'a') as f: # f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or save_crop or view_img: # Add bbox to image c = int(cls) # integer class label = None if hide_labels else ( names[c] if hide_conf else f'{names[c]} {conf:.2f}') annotator.box_label(xyxy, label, color=colors(c, True)) if save_crop: save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True) # Print time (inference-only) # print(f'{pred[0][0][0].tolist()} {pred[0][0][1].tolist()} {s}Done. ({t3 - t2:.3f}s)') # Stream results im0 = annotator.result() # xxx = (pred[0][0][0].tolist()+pred[0][0][2].tolist())/2 # yyy = (pred[0][0][1].tolist()+pred[0][0][3].tolist())/2 if view_img: # + / 2 + cv2.imshow(str(p), im0) cv2.moveWindow(str(p), 0, 0) # pyautogui.moveTo(xxx, yyy) cv2.waitKey(1000) # 1 millisecond # Save results (image with detections) # if save_img: # if dataset.mode == 'image': # cv2.imwrite(save_path, im0) # else: # 'video' or 'stream' # if vid_path[i] != save_path: # new video # vid_path[i] = save_path # if isinstance(vid_writer[i], cv2.VideoWriter): # vid_writer[i].release() # release previous video writer # if vid_cap: # video # fps = vid_cap.get(cv2.CAP_PROP_FPS) # w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) # h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) # else: # stream # fps, w, h = 30, im0.shape[1], im0.shape[0] # save_path += '.mp4' # vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) # vid_writer[i].write(im0) # Print results t = tuple(x / seen * 1E3 for x in dt) # speeds per image print( f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}' % t)
def run( weights=ROOT / 'yolov5s.pt', # model.pt path(s) 训练的权重 imgsz=[640, 640], # inference size (pixels) 网络输入图片大小 conf_thres=0.25, # confidence threshold 置信度阈值 iou_thres=0.45, # NMS IOU threshold nms的iou阈值 max_det=1000, # maximum detections per image 分类数 device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu 设备 view_img=True, # show results 是否展示预测之后的图片/视频 classes=None, # filter by class: --class 0, or --class 0 2 3 设置只保留某一部分类别 agnostic_nms=False, # class-agnostic NMS 进行nms是否也去除不同类别之间的框 augment=False, # augmented inference 图像增强 visualize=False, # visualize features 可视化 line_thickness=3, # bounding box thickness (pixels) hide_labels=False, # hide labels hide_conf=False, # hide confidences half=False, # use FP16 half-precision inference ): # Initialize set_logging() device = select_device(device) half &= device.type != 'cpu' # half precision only supported on CUDA # Load model w = weights[0] if isinstance(weights, list) else weights classify, suffix, suffixes = False, Path(w).suffix.lower(), [ '.pt', '.onnx', '.tflite', '.pb', '' ] check_suffix(w, suffixes) # check weights have acceptable suffix pt, onnx, tflite, pb, saved_model = (suffix == x for x in suffixes) # backend booleans stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults if pt: model = attempt_load( weights, map_location=device) # load FP32 model 加载float32模型,确保图片分辨率能整除32 stride = int(model.stride.max()) # model stride names = model.module.names if hasattr( model, 'module') else model.names # get class names #设置Float16 if half: model.half() # to FP16 # 设置2次分类 if classify: # second-stage classifier modelc = load_classifier(name='resnet50', n=2) # initialize modelc.load_state_dict( torch.load('resnet50.pt', map_location=device)['model']).to(device).eval() else: # TensorFlow models check_requirements(('tensorflow>=2.4.1', )) import tensorflow as tf if pb: # https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt def wrap_frozen_graph(gd, inputs, outputs): x = tf.compat.v1.wrap_function( lambda: tf.compat.v1.import_graph_def(gd, name=""), []) # wrapped import return x.prune( tf.nest.map_structure(x.graph.as_graph_element, inputs), tf.nest.map_structure(x.graph.as_graph_element, outputs)) graph_def = tf.Graph().as_graph_def() graph_def.ParseFromString(open(w, 'rb').read()) frozen_func = wrap_frozen_graph(gd=graph_def, inputs="x:0", outputs="Identity:0") elif saved_model: model = tf.keras.models.load_model(w) elif tflite: interpreter = tf.lite.Interpreter( model_path=w) # load TFLite model interpreter.allocate_tensors() # allocate input_details = interpreter.get_input_details() # inputs output_details = interpreter.get_output_details() # outputs int8 = input_details[0][ 'dtype'] == np.uint8 # is TFLite quantized uint8 model imgsz = check_img_size(imgsz, s=stride) # check image size # Dataloader # 图片或视频 tmp = False tmp2 = False mon = {'top': 0, 'left': 0, 'width': 960, 'height': 960} while True: im = np.array(mss().grab(mon)) screen = cv2.cvtColor(im, cv2.COLOR_BGRA2BGR) dataset = LoadImages(screen, img_size=imgsz, stride=stride, auto=pt) dt, seen = [0.0, 0.0, 0.0], 0 ''' path 图片/视频路径 img 进行resize+pad之后的图片,如(3,640,512) 格式(c,h,w) img0s 原size图片,如(1080,810,3) cap 当读取图片时为None,读取视频时为视频源 ''' for img, im0s, vid_cap in dataset: t1 = time_sync() if onnx: img = img.astype('float32') else: img = torch.from_numpy(img).to(device) # print(img) # 图片也设置为Float16或者32 img = img.half() if half else img.float() # uint8 to fp16/32 img = img / 255.0 # 0 - 255 to 0.0 - 1.0 # 没有batch_size时,在最前面添加一个轴 if len(img.shape) == 3: img = img[None] # expand for batch dim t2 = time_sync() dt[0] += t2 - t1 # Inference if pt: ''' 前向传播,返回pred的shape是(1,num_boxes,5+num_class) h,w为传入网络图片的高和宽,注意dataset在检测时使用了矩形推理,所以h不一定等于w num_boxes = (h/32*w/32+h/16*w/16+h/8*w/8)*3 例如:图片大小720,1280 -> 15120个boxes = (20*12 + 40*24 + 80*48 = 5040)*3 pred[...,0:4]为预测框坐标;预测框坐标为xywh pred[...,4]为objectness置信度 pred[...,5:-1]为分类结果 ''' pred = model(img, augment=augment, visualize=visualize)[0] else: # tensorflow model (tflite, pb, saved_model) imn = img.permute(0, 2, 3, 1).cpu().numpy() # image in numpy if pb: pred = frozen_func(x=tf.constant(imn)).numpy() elif saved_model: pred = model(imn, training=False).numpy() elif tflite: if int8: scale, zero_point = input_details[0]['quantization'] imn = (imn / scale + zero_point).astype( np.uint8) # de-scale interpreter.set_tensor(input_details[0]['index'], imn) interpreter.invoke() pred = interpreter.get_tensor(output_details[0]['index']) if int8: scale, zero_point = output_details[0]['quantization'] pred = (pred.astype(np.float32) - zero_point) * scale # re-scale pred[..., 0] *= imgsz[1] # x pred[..., 1] *= imgsz[0] # y pred[..., 2] *= imgsz[1] # w pred[..., 3] *= imgsz[0] # h pred = torch.tensor(pred) t3 = time_sync() dt[1] += t3 - t2 # NMS ''' pred:前向传播的输出 conf_thres:置信度阈值 iou_thres:iou阈值 classes:是否只保留特定的类别 agnostic_nmsL进行nms是否也去除不同类别之间的框 经过nms后预测框格式,xywh->xyxy(左上角右上角) pred是一个列表list[torch.tensor],长度为nms后目标框个数 每一个torch.tensor的shape为(num_boxes,6),内容为box(4个值)+cunf+cls ''' pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det) dt[2] += time_sync() - t3 # Second-stage classifier (optional) # 添加二级分类,默认false # if classify: # pred = apply_classifier(pred, modelc, img, im0s) # Process predictions # 对每一张图片处理 for i, det in enumerate(pred): # per image seen += 1 s, im0 = '', im0s.copy() # 设置打印信息(图片宽高),s如'640*512' s += '%gx%g ' % img.shape[2:] # print string annotator = Annotator(im0, line_width=line_thickness, example=str(names)) if len(det): # Rescale boxes from img_size to im0 size # 调整预测框坐标,基于resize+pad的图片坐标->基于原size图片坐标 # 此时坐标格式为xyxy det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results # 打印检测到的类别数量 for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Write results # 保存预测结果 for *xyxy, conf, cls in reversed(det): if view_img: # Add bbox to image c = int(cls) # integer class label = None if hide_labels else ( names[c] if hide_conf else f'{names[c]} {conf:.2f}') annotator.box_label(xyxy, label, color=colors(c, True)) # Stream results im0 = annotator.result() cv2.imshow('a crop of the screen', im0) cv2.moveWindow('a crop of the screen', 960, 0) if cv2.waitKey(1) & 0xff == ord('q'): tmp = True break if tmp: tmp2 = True break if tmp2: break