def display(self, pprint=False, show=False, save=False, crop=False, render=False, save_dir=Path('')): for i, (im, pred) in enumerate(zip(self.imgs, self.pred)): str = f'image {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} ' if pred is not None: for c in pred[:, -1].unique(): n = (pred[:, -1] == c).sum() # detections per class str += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string if show or save or render or crop: for *box, conf, cls in pred: # xyxy, confidence, class label = f'{self.names[int(cls)]} {conf:.2f}' if crop: save_one_box(box, im, file=save_dir / 'crops' / self.names[int(cls)] / self.files[i]) else: # all others plot_one_box(box, im, label=label, color=colors(cls)) im = Image.fromarray(im.astype(np.uint8)) if isinstance(im, np.ndarray) else im # from np if pprint: print(str.rstrip(', ')) if show: im.show(self.files[i]) # show if save: f = self.files[i] im.save(save_dir / f) # save print(f"{'Saved' * (i == 0)} {f}", end=',' if i < self.n - 1 else f' to {save_dir}\n') if render: self.imgs[i] = np.asarray(im)
def display(self, pprint=False, show=False, save=False, crop=False, render=False, save_dir=Path('')): for i, (im, pred) in enumerate(zip(self.imgs, self.pred)): str = f'image {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} ' if pred.shape[0]: for c in pred[:, -1].unique(): n = (pred[:, -1] == c).sum() # detections per class str += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string if show or save or render or crop: annotator = Annotator(im, pil=not self.ascii) for *box, conf, cls in reversed(pred): # xyxy, confidence, class label = f'{self.names[int(cls)]} {conf:.2f}' if crop: save_one_box(box, im, file=save_dir / 'crops' / self.names[int(cls)] / self.files[i]) else: # all others annotator.box_label(box, label, color=colors(cls)) im = annotator.im else: str += '(no detections)' im = Image.fromarray(im.astype(np.uint8)) if isinstance(im, np.ndarray) else im # from np if pprint: LOGGER.info(str.rstrip(', ')) if show: im.show(self.files[i]) # show if save: f = self.files[i] im.save(save_dir / f) # save if i == self.n - 1: LOGGER.info(f"Saved {self.n} image{'s' * (self.n > 1)} to {colorstr('bold', save_dir)}") if render: self.imgs[i] = np.asarray(im)
def plot(self, im, det): '''names = self.names; colors = self.colors for *xyxy, conf, c in reversed(det): # (x1,y1,x2,y2,conf,cls) c = int(c); label = f'{names[c]} {conf:.2f}' # Add bbox to image plot_one_box(xyxy, im, label=label, color=colors[c], line_width=2)#''' names = self.names from utils.plots import colors annotator = Annotator(im, line_width=2, example=str(names)) for *xyxy, conf, c in reversed(det): # (x1,y1,x2,y2,conf,cls) c = int(c) label = f'{names[c]} {conf:.2f}' # Add bbox to image annotator.box_label(xyxy, label, color=colors(c, True)) im[:] = annotator.result() #''' return { names[int(c)]: int((det[:, -1] == c).sum()) for c in det[:, -1].unique() }
def __call__(self, image, *args, **kwargs): img = self.image_preprocess(image) """ Inference """ pred = self.model(img)[0] """ Apply NMS """ det = non_max_suppression(pred, self.conf_thres, self.iou_thres)[0] """ Process detections """ im0 = image.copy() s = '' bbox_container = [] if len(det): """ Rescale boxes from img_size to im0 size """ det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() """ Print results """ """ detections per class """ for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() """ add to string """ s += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " """ Write results """ for *xyxy, conf, cls in reversed(det): c = int(cls) label = f'{self.names[c]} {conf:.2f}' """ xyxy: LU --> RD """ plot_one_box(xyxy, im0, label=label, color=colors(c, True), line_thickness=2) bbox = { 'class': self.names[c], 'confidence': round(conf.item(), 2), 'box': [int(v.item()) for v in xyxy] } bbox_container.append(bbox) print(s) return bbox_container
def detect_and_annotate(self, img_in): detections, img_out = self.get_detections(img_in) # Loop over detections first = self.names.copy() if self.names else None for xywh, xyxy, conf, cls in detections: if isinstance(conf, tuple): conf = conf[0] img_lab = f'{conf:.2f}' # Add bbox to image c = int(cls) # integer class label = True if first and first[c]: first[c] = 0 else: label = False name = f'{self.names[c]} ' if label else '' label = f'{name}{img_lab}' plot_one_box(xyxy, img_out, label=label, color=colors(c, True), line_thickness=6) return img_out
def run_detections(self, opt): image_files = FileIterator(opt.source) for k, img_file in enumerate(image_files): p = Path(img_file) # to Path if opt.save_dir is None: print('save_dir is set to None!') sys.exit() t1 = time_synchronized() detections, img0 = self.get_detections(img_file) #, opt) t2 = time_synchronized() if k % 10 == 0: print(k) if opt.skip_empty and len(detections) == 0: continue # Print results to screen(?) s = '%gx%g ' % img0.shape[:2] # print string cls_idx = np.array([det[-1].cpu().numpy() for det in detections]) for c in np.unique(cls_idx): n = (cls_idx == c).sum() # detections per class s += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string save_path = str(opt.save_dir / p.name) txt_path = str(opt.save_dir / 'labels' / p.stem) first = self.names.copy() if self.names else None # Loop over detections imc = img0.copy() if opt.save_crop else img0 # for opt.save_crop for xywh, xyxy, conf, cls in detections: # Write label to txt file txt_lab, img_lab = self.format_labels(xywh, cls, conf, opt) with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(txt_lab)).rstrip() % txt_lab + '\n') # Add bbox to image if opt.save_img or opt.save_crop: c = int(cls) # integer class label = True if first and first[c]: first[c] = 0 else: label = False name = f'{self.names[c]} ' if label else '' label = None if opt.hide_labels else f'{name}{img_lab}' plot_one_box(xyxy, img0, label=label, color=colors(c, True), line_thickness=opt.line_thickness) if opt.save_crop: save_one_box(xyxy, imc, file=opt.save_dir / 'crops' / self.names[c] / f'{p.stem}.jpg', BGR=True) print(f'{s}Done. ({t2 - t1:.3f}s)') if opt.save_img and len(detections) > 0: cv2.imwrite(save_path, img0) if len(detections) == 0: empty_file(txt_path + '.txt')
def detect(opt): source, weights, view_img, imgsz = opt.source, opt.weights, opt.view_img, opt.img_size save_dir = 'output' file_name = Path(source).name source = cv2.VideoCapture(source) width = int(source.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(source.get(cv2.CAP_PROP_FRAME_HEIGHT)) video_writer = cv2.VideoWriter('video_detection.avi', cv2.VideoWriter_fourcc(*'MJPG'), 20, (width, height)) while True: validation, frame = source.read() if validation is not True: break # Load model print(validation) device = select_device(opt.device) model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride imgsz = check_img_size(imgsz, s=stride) # check img_size names = model.module.names if hasattr(model, 'module') else model.names # get class names if device.type != 'cpu': model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once img = frame original_image = img.copy() t0 = time.time() # img = cv2.resize(img, (416, 416)) img = letterbox(img)[0] img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) img = torch.from_numpy(img).to(device) img = img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 img = img.unsqueeze(0) # Inference pred = model(img, augment=False)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) save_path = os.path.join(save_dir, file_name) # Process detections for det in pred: # detections per image if len(det): # Rescale boxes from img size to original_image size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], original_image.shape).round() # Write results for *xyxy, conf, cls in reversed(det): c = int(cls) # integer class label = (names[c] if opt.hide_conf else f'{names[c]} {conf:.2f}') plot_one_box(xyxy, original_image, label=label, color=colors(c, True), line_thickness=2) # if view_img: cv2.imshow("result", original_image) cv2.waitKey(1) # 1 millisecond # Save results (image with detections) # cv2.imwrite(save_path, original_image) # video_writer.write(original_image) # video_writer.release() print(f'Done. ({time.time() - t0:.3f}s)')
def run( weights=ROOT / 'yolov5s.pt', # model.pt path(s) 训练的权重 source=ROOT / 'data/images', # file/dir/URL/glob, 0 for webcam 测试数据,图片/视频路径,'0'摄像头,rtsp视频流 imgsz=640, # inference size (pixels) 网络输入图片大小 conf_thres=0.25, # confidence threshold 置信度阈值 iou_thres=0.45, # NMS IOU threshold nms的iou阈值 max_det=1000, # maximum detections per image 分类数 device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu 设备 view_img=True, # show results 是否展示预测之后的图片/视频 save_txt=False, # save results to *.txt 是否将预测的框坐标保持txt格式,默认false # save_conf=False, # save confidences in --save-txt labels 置信度保存 save_crop=False, # save cropped prediction boxes nosave=False, # do not save images/videos 不保存 classes=None, # filter by class: --class 0, or --class 0 2 3 设置只保留某一部分类别 agnostic_nms=False, # class-agnostic NMS 进行nms是否也去除不同类别之间的框 augment=False, # augmented inference 图像增强 visualize=False, # visualize features 可视化 # update=False, # update all models 若ture,则对所有模型进行strip_optimizer操作,去除pt文件中的优化器等信息,默认false project=ROOT / 'runs/detect', # save results to project/name name='exp', # save results to project/name exist_ok=False, # existing project/name ok, do not increment line_thickness=3, # bounding box thickness (pixels) hide_labels=False, # hide labels hide_conf=False, # hide confidences half=False, # use FP16 half-precision inference ): source = str(source) save_img = not nosave and not source.endswith( '.txt') # save inference images webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith( ('rtsp://', 'rtmp://', 'http://', 'https://')) # Directories save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(device) half &= device.type != 'cpu' # half precision only supported on CUDA # Load model w = weights[0] if isinstance(weights, list) else weights classify, suffix, suffixes = False, Path(w).suffix.lower(), [ '.pt', '.onnx', '.tflite', '.pb', '' ] check_suffix(w, suffixes) # check weights have acceptable suffix pt, onnx, tflite, pb, saved_model = (suffix == x for x in suffixes) # backend booleans stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults if pt: model = attempt_load( weights, map_location=device) # load FP32 model 加载float32模型,确保图片分辨率能整除32 stride = int(model.stride.max()) # model stride names = model.module.names if hasattr( model, 'module') else model.names # get class names #设置Float16 if half: model.half() # to FP16 # 设置2次分类 if classify: # second-stage classifier modelc = load_classifier(name='resnet50', n=2) # initialize modelc.load_state_dict( torch.load('resnet50.pt', map_location=device)['model']).to(device).eval() # elif onnx: # check_requirements(('onnx', 'onnxruntime')) # import onnxruntime # session = onnxruntime.InferenceSession(w, None) else: # TensorFlow models check_requirements(('tensorflow>=2.4.1', )) import tensorflow as tf if pb: # https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt def wrap_frozen_graph(gd, inputs, outputs): x = tf.compat.v1.wrap_function( lambda: tf.compat.v1.import_graph_def(gd, name=""), []) # wrapped import return x.prune( tf.nest.map_structure(x.graph.as_graph_element, inputs), tf.nest.map_structure(x.graph.as_graph_element, outputs)) graph_def = tf.Graph().as_graph_def() graph_def.ParseFromString(open(w, 'rb').read()) frozen_func = wrap_frozen_graph(gd=graph_def, inputs="x:0", outputs="Identity:0") elif saved_model: model = tf.keras.models.load_model(w) elif tflite: interpreter = tf.lite.Interpreter( model_path=w) # load TFLite model interpreter.allocate_tensors() # allocate input_details = interpreter.get_input_details() # inputs output_details = interpreter.get_output_details() # outputs int8 = input_details[0][ 'dtype'] == np.uint8 # is TFLite quantized uint8 model imgsz = check_img_size(imgsz, s=stride) # check image size # Dataloader # 通过不同的输入源来设置不同的数据加载方式 # 摄像头 if webcam: view_img = check_imshow() cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt) bs = len(dataset) # batch_size # 图片或视频 else: dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt) bs = 1 # batch_size vid_path, vid_writer = [None] * bs, [None] * bs # Run inference if pt and device.type != 'cpu': # 进行一次前向推理,测试程序是否正常 model( torch.zeros(1, 3, *imgsz).to(device).type_as( next(model.parameters()))) # run once dt, seen = [0.0, 0.0, 0.0], 0 ''' path 图片/视频路径 img 进行resize+pad之后的图片,如(3,640,512) 格式(c,h,w) img0s 原size图片,如(1080,810,3) cap 当读取图片时为None,读取视频时为视频源 ''' for path, img, im0s, vid_cap in dataset: t1 = time_sync() if onnx: img = img.astype('float32') else: img = torch.from_numpy(img).to(device) # 图片也设置为Float16或者32 img = img.half() if half else img.float() # uint8 to fp16/32 img = img / 255.0 # 0 - 255 to 0.0 - 1.0 # 没有batch_size时,在最前面添加一个轴 if len(img.shape) == 3: img = img[None] # expand for batch dim t2 = time_sync() dt[0] += t2 - t1 # Inference if pt: visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False ''' 前向传播,返回pred的shape是(1,num_boxes,5+num_class) h,w为传入网络图片的高和宽,注意dataset在检测时使用了矩形推理,所以h不一定等于w num_boxes = (h/32*w/32+h/16*w/16+h/8*w/8)*3 例如:图片大小720,1280 -> 15120个boxes = (20*12 + 40*24 + 80*48 = 5040)*3 pred[...,0:4]为预测框坐标;预测框坐标为xywh pred[...,4]为objectness置信度 pred[...,5:-1]为分类结果 ''' pred = model(img, augment=augment, visualize=visualize)[0] # elif onnx: # pred = torch.tensor(session.run([session.get_outputs()[0].name], {session.get_inputs()[0].name: img})) else: # tensorflow model (tflite, pb, saved_model) imn = img.permute(0, 2, 3, 1).cpu().numpy() # image in numpy if pb: pred = frozen_func(x=tf.constant(imn)).numpy() elif saved_model: pred = model(imn, training=False).numpy() elif tflite: if int8: scale, zero_point = input_details[0]['quantization'] imn = (imn / scale + zero_point).astype( np.uint8) # de-scale interpreter.set_tensor(input_details[0]['index'], imn) interpreter.invoke() pred = interpreter.get_tensor(output_details[0]['index']) if int8: scale, zero_point = output_details[0]['quantization'] pred = (pred.astype(np.float32) - zero_point) * scale # re-scale pred[..., 0] *= imgsz[1] # x pred[..., 1] *= imgsz[0] # y pred[..., 2] *= imgsz[1] # w pred[..., 3] *= imgsz[0] # h pred = torch.tensor(pred) t3 = time_sync() dt[1] += t3 - t2 # NMS ''' pred:前向传播的输出 conf_thres:置信度阈值 iou_thres:iou阈值 classes:是否只保留特定的类别 agnostic_nmsL进行nms是否也去除不同类别之间的框 经过nms后预测框格式,xywh->xyxy(左上角右上角) pred是一个列表list[torch.tensor],长度为nms后目标框个数 每一个torch.tensor的shape为(num_boxes,6),内容为box(4个值)+cunf+cls ''' pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det) dt[2] += time_sync() - t3 # Second-stage classifier (optional) # 添加二级分类,默认false if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process predictions # 对每一张图片处理 for i, det in enumerate(pred): # per image seen += 1 # 如果输入源是webcam,则batch_size不为1,取出dataset中的一张图片 if webcam: # batch_size >= 1 p, s, im0, frame = path[i], f'{i}: ', im0s[i].copy( ), dataset.count else: p, s, im0, frame = path, '', im0s.copy(), getattr( dataset, 'frame', 0) p = Path(p) # to Path # 设置保存图片或视频的路径 # p是原图片路径 save_path = str(save_dir / p.name) # img.jpg #设置保存框坐标txt文件的路径 txt_path = str(save_dir / 'labels' / p.stem) + ( '' if dataset.mode == 'image' else f'_{frame}') # img.txt # 设置打印信息(图片宽高),s如'640*512' s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh imc = im0.copy() if save_crop else im0 # for save_crop annotator = Annotator(im0, line_width=line_thickness, example=str(names)) if len(det): # Rescale boxes from img_size to im0 size # 调整预测框坐标,基于resize+pad的图片坐标->基于原size图片坐标 # 此时坐标格式为xyxy det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results # 打印检测到的类别数量 for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Write results # 保存预测结果 for *xyxy, conf, cls in reversed(det): # if save_txt: # Write to file # # 将xyxy格式转为xywh格式,并除上我w,h作归一化,转化为列表再保存 # xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh # line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format # with open(txt_path + '.txt', 'a') as f: # f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or save_crop or view_img: # Add bbox to image c = int(cls) # integer class label = None if hide_labels else ( names[c] if hide_conf else f'{names[c]} {conf:.2f}') annotator.box_label(xyxy, label, color=colors(c, True)) if save_crop: save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True) # Print time (inference-only) # print(f'{pred[0][0][0].tolist()} {pred[0][0][1].tolist()} {s}Done. ({t3 - t2:.3f}s)') # Stream results im0 = annotator.result() # xxx = (pred[0][0][0].tolist()+pred[0][0][2].tolist())/2 # yyy = (pred[0][0][1].tolist()+pred[0][0][3].tolist())/2 if view_img: # + / 2 + cv2.imshow(str(p), im0) cv2.moveWindow(str(p), 0, 0) # pyautogui.moveTo(xxx, yyy) cv2.waitKey(1000) # 1 millisecond # Save results (image with detections) # if save_img: # if dataset.mode == 'image': # cv2.imwrite(save_path, im0) # else: # 'video' or 'stream' # if vid_path[i] != save_path: # new video # vid_path[i] = save_path # if isinstance(vid_writer[i], cv2.VideoWriter): # vid_writer[i].release() # release previous video writer # if vid_cap: # video # fps = vid_cap.get(cv2.CAP_PROP_FPS) # w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) # h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) # else: # stream # fps, w, h = 30, im0.shape[1], im0.shape[0] # save_path += '.mp4' # vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) # vid_writer[i].write(im0) # Print results t = tuple(x / seen * 1E3 for x in dt) # speeds per image print( f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}' % t)
def run( weights=ROOT / 'yolov5s.pt', # model.pt path(s) source=ROOT / 'data/images', # file/dir/URL/glob, 0 for webcam imgsz=640, # inference size (pixels) conf_thres=0.25, # confidence threshold iou_thres=0.45, # NMS IOU threshold max_det=1000, # maximum detections per image device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu view_img=False, # show results save_txt=False, # save results to *.txt save_conf=False, # save confidences in --save-txt labels save_crop=False, # save cropped prediction boxes nosave=False, # do not save images/videos classes=None, # filter by class: --class 0, or --class 0 2 3 agnostic_nms=False, # class-agnostic NMS augment=False, # augmented inference visualize=False, # visualize features update=False, # update all models project=ROOT / 'runs/detect', # save results to project/name name='exp', # save results to project/name exist_ok=False, # existing project/name ok, do not increment line_thickness=3, # bounding box thickness (pixels) hide_labels=False, # hide labels hide_conf=False, # hide confidences half=False, # use FP16 half-precision inference dnn=False, # use OpenCV DNN for ONNX inference ): source = str(source) save_img = not nosave and not source.endswith( '.txt') # save inference images webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith( ('rtsp://', 'rtmp://', 'http://', 'https://')) # Directories save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(device) half &= device.type != 'cpu' # half precision only supported on CUDA # Load model w = str(weights[0] if isinstance(weights, list) else weights) classify, suffix, suffixes = False, Path(w).suffix.lower(), [ '.pt', '.onnx', '.tflite', '.pb', '' ] check_suffix(w, suffixes) # check weights have acceptable suffix pt, onnx, tflite, pb, saved_model = (suffix == x for x in suffixes) # backend booleans stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults if pt: model = torch.jit.load(w) if 'torchscript' in w else attempt_load( weights, map_location=device, fuse=False) stride = int(model.stride.max()) # model stride names = model.module.names if hasattr( model, 'module') else model.names # get class names """ for _, param in enumerate(model.named_parameters()): print("====>", param[0], param[1].shape) torch.save(model.state_dict(), 'new_params.pt') for k, v in model.state_dict().items(): print(k, v.shape) exit() """ if half: model.half() # to FP16 if classify: # second-stage classifier modelc = load_classifier(name='resnet50', n=2) # initialize modelc.load_state_dict( torch.load('resnet50.pt', map_location=device)['model']).to(device).eval() elif onnx: if dnn: # check_requirements(('opencv-python>=4.5.4',)) net = cv2.dnn.readNetFromONNX(w) else: check_requirements(('onnx', 'onnxruntime')) import onnxruntime session = onnxruntime.InferenceSession(w, None) else: # TensorFlow models check_requirements(('tensorflow>=2.4.1', )) import tensorflow as tf if pb: # https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt def wrap_frozen_graph(gd, inputs, outputs): x = tf.compat.v1.wrap_function( lambda: tf.compat.v1.import_graph_def(gd, name=""), []) # wrapped import return x.prune( tf.nest.map_structure(x.graph.as_graph_element, inputs), tf.nest.map_structure(x.graph.as_graph_element, outputs)) graph_def = tf.Graph().as_graph_def() graph_def.ParseFromString(open(w, 'rb').read()) frozen_func = wrap_frozen_graph(gd=graph_def, inputs="x:0", outputs="Identity:0") elif saved_model: model = tf.keras.models.load_model(w) elif tflite: interpreter = tf.lite.Interpreter( model_path=w) # load TFLite model interpreter.allocate_tensors() # allocate input_details = interpreter.get_input_details() # inputs output_details = interpreter.get_output_details() # outputs int8 = input_details[0][ 'dtype'] == np.uint8 # is TFLite quantized uint8 model imgsz = check_img_size(imgsz, s=stride) # check image size # Dataloader if webcam: view_img = check_imshow() cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt) bs = len(dataset) # batch_size else: dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt) bs = 1 # batch_size vid_path, vid_writer = [None] * bs, [None] * bs # Run inference if pt and device.type != 'cpu': model( torch.zeros(1, 3, *imgsz).to(device).type_as( next(model.parameters()))) # run once dt, seen = [0.0, 0.0, 0.0], 0 for path, img, im0s, vid_cap in dataset: t1 = time_sync() if onnx: img = img.astype('float32') else: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img = img / 255.0 # 0 - 255 to 0.0 - 1.0 if len(img.shape) == 3: img = img[None] # expand for batch dim t2 = time_sync() dt[0] += t2 - t1 # Inference if pt: visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False pred = model(img, augment=augment, visualize=visualize)[0] anchor_grid = model.model[-1].anchors * model.model[-1].stride[ ..., None, None] delattr(model.model[-1], 'anchor_grid') # model.model[-1] is detect layer model.model[-1].register_buffer("anchor_grid", anchor_grid) model.to(device).eval() wts_file = "generated.wts" with open(wts_file, 'w') as f: f.write('{}\n'.format(len(model.state_dict().keys()))) for k, v in model.state_dict().items(): if len(v.shape) == 0: continue print(k, v.shape) vr = v.reshape(-1).cpu().numpy() f.write('{} {} {} {}'.format( k, len(vr), v.shape[0], v.shape[1] if len(v.shape) > 1 else 0)) for vv in vr: f.write(' ') f.write(struct.pack('>f', float(vv)).hex()) f.write('\n') exit() elif onnx: if dnn: net.setInput(img) pred = torch.tensor(net.forward()) else: pred = torch.tensor( session.run([session.get_outputs()[0].name], {session.get_inputs()[0].name: img})) else: # tensorflow model (tflite, pb, saved_model) imn = img.permute(0, 2, 3, 1).cpu().numpy() # image in numpy if pb: pred = frozen_func(x=tf.constant(imn)).numpy() elif saved_model: pred = model(imn, training=False).numpy() elif tflite: if int8: scale, zero_point = input_details[0]['quantization'] imn = (imn / scale + zero_point).astype( np.uint8) # de-scale interpreter.set_tensor(input_details[0]['index'], imn) interpreter.invoke() pred = interpreter.get_tensor(output_details[0]['index']) if int8: scale, zero_point = output_details[0]['quantization'] pred = (pred.astype(np.float32) - zero_point) * scale # re-scale pred[..., 0] *= imgsz[1] # x pred[..., 1] *= imgsz[0] # y pred[..., 2] *= imgsz[1] # w pred[..., 3] *= imgsz[0] # h pred = torch.tensor(pred) t3 = time_sync() dt[1] += t3 - t2 # NMS pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det) dt[2] += time_sync() - t3 # Second-stage classifier (optional) if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process predictions for i, det in enumerate(pred): # per image seen += 1 if webcam: # batch_size >= 1 p, s, im0, frame = path[i], f'{i}: ', im0s[i].copy( ), dataset.count else: p, s, im0, frame = path, '', im0s.copy(), getattr( dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # img.jpg txt_path = str(save_dir / 'labels' / p.stem) + ( '' if dataset.mode == 'image' else f'_{frame}') # img.txt s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh imc = im0.copy() if save_crop else im0 # for save_crop annotator = Annotator(im0, line_width=line_thickness, example=str(names)) if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or save_crop or view_img: # Add bbox to image c = int(cls) # integer class label = None if hide_labels else ( names[c] if hide_conf else f'{names[c]} {conf:.2f}') annotator.box_label(xyxy, label, color=colors(c, True)) if save_crop: save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True) # Print time (inference-only) print(f'{s}Done. ({t3 - t2:.3f}s)') # Stream results im0 = annotator.result() if view_img: cv2.imshow(str(p), im0) cv2.waitKey(1) # 1 millisecond # Save results (image with detections) if save_img: if dataset.mode == 'image': cv2.imwrite(save_path, im0) else: # 'video' or 'stream' if vid_path[i] != save_path: # new video vid_path[i] = save_path if isinstance(vid_writer[i], cv2.VideoWriter): vid_writer[i].release( ) # release previous video writer if vid_cap: # video fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) else: # stream fps, w, h = 30, im0.shape[1], im0.shape[0] save_path += '.mp4' vid_writer[i] = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) vid_writer[i].write(im0) # Print results t = tuple(x / seen * 1E3 for x in dt) # speeds per image print( f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}' % t) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {colorstr('bold', save_dir)}{s}") if update: strip_optimizer(weights) # update model (to fix SourceChangeWarning)
def detect(opt): source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size save_img = not opt.nosave and not source.endswith( '.txt') # save inference images webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith( ('rtsp://', 'rtmp://', 'http://', 'https://')) # Directories save_dir = increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(opt.device) half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride imgsz = check_img_size(imgsz, s=stride) # check img_size names = model.module.names if hasattr( model, 'module') else model.names # get class names if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = check_imshow() cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride) else: dataset = LoadImages(source, img_size=imgsz, stride=stride) # Run inference if device.type != 'cpu': model( torch.zeros(1, 3, imgsz, imgsz).to(device).type_as( next(model.parameters()))) # run once t0 = time.time() for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy( ), dataset.count else: p, s, im0, frame = path, '', im0s.copy(), getattr( dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # img.jpg txt_path = str(save_dir / 'labels' / p.stem) + ( '' if dataset.mode == 'image' else f'_{frame}') # img.txt s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if opt.save_conf else ( cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or opt.save_crop or view_img: # Add bbox to image c = int(cls) # integer class label = None if opt.hide_labels else ( names[c] if opt.hide_conf else f'{names[c]} {conf:.2f}') plot_one_box(xyxy, im0, label=label, color=colors(c, True), line_thickness=opt.line_thickness) if opt.save_crop: save_one_box(xyxy, im0s, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True) # Print time (inference + NMS) print(f'{s}Done. ({t2 - t1:.3f}s)') # Stream results if view_img: cv2.imshow(str(p), im0) cv2.waitKey(1) # 1 millisecond # Save results (image with detections) if save_img: if dataset.mode == 'image': cv2.imwrite(save_path, im0) else: # 'video' or 'stream' if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer if vid_cap: # video fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) else: # stream fps, w, h = 30, im0.shape[1], im0.shape[0] save_path += '.mp4' vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") print(f'Done. ({time.time() - t0:.3f}s)')
def run( weights=ROOT / 'yolov5s.pt', # model.pt path(s) source=ROOT / 'data/images', # file/dir/URL/glob, 0 for webcam data=ROOT / 'data/coco128.yaml', # dataset.yaml path imgsz=(640, 640), # inference size (height, width) conf_thres=0.25, # confidence threshold iou_thres=0.45, # NMS IOU threshold max_det=1000, # maximum detections per image device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu view_img=False, # show results save_txt=False, # save results to *.txt save_conf=False, # save confidences in --save-txt labels save_crop=False, # save cropped prediction boxes nosave=False, # do not save images/videos classes=None, # filter by class: --class 0, or --class 0 2 3 agnostic_nms=False, # class-agnostic NMS augment=False, # augmented inference visualize=False, # visualize features update=False, # update all models project=ROOT / 'runs/detect', # save results to project/name name='exp', # save results to project/name exist_ok=False, # existing project/name ok, do not increment line_thickness=3, # bounding box thickness (pixels) hide_labels=False, # hide labels hide_conf=False, # hide confidences half=False, # use FP16 half-precision inference dnn=False, # use OpenCV DNN for ONNX inference ): source = str(source) save_img = not nosave and not source.endswith( '.txt') # save inference images is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS) is_url = source.lower().startswith( ('rtsp://', 'rtmp://', 'http://', 'https://')) webcam = source.isnumeric() or source.endswith('.txt') or (is_url and not is_file) if is_url and is_file: source = check_file(source) # download # Directories save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Load model device = select_device(device) model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half) stride, names, pt = model.stride, model.names, model.pt imgsz = check_img_size(imgsz, s=stride) # check image size # Dataloader if webcam: view_img = check_imshow() cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt) bs = len(dataset) # batch_size else: dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt) bs = 1 # batch_size vid_path, vid_writer = [None] * bs, [None] * bs # Run inference model.warmup(imgsz=(1 if pt else bs, 3, *imgsz)) # warmup dt, seen = [0.0, 0.0, 0.0], 0 for path, im, im0s, vid_cap, s in dataset: t1 = time_sync() im = torch.from_numpy(im).to(device) im = im.half() if model.fp16 else im.float() # uint8 to fp16/32 im /= 255 # 0 - 255 to 0.0 - 1.0 if len(im.shape) == 3: im = im[None] # expand for batch dim t2 = time_sync() dt[0] += t2 - t1 # Inference visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False pred = model(im, augment=augment, visualize=visualize) t3 = time_sync() dt[1] += t3 - t2 # NMS pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det) dt[2] += time_sync() - t3 # Second-stage classifier (optional) # pred = utils.general.apply_classifier(pred, classifier_model, im, im0s) # Process predictions for i, det in enumerate(pred): # per image seen += 1 if webcam: # batch_size >= 1 p, im0, frame = path[i], im0s[i].copy(), dataset.count s += f'{i}: ' else: p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # im.jpg txt_path = str(save_dir / 'labels' / p.stem) + ( '' if dataset.mode == 'image' else f'_{frame}') # im.txt s += '%gx%g ' % im.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh imc = im0.copy() if save_crop else im0 # for save_crop annotator = Annotator(im0, line_width=line_thickness, example=str(names)) if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format with open(f'{txt_path}.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or save_crop or view_img: # Add bbox to image c = int(cls) # integer class label = None if hide_labels else ( names[c] if hide_conf else f'{names[c]} {conf:.2f}') annotator.box_label(xyxy, label, color=colors(c, True)) if save_crop: save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True) # Stream results im0 = annotator.result() if view_img: cv2.imshow(str(p), im0) cv2.waitKey(1) # 1 millisecond # Save results (image with detections) if save_img: if dataset.mode == 'image': cv2.imwrite(save_path, im0) else: # 'video' or 'stream' if vid_path[i] != save_path: # new video vid_path[i] = save_path if isinstance(vid_writer[i], cv2.VideoWriter): vid_writer[i].release( ) # release previous video writer if vid_cap: # video fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) else: # stream fps, w, h = 30, im0.shape[1], im0.shape[0] save_path = str(Path(save_path).with_suffix( '.mp4')) # force *.mp4 suffix on results videos vid_writer[i] = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) vid_writer[i].write(im0) # Print time (inference-only) LOGGER.info(f'{s}Done. ({t3 - t2:.3f}s)') # Print results t = tuple(x / seen * 1E3 for x in dt) # speeds per image LOGGER.info( f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}' % t) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}") if update: strip_optimizer(weights) # update model (to fix SourceChangeWarning)
def detect( weights='yolov5s.pt', # model.pt path(s) source='data/images', # file/dir/URL/glob, 0 for webcam imgsz=640, # inference size (pixels) conf_thres=0.25, # confidence threshold iou_thres=0.45, # NMS IOU threshold max_det=1000, # maximum detections per image device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu view_img=False, # show results save_txt=False, # save results to *.txt save_conf=False, # save confidences in --save-txt labels save_crop=False, # save cropped prediction boxes nosave=False, # do not save images/videos classes=None, # filter by class: --class 0, or --class 0 2 3 agnostic_nms=False, # class-agnostic NMS augment=False, # augmented inference update=False, # update all models project='runs/detect', # save results to project/name name='exp', # save results to project/name exist_ok=False, # existing project/name ok, do not increment line_thickness=3, # bounding box thickness (pixels) hide_labels=False, # hide labels hide_conf=False, # hide confidences half=False, # use FP16 half-precision inference ): save_img = not nosave and not source.endswith( '.txt') # save inference images webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith( ('rtsp://', 'rtmp://', 'http://', 'https://')) realsense = "realsense" in source # Directories save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(device) half &= device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride imgsz = check_img_size(imgsz, s=stride) # check image size names = model.module.names if hasattr( model, 'module') else model.names # get class names if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet50', n=2) # initialize modelc.load_state_dict( torch.load('resnet50.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = check_imshow() cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride) elif realsense: view_img = check_imshow() cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadRealSense2(width=640, height=480, fps=15, img_size=imgsz) else: dataset = LoadImages(source, img_size=imgsz, stride=stride) # Run inference if device.type != 'cpu': model( torch.zeros(1, 3, imgsz, imgsz).to(device).type_as( next(model.parameters()))) # run once t0 = time.time() for path, depth, distance, depth_scale, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=augment)[0] # Apply NMS pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0, frame = path[i], f'{i}: ', im0s[i].copy( ), dataset.count else: p, s, im0, frame = path, '', im0s.copy(), getattr( dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # img.jpg txt_path = str(save_dir / 'labels' / p.stem) + ( '' if dataset.mode == 'image' else f'_{frame}') # img.txt s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh imc = im0.copy() if save_crop else im0 # for save_crop if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Write results for *xyxy, conf, cls in reversed(det): # Splitting xyxy* (measurement) xmin = int(xyxy[0]) ymin = int(xyxy[1]) xmax = int(xyxy[2]) ymax = int(xyxy[3]) # Calculating depth of centroid of the object (in Pixel) xc = int(round(((xmax + xmin) / 2), 0)) yc = int(round(((ymax + ymin) / 2), 0)) object_depth = depth[xc, yc] object_distance = distance[xc, yc] print("object depth:") print(object_depth) print("object distance:") print(object_distance) if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or save_crop or view_img: # Add bbox to image c = int(cls) # integer class label = None if hide_labels else ( names[c] if hide_conf else f'{names[c]} {conf:.2f}') plot_one_box(xyxy, im0, label=label, color=colors(c, True), line_thickness=line_thickness) if save_crop: save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True) # Print time (inference + NMS) print(f'{s}Done. ({t2 - t1:.3f}s)') # Stream results if view_img: cv2.imshow(str(p), im0) cv2.waitKey(1) # 1 millisecond # Save results (image with detections) if save_img: if dataset.mode == 'image': cv2.imwrite(save_path, im0) else: # 'video' or 'stream' if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer if vid_cap: # video fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) else: # stream fps, w, h = 30, im0.shape[1], im0.shape[0] save_path += '.mp4' vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") if update: strip_optimizer(weights) # update model (to fix SourceChangeWarning) print(f'Done. ({time.time() - t0:.3f}s)')
def apply(opt): source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size save_img = not opt.nosave and not source.endswith('.txt') # save inference images # Directories save_dir = increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir (save_dir / 'data' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) with (save_dir / f"params_{Path(opt.source).name}.json").open("w") as f: f.write(json.dumps(opt.__dict__, indent=4)) # Initialize set_logging() device = select_device(opt.device) half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride imgsz = check_img_size(imgsz, s=stride) # check img_size names = model.module.names if hasattr(model, 'module') else model.names # get class names if half: model.half() # to FP16 # Set Dataloader vid_path, vid_writer = None, None dataset = LoadRiceImages(source, img_size=imgsz, stride=stride, dshape=opt.dshape, ishape=opt.ishape) # Run inference if device.type != 'cpu': model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once t0 = time.time() my = None for path, imgs, imgs0, _, big_img in dataset: path = Path(path) ori_img = cv2.imread(str(path)) save_path = str(save_dir / path.name) txt_path = str(save_dir / "labels" / f"{path.stem}.csv") data_path = str(save_dir / "data" / f"{path.stem}.csv") coords = [] img_type = str(path.name)[0].lower() for r in range(imgs.shape[0]): for c in range(imgs.shape[1]): conf_thres = opt.i_conf_thres if img_type == "i" else opt.d_conf_thres img = imgs[r, c] im0s = imgs0[r, c] img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Process detections for i, det in enumerate(pred): # detections per image p, s, im0, frame = path, '', im0s.copy(), getattr(dataset, 'frame', 0) p = Path(p) # to Path s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if len(det): # Rescale boxes from img_size to im0 size # det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for cl in det[:, -1].unique(): n = (det[:, -1] == cl).sum() # detections per class s += f"{n} {names[int(cl)]}{'s' * (n > 1)}, " # add to string # Write results for *xyxy, conf, cl in reversed(det): print(xyxy) # sys.exit(0) if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh x, y = xywh[:2] x, y = x * im0.shape[1], y * im0.shape[0] x += c * im0.shape[1] y += r * im0.shape[0] cl = cl.cpu() # Only append if the predicted class matches the img_type if (cl == 0 and img_type == "i") or (cl == 1 and img_type == "d"): coords.append(np.array((conf.cpu().item() * 100, x, y, cl))) if save_img or view_img: # Add bbox to image c = int(cl) # integer class label = None if opt.hide_labels else (names[c] if opt.hide_conf else f'{names[c]} {conf:.2f}') plot_one_box(xyxy, im0, label=label, color=colors(c, True), line_thickness=opt.line_thickness) # Print time (inference + NMS) print(f'{s}Done. ({t2 - t1:.3f}s)') # Save results (image with detections) if save_img: if dataset.mode == 'image': cv2.imwrite('out_old.jpg', im0) sys.exit(0) v_grid_starts, h_grid_starts = [], [] if opt.grid: x = imgs[0, 0].shape[2] while x < ori_img.shape[1]: v_grid_starts.append(x) x += imgs[0, 0].shape[2] y = imgs[0, 0].shape[1] while y < ori_img.shape[0]: h_grid_starts.append(y) y += imgs[0, 0].shape[1] v_grid_starts, h_grid_starts = np.array(v_grid_starts, dtype=float), np.array(h_grid_starts, dtype=float) # imgs[0, 0].shape is (c, h, w) scale_x = ori_img.shape[1] / (imgs.shape[1] * imgs[0, 0].shape[2]) scale_y = ori_img.shape[0] / (imgs.shape[0] * imgs[0, 0].shape[1]) coords = np.array(coords) coords[:, 1] *= scale_x coords[:, 2] *= scale_y coords = np.around(coords).astype(int) close_tol = opt.i_close if img_type == "i" else opt.d_close v_grid_starts *= scale_x h_grid_starts *= scale_y v_grid_starts, h_grid_starts = np.around(v_grid_starts).astype(int), np.around(h_grid_starts).astype(int) axis_expand = opt.i_axis_expand if img_type == "i" else opt.d_axis_expand coords = filter_too_close(coords, tolerance=close_tol, h_axis=h_grid_starts, v_axis=v_grid_starts, axis_expand=axis_expand) coords = filter_border(coords, ori_img.shape, tolerance=opt.border) gt_path = path.parent / f"{path.stem}.csv" if save_txt: with open(txt_path, "w") as f: np.savetxt(f, coords[:, 1:3], fmt="%d", delimiter=",") with open(data_path, "w") as f: np.savetxt(f, coords[:, 0:3], fmt="%d", delimiter=",") if save_img: if "border" in vars(opt) and opt.border > 0: ori_img = draw_border(ori_img, opt.border) if opt.grid: ori_img = draw_grid(ori_img, v_grid_starts, h_grid_starts) if opt.with_gt: gts = np.loadtxt(gt_path, dtype=int, delimiter=",", ndmin=2) for x, y in gts: ori_img = cv2.circle(ori_img, (x, y), 9, (255, 255, 255), 2) for conf, x, y, cl in coords: if cl == 0: circle_color = (255, 0, 0) elif cl == 1: circle_color = (0, 0, 255) if not opt.hide_conf: # print(conf) ori_img = cv2.putText(ori_img, f"{conf}%", (x, y - 3), 0, 1, (255, 255, 0), 2) ori_img = cv2.circle(ori_img, (x, y), 4, circle_color, -1) cv2.imwrite(save_path, ori_img) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") print(f'Done. ({time.time() - t0:.3f}s)')
def detect_image( self, img_cv, conf_thres=0.25, # confidence threshold iou_thres=0.45, # NMS IOU threshold max_det=1000, # maximum detections per image classes=None, agnostic_nms=False, # class-agnostic NMS line_thickness=3, # bounding box thickness (pixels) hide_labels=False, # hide labels hide_conf=False, # hide confidences ): device = select_device('') ### Preprocess based on what inside LoadImages # Padded resize img = letterbox(img_cv, 640, stride=32)[0] # Convert img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB img = np.ascontiguousarray(img) ### Preprocess finish img = torch.from_numpy(img).to(device) img = img.float() img /= 255.0 # 0 - 255 to 0.0 - 1.0 if len(img.shape) == 3: img = img[None] # expand for batch dim # Inference pred = self.model(img, augment=False, visualize=False)[0] # NMS pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det) # Process predictions bbox_pred = [] s, im0 = '', img_cv.copy() for i, det in enumerate(pred): # detections per image s += '%gx%g ' % img.shape[2:] # print string if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Visualize results for *xyxy, conf, cls in reversed(det): c = int(cls) # integer class label = None if hide_labels else ( self.names[c] if hide_conf else f'{self.names[c]} {conf:.2f}') plot_one_box(xyxy, im0, label=label, color=colors(c, True), line_thickness=line_thickness) bbox_pred.append( det.cpu().numpy()[:, :-1] ) # bbox pred after rescaled back to the original image size return bbox_pred[0], im0
0]] # normalization gain whwh for i, det in enumerate(pred): if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], out_img.shape).round() # Write results for *xyxy, conf, cls in reversed(det): if opt.show: # Add bbox to image c = int(cls) # integer class label = f'{names[c]} {conf:.2f}' plot_one_box(xyxy, out_img, label=label, color=colors(c, True), line_thickness=opt.line_thickness) t1 = time.time() t2 = time.time() times_infer.append(t1 - t0) times_pipe.append(t2 - t0) times_infer = times_infer[-20:] times_pipe = times_pipe[-20:] ms = sum(times_infer) / len(times_infer) * 1000 fps_infer = 1000 / (ms + 0.00001) fps_pipe = 1000 / (sum(times_pipe) / len(times_pipe) * 1000)
def run(weights='yolov5s.pt', # model.pt path(s) source='data/images', # file/dir/URL/glob, 0 for webcam imgsz=640, # inference size (pixels) conf_thres=0.25, # confidence threshold iou_thres=0.45, # NMS IOU threshold max_det=1000, # maximum detections per image device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu view_img=False, # show results save_txt=False, # save results to *.txt save_conf=False, # save confidences in --save-txt labels save_crop=False, # save cropped prediction boxes nosave=False, # do not save images/videos classes=None, # filter by class: --class 0, or --class 0 2 3 agnostic_nms=False, # class-agnostic NMS augment=False, # augmented inference visualize=False, # visualize features update=False, # update all models project='runs/detect', # save results to project/name name='exp', # save results to project/name exist_ok=False, # existing project/name ok, do not increment line_thickness=3, # bounding box thickness (pixels) hide_labels=False, # hide labels hide_conf=False, # hide confidences half=False, # use FP16 half-precision inference ): save_img = not nosave and not source.endswith('.txt') # save inference images webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith( ('rtsp://', 'rtmp://', 'http://', 'https://')) sheet = pd.DataFrame() # Directories save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(device) half &= device.type != 'cpu' # half precision only supported on CUDA # Load model w = weights[0] if isinstance(weights, list) else weights classify, pt, onnx = False, w.endswith('.pt'), w.endswith('.onnx') # inference type stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults if pt: model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride names = model.module.names if hasattr(model, 'module') else model.names # get class names if half: model.half() # to FP16 if classify: # second-stage classifier modelc = load_classifier(name='resnet50', n=2) # initialize modelc.load_state_dict(torch.load('resnet50.pt', map_location=device)['model']).to(device).eval() elif onnx: check_requirements(('onnx', 'onnxruntime')) import onnxruntime session = onnxruntime.InferenceSession(w, None) imgsz = check_img_size(imgsz, s=stride) # check image size # Dataloader if webcam: view_img = check_imshow() cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride) bs = len(dataset) # batch_size else: dataset = LoadImages(source, img_size=imgsz, stride=stride) bs = 1 # batch_size vid_path, vid_writer = [None] * bs, [None] * bs # Run inference if pt and device.type != 'cpu': model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once t0 = time.time() for path, img, im0s, vid_cap in dataset: # print(path) # sys.exit(0) if pt: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 elif onnx: img = img.astype('float32') img /= 255.0 # 0 - 255 to 0.0 - 1.0 if len(img.shape) == 3: img = img[None] # expand for batch dim # Inference t1 = time_sync() if pt: visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False pred = model(img, augment=augment, visualize=visualize)[0] elif onnx: pred = torch.tensor(session.run([session.get_outputs()[0].name], {session.get_inputs()[0].name: img})) # NMS pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det) t2 = time_sync() # Second-stage classifier (optional) if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process predictions # print(len(pred)) for i, det in enumerate(pred): # detections per image # print(det.shape) # sys.exit(0) if webcam: # batch_size >= 1 p, s, im0, frame = path[i], f'{i}: ', im0s[i].copy(), dataset.count else: p, s, im0, frame = path, '', im0s.copy(), getattr(dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # img.jpg txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # img.txt s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh imc = im0.copy() if save_crop else im0 # for save_crop if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file # xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh # line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format # with open(txt_path + '.txt', 'a') as f: # f.write(('%g ' * len(line)).rstrip() % line + '\n') lst = torch.tensor(xyxy).tolist() # print(i) dic = dict(name=names[int(cls)], image_id=str(path.split('\\')[-1].split('.')[0]), confidence=float(conf), xmin=int(lst[0]), ymin=int(lst[1]), xmax=int(lst[2]), ymax=int(lst[3])) sheet = sheet.append(dic, ignore_index=True) # print(dic) # sys.exit(0) # print(dic) # sys.exit(0) if save_img or save_crop or view_img: # Add bbox to image c = int(cls) # integer class label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}') plot_one_box(xyxy, im0, label=label, color=colors(c, True), line_thickness=line_thickness) if save_crop: save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True) # print(sheet) # sys.exit(0) # Print time (inference + NMS) print(f'{s}Done. ({t2 - t1:.3f}s)') # Stream results if view_img: cv2.imshow(str(p), im0) cv2.waitKey(1) # 1 millisecond # Save results (image with detections) if save_img: if dataset.mode == 'image': cv2.imwrite(save_path, im0) else: # 'video' or 'stream' if vid_path[i] != save_path: # new video vid_path[i] = save_path if isinstance(vid_writer[i], cv2.VideoWriter): vid_writer[i].release() # release previous video writer if vid_cap: # video fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) else: # stream fps, w, h = 30, im0.shape[1], im0.shape[0] save_path += '.mp4' vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) vid_writer[i].write(im0) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") if update: strip_optimizer(weights) # update model (to fix SourceChangeWarning) cols=['name', 'image_id', 'confidence', 'xmin', 'ymin', 'xmax', 'ymax'] sheet = sheet.loc[:, cols] # sheet = sheet.set_index('name') sheet.to_csv('result.csv', index=False) print(f'Done. ({time.time() - t0:.3f}s)')
def run( weights=ROOT / 'yolov5s.pt', # model.pt path(s) 训练的权重 imgsz=[640, 640], # inference size (pixels) 网络输入图片大小 conf_thres=0.25, # confidence threshold 置信度阈值 iou_thres=0.45, # NMS IOU threshold nms的iou阈值 max_det=1000, # maximum detections per image 分类数 device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu 设备 view_img=True, # show results 是否展示预测之后的图片/视频 classes=None, # filter by class: --class 0, or --class 0 2 3 设置只保留某一部分类别 agnostic_nms=False, # class-agnostic NMS 进行nms是否也去除不同类别之间的框 augment=False, # augmented inference 图像增强 visualize=False, # visualize features 可视化 line_thickness=3, # bounding box thickness (pixels) hide_labels=False, # hide labels hide_conf=False, # hide confidences half=False, # use FP16 half-precision inference ): # Initialize set_logging() device = select_device(device) half &= device.type != 'cpu' # half precision only supported on CUDA # Load model w = weights[0] if isinstance(weights, list) else weights classify, suffix, suffixes = False, Path(w).suffix.lower(), [ '.pt', '.onnx', '.tflite', '.pb', '' ] check_suffix(w, suffixes) # check weights have acceptable suffix pt, onnx, tflite, pb, saved_model = (suffix == x for x in suffixes) # backend booleans stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults if pt: model = attempt_load( weights, map_location=device) # load FP32 model 加载float32模型,确保图片分辨率能整除32 stride = int(model.stride.max()) # model stride names = model.module.names if hasattr( model, 'module') else model.names # get class names #设置Float16 if half: model.half() # to FP16 # 设置2次分类 if classify: # second-stage classifier modelc = load_classifier(name='resnet50', n=2) # initialize modelc.load_state_dict( torch.load('resnet50.pt', map_location=device)['model']).to(device).eval() else: # TensorFlow models check_requirements(('tensorflow>=2.4.1', )) import tensorflow as tf if pb: # https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt def wrap_frozen_graph(gd, inputs, outputs): x = tf.compat.v1.wrap_function( lambda: tf.compat.v1.import_graph_def(gd, name=""), []) # wrapped import return x.prune( tf.nest.map_structure(x.graph.as_graph_element, inputs), tf.nest.map_structure(x.graph.as_graph_element, outputs)) graph_def = tf.Graph().as_graph_def() graph_def.ParseFromString(open(w, 'rb').read()) frozen_func = wrap_frozen_graph(gd=graph_def, inputs="x:0", outputs="Identity:0") elif saved_model: model = tf.keras.models.load_model(w) elif tflite: interpreter = tf.lite.Interpreter( model_path=w) # load TFLite model interpreter.allocate_tensors() # allocate input_details = interpreter.get_input_details() # inputs output_details = interpreter.get_output_details() # outputs int8 = input_details[0][ 'dtype'] == np.uint8 # is TFLite quantized uint8 model imgsz = check_img_size(imgsz, s=stride) # check image size # Dataloader # 图片或视频 tmp = False tmp2 = False mon = {'top': 0, 'left': 0, 'width': 960, 'height': 960} while True: im = np.array(mss().grab(mon)) screen = cv2.cvtColor(im, cv2.COLOR_BGRA2BGR) dataset = LoadImages(screen, img_size=imgsz, stride=stride, auto=pt) dt, seen = [0.0, 0.0, 0.0], 0 ''' path 图片/视频路径 img 进行resize+pad之后的图片,如(3,640,512) 格式(c,h,w) img0s 原size图片,如(1080,810,3) cap 当读取图片时为None,读取视频时为视频源 ''' for img, im0s, vid_cap in dataset: t1 = time_sync() if onnx: img = img.astype('float32') else: img = torch.from_numpy(img).to(device) # print(img) # 图片也设置为Float16或者32 img = img.half() if half else img.float() # uint8 to fp16/32 img = img / 255.0 # 0 - 255 to 0.0 - 1.0 # 没有batch_size时,在最前面添加一个轴 if len(img.shape) == 3: img = img[None] # expand for batch dim t2 = time_sync() dt[0] += t2 - t1 # Inference if pt: ''' 前向传播,返回pred的shape是(1,num_boxes,5+num_class) h,w为传入网络图片的高和宽,注意dataset在检测时使用了矩形推理,所以h不一定等于w num_boxes = (h/32*w/32+h/16*w/16+h/8*w/8)*3 例如:图片大小720,1280 -> 15120个boxes = (20*12 + 40*24 + 80*48 = 5040)*3 pred[...,0:4]为预测框坐标;预测框坐标为xywh pred[...,4]为objectness置信度 pred[...,5:-1]为分类结果 ''' pred = model(img, augment=augment, visualize=visualize)[0] else: # tensorflow model (tflite, pb, saved_model) imn = img.permute(0, 2, 3, 1).cpu().numpy() # image in numpy if pb: pred = frozen_func(x=tf.constant(imn)).numpy() elif saved_model: pred = model(imn, training=False).numpy() elif tflite: if int8: scale, zero_point = input_details[0]['quantization'] imn = (imn / scale + zero_point).astype( np.uint8) # de-scale interpreter.set_tensor(input_details[0]['index'], imn) interpreter.invoke() pred = interpreter.get_tensor(output_details[0]['index']) if int8: scale, zero_point = output_details[0]['quantization'] pred = (pred.astype(np.float32) - zero_point) * scale # re-scale pred[..., 0] *= imgsz[1] # x pred[..., 1] *= imgsz[0] # y pred[..., 2] *= imgsz[1] # w pred[..., 3] *= imgsz[0] # h pred = torch.tensor(pred) t3 = time_sync() dt[1] += t3 - t2 # NMS ''' pred:前向传播的输出 conf_thres:置信度阈值 iou_thres:iou阈值 classes:是否只保留特定的类别 agnostic_nmsL进行nms是否也去除不同类别之间的框 经过nms后预测框格式,xywh->xyxy(左上角右上角) pred是一个列表list[torch.tensor],长度为nms后目标框个数 每一个torch.tensor的shape为(num_boxes,6),内容为box(4个值)+cunf+cls ''' pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det) dt[2] += time_sync() - t3 # Second-stage classifier (optional) # 添加二级分类,默认false # if classify: # pred = apply_classifier(pred, modelc, img, im0s) # Process predictions # 对每一张图片处理 for i, det in enumerate(pred): # per image seen += 1 s, im0 = '', im0s.copy() # 设置打印信息(图片宽高),s如'640*512' s += '%gx%g ' % img.shape[2:] # print string annotator = Annotator(im0, line_width=line_thickness, example=str(names)) if len(det): # Rescale boxes from img_size to im0 size # 调整预测框坐标,基于resize+pad的图片坐标->基于原size图片坐标 # 此时坐标格式为xyxy det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results # 打印检测到的类别数量 for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Write results # 保存预测结果 for *xyxy, conf, cls in reversed(det): if view_img: # Add bbox to image c = int(cls) # integer class label = None if hide_labels else ( names[c] if hide_conf else f'{names[c]} {conf:.2f}') annotator.box_label(xyxy, label, color=colors(c, True)) # Stream results im0 = annotator.result() cv2.imshow('a crop of the screen', im0) cv2.moveWindow('a crop of the screen', 960, 0) if cv2.waitKey(1) & 0xff == ord('q'): tmp = True break if tmp: tmp2 = True break if tmp2: break
gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh imc = img0.copy() # for save_crop annotator = Annotator(im0, line_width=line_thickness) if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class #s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Write results for *xyxy, conf, cls in reversed(det): # Add bbox to image c = int(cls) # integer class #label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}') label= None annotator.box_label(xyxy, label, color=colors(c, True)) # Stream results im0 = annotator.result() if True: cv2.imwrite("pred.jpg", im0) cv2.waitKey(0) # 1 millisecond
def run( weights='yolov5s.pt', # model.pt path(s) source='./test_1', # file/dir/URL/glob, 0 for webcam imgsz=640, # inference size (pixels) conf_thres=0.25, # confidence threshold iou_thres=0.45, # NMS IOU threshold max_det=1000, # maximum detections per image device='', # = device, i.e. 0 or 0,1,2,3 or cpu view_img=False, # show results save_txt=False, # save results to *.txt save_conf=False, # save confidences in --save-txt labelss save_crop=False, # save cropped prediction boxes nosave=False, # do not save images/videos classes=None, # filter by class: --class 0, or --class 0 2 3 agnostic_nms=False, # class-agnostic NMS augment=False, # augmented inference update=False, # update all models project='runs/detect', # save results to project/name name='exp', # save results to project/name exist_ok=False, # existing project/name ok, do not increment line_thickness=3, # bounding box thickness (pixels) hide_labels=False, # hide labels hide_conf=False, # hide confidences half=False, # use FP16 half-precision inference ): desire_param = [] coor = [] all_info = [] save_img = not nosave and not source.endswith( '.txt') # save inference images # Directories save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(device) half &= device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride imgsz = check_img_size(imgsz, s=stride) # check image size names = model.module.names if hasattr( model, 'module') else model.names # get class names if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet50', n=2) # initialize modelc.load_state_dict( torch.load('resnet50.pt', map_location=device)['model']).to(device).eval() # Set Dataloader dataset = LoadImages(source, img_size=imgsz, stride=stride) # Run inference if device.type != 'cpu': model( torch.zeros(1, 3, imgsz, imgsz).to(device).type_as( next(model.parameters()))) # run once t0 = time.time() for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=augment)[0] # Apply NMS pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image p, s, im0, frame = path, '', im0s.copy(), getattr( dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # img.jpg #print() txt_path = str(save_dir / 'labels' / p.stem) + ( '' if dataset.mode == 'image' else f'_{frame}') # img.txt s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh imc = im0.copy() if save_crop else im0 # for save_crop if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results pr = ' ' for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string pr += f"{n} {names[int(c)]}{'s' * (n > 1)}," desire_param.append({"image_id": p.name, "prediction": pr}) # for img_name in enumerate(p.name): # if img_name not in desire_param: # add_image = (img_name, "predictioni 0") # desire_param.append(add_image) # print(desire_param) # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or save_crop or view_img: # Add bbox to image c = int(cls) # integer class label = None if hide_labels else ( names[c] if hide_conf else f'{names[c]} {conf:.2f}') plot_one_box(xyxy, im0, label=label, color=colors(c, True), line_thickness=line_thickness) if save_crop: save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True) # Print time (inference + NMS) # print(f'{s}Done. ({t2 - t1:.3f}s)') result = [(f'{s}Done. ({t2 - t1:.3f}s)')] print(result) # Stream results if view_img: cv2.imshow(str(p), im0) cv2.waitKey(1) # 1 millisecond # Save results (image with detections) if save_img: if dataset.mode == 'image': cv2.imwrite(save_path, im0) else: None if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") if update: strip_optimizer(weights) # update model (to fix SourceChangeWarning) print(f'Done. ({time.time() - t0:.3f}s)') # lst = [] # for child in desire_param: # info = ["img_name", "prediction"] # lst1 = {k: v for k, v in zip(info, child)} # lst.append(lst1) with open('result.json', 'w') as f: json.dump(desire_param, f)
def display(self, pprint=False, show=False, save=False, crop=False, render=False, labels=True, save_dir=Path('')): crops = [] for i, (im, pred) in enumerate(zip(self.imgs, self.pred)): s = f'image {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} ' # string if pred.shape[0]: for c in pred[:, -1].unique(): n = (pred[:, -1] == c).sum() # detections per class s += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string if show or save or render or crop: annotator = Annotator(im, example=str(self.names)) for *box, conf, cls in reversed( pred): # xyxy, confidence, class label = f'{self.names[int(cls)]} {conf:.2f}' if crop: file = save_dir / 'crops' / self.names[int( cls)] / self.files[i] if save else None crops.append({ 'box': box, 'conf': conf, 'cls': cls, 'label': label, 'im': save_one_box(box, im, file=file, save=save) }) else: # all others annotator.box_label(box, label if labels else '', color=colors(cls)) im = annotator.im else: s += '(no detections)' im = Image.fromarray(im.astype(np.uint8)) if isinstance( im, np.ndarray) else im # from np if pprint: print(s.rstrip(', ')) if show: im.show(self.files[i]) # show if save: f = self.files[i] im.save(save_dir / f) # save if i == self.n - 1: LOGGER.info( f"Saved {self.n} image{'s' * (self.n > 1)} to {colorstr('bold', save_dir)}" ) if render: self.imgs[i] = np.asarray(im) if crop: if save: LOGGER.info(f'Saved results to {save_dir}\n') return crops
def detect(path_request): parser = argparse.ArgumentParser() parser.add_argument('--weights', nargs='+', type=str, default='last_ufpr.pt', help='model.pt path(s)') parser.add_argument('--source', type=str, default='data/images', help='source') # file/folder, 0 for webcam parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)') parser.add_argument('--conf-thres', type=float, default=0.25, help='object confidence threshold') parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS') parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') parser.add_argument('--view-img', action='store_true', help='display results') parser.add_argument('--save-txt', action='store_true', help='save results to *.txt') parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels') parser.add_argument('--save-crop', action='store_true', help='save cropped prediction boxes') parser.add_argument('--nosave', action='store_true', help='do not save images/videos') parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3') parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS') parser.add_argument('--augment', action='store_true', help='augmented inference') parser.add_argument('--update', action='store_true', help='update all models') parser.add_argument('--project', default='runs/detect', help='save results to project/name') parser.add_argument('--name', default='exp', help='save results to project/name') parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)') parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels') parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences') opt = parser.parse_args() #print(opt) check_requirements(exclude=('tensorboard', 'pycocotools', 'thop')) source, weights, view_img, save_txt, imgsz = path_request, opt.weights, opt.view_img, opt.save_txt, opt.img_size save_img = not opt.nosave and not source.endswith('.txt') # save inference images dataset = LoadImages(source, img_size=imgsz, stride=stride) webcam = False # Directories save_dir = increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) #print("Predicted",pred) # Process detections dict_prediction = {} for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0, frame = path[i], f'{i}: ', im0s[i].copy(), dataset.count else: p, s, im0, frame = path, '', im0s.copy(), getattr(dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # img.jpg txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # img.txt s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh imc = im0.copy() if opt.save_crop else im0 # for opt.save_crop if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() ans = "" class_cat = [] class_conf = [] for i,c in enumerate(det[:, -1]): if names[int(c)] == "car" or names[int(c)] == "plate" or names[int(c)] == "motorcycle": ans+=f"{names[int(c)]}, " for i,c in enumerate(det[:, -1]): class_cat.append(int(c)) for i,c in enumerate(det[:, -2]): class_conf.append(float(c)) print("all",class_cat,class_conf) # Print results for c in det[:, -1].unique(): #print(c) n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string #print("results:",s) # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if opt.save_conf else (cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or opt.save_crop or view_img: # Add bbox to image c = int(cls) # integer class label = None if opt.hide_labels else (names[c] if opt.hide_conf else f'{names[c]} {conf:.2f}') plot_one_box(xyxy, im0, label=label, color=colors(c, True), line_thickness=opt.line_thickness) if opt.save_crop: save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True) # dict_prediction["class_cat"] = "".join((str(value) for value in class_cat)) print("ans",ans) dict_prediction["class_cat"] = ans dict_prediction["class_conf"] = "".join((str(value) for value in class_conf)) json_data = json.dumps(dict_prediction) return json_data # if __name__ == '__main__': # parser = argparse.ArgumentParser() # parser.add_argument('--weights', nargs='+', type=str, default='yolov5s.pt', help='model.pt path(s)') # parser.add_argument('--source', type=str, default='data/images', help='source') # file/folder, 0 for webcam # parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)') # parser.add_argument('--conf-thres', type=float, default=0.25, help='object confidence threshold') # parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS') # parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') # parser.add_argument('--view-img', action='store_true', help='display results') # parser.add_argument('--save-txt', action='store_true', help='save results to *.txt') # parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels') # parser.add_argument('--save-crop', action='store_true', help='save cropped prediction boxes') # parser.add_argument('--nosave', action='store_true', help='do not save images/videos') # parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3') # parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS') # parser.add_argument('--augment', action='store_true', help='augmented inference') # parser.add_argument('--update', action='store_true', help='update all models') # parser.add_argument('--project', default='runs/detect', help='save results to project/name') # parser.add_argument('--name', default='exp', help='save results to project/name') # parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') # parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)') # parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels') # parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences') # opt = parser.parse_args() # print(opt) # check_requirements(exclude=('tensorboard', 'pycocotools', 'thop')) # with torch.no_grad(): # if opt.update: # update all models (to fix SourceChangeWarning) # for opt.weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt']: # detect(opt=opt) # strip_optimizer(opt.weights) # else: # detect(opt=opt) #model.load_state_dict(model['state_dict']) ### now you can evaluate it #model.eval() #model.eval()
def detect( model="mobilenet_thin", # A model option for being cool weights='yolov5s.pt', # model.pt path(s) source='data/images', # file/dir/URL/glob, 0 for webcam imgsz=640, # inference size (pixels) conf_thres=0.25, # confidence threshold iou_thres=0.45, # NMS IOU threshold max_det=1000, # maximum detections per image device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu view_img=False, # show results save_txt=False, # save results to *.txt save_conf=False, # save confidences in --save-txt labels save_crop=False, # save cropped prediction boxes nosave=False, # do not save images/videos classes=None, # filter by class: --class 0, or --class 0 2 3 agnostic_nms=False, # class-agnostic NMS augment=False, # augmented inference update=False, # update all models project='runs/detect', # save results to project/name name='exp', # save results to project/name exist_ok=False, # existing project/name ok, do not increment line_thickness=3, # bounding box thickness (pixels) hide_labels=False, # hide labels hide_conf=False, # hide confidences half=False, # use FP16 half-precision inference ): w, h = 432, 368 e = TfPoseEstimator(get_graph_path(model), target_size=(w, h)) save_img = not nosave and not source.endswith( '.txt') # save inference images webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith( ('rtsp://', 'rtmp://', 'http://', 'https://')) # Directories save_dir = Path(project) #save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(device) half &= device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride imgsz = check_img_size(imgsz, s=stride) # check image size names = model.module.names if hasattr( model, 'module') else model.names # get class names if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = check_imshow() cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride) else: dataset = LoadImages(source, img_size=imgsz, stride=stride) # Run inference breakCond = False if device.type != 'cpu': model( torch.zeros(1, 3, imgsz, imgsz).to(device).type_as( next(model.parameters()))) # run once t0 = time.time() for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Openpose getting keypoints and individual crops print("\n") myImg = im0s.copy() keypoints, humans = getKeyPoints(myImg, e, w, h) crops = [ getCrop(point[0], myImg, 10, device, point[1] / 2) for point in keypoints ] # Inference t1 = time_synchronized() pred = model(img, augment=augment)[0] # Apply NMS pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det) t2 = time_synchronized() # Need to adjust bboxes to full image if len(pred) > 0: breakCond = True # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0, frame = path[i], f'{i}: ', im0s[i].copy( ), dataset.count else: p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # img.jpg txt_path = str(save_dir / 'labels' / p.stem) + ( '' if dataset.mode == 'image' else f'_{frame}') # img.txt s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh imc = im0.copy() if save_crop else im0 # for save_crop if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Check if any overlap between keypoint and det (handheld weapon) for detection in det: for crop in crops: if bbox_iou(detection, crop) > 0: cv2.putText(im0, "Spider-Sense Tingling!", (30, 90), cv2.FONT_HERSHEY_SIMPLEX, 3, (255, 0, 0), 5) break # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or save_crop or view_img: # Add bbox to image c = int(cls) # integer class label = None if hide_labels else ( names[c] if hide_conf else f'{names[c]} {conf:.2f}') plot_one_box(xyxy, im0, label=label, color=colors(c, True), line_thickness=line_thickness) if save_crop: save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True) # write keypoint boxes for *xyxy, conf, cls in reversed(crops): plot_one_box(xyxy, imc, label="keyP", color=colors(c, True), line_thickness=line_thickness) # Stream results if view_img: cv2.imshow(str(p), im0) cv2.waitKey(1) # 1 millisecond # Save results (image with detections) im0 = TfPoseEstimator.draw_humans(im0, humans, imgcopy=False) if save_img: if dataset.mode == 'image': cv2.imwrite(save_path, im0) else: # 'video' or 'stream' if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer if vid_cap: # video fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) else: # stream fps, w, h = 30, im0.shape[1], im0.shape[0] save_path += '.mp4' vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") if update: strip_optimizer(weights) # update model (to fix SourceChangeWarning) print(f'Done. ({time.time() - t0:.3f}s)')
def detect(save_img=False): source, weights, weights2, view_img, save_txt, imgsz, thres = opt.source, opt.weights, opt.weights2, opt.view_img, opt.save_txt, opt.img_size, opt.headThres save_img = not opt.nosave and not source.endswith( '.txt') # save inference images webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith( ('rtsp://', 'rtmp://', 'http://', 'https://')) # Directories save_dir = Path(opt.project) # save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(opt.device) half = device.type != 'cpu' # half precision only supported on CUDA # Load models model1 = attempt_load(weights, map_location=device) model2 = attempt_load(weights2, map_location=device) stride1 = int(model1.stride.max()) # model strides stride2 = int(model2.stride.max()) # model 2 strides names1 = model1.module.names if hasattr(model1, 'module') else model1.names names2 = model2.module.names if hasattr(model2, 'module') else model2.names imgsz = check_img_size(imgsz, s=stride1) # check img_size if half: model1.half() # to FP16 model2.half() # to FP16 too # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: if opt.saveWebcam: save_img = True view_img = check_imshow() cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride1) else: save_img = True dataset = LoadImages(source, img_size=imgsz, stride=stride1) # Run inference numFrames = 1 t0 = time.time() numWeapons = 0 headDet = [] weapDet = [] frames = [] mask = None for path, img, im0s, vid_cap in dataset: print("\nFrame:", numFrames) if webcam: print("FPS", dataset.fps) t1 = time_synchronized() # Adding to frame if (len(img.shape) >= 4): myImg = np.dstack((img[0, 0], img[0, 1], img[0, 2])) else: myImg = np.dstack((img[0], img[1], img[2])) # Creating mask if mask is None: mask = np.zeros_like(myImg) # Appending frames frames.append(myImg) if len(frames) > opt.filterLen: frames.pop(0) # Starting with the actual detections img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Do first round of predictions model = model1 # set pointer to model1 names = names1 # Inference pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, opt.classes, opt.agnostic_nms, max_det=opt.max_det) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image weapDet.append(det.clone()) if len(weapDet) > opt.filterLen: weapDet.pop(0) if webcam: # batch_size >= 1 p, s, im0, frame = path[i], '%g: ' % i, im0s[i], dataset.count else: p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # img.jpg txt_path = str(save_dir / 'labels' / p.stem) + ( '' if dataset.mode == 'image' else f'_{frame}') # img.txt s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh imc = im0.copy() if opt.save_crop else im0 # for opt.save_crop if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # for detection in det[:, :4]: # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names2[int(c)]}{'s' * (n > 1)}, " # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if opt.save_conf else ( cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or view_img: # Add bbox to image c = int(cls) # integer class label = None if opt.hide_labels else ( names[c] if opt.hide_conf else f'{names[c]} {conf:.2f}') plot_one_box(xyxy, im0, label=label, color=colors(c, True), line_thickness=opt.line_thickness) if opt.save_crop: save_one_box(xyxy, imc, file=Path(save_path + '_crops') / names[c] / f'{p.stem}.jpg', BGR=True) print("2nd Round") model = model2 names = names2 # Inference pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, opt.classes, opt.agnostic_nms, max_det=opt.max_det) t2 = time_synchronized() # Process detections for i, det in enumerate(pred): # detections per image numWeapons += len(det) headDet.append(det.clone()) if len(headDet) > opt.filterLen: headDet.pop(0) if webcam: # batch_size >= 1 p, s, im0, frame = path[i], '%g: ' % i, im0s[i], dataset.count else: p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # img.jpg txt_path = str(save_dir / 'labels' / p.stem) + ( '' if dataset.mode == 'image' else f'_{frame}') # img.txt s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh imc = im0.copy() if opt.save_crop else im0 # for opt.save_crop if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if opt.save_conf else ( cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or view_img: # Add bbox to image c = int(cls) # integer class label = None if opt.hide_labels else ( names[c] if opt.hide_conf else f'{names[c]} {conf:.2f}') width = round( float((xyxy[2] - xyxy[0]) / im0.shape[1]), 2) plot_one_box(xyxy, im0, label=label + " " + str(width), color=colors(c, True), line_thickness=opt.line_thickness) # Print time (inference + NMS) print(f'{s}Done. ({t2 - t1:.3f}s)') # Checking for Spider-Sense sense = spider_sense(headDet, weapDet, frames, im0, thres, mask, device) if sense[0] or sense[1]: cv2.putText(im0, "Spider-Sense Tingling!", (30, 90), cv2.FONT_HERSHEY_SIMPLEX, 3, (255, 0, 0), 5) # Stream results if view_img: if opt.flowShow: thisMask = cv2.resize(mask, (im0.shape[1], im0.shape[0])) im0 = cv2.add(im0, thisMask) cv2.imshow(str(p), im0) cv2.waitKey(1) # 1 millisecond # Save results (image with detections) if save_img: if dataset.mode == 'image': cv2.imwrite(save_path, im0) elif webcam: if vid_path != save_path + ".mp4": vid_path = save_path + ".mp4" print("Save Path: ", save_path) if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' fps = dataset.fps w = dataset.w h = dataset.h vid_writer = cv2.VideoWriter( vid_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) if opt.flowShow: thisMask = cv2.resize(mask, (im0.shape[1], im0.shape[0])) im0 = cv2.add(im0, thisMask) vid_writer.write(im0) else: # 'video' if vid_path != save_path: # new video vid_path = save_path print("Save Path: ", save_path) if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) if opt.flowShow: thisMask = cv2.resize(mask, (im0.shape[1], im0.shape[0])) im0 = cv2.add(im0, thisMask) vid_writer.write(im0) # Checking break condition if numFrames == opt.maxFrames: break numFrames += 1 if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release() # release previous video writer if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''