def detect(): out, source, weights, view_img, save_img, save_txt, img_cfg_sz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_img, opt.save_txt, opt.img_size webcam = source.isnumeric() or source.startswith( ('rtsp://', 'rtmp://', 'http://')) or source.endswith('.txt') print("source: ", source) print("out: ", out) print("view_img: ", view_img) print("save_img: ", save_img) print("save_txt: ", save_txt) print("weights: ", weights) print("img_cfg_sz: ", img_cfg_sz) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder # Initialize set_logging() #打印INFO级别以上的日志 device = select_device(opt.device) print("device: ", device) half = device.type != 'cpu' # half precision only supported on CUDA print("half: ", half) # Load model model = attempt_load(weights, map_location=device) # load FP32 model # print("type(model): ", type(model)) #是一个Model类 # print("model: ", model) # print("model.yaml: ", model.yaml) # print("model.model: ", model.model) # print("model.save: ", model.save) if half: model.half() # to FP16 # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] img_cfg_sz = check_img_size(img_cfg_sz, s=model.stride.max()) # check img_size # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Set Dataloader vid_writer = None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=img_cfg_sz) else: save_img = True dataset = LoadImages(source, img_size=img_cfg_sz) # print("dataset: ", dataset) # print("dataset[0]: ", dataset[0]) #no __getitem__ # Run inference t0 = time.time() img = torch.zeros((1, 3, img_cfg_sz, img_cfg_sz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once for path, img, img_ori, vid_cap in dataset: print("img_ori.shape: ", img_ori.shape) print("img.shape: ", img.shape) img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] #print("1pred.shape: ", pred.shape) # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, merge=opt.merge, classes=opt.classes, agnostic=opt.agnostic_nms) #print("2pred[0].shape: ", pred[0].shape) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, img_ori) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s_log, im_ori = path[i], '%g: ' % i, img_ori[i].copy() else: p, s_log, im_ori = path, '', img_ori save_path = str(Path(out) / Path(p).name) txt_path = str(Path(out) / Path(p).stem) + ( '_%g' % dataset.frame if dataset.mode == 'video' else '') + '.txt' # print("save_path: ", save_path) # print("txt_path: ", txt_path) s_log += '%gx%g ' % img.shape[2:] # print string print("det: ", det) if det is not None and len(det): # Rescale boxes from img_size to im_ori size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im_ori.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s_log += '%g %ss, ' % (n, names[int(c)] ) # add to string #检出种类的个数 # Write results for *xyxy, conf, cls in reversed(det): # Add bbox to image label = '%s %.2f' % (names[int(cls)], conf) #string plot_one_box(xyxy, im_ori, label=label, color=colors[int(cls)]) if save_txt: # Write to file gn = torch.tensor( im_ori.shape)[[1, 0, 1, 0]] # normalization gain whwh xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh with open(txt_path, 'a') as f: f.write(('%g ' * 5 + '\n') % (cls, *xywh)) # label format # Print time (inference + NMS) print('s_log: %sDone. (%.3fs)\n' % (s_log, t2 - t1)) # Stream results if view_img: cv2.imshow(p, im_ori) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im_ori) else: if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release() # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im_ori) if save_txt or save_img: print('Results saved to %s' % Path(out)) if platform.system() == 'Darwin' and not opt.update: # MacOS os.system('open ' + save_path) t3 = time.time() print('All Done. (%.3fs)' % (t3 - t0))
def test( data, weights=None, batch_size=16, imgsz=640, conf_thres=0.001, iou_thres=0.6, # for NMS save_json=False, single_cls=False, augment=False, verbose=False, model=None, dataloader=None, save_dir='', merge=False, save_txt=False): # Initialize/load model and set device training = model is not None if training: # called by train.py device = next(model.parameters()).device # get model device else: # called directly set_logging() device = select_device(opt.device, batch_size=batch_size) merge, save_txt = opt.merge, opt.save_txt # use Merge NMS, save *.txt labels if save_txt: out = Path('inference/output') if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder # Remove previous for f in glob.glob(str(Path(save_dir) / 'test_batch*.jpg')): os.remove(f) # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99 # if device.type != 'cpu' and torch.cuda.device_count() > 1: # model = nn.DataParallel(model) # Half half = device.type != 'cpu' # half precision only supported on CUDA if half: model.half() # Configure model.eval() with open(data) as f: data = yaml.load(f, Loader=yaml.FullLoader) # model dict check_dataset(data) # check nc = 1 if single_cls else int(data['nc']) # number of classes iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for [email protected]:0.95 niou = iouv.numel() # Dataloader if not training: img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once path = data['test'] if opt.task == 'test' else data[ 'val'] # path to val/test images dataloader = create_dataloader(path, imgsz, batch_size, model.stride.max(), opt, hyp=None, augment=False, cache=True, pad=0.5, rect=True)[0] seen = 0 names = model.names if hasattr(model, 'names') else model.module.names coco91class = coco80_to_coco91_class() s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', '[email protected]', '[email protected]:.95') p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0. loss = torch.zeros(3, device=device) jdict, stats, ap, ap_class = [], [], [], [] evaluator = COCOEvaluator(root=DATA_ROOT, model_name=opt.weights.replace('.pt', '')) for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)): img = img.to(device, non_blocking=True) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 targets = targets.to(device) nb, _, height, width = img.shape # batch size, channels, height, width whwh = torch.Tensor([width, height, width, height]).to(device) # Disable gradients with torch.no_grad(): # Run model t = time_synchronized() inf_out, train_out = model( img, augment=augment) # inference and training outputs t0 += time_synchronized() - t # Compute loss if training: # if model has loss hyperparameters loss += compute_loss([x.float() for x in train_out], targets, model)[1][:3] # GIoU, obj, cls # Run NMS t = time_synchronized() output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres, merge=merge) t1 += time_synchronized() - t # Statistics per image for si, pred in enumerate(output): labels = targets[targets[:, 0] == si, 1:] nl = len(labels) tcls = labels[:, 0].tolist() if nl else [] # target class seen += 1 if pred is None: if nl: stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls)) continue # Append to text file if save_txt: gn = torch.tensor(shapes[si][0])[[1, 0, 1, 0 ]] # normalization gain whwh x = pred.clone() x[:, :4] = scale_coords(img[si].shape[1:], x[:, :4], shapes[si][0], shapes[si][1]) # to original for *xyxy, conf, cls in x: xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh with open(str(out / Path(paths[si]).stem) + '.txt', 'a') as f: f.write( ('%g ' * 5 + '\n') % (cls, *xywh)) # label format # Clip boxes to image bounds clip_coords(pred, (height, width)) # Append to pycocotools JSON dictionary if save_json: # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ... image_id = Path(paths[si]).stem box = pred[:, :4].clone() # xyxy scale_coords(img[si].shape[1:], box, shapes[si][0], shapes[si][1]) # to original shape box = xyxy2xywh(box) # xywh box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner for p, b in zip(pred.tolist(), box.tolist()): result = { 'image_id': int(image_id) if image_id.isnumeric() else image_id, 'category_id': coco91class[int(p[5])], 'bbox': [round(x, 3) for x in b], 'score': round(p[4], 5) } jdict.append(result) #evaluator.add([result]) #if evaluator.cache_exists: # break # # Assign all predictions as incorrect # correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device) # if nl: # detected = [] # target indices # tcls_tensor = labels[:, 0] # # # target boxes # tbox = xywh2xyxy(labels[:, 1:5]) * whwh # # # Per target class # for cls in torch.unique(tcls_tensor): # ti = (cls == tcls_tensor).nonzero(as_tuple=False).view(-1) # prediction indices # pi = (cls == pred[:, 5]).nonzero(as_tuple=False).view(-1) # target indices # # # Search for detections # if pi.shape[0]: # # Prediction to target ious # ious, i = box_iou(pred[pi, :4], tbox[ti]).max(1) # best ious, indices # # # Append detections # detected_set = set() # for j in (ious > iouv[0]).nonzero(as_tuple=False): # d = ti[i[j]] # detected target # if d.item() not in detected_set: # detected_set.add(d.item()) # detected.append(d) # correct[pi[j]] = ious[j] > iouv # iou_thres is 1xn # if len(detected) == nl: # all targets already located in image # break # # # Append statistics (correct, conf, pcls, tcls) # stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)) # # Plot images # if batch_i < 1: # f = Path(save_dir) / ('test_batch%g_gt.jpg' % batch_i) # filename # plot_images(img, targets, paths, str(f), names) # ground truth # f = Path(save_dir) / ('test_batch%g_pred.jpg' % batch_i) # plot_images(img, output_to_target(output, width, height), paths, str(f), names) # predictions evaluator.add(jdict) evaluator.save()
def detect(save_img=False): source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith(('rtsp://', 'rtmp://', 'http://')) # Directories save_dir = Path( increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(opt.device) half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride imgsz = check_img_size(imgsz, s=stride) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = check_imshow() cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride) else: save_img = True dataset = LoadImages(source, img_size=imgsz, stride=stride) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference if device.type != 'cpu': model( torch.zeros(1, 3, imgsz, imgsz).to(device).type_as( next(model.parameters()))) # run once t0 = time.time() for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections isAlert = False for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy( ), dataset.count else: p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # img.jpg txt_path = str(save_dir / 'labels' / p.stem) + ( '' if dataset.mode == 'image' else f'_{frame}') # img.txt # s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string t = s s += "\n" # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if opt.save_conf else ( cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or view_img: # Add bbox to image label = f'{names[int(cls)]} {conf:.2f}' plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) # Print time (inference + NMS) isAlert = True s += "\n" #print(f'{s}ALERT' + "\n") else: print("No Detections") # Stream results if view_img: cv2.imshow(str(p), im0) cv2.waitKey(1) # 1 millisecond # Save results (image with detections) if save_img: if dataset.mode == 'image': cv2.imwrite(save_path, im0) send_picture_alert(save_path, isAlert) if isAlert: text = t[0:-2] send_alert(client, text) else: # 'video' if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") print(f'ALERT. ({time.time() - t0:.3f}s)') print()
def forward(self, imgs, size=640, augment=False, profile=False): # Inference from various sources. For height=640, width=1280, RGB images example inputs are: # filename: imgs = 'data/images/zidane.jpg' # URI: = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/zidane.jpg' # OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(640,1280,3) # PIL: = Image.open('image.jpg') # HWC x(640,1280,3) # numpy: = np.zeros((640,1280,3)) # HWC # torch: = torch.zeros(16,3,320,640) # BCHW (scaled to size=640, 0-1 values) # multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images t = [time_synchronized()] p = next(self.model.parameters()) # for device and type if isinstance(imgs, torch.Tensor): # torch with amp.autocast(enabled=p.device.type != 'cpu'): return self.model( imgs.to(p.device).type_as(p), augment, profile) # inference # Pre-process n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else ( 1, [imgs]) # number of images, list of images shape0, shape1, files = [], [], [ ] # image and inference shapes, filenames for i, im in enumerate(imgs): f = f'image{i}' # filename if isinstance(im, str): # filename or uri im, f = np.asarray( Image.open( requests.get(im, stream=True).raw if im. startswith('http') else im)), im elif isinstance(im, Image.Image): # PIL Image im, f = np.asarray(im), getattr(im, 'filename', f) or f files.append(Path(f).with_suffix('.jpg').name) if im.shape[0] < 5: # image in CHW im = im.transpose( (1, 2, 0)) # reverse dataloader .transpose(2, 0, 1) im = im[:, :, :3] if im.ndim == 3 else np.tile( im[:, :, None], 3) # enforce 3ch input s = im.shape[:2] # HWC shape0.append(s) # image shape g = (size / max(s)) # gain shape1.append([y * g for y in s]) imgs[i] = im if im.data.contiguous else np.ascontiguousarray( im) # update shape1 = [ make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0) ] # inference shape x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs] # pad x = np.stack(x, 0) if n > 1 else x[0][None] # stack x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW x = torch.from_numpy(x).to( p.device).type_as(p) / 255. # uint8 to fp16/32 t.append(time_synchronized()) with amp.autocast(enabled=p.device.type != 'cpu'): # Inference y = self.model(x, augment, profile)[0] # forward t.append(time_synchronized()) # Post-process y = non_max_suppression(y, conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) # NMS for i in range(n): scale_coords(shape1, y[i][:, :4], shape0[i]) t.append(time_synchronized()) return Detections(imgs, y, files, t, self.names, x.shape)
def detect(self, data): """ 返回的bboxes是实际的坐标,x1,y1,x2,y2,是左上角和右下角的坐标 :param data: 图片数据的列表 [image1, image2] :return: [[images, bboxes, confidences, labels],[images, bboxes,confidences, labels],...] bboxes是所有的bboxes, confidence是置信度, labels是所有的bboxes对应的label, """ #检查设置的图片的大小和模型的步长是否能整除 imgsz = check_img_size(self.img_size, s=self.stride) # check img_size #下载数据集, images保存图片的本地的路径 images = [] for url in data: if url.startswith('http'): image = self.download_file(url, self.image_dir) else: #不是http的格式的,是本地文件的,那么直接使用即可 image = url images.append(image) #设置数据集 dataset = LoadImages(path=self.image_dir, img_size=imgsz, stride=self.stride) # 这里我们重设下images,我们只要自己需要的images既可, dataset.nf, 即number_files, 文件数量也需要修改下 dataset.files = images dataset.nf = len(images) dataset.video_flag = [False] * len(images) # 设置模型 predict_model = self.predict_model # 运行推理 if self.device.type != 'cpu': predict_model( torch.zeros(1, 3, imgsz, imgsz).to(self.device).type_as( next(predict_model.parameters()))) # run once #计算耗时 t0 = time.time() # path是图片的路径,img是图片的改变size后的numpy格式[channel, new_height,new_witdh], im0s是原始的图片,[height, width,channel], eg: (2200, 1700, 3), vid_cap 是None如果是图片,只对视频有作用 results = [] for idx, (path, img, im0s, vid_cap) in enumerate(dataset): # 如果是GPU,会放到GPU上 img = torch.from_numpy(img).to(self.device) #转换成float img = img.float() #归一化 img /= 255.0 # 0 - 255 to 0.0 - 1.0 #扩展一个batch_size维度, [batch_isze, channel, new_height, new_witdh], eg: torch.Size([1, 3, 640, 512]) if img.ndimension() == 3: img = img.unsqueeze(0) #开始推理, time_synchronized是GPU的同步 t1 = time_synchronized() # pred模型的预测结果 [batch_size,hidden_size, other] eg: torch.Size([1, 20160, 8]), 8代表 (x1, y1, x2, y2, conf, cls1, cls2, cls3...), 前4个是bbox坐标,conf是置信度,cls是类别的,cls1代表是类别1的概率 pred = predict_model(img, augment=False)[0] #使用 NMS, pred 是一个列表,里面是,一个元素代表一张图片, 一个元素的维度是 [bbox_num, other], other代表(x1, y1, x2, y2, conf, max_cls_prob) eg: torch.Size([5, 6]) pred = non_max_suppression(pred, self.conf_thres, self.iou_thres, classes=self.classes, agnostic=self.agnostic_nms) t2 = time_synchronized() # 处理 detections,i是索引,det是所有的bbox, torch.Size([3, 6]),代表3个bbox,6代表 (x1, y1, x2, y2, 置信度, 类别id) for i, det in enumerate(pred): # detections per image # s 是初始化一个空字符串,用于打印预测结果,im0是原始图片, frame是对于视频而言的, 这里pred一定是以个元素,因为我们迭代的是一张图片 p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0) #p原始图片的绝对路径 p = Path(p) # to Path save_path = os.path.join(self.predict_dir, p.name) #预测后的保存的图片的路径 s += ' 预测的图片尺寸高度宽度%gx%g,' % img.shape[ 2:] # print string, eg '640x480 ' s += '原始尺寸为高度宽度%sx%s,' % im0.shape[: 2] # print string, eg '640x480 ' # 图片的width,height, width, height, eg: tensor([1700, 2200, 1700, 2200]), 用于下面的归一化 #如果det不为空,说明检测到了bbox,检测到了目标 if len(det): # bbox 放大到原始图像的大小,从img_size 到原始图像 im0 尺寸, bbox左上角的x1,y1, 右下角的x2,y2 det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # eg: [[832.0, 160.0, 1495.0, 610.0, 0.9033942818641663], [849.0, 1918.0, 1467.0, 2016.0, 0.8640206456184387], [204.0, 0.0, 798.0, 142.0, 0.2842876613140106]] bboxes = det[:, :4].tolist() confidences = det[:, 4].tolist() # eg: ['figure', 'equation', 'figure'] labels = [ self.label_list_cn[i] for i in map(int, det[:, -1].tolist()) ] #图片的名称,bboex,置信度,标签,都加到结果 results.append([images[idx], bboxes, confidences, labels]) #最后一个维度的最后一位是预测的结果, unique是为了统计多个相同的结果 for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n}个{self.label_list[int(c)]}{'s' * (n > 1)} bbox, " # eg: 2个figures bbox # Write results for *xyxy, conf, cls in reversed(det): if self.save_img or self.view_img: # 给图片添加bbox,画到图片上 label = f'{self.label_list[int(cls)]} {conf:.2f}' plot_one_box(xyxy, im0, label=label, color=self.colors[int(cls)], line_thickness=3) # Print time (inference + NMS) print(f'{s}完成. 耗时({t2 - t1:.3f}s)') else: #如果没有匹配到,那么为空 results.append([images[idx], [], [], []]) print(f'{s}完成. 没有发现目标,耗时({t2 - t1:.3f}s)') # 保存图片的识别结果到目录下 if self.save_img: if dataset.mode == 'image': cv2.imwrite(save_path, im0) print(f"保存识别结果到 {save_path}{s}") print(f'Done. ({time.time() - t0:.3f}s)') return results
def detect(save_img=False): source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith(('rtsp://', 'rtmp://', 'http://')) # Directories save_dir = Path( increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Initialize set_logging() # 获取设备 device = select_device(opt.device) # 如果设备为gpu,使用Float16 half = device.type != 'cpu' # half precision only supported on CUDA # Load model # 加载Float32模型,确保用户设定的输入图片分辨率能整除32(如果不能则调整为能整除并删除) model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size # 设置Float16 if half: model.half() # to FP16 # Second-stage classifier # 设置第二次分类,默认为不使用 classify = False if classify: modelc = load_classifier(name='yolov5s', n=2) # initialize modelc.load_state_dict( torch.load('weights/yolov5s.pt', map_location=device)['model']).to(device).eval() # Set Dataloader # 通过不同的输入源来设置不同的数据加载方式 vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True # 如果检测视频的时候想显示出来,可以再这里加一行 view_img = True view_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors # 获取类别的名字 names = model.module.names if hasattr(model, 'module') else model.names # 设置画框的颜色 colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference t0 = time.time() # 进行一次向前推理,测试程序是否正常 img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once """ path 图片/视频路径 img 进行resize+pad之后的图片 img0 原size图片 cap 当读取图片时为None,读取视频时为视频源 """ for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) # 图片也设置为Float16 img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 # 没有batch_size 的话则在最前面增加一个轴 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() """ 前向传播 返回pred的shape是(1, num_boxes, 5+num_class) h,w为传入网络图片的长和宽,注意dataset在检测时使用了矩形推理,所以这里h不一定等于w num_boxes = h/32 * w/32 + h/16 * w/16 + h/8 * w/8 pred[..., 0:4]为预测框坐标 预测框坐标为xywh(中心点+宽长)格式 pred[..., 4]为objectness置信度 pred[..., 5:-1]为分类结果 """ pred = model(img, augment=opt.augment)[0] # Apply NMS """ pred:前向传播的输出 conf_thres:置信度阈值 iou_thres:iou阈值 classes:是否只保留特定的类别 agnostic:进行nms是否也去除不同类别之间的框 经过nms之后,预测框格式:xywh-->xyxy(左上角右下角) pred是一个列表list[torch.tensor],长度为batch_size 每一个torch.tensor的shape为(num_boxes, 6),内容为box+conf+cls """ pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier # 添加二次分类,默认不适用 if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections # 对每一张图片作处理 for i, det in enumerate(pred): # detections per image # 如果输入源是webcam 则batch_size 不为1,取出dataset中的一张图片 if webcam: # batch_size >= 1 p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy( ), dataset.count else: p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0) p = Path(p) # to Path # 设置保存图片/视频的路径 save_path = str(save_dir / p.name) # img.jpg # 设置保存框最薄txt文件的路径 txt_path = str(save_dir / 'labels' / p.stem) + ( '' if dataset.mode == 'image' else f'_{frame}') # img.txt # 设置打印信息(图片长宽) s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if len(det): # Rescale boxes from img_size to im0 size # 调整预测框的坐标:基于resize+pad的图片的坐标 -->基于原size图片的坐标 # 此时坐标格式为xyxy det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results # 打印检测到的类别数量 for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f'{n} {names[int(c)]}s, ' # add to string # Write results # 保存预测结果 for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file # 将xyxy(左上角+右下角)格式转为xywh(中心点+宽长)格式,并除上w,h做归一化,转化为列表再保存 xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if opt.save_conf else ( cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') # 在原图上画框 if save_img or view_img: # Add bbox to image label = f'{names[int(cls)]} {conf:.2f}' plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) # Print time (inference + NMS) # 打印向前传播+nms时间 print(f'{s}Done. ({t2 - t1:.3f}s)') # Stream results # 如果设置展示,则show图片/视频 if view_img: cv2.imshow(str(p), im0) # Save results (image with detections) # 设置保存图片/视频 if save_img: if dataset.mode == 'image': cv2.imwrite(save_path, im0) else: # 'video' if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") # 打印总时间 print(f'Done. ({time.time() - t0:.3f}s)')
def detect(save_img=False): out, source, weights, view_img, save_txt, imgsz, conn = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size, opt.conn webcam = source.isnumeric() or source.startswith( ('rtsp://', 'rtmp://', 'http://')) or source.endswith('.txt') # Initialize set_logging() device = select_device(opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA #Parameter definition print(conn) angle = 0 rho = 0 saw_ball = 0 theta = 0 theta_increment = 3 * math.pi / 4 increment = math.pi / 48 if conn: serial_port_1 = serial.Serial( port="/dev/ttyACM0", baudrate=115200, bytesize=serial.EIGHTBITS, parity=serial.PARITY_NONE, stopbits=serial.STOPBITS_ONE, ) # Wait a second to let the port initialize serial_port = utils.openContinuous(timeout=0.01) time.sleep(1) # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) hasBall = 0 # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s #Add deadzone to the image cv2.rectangle(im0, (round(im0.shape[1] * 0.45), 0), (round(im0.shape[1] * 0.55), im0.shape[0]), (0, 0, 255), thickness=3, lineType=cv2.LINE_AA) save_path = str(Path(out) / Path(p).name) txt_path = str(Path(out) / Path(p).stem) + ( '_%g' % dataset.frame if dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * 5 + '\n') % (cls, *xywh)) # label format if save_img or view_img: # Add bbox to image label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh #Get Object Coordinates mid_x = xywh[0] mid_y = xywh[1] box_w = xywh[2] box_h = xywh[3] #print mid coordinates of the box print('Mid coordinates of box,w, h: %.3f,%.3f,%.3f,%.3f' % (mid_x, mid_y, box_w, box_h)) if (mid_x < 0.45): #ROBOT TURN LEFT angle = angle + increment #rad/s print('LEFT %.4f', angle) rho = 0 angularControl = 0 #angular velocity control elif (mid_x > 0.55): #ROBOT TURN RIGHT angle = angle - increment #rad/s print('RIGHT %.4f', angle) rho = 0 angularControl = 0 #angular velocity control else: #DRIVE STRAIGHT TO THE BALL angle = angle + 0 #rad/s print('STRAIGHT %.4f', angle) rho = rho + 0.1 #m/s #if saw_ball: #theta = theta #----> INSERT VALUE FOR THE ROBOT TO MOVE IN THE DIRECTION OF THE CAMERA angularControl = 0 #angular velocity control saw_ball = 1 else: if not (hasBall): angle = angle + increment #rad/s print('NOBALL %.4f', angle) rho = 0 angularControl = 0 #angular velocity control else: #STOP THE ROBOT angle = angle + 0 #rad/s rho = 0 angularControl = 0 #angular velocity control if conn: #Constructing the packet cmd = rem.ffi.new("RobotCommand*") cmd.header = rem.lib.PACKET_TYPE_ROBOT_COMMAND cmd.id = 3 #check value of angle if angle > math.pi * 2: angle = angle - math.pi * 2 if angle < -math.pi * 2: angle = angle + math.pi * 2 cmd.angle = angle #cmd.rho = rho #cmd.theta = theta #cmd. angularControl = angularControl packet = rem.ffi.new("RobotCommandPayload*") rem.lib.encodeRobotCommand(packet, cmd) #Sending the packet try: serial_port.write(packet.payload) print(" angle : %.4f" % cmd.angle) except KeyboardInterrupt: print("Exiting Program") except Exception as exception_error: print("Error occurred. Exiting Program") print("Error: " + str(exception_error)) # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit cv2.destroyAllWindows() raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % Path(out)) if platform.system() == 'Darwin' and not opt.update: # MacOS os.system('open ' + save_path) print('Done. (%.3fs)' % (time.time() - t0)) serial_port.close()
def detect(save_img=False): source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith(('rtsp://', 'rtmp://', 'http://')) # Directories save_dir = Path( increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(opt.device) half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadWholeImages(source) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once results_json = [] json_path = str(save_dir / 'results') + '.json' for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment, wbf=opt.wbf)[0] # Apply NMS if opt.augment and opt.wbf: boxes_list = [] scores_list = [] labels_list = [] for predi in pred: predi = non_max_suppression(predi, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms, softnms=False) # predi = scale_coords(img.shape[2:], predi[:, :4], im0.shape).round() boxes_list.append(predi[0][:, :4].cpu().numpy()) scores_list.append(predi[0][:, 4].cpu().numpy()) labels_list.append(predi[0][:, 5].cpu().numpy()) boxes, scores, labels = run_wbf(boxes_list, scores_list, labels_list, img.shape[2:]) pred = [ torch.from_numpy(np.concatenate([boxes, scores, labels], -1)) ] else: pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms, softnms=False) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections # discard background ex1, ey1, ex2, ey2 = get_tile_edge(im0s) discard = (ex1, ey1, ex2, ey2) for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy( ), dataset.count else: p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # img.jpg txt_path = str(save_dir / 'labels' / p.stem) + ( '' if dataset.mode == 'image' else f'_{frame}') # img.txt s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f'{n} {names[int(c)]}s, ' # add to string # Write results for *xyxy, conf, cls in reversed(det): if xyxy[0] < discard[0] or xyxy[1] < discard[1] or xyxy[ 2] > discard[2] or xyxy[3] > discard[3]: continue if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if opt.save_conf else ( cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or view_img: # Add bbox to image label = f'{names[int(cls)]} {conf:.2f}' plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) results_dict = { "name": p.name, "category": int(cls) + 1, "bbox": list(np.array(xyxy, dtype=np.float64)), "score": np.float64(conf) } results_json.append(results_dict) # Print time (inference + NMS) print(f'{s}Done. ({t2 - t1:.3f}s)') # Stream results if view_img: cv2.imshow(str(p), im0) # # Save results (image with detections) # if save_img: # if dataset.mode == 'image': # cv2.imwrite(save_path, im0) # else: # 'video' # if vid_path != save_path: # new video # vid_path = save_path # if isinstance(vid_writer, cv2.VideoWriter): # vid_writer.release() # release previous video writer # fourcc = 'mp4v' # output video codec # fps = vid_cap.get(cv2.CAP_PROP_FPS) # w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) # h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) # vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) # vid_writer.write(im0) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") with open(json_path, 'w') as f: json.dump(results_json, f) print(f'Done. ({time.time() - t0:.3f}s)')
def detect(weights='mdp/weights/weights.pt', img_size=416, conf_thres=0.7, iou_thres=0.5, device='', classes=None, agnostic_nms=False, augment=False, update=False): # Initialize set_logging() device = select_device(device) model = attempt_load(weights, map_location=device) imgsz = check_img_size(img_size, s=model.stride.max()) names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # HOST = '' # PORT = 8485 # s = socket.socket(socket.AF_INET,socket.SOCK_STREAM) # print('Socket created') # # s.bind((HOST, PORT)) # print('Socket bind complete') # s.listen(10) # print('Socket now listening') client_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) client_socket.connect(('192.168.15.1', 9000)) conn = client_socket # conn, addr = s.accept() data = b"" payload_size = struct.calcsize(">L") print("payload_size: {}".format(payload_size)) # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img frame, data = receive_frame(data, payload_size, conn) img = [frame] s = np.stack([letterbox(x, new_shape=img_size)[0].shape for x in img], 0) # inference shapes rect = np.unique( s, axis=0).shape[0] == 1 # rect inference if all shapes equal while True: frame, data = receive_frame(data, payload_size, conn) img = [frame] img0 = img.copy() # Letterbox img = [letterbox(x, new_shape=img_size, auto=rect)[0] for x in img0] # Stack img = np.stack(img, 0) # Convert img = img[:, :, :, ::-1].transpose(0, 3, 1, 2) # BGR to RGB, to bsx3x416x416 img = np.ascontiguousarray(img) img = torch.from_numpy(img).to(device) img = img.float() img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=augment)[0] # Apply NMS pred = non_max_suppression(pred, conf_thres, iou_thres, classes=classes, agnostic=agnostic_nms) t2 = time_synchronized() # Process detections for i, det in enumerate(pred): # detections per image s, im0 = '', img0[i].copy() s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].detach().unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls in det: predicted_label = names[int(cls)] label_id = label_id_mapping.get(predicted_label) rpi_message = convert_to_message(label_id) conn.sendall(rpi_message) # send result to rpi xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh print( ('%s ' * 5 + '\n') % (label_id, *xywh)) # label format label = '%s %.2f' % (label_id, conf) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) break cv2.imshow('ImageWindow', im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration
def main(): img_size = 512 # 必须是32的整数倍 [416, 512, 608] cfg = "cfg/yolov3-spp.cfg" weights = "weights/yolov3-spp-ultralytics-{}.pt".format(img_size) img_path = "test.jpg" input_size = (img_size, img_size) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = Darknet(cfg, img_size) model.load_state_dict(torch.load(weights, map_location=device)["model"]) model.to(device) model.eval() # init img = torch.zeros((1, 3, img_size, img_size), device=device) model(img) img_o = cv2.imread(img_path) # BGR assert img_o is not None, "Image Not Found " + img_path img = img_utils.letterbox(img_o, new_shape=input_size, auto=True, color=(0, 0, 0))[0] # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) img = torch.from_numpy(img).to(device).float() img /= 255.0 # scale (0, 255) to (0, 1) img = img.unsqueeze(0) # add batch dimension t1 = torch_utils.time_synchronized() pred = model(img)[0] # only get inference result t2 = torch_utils.time_synchronized() print(t2 - t1) pred = utils.non_max_suppression(pred, conf_thres=0.3, iou_thres=0.6, multi_label=True)[0] t3 = time.time() print(t3 - t2) # process detections pred[:, :4] = utils.scale_coords(img.shape[2:], pred[:, :4], img_o.shape).round() print(pred.shape) bboxes = pred[:, :4].detach().numpy() scores = pred[:, 4].detach().numpy() classes = pred[:, 5].detach().numpy().astype(np.int) + 1 category_index = dict([(i + 1, str(i + 1)) for i in range(90)]) img_o = draw_box(img_o[:, :, ::-1], bboxes, classes, scores, category_index) plt.imshow(img_o) plt.show() img_o.save("test_result.jpg")
def detect(save_img=False): out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source.isnumeric() or source.startswith(('rtsp://', 'rtmp://', 'http://')) or source.endswith('.txt') # Initialize set_logging() device = select_device(opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img) if device.type != 'cpu' else None # run once for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Process detections for i, det in enumerate(pred): # detections per image p, s, im0 = path, '', im0s save_path = str(Path(out) / Path(p).name) txt_path = str(Path(out) / Path(p).stem) + ('_%g' % dataset.frame if dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() max =0 maxlabel="" xyxymax=[] for *xyxy, conf, cls in reversed(det): #가장큰 차량 찾기 (주차하는 차량 찾기) if( names[int(cls)] == 'car' or names[int(cls)] == 'truck' or names[int(cls)] == 'bus'): if((int(xyxy[0])-int(xyxy[1]))**2 + (int(xyxy[2])-int(xyxy[3])**2)**-2) > max: max = ((int(xyxy[0])-int(xyxy[1]))**2 + (int(xyxy[2])-int(xyxy[3])**2)**-2) maxlabel = '%s %.2f' % (names[int(cls)], conf) xyxymax = xyxy[0],xyxy[1],xyxy[2],xyxy[3] crop_image = im0[int(xyxy[1]):int(xyxy[3]) ,int(xyxy[0]):int(xyxy[2])] # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * 5 + '\n') % (cls, *xywh)) # label format if names[int(cls)] == 'bus' or names[int(cls)] == 'car' or names[int(cls)] == 'truck': pass if save_img or view_img: # Add bbox to image label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) # Stream results if False: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration
def detect(save_img=False): out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source == '0' or source.startswith('rtsp') or source.startswith( 'http') or source.endswith('.txt') # initialize deepsort cfg = get_config() cfg.merge_from_file(opt.config_deepsort) deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=True) # Initialize device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') if os.path.exists(out): shutil.rmtree(out) # delete output folder if not os.path.exists(opt.smooth_txt): os.makedirs(opt.smooth_txt) os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # # Second-stage classifier # classify = False # if classify: # modelc = load_classifier(name='resnet101', n=2) # initialize # modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights # modelc.to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[np.random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once # dataset contains all the frames (or images) of the video crds_crop = np.empty( (0, 4)) # contains coordinates of a single bbox with the highest conf np_nan = np.empty([1, 4]) # for tracking np_nan[:] = np.nan # for tracking frame_no = 0 for path, img, im0s, vid_cap in dataset: # im0s, img - initial, resized and padded (img) img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # # Apply Classifier # if classify: # pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s save_path = str(Path(out) / Path(p).name) txt_path = str(Path(out) / Path(p).stem) + ( '_%g' % dataset.frame if dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string # gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # only when obj is in the frame # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # these will be used for the deepsort input xywhs = xyxy2xywh(det[:, :4].cpu()) confs = det[:, 4].cpu() # Pass detections to deepsort outputs = deepsort.update(xywhs, confs, im0) # this is numpy array ########################################################### # FOR NOW, WE WILL ONLY BE KEEPING THE MOST CONFIDENT VALUE ########################################################### max_conf_id = confs.argmax() # keeping the coordinates row with max conf (det now only keeps one row and four columns) det = det[max_conf_id, :].reshape(1, 6) to_append = xyxy2xywh(det[:, :4].cpu().numpy().reshape( 1, 4).astype(int)) if len(crds_crop) == 0: crds_crop = np.append(crds_crop, to_append).reshape(-1, 4) else: crds_crop = np.append(crds_crop, to_append, axis=0) # draw boxes for visualization if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] # draw_boxes(im0, bbox_xyxy, identities) # no tracking boxes for now # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls in det: if save_txt: # Write to file with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * 5 + '\n') % (*xyxy, conf)) # label format if save_img or view_img: # Add bbox to image label = '%s' % (names[int(cls)]) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=2) else: deepsort.increment_ages() if len(crds_crop) == 0: crds_crop = np.append(crds_crop, np_nan).reshape(-1, 4) else: crds_crop = np.append(crds_crop, np_nan, axis=0) # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) frame_no += 1 ############################################################################## # this part should be temporary as online filtering will be implemented crds_crop = linear_interp(crds_crop) max_side_bbox = crds_crop[:, 2:].max(axis=1) * 1.2 # 20% relaxation # making sure that the window size does not exceed frame size max_side_bbox = np.where(max_side_bbox < min(w, h), max_side_bbox, min(w, h)) crds_crop = np.c_[crds_crop, max_side_bbox] crds_crop = smoothing(crds_crop, fps) np.savetxt(os.path.join(opt.smooth_txt, os.path.basename(path)[:-4] + '_savgol_' + '.txt'), crds_crop, delimiter=' ') if save_txt or save_img: print('Results saved to %s' % Path(out)) if platform == 'darwin' and not opt.update: # MacOS os.system('open ' + save_path) print('Done. (%.3fs)' % (time.time() - t0))
def detect(image, weights): # Final Declaration final_det = None final_class = [] final_text = [] source, weights, save_txt, imgsz, ocr, save_img = image, weights, True, 1280, True, True webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith(('rtsp://', 'rtmp://', 'http://')) # Directories save_dir = Path(increment_path(Path('runs/detect') / 'exp', exist_ok=False)) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device('') half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride imgsz = check_img_size(imgsz, s=stride) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = check_imshow() cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride) else: save_img = True dataset = LoadImages(source, img_size=imgsz, stride=stride) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference if device.type != 'cpu': model( torch.zeros(1, 3, imgsz, imgsz).to(device).type_as( next(model.parameters()))) # run once t0 = time.time() for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=False)[0] # Apply NMS pred = non_max_suppression(pred, 0.6, 0.45, classes=None, agnostic=False) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy( ), dataset.count else: p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # img.jpg txt_path = str(save_dir / 'labels' / p.stem) + ( '' if dataset.mode == 'image' else f'_{frame}') # img.txt s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if len(det): # Rescale boxes from img_size to im0 size print() print(img.shape[2:], " ---------------------- ", im0.shape) det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() final_det = det[:, :4].tolist() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # python detect.py --source test\ --weights weights\model.pt --save-txt --save-conf final_class_reverse = [] for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (conf, cls, *xywh) if True else (cls, *xywh) # label format # print(names[int(cls)], str(line[0].squeeze().tolist()), 'box(xywh) = ' + str(line[2:])) final_class_reverse.append( names[int(cls)] + '(' + str(round((line[0].squeeze().tolist()), 2)) + ')') with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or view_img: # Add bbox to image label = f'{names[int(cls)]} {conf:.2f}' plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=2) for i in final_class_reverse: final_class.append(i) # Print time (inference + NMS) print(f'{s}Done. ({t2 - t1:.3f}s)') if ocr: # Opens a image in RGB mode im = Image.open(path) newlist = [] # pytesseract.pytesseract.tesseract_cmd = "/app/.apt/usr/bin/tesseract" for I in final_det: # [27.0, 14.0, 177.0, 91.0] left, top, right, bottom = I[0], I[1], I[2], I[3] im1 = im.crop((left, top, right, bottom)) text = pytesseract.image_to_string(im1) # print(text.split('\n')) newlist = text.split('\n') newlist.pop(-1) final_text.append(' '.join(map(str, newlist))) # im1.show() print('LENGHTS', len(final_class), len(final_text)) final_class.reverse() res = dict(zip(final_class, final_text)) print("Resultant dictionary is : " + str(res)) # Save results (image with detections) if save_img: if dataset.mode == 'image': cv2.imwrite(save_path, im0) else: # 'video' if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") return res
def detect(save_img=False): out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source.isnumeric() or source.startswith( 'rtsp') or source.startswith('http') or source.endswith('.txt') # Initialize set_logging() device = select_device(opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) print(pred) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s save_path = str(Path(out) / Path(p).name) txt_path = str(Path(out) / Path(p).stem) + ( '_%g' % dataset.frame if dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() handles_ymax = [] #handles_xmid = [] handles_ymid = [] handle_mids = [] tailgates_ymin = [] tailgates_ymax = [] tailgate_ythird_coord = [] # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls in reversed( det): #coords, confidence, classes if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * 5 + '\n') % (cls, *xywh)) # label format if save_img or view_img: # Add bbox to image #label = '%s %.2f' % (names[int(cls)], conf) #confidence not needed label = '%s ' % (names[int(cls)]) coord1, coord2, dim_label = plot_one_box( xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) # get important points for line drawing if int(cls) == 1: #handles ymax = max(coord1[1], coord2[1]) handles_ymax.append(ymax) xmid = int((coord1[0] + coord2[0]) / 2) ymid = int((coord1[1] + coord2[1]) / 2) handle_mids.append([xmid, ymid]) #cv2.circle(im0, (xmid,ymax), 8, (255,0,0), -1) elif int(cls) == 0: #tailgates tailgate_xmin = min(coord1[0], coord2[0]) ymax = max(coord1[1], coord2[1]) tailgates_ymax.append(ymax) ymin = min(coord1[1], coord2[1]) tailgates_ymin.append(ymin) tailgate_ythird = int( abs(coord1[1] - coord2[1]) / 3 + ymin) tailgate_ythird_coord.append( [tailgate_xmin, tailgate_ythird]) # added ability to measure between bottom of handle and bottom of tailgate if handle in top 1/3 for i, (handle_mid, max_point) in enumerate(zip(handle_mids, handles_ymax)): hyps = [ hypotenuse(handle_mid, b) for b in tailgate_ythird_coord ] closest_index = np.argmin(hyps) if handle_mid[1] < tailgate_ythird_coord[closest_index][1]: min_dist_tg = min( [int(abs(max_point - x)) for x in tailgates_ymax]) start_point = (handle_mid[0], handles_ymax[i]) end_point = (handle_mid[0], handles_ymax[i] + min_dist_tg) cv2.line(im0, start_point, end_point, (100, 100, 0), 4) line_mid = int((start_point[1] + end_point[1]) / 2) cv2.putText(im0, label, (start_point[0], line_mid), 0, 1, [0, 0, 0], thickness=2, lineType=cv2.LINE_AA) ### Previous ability to measure between bottom of handle and tailgate --- was not robust. ### Keeping until determined not needed # for i, (mid_point, max_point) in enumerate(zip(handles_ymid, handles_ymax)): # print(f'\nmidpoint: {mid_point}') # min_y_dist = min([int(abs(mid_point - x)) for x in tailgate_ythird]) # gets min distance from handle midpoint to tailgate third # print(f'min y dist: {min_y_dist}') # print(f'tailgate third: {tailgate_ythird}') # min_dist_third = min([x for x in tailgate_ythird if abs(x - min_y_dist) in handles_ymid]) # print(f'min_dist_third: {min_dist_third}') # if mid_point < min_dist_third: #handle mid point in top 1/3 of truck # min_dist_tg = min([int(abs(max_point - x)) for x in tailgates_ymax]) # print(f'min_dist_tg {min_dist_tg}') # start_point = (handles_xmid[i], handles_ymax[i]) # print(f'start point: {start_point}') # end_point = (handles_xmid[i], handles_ymax[i] + min_dist_tg) # print(f'end point: {end_point}') # cv2.line(im0, start_point, end_point, (100,100,0), 4) # label = f'Distance: {min_dist_tg/300:.4f}"L' # line_mid = int((start_point[1] + end_point[1])/2) # cv2.putText(im0, label, (start_point[0], line_mid), 0, 1, [0, 0, 0], # thickness=2, lineType=cv2.LINE_AA) # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % Path(out)) if platform.system() == 'Darwin' and not opt.update: # MacOS os.system('open ' + save_path) print('Done. (%.3fs)' % (time.time() - t0))
def detect(model, source, out, img_size, iouThrs, conf_thres, agnostic_nms, names=None): # Initialize set_logging() device = select_device('cuda') half = device.type != 'cpu' # half precision only supported on CUDA imgsz = check_img_size(img_size, s=model.stride.max()) # check img_size if half: model.half() # to FP16 save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors if names == None: names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=False)[0] # Apply NMS pred = non_max_suppression(pred, conf_thres, iouThrs, classes=None, agnostic=agnostic_nms) t2 = time_synchronized() # Process detections for i, det in enumerate(pred): # detections per image p, s, im0 = path, '', im0s save_path = str(Path(out) / Path(p).name) txt_path = str(Path(out) / Path(p).stem) + ( '_%g' % dataset.frame if dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_img: # Add bbox to image label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=1) # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im0) print('Done. (%.3fs)' % (time.time() - t0)) return save_path
img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x640x640 img = np.ascontiguousarray(img) # torch tensor的操作 img = torch.from_numpy(img).to(device) img = img.float() # img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # nb, _, height, width = img.shape # batch size, channels, height, width # whwh = torch.Tensor([width, height, width, height]).to(device) # Inference t1 = time_synchronized() pred = model(img, augment=True)[0] # 使用TTA # Apply NMS pred = non_max_suppression(pred, conf_thres=conf_thres, iou_thres=iou_thres, merge=merge) t2 = time_synchronized() # Process detections for i, det in enumerate(pred): # detections per image # if webcam: # batch_size >= 1 # p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() # else:
def detect(config, save_img=False): out, source, weights, view_img, save_txt, imgsz = ( config["output"], config["source"], config["weights"], config["view_img"], config["save_txt"], config["img_size"], ) webcam = ( source.isnumeric() or source.endswith(".txt") or source.lower().startswith(("rtsp://", "rtmp://", "http://")) ) # Initialize set_logging() device = select_device(config["device"]) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) half = device.type != "cpu" # half precision only supported on CUDA # # Load model # model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride imgsz = check_img_size(imgsz, s=stride) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name="resnet101", n=2) # initialize modelc.load_state_dict( torch.load("weights/resnet101.pt", map_location=device)["model"] ).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = check_imshow() cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride) else: save_img = True dataset = LoadImages(source, img_size=imgsz, stride=stride) # Get names and colors names = model.module.names if hasattr(model, "module") else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference if device.type != "cpu": model( torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters())) ) # run once t0 = time.time() for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=config["augment"])[0] # Apply NMS pred = non_max_suppression( pred, config["conf_thres"], config["iou_thres"], classes=config["classes"], agnostic=config["agnostic_nms"], ) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], "%g: " % i, im0s[i].copy() else: p, s, im0 = path, "", im0s save_path = str(Path(out) / Path(p).name) # img.jpg txt_path = str(Path(out) / Path(p).stem) + ( "_%g" % dataset.frame if dataset.mode == "video" else "" ) s += "%gx%g " % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = ( (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn) .view(-1) .tolist() ) # normalized xywh line = (cls, *xywh) # label format with open(txt_path + ".txt", "a") as f: f.write(("%g " * len(line)).rstrip() % line + "\n") if save_img or view_img: # Add bbox to image label = f"{names[int(cls)]} {conf:.2f}" plot_one_box( xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3, ) # Print time (inference + NMS) print(f"{s}Done. ({t2 - t1:.3f}s)") # Stream results if view_img: cv2.imshow(str(p), im0) cv2.waitKey(1) # 1 millisecond # Save results (image with detections) if save_img: if dataset.mode == "image": cv2.imwrite(save_path, im0) else: # 'video' if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release() # release previous video writer fourcc = "mp4v" # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h) ) vid_writer.write(im0) if save_txt or save_img: print("Results saved to %s" % Path(out)) # if platform.system() == "Darwin" and not config["update"]: # MacOS # os.system("open " + save_path) print(f"Done. ({time.time() - t0:.3f}s)")
def detect(save_img=False): out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source.isnumeric() or source.startswith( 'rtsp') or source.startswith('http') or source.endswith('.txt') # Initialize set_logging() device = select_device(opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA # Load model if weights[0].split('.')[-1] == 'pt': backend = 'pytorch' elif weights[0].split('.')[-1] == 'pb': backend = 'graph_def' elif weights[0].split('.')[-1] == 'tflite': backend = 'tflite' else: backend = 'saved_model' if backend == 'saved_model' or backend == 'graph_def' or backend == 'tflite': import tensorflow as tf from tensorflow import keras if backend == 'pytorch': model = attempt_load(weights, map_location=device) # load FP32 model elif backend == 'saved_model': if tf.__version__.startswith('1'): config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) loaded = tf.saved_model.load( sess, [tf.saved_model.tag_constants.SERVING], weights[0]) tf_input = loaded.signature_def['serving_default'].inputs[ 'input_1'] tf_output = loaded.signature_def['serving_default'].outputs[ 'tf__detect'] else: model = keras.models.load_model(weights[0]) elif backend == 'graph_def': if tf.__version__.startswith('1'): config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) graph = tf.Graph() graph_def = graph.as_graph_def() graph_def.ParseFromString(open(weights[0], 'rb').read()) tf.import_graph_def(graph_def, name='') default_graph = tf.get_default_graph() tf_input = default_graph.get_tensor_by_name('x:0') tf_output = default_graph.get_tensor_by_name('Identity:0') else: # https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt # https://github.com/leimao/Frozen_Graph_TensorFlow def wrap_frozen_graph(graph_def, inputs, outputs, print_graph=False): def _imports_graph_def(): tf.compat.v1.import_graph_def(graph_def, name="") wrapped_import = tf.compat.v1.wrap_function( _imports_graph_def, []) import_graph = wrapped_import.graph if print_graph == True: print("-" * 50) print("Frozen model layers: ") layers = [op.name for op in import_graph.get_operations()] for layer in layers: print(layer) print("-" * 50) return wrapped_import.prune( tf.nest.map_structure(import_graph.as_graph_element, inputs), tf.nest.map_structure(import_graph.as_graph_element, outputs)) graph = tf.Graph() graph_def = graph.as_graph_def() graph_def.ParseFromString(open(weights[0], 'rb').read()) frozen_func = wrap_frozen_graph(graph_def=graph_def, inputs="x:0", outputs="Identity:0", print_graph=False) elif backend == 'tflite': # Load TFLite model and allocate tensors. interpreter = tf.lite.Interpreter(model_path=opt.weights[0]) interpreter.allocate_tensors() # Get input and output tensors. input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() if backend == 'pytorch': imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half and backend == 'pytorch': model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, auto=True if backend == 'pytorch' else False) else: save_img = True dataset = LoadImages(source, img_size=imgsz, auto=True if backend == 'pytorch' else False) # Get names and colors if backend == 'pytorch': names = model.module.names if hasattr(model, 'module') else model.names # Assume using COCO labels else: names = [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush' ] colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img if backend == 'pytorch': _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once elif backend == 'saved_model': if tf.__version__.startswith('1'): _ = sess.run(tf_output.name, feed_dict={ tf_input.name: img.permute(0, 2, 3, 1).cpu().numpy() }) else: _ = model(img.permute(0, 2, 3, 1).cpu().numpy(), training=False) elif backend == 'graph_def': if tf.__version__.startswith('1'): _ = sess.run(tf_output.name, feed_dict={ tf_input.name: img.permute(0, 2, 3, 1).cpu().numpy() }) else: _ = frozen_func( x=tf.constant(img.permute(0, 2, 3, 1).cpu().numpy())) elif backend == 'tflite': input_data = img.permute(0, 2, 3, 1).cpu().numpy() interpreter.set_tensor(input_details[0]['index'], input_data) interpreter.invoke() output_data = interpreter.get_tensor(output_details[0]['index']) for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half and backend == 'pytorch' else img.float( ) # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() if backend == 'pytorch': pred = model(img, augment=opt.augment)[0] elif backend == 'saved_model': if tf.__version__.startswith('1'): pred = sess.run(tf_output.name, feed_dict={ tf_input.name: img.permute(0, 2, 3, 1).cpu().numpy() }) pred = torch.tensor(pred) else: res = model(img.permute(0, 2, 3, 1).cpu().numpy(), training=False) pred = res[0].numpy() pred = torch.tensor(pred) elif backend == 'graph_def': if tf.__version__.startswith('1'): pred = sess.run(tf_output.name, feed_dict={ tf_input.name: img.permute(0, 2, 3, 1).cpu().numpy() }) pred = torch.tensor(pred) else: pred = frozen_func( x=tf.constant(img.permute(0, 2, 3, 1).cpu().numpy())) pred = torch.tensor(pred.numpy()) elif backend == 'tflite': input_data = img.permute(0, 2, 3, 1).cpu().numpy() interpreter.set_tensor(input_details[0]['index'], input_data) interpreter.invoke() output_data = interpreter.get_tensor(output_details[0]['index']) pred = torch.tensor(output_data) # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s save_path = str(Path(out) / Path(p).name) txt_path = str(Path(out) / Path(p).stem) + ( '_%g' % dataset.frame if dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * 5 + '\n') % (cls, *xywh)) # label format if save_img or view_img: # Add bbox to image label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % Path(out)) if platform.system() == 'Darwin' and not opt.update: # MacOS os.system('open ' + save_path) print('Done. (%.3fs)' % (time.time() - t0))
def detect(save_img=False,weights='yolov5s.pt', source='data/images',imgsz=512,conf_thres=0.25,iou_thres=0.45,myAnnFileName='estimated_ann'): webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith(('rtsp://', 'rtmp://', 'http://')) #default_parameters: exist_ok=False project='runs/detect' name='exp' save_txt=False view_img=False vid_path, vid_writer = None, None save_dir=Path(increment_path(Path(project) / name, exist_ok)) (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # Initialize set_logging() device = select_device('') half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names #annotation_file script_dir = os.path.dirname(__file__) rel_path = myAnnFileName abs_file_path = os.path.join(script_dir, rel_path) file_annot=open(abs_file_path,'w') #changing labels from net to proper form cls_dict={0:5,1:4,2:1,3:2,4:6,5:3} # # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img) if device.type != 'cpu' else None # run once for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, False)[0] # Apply NMS pred = non_max_suppression(pred, conf_thres, iou_thres, None, False) t2 = time_synchronized() # p, s, im0 = Path(path), '', im0s file_annot.write(p.stem.split('_')[0]+'.JPG:') xywh_2print=[] # # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = Path(path[i]), '%g: ' % i, im0s[i].copy() else: p, s, im0 = Path(path), '', im0s save_path = str(save_dir / p.name) txt_path = str(save_dir / 'labels' / p.stem) + ('_%g' % dataset.frame if dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls in reversed(det): xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh xyxy_0=[int(elem.item()) for elem in xyxy] xywh_0=[xyxy_0[0],xyxy_0[1],int(xyxy[2]-xyxy[0]),int(xyxy[3]-xyxy[1])] xywh_0.append(cls_dict[int(cls.item())]) xywh_2print.append(xywh_0) #xywh_2print.append([int(elem.item()) for elem in xyxy]) #xyxy_2print[-1].append(cls_dict[int(cls.item())]) # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: cv2.imshow(str(p), im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release() # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) xywh_2print=str(xywh_2print) xywh_2print=xywh_2print[1:-1] file_annot.write(xywh_2print) file_annot.write('\n') file_annot.close() print('Done. (%.3fs)' % (time.time() - t0))
def detect(save_img=False): source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith(('rtsp://', 'rtmp://', 'http://')) ##### Inicializar DEEPSORT cfg = get_config() cfg.merge_from_file(opt.config_deepsort) deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=True) # Directories save_dir = Path( increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(opt.device) half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy( ), dataset.count else: p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # img.jpg txt_path = str(save_dir / 'labels' / p.stem) + ( '' if dataset.mode == 'image' else f'_{frame}') # img.txt s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f'{n} {names[int(c)]}s, ' # add to string ###### Agregar deepSort bbox_xywh = [] confs = [] # Adapt detections to deep sort input format for *xyxy, conf, cls in det: x_c, y_c, bbox_w, bbox_h = bbox_rel(*xyxy) obj = [x_c, y_c, bbox_w, bbox_h] bbox_xywh.append(obj) confs.append([conf.item()]) xywhs = torch.Tensor(bbox_xywh) confss = torch.Tensor(confs) # Pass detections to deepsort outputs = deepsort.update(xywhs, confss, im0) # draw boxes for visualization if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] draw_boxes(im0, bbox_xyxy, identities) ###### # # Write results # for *xyxy, conf, cls in reversed(det): # if save_txt: # Write to file # xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh # line = (cls, *xywh, conf) if opt.save_conf else (cls, *xywh) # label format # with open(txt_path + '.txt', 'a') as f: # f.write(('%g ' * len(line)).rstrip() % line + '\n') # if save_img or view_img: # Add bbox to image # label = f'{names[int(cls)]} {conf:.2f}' # plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) else: deepsort.increment_ages() # Print time (inference + NMS) print(f'{s}Done. ({t2 - t1:.3f}s)') # Stream results if view_img: cv2.imshow(str(p), im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'image': cv2.imwrite(save_path, im0) else: # 'video' if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") print(f'Done. ({time.time() - t0:.3f}s)')
def streamRecognition(sourceImage): # Initialize source = sourceImage set_logging() weights = 'yolov3-tiny.pt' device = 'cpu' imgsz = 640 # webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith( # ('rtsp://', 'rtmp://', 'http://', 'https://')) # Load model model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride imgsz = check_img_size(imgsz, s=stride) # check img_size # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader # vid_path, vid_writer = None, None # if webcam: # view_img = check_imshow() # cudnn.benchmark = True # set True to speed up constant image size inference # dataset = LoadStreams(source, img_size=imgsz, stride=stride) # else: # dataset = LoadImages(source, img_size=imgsz, stride=stride) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference # if device.type != 'cpu': # model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once t0 = time.time() # for path, img, im0s, vid_cap in dataset: # Padded resize # img = letterbox(source, self.img_size, stride=self.stride)[0] # Convert # img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 # img = np.ascontiguousarray(img) img = torch.from_numpy(source).to(device) img = img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=0)[0] # Apply NMS # pred = non_max_suppression(pred, 0.25, 0.45, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier # if classify: # pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image # if webcam: # batch_size >= 1 # p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(), dataset.count # else: im0 = im0s s = '' # p = Path(p) # to Path # save_path = str(save_dir / p.name) # img.jpg # txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # img.txt s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Write results # for *xyxy, conf, cls in reversed(det): # if save_txt: # Write to file # xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh # line = (cls, *xywh, conf) if opt.save_conf else (cls, *xywh) # label format # with open(txt_path + '.txt', 'a') as f: # f.write(('%g ' * len(line)).rstrip() % line + '\n') # if save_img or view_img: # Add bbox to image # label = f'{names[int(cls)]} {conf:.2f}' # plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) # Print time (inference + NMS) # print(f'{s}Done. ({t2 - t1:.3f}s)') # Stream results # if view_img: # cv2.imshow(str(p), im0) # cv2.waitKey(1) # 1 millisecond # Return marked image. return im0
def detect( weights='yolov5s.pt', # model.pt path(s) source='data/images', # file/dir/URL/glob, 0 for webcam imgsz=640, # inference size (pixels) conf_thres=0.25, # confidence threshold iou_thres=0.45, # NMS IOU threshold max_det=1000, # maximum detections per image device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu view_img=False, # show results save_txt=False, # save results to *.txt save_conf=False, # save confidences in --save-txt labels save_crop=False, # save cropped prediction boxes nosave=False, # do not save images/videos classes=None, # filter by class: --class 0, or --class 0 2 3 agnostic_nms=False, # class-agnostic NMS augment=False, # augmented inference update=False, # update all models project='runs/detect', # save results to project/name name='exp', # save results to project/name exist_ok=False, # existing project/name ok, do not increment line_thickness=3, # bounding box thickness (pixels) hide_labels=False, # hide labels hide_conf=False, # hide confidences half=False, # use FP16 half-precision inference ): save_img = not nosave and not source.endswith( '.txt') # save inference images webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith( ('rtsp://', 'rtmp://', 'http://', 'https://')) # Directories save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(device) half &= device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride imgsz = check_img_size(imgsz, s=stride) # check image size names = model.module.names if hasattr( model, 'module') else model.names # get class names if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet50', n=2) # initialize modelc.load_state_dict( torch.load('resnet50.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = check_imshow() cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride) else: dataset = LoadImages(source, img_size=imgsz, stride=stride) # Run inference if device.type != 'cpu': model( torch.zeros(1, 3, imgsz, imgsz).to(device).type_as( next(model.parameters()))) # run once t0 = time.time() for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=augment)[0] # Apply NMS pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0, frame = path[i], f'{i}: ', im0s[i].copy( ), dataset.count else: p, s, im0, frame = path, '', im0s.copy(), getattr( dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # img.jpg txt_path = str(save_dir / 'labels' / p.stem) + ( '' if dataset.mode == 'image' else f'_{frame}') # img.txt s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh imc = im0.copy() if save_crop else im0 # for save_crop if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or save_crop or view_img: # Add bbox to image c = int(cls) # integer class label = None if hide_labels else ( names[c] if hide_conf else f'{names[c]} {conf:.2f}') plot_one_box(xyxy, im0, label=label, color=colors(c, True), line_thickness=line_thickness) if save_crop: save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True) # Print time (inference + NMS) print(f'{s}Done. ({t2 - t1:.3f}s)') # Stream results if view_img: cv2.imshow(str(p), im0) cv2.waitKey(1) # 1 millisecond # Save results (image with detections) if save_img: if dataset.mode == 'image': cv2.imwrite(save_path, im0) else: # 'video' or 'stream' if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer if vid_cap: # video fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) else: # stream fps, w, h = 30, im0.shape[1], im0.shape[0] save_path += '.mp4' vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") if update: strip_optimizer(weights) # update model (to fix SourceChangeWarning) print(f'Done. ({time.time() - t0:.3f}s)')
def detect(save_img=False): global cur_pic global last_pic source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith(('rtsp://', 'rtmp://', 'http://')) # Directories save_dir = Path( increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(opt.device) half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride imgsz = check_img_size(imgsz, s=stride) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: print("load from webcam") view_img = check_imshow() cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride) else: print("load from picture") save_img = True dataset = LoadImages(source, img_size=imgsz, stride=stride) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference if device.type != 'cpu': model( torch.zeros(1, 3, imgsz, imgsz).to(device).type_as( next(model.parameters()))) # run once t0 = time.time() for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections # 对每一张图片做处理: for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy( ), dataset.count else: p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0) # 保存图片的副本,留作截取目标bbox用 im0_copy = im0.copy() # 每次循环将cur_pic中的内容给last_pic,并将cur_pic置空 if cur_pic: last_pic = cur_pic cur_pic = [] # 定义一个list存储一张图片的所有类别,并一起上传到前端 abnormal_list = [] abnormal_list2 = [] # 定义一个比较数量,将当前图片与last_pic中每一张比较,最后退出检测compare_num若等于last_pic张数,表明是新目标;否则是重复目标 compare_num = 0 new_object_flag = False # 若图片上检测到的目标不为空 if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results # 打印检测到的类别数量 for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Write results # 保存预测结果,此处是针对一张图上的每个检测框来说的 for *xyxy, conf, cls in reversed(det): compare_num = 0 # 定义一个cur_pic1,字典类型,保留此张图片一个bbox的信息 cur_pic1 = dict() cur_pic1['cls'] = torch.tensor(cls).view(-1).tolist()[0] cur_pic1['xyxy'] = torch.tensor(xyxy).view( 1, 4).view(-1).tolist() cur_pic1['conf'] = torch.tensor(conf).view(-1).tolist()[0] cur_pic.append(cur_pic1) # 当前帧一个bbox的坐标信息 x1, y1, x2, y2 = cur_pic1['xyxy'][0], cur_pic1['xyxy'][1], \ cur_pic1['xyxy'][2], cur_pic1['xyxy'][3] label = f'{names[int(cls)]} {conf:.2f}' plot_one_box(xyxy, im0, label=label, color=(255, 0, 0), line_thickness=3) # 去重方法:与上一帧相比,判断是否有新类别出现,若有,发送新的一帧图的目标检测结果;若无,不发送 # 具体步骤: # 1.判断上一帧是否为空: # 1.1.若为空,发送当前帧的所有目标 # 1.2.若不为空,设置new_object_flag=False # 1.2.1.此帧中每个结果与上一帧每个结果相比较,比较依据为iou。 # 若当前帧某一个bbox与上一帧所有结果比较后iou均小于阈值,表明这个为新目标,将new_object_flag设置为True,并发送这一帧所有结果至前端 # 否则,break; if last_pic: for last_pic1 in last_pic: # 上一帧某一个bbox的坐标信息 x1_hat, y1_hat, x2_hat, y2_hat = last_pic1['xyxy'][0], last_pic1['xyxy'][1], \ last_pic1['xyxy'][2], last_pic1['xyxy'][3] if not compare_iou(x1, y1, x2, y2, x1_hat, y1_hat, x2_hat, y2_hat): print("两张图片的Iou值没有超过阈值,可以保留:{}".format( cur_pic1['conf'])) compare_num += 1 else: print("两张图片的Iou值太大,compare_num:{}".format( compare_num)) continue if compare_num == len(last_pic): new_object_flag = True break else: new_object_flag = True break if new_object_flag: cur_pic.clear() for *xyxy, conf, cls in reversed(det): # 在原图上画框 label = f'{names[int(cls)]} {conf:.2f}' plot_one_box(xyxy, im0, label=label, color=(255, 0, 0), line_thickness=3) cur_pic1 = dict() cur_pic1['cls'] = torch.tensor(cls).view( -1).tolist()[0] cur_pic1['xyxy'] = torch.tensor(xyxy).view( 1, 4).view(-1).tolist() cur_pic1['conf'] = torch.tensor(conf).view( -1).tolist()[0] cur_pic.append(cur_pic1) label = f'{names[int(cls)]} {conf:.2f}' label_name = label.split(' ')[0] cropped_image = im0_copy[int(xyxy[1]):int(xyxy[3]), int(xyxy[0]):int(xyxy[2])] if cropped_image.shape[0] and cropped_image.shape[1]: cropped_image = base64.b64encode( cv2.imencode('.jpg', cropped_image) [1].tostring()).decode("utf-8") abnormal_list.append((label_name, cropped_image), ) abnormal_list2.append(label_name, ) s = json.dumps(abnormal_list) dangerousClient.publish("abnormal", payload=s) print("------>发布一条abnormal_list的消息") dangerousClient.publish( topic="warning", payload="Hazardous material detected in security area") abnormal_list.clear() abnormal_list2.clear() flow_danger = base64.b64encode( cv2.imencode('.jpg', im0)[1].tostring()).decode("utf-8") dangerousClient.publish(topic="flowDangerous", payload=flow_danger) # Stream results if view_img: cv2.imshow(str(p), im0) cv2.waitKey(1) # 1 millisecond
def person_detection(model, img0, imgsz, device, half, img_size=640, conf_thres=0.25, iou_thres=0.45, view_img=False, save_txt=False, \ save_conf=False, classes=[0], agnostic_nms=False, augment=False, update=False, exist_ok=False, save_img=False): # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Preprocessing assert img0 is not None, 'Image Not Valid' # Padded resize img = letterbox(img0, new_shape=imgsz)[0] # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=augment)[0] # Apply NMS pred = non_max_suppression(pred, conf_thres, iou_thres, classes=classes, agnostic=agnostic_nms) t2 = time_synchronized() # Process detections output = [] for i, det in enumerate(pred): # detections per image s, im0 = '', img0 s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls in reversed(det): tmp = [i.cpu() for i in xyxy] tmp.append(conf.cpu()) output.append(tmp) # if save_txt: # Write to file # xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh # line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format # with open(txt_path + '.txt', 'a') as f: # f.write(('%g ' * len(line)).rstrip() % line + '\n') # if save_img or view_img: # Add bbox to image # label = '%s %.2f' % (names[int(cls)], conf) # plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) return np.array(output)
def test( data, weights=None, batch_size=16, imgsz=640, conf_thres=0.001, iou_thres=0.6, # for NMS save_json=False, single_cls=False, augment=False, verbose=False, model=None, dataloader=None, save_dir='', merge=False, save_txt=False): # Initialize/load model and set device training = model is not None if training: # called by train.py device = next(model.parameters()).device # get model device else: # called directly device = select_device(opt.device, batch_size=batch_size) merge, save_txt = opt.merge, opt.save_txt # use Merge NMS, save *.txt labels if save_txt: out = Path('inference/output') if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder # Remove previous for f in glob.glob(str(Path(save_dir) / 'test_batch*.jpg')): os.remove(f) # Load model model = Darknet(opt.cfg).to(device) # load model try: ckpt = torch.load(weights, map_location=device) # load checkpoint ckpt['model'] = { k: v for k, v in ckpt['model'].items() if model.state_dict()[k].numel() == v.numel() } model.load_state_dict(ckpt['model'], strict=False) except: load_darknet_weights(model, weights) imgsz = check_img_size(imgsz, s=32) # check img_size # Half half = device.type != 'cpu' # half precision only supported on CUDA if half: model.half() # Configure model.eval() with open(data) as f: data = yaml.load(f, Loader=yaml.FullLoader) # model dict nc = 1 if single_cls else int(data['nc']) # number of classes iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for [email protected]:0.95 niou = iouv.numel() # Dataloader if not training: img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once path = data['test'] if opt.task == 'test' else data[ 'val'] # path to val/test images dataloader = create_dataloader(path, imgsz, batch_size, 32, opt, hyp=None, augment=False, cache=False, pad=0.5, rect=True)[0] seen = 0 try: names = model.names if hasattr(model, 'names') else model.module.names except: names = load_classes(opt.names) coco91class = coco80_to_coco91_class() s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', '[email protected]', '[email protected]:.95') p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0. loss = torch.zeros(3, device=device) jdict, stats, ap, ap_class = [], [], [], [] torch.save(model, "v4-csp.pkl") for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)): img = img.to(device, non_blocking=True) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 targets = targets.to(device) nb, _, height, width = img.shape # batch size, channels, height, width whwh = torch.Tensor([width, height, width, height]).to(device) # Disable gradients with torch.no_grad(): # Run model t = time_synchronized() inf_out, train_out = model( img, augment=augment) # inference and training outputs t0 += time_synchronized() - t # Compute loss if training: # if model has loss hyperparameters loss += compute_loss([x.float() for x in train_out], targets, model)[1][:3] # GIoU, obj, cls # Run NMS t = time_synchronized() output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres, merge=merge) t1 += time_synchronized() - t # Statistics per image for si, pred in enumerate(output): labels = targets[targets[:, 0] == si, 1:] nl = len(labels) tcls = labels[:, 0].tolist() if nl else [] # target class seen += 1 if pred is None: if nl: stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls)) continue # Append to text file if save_txt: gn = torch.tensor(shapes[si][0])[[1, 0, 1, 0 ]] # normalization gain whwh txt_path = str(out / Path(paths[si]).stem) pred[:, :4] = scale_coords(img[si].shape[1:], pred[:, :4], shapes[si][0], shapes[si][1]) # to original for *xyxy, conf, cls in pred: xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh with open(txt_path + '.txt', 'a') as f: f.write( ('%g ' * 5 + '\n') % (cls, *xywh)) # label format # Clip boxes to image bounds clip_coords(pred, (height, width)) # Append to pycocotools JSON dictionary if save_json: # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ... image_id = Path(paths[si]).stem box = pred[:, :4].clone() # xyxy scale_coords(img[si].shape[1:], box, shapes[si][0], shapes[si][1]) # to original shape box = xyxy2xywh(box) # xywh box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner for p, b in zip(pred.tolist(), box.tolist()): jdict.append({ 'image_id': int(image_id) if image_id.isnumeric() else image_id, 'category_id': coco91class[int(p[5])], 'bbox': [round(x, 3) for x in b], 'score': round(p[4], 5) }) # Assign all predictions as incorrect correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device) if nl: detected = [] # target indices tcls_tensor = labels[:, 0] # target boxes tbox = xywh2xyxy(labels[:, 1:5]) * whwh # Per target class for cls in torch.unique(tcls_tensor): ti = (cls == tcls_tensor).nonzero(as_tuple=False).view( -1) # prediction indices pi = (cls == pred[:, 5]).nonzero(as_tuple=False).view( -1) # target indices # Search for detections if pi.shape[0]: # Prediction to target ious ious, i = box_iou(pred[pi, :4], tbox[ti]).max( 1) # best ious, indices # Append detections for j in (ious > iouv[0]).nonzero(as_tuple=False): d = ti[i[j]] # detected target if d not in detected: detected.append(d) correct[ pi[j]] = ious[j] > iouv # iou_thres is 1xn if len( detected ) == nl: # all targets already located in image break # Append statistics (correct, conf, pcls, tcls) stats.append( (correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)) # Plot images if batch_i < 1: f = Path(save_dir) / ('test_batch%g_gt.jpg' % batch_i) # filename plot_images(img, targets, paths, str(f), names) # ground truth f = Path(save_dir) / ('test_batch%g_pred.jpg' % batch_i) plot_images(img, output_to_target(output, width, height), paths, str(f), names) # predictions # Compute statistics stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy if len(stats) and stats[0].any(): p, r, ap, f1, ap_class = ap_per_class(*stats) p, r, ap50, ap = p[:, 0], r[:, 0], ap[:, 0], ap.mean( 1) # [P, R, [email protected], [email protected]:0.95] mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean() nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class else: nt = torch.zeros(1) # Print results pf = '%20s' + '%12.3g' * 6 # print format print(pf % ('all', seen, nt.sum(), mp, mr, map50, map)) # Print results per class if verbose and nc > 1 and len(stats): for i, c in enumerate(ap_class): print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i])) # Print speeds t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + (imgsz, imgsz, batch_size) # tuple if not training: print( 'Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g' % t) # Save JSON if save_json and len(jdict): f = 'detections_val2017_%s_results.json' % \ (weights.split(os.sep)[-1].replace('.pt', '') if isinstance(weights, str) else '') # filename print('\nCOCO mAP with pycocotools... saving %s...' % f) with open(f, 'w') as file: json.dump(jdict, file) try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval imgIds = [int(Path(x).stem) for x in dataloader.dataset.img_files] cocoGt = COCO( glob.glob('../coco/annotations/instances_val*.json') [0]) # initialize COCO ground truth api cocoDt = cocoGt.loadRes(f) # initialize COCO pred api cocoEval = COCOeval(cocoGt, cocoDt, 'bbox') cocoEval.params.imgIds = imgIds # image IDs to evaluate cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() map, map50 = cocoEval.stats[: 2] # update results ([email protected]:0.95, [email protected]) except Exception as e: print('ERROR: pycocotools unable to run: %s' % e) # Return results model.float() # for training maps = np.zeros(nc) + map for i, c in enumerate(ap_class): maps[c] = ap[i] return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t
def detect(weights='yolov3.pt', source='0', img_size=640, conf_thres=0.25, iou_thres=0.45, device='', view_img=False, save_txt=False, \ save_conf=False, classes=[0], agnostic_nms=False, augment=False, update=False, project='runs/detect', name='exp', exist_ok=False, save_img=False): source, weights, view_img, save_txt, imgsz = source, weights, view_img, save_txt, img_size webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith( ('rtsp://', 'rtmp://', 'http://')) # Directories save_dir = Path(increment_path(Path(project) / name, exist_ok=exist_ok)) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(device) half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img) if device.type != 'cpu' else None # run once for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=augment)[0] # Apply NMS pred = non_max_suppression(pred, conf_thres, iou_thres, classes=classes, agnostic=agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = Path(path[i]), '%g: ' % i, im0s[i].copy() else: p, s, im0 = Path(path), '', im0s save_path = str(save_dir / p.name) txt_path = str(save_dir / 'labels' / p.stem) + ('_%g' % dataset.frame if dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or view_img: # Add bbox to image label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: cv2.imshow(str(p), im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release() # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") print('Done. (%.3fs)' % (time.time() - t0))
def test(valDataDict, weights=None, batch_size=32, imgsz=640, conf_thres=0.001, iou_thres=0.6, # for NMS save_json=False, single_cls=False, augment=False, verbose=False, model=None, dataloader=None, save_dir=Path(''), # for saving images save_txt=False, # for auto-labelling save_conf=False, plots=True, log_imgs=0): # number of logged images # Initialize/load model and set device training = model is not None if training: # called by train.py device = next(model.parameters()).device # get model device else: # called directly set_logging() device = select_device(opt.device, batch_size=batch_size) save_txt = opt.save_txt # save *.txt labels # Directories save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99 # if device.type != 'cpu' and torch.cuda.device_count() > 1: # model = nn.DataParallel(model) # Half half = device.type != 'cpu' # half precision only supported on CUDA if half: model.half() # Configure model.eval() is_coco = False # is COCO dataset nc = 1 if single_cls else int(valDataDict['nc']) # number of classes iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for [email protected]:0.95 niou = iouv.numel() # Logging log_imgs, wandb = min(log_imgs, 100), None # ceil try: import wandb # Weights & Biases except ImportError: log_imgs = 0 # Dataloader if not training: img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img) if device.type != 'cpu' else None # run once path = valDataDict['imagePathList'] dataloader = create_dataloader(path, imgsz, batch_size, model.stride.max(), opt, pad=0.5, rect=True)[0] seen = 0 confusion_matrix = ConfusionMatrix(nc=nc) names = {k: v for k, v in enumerate(model.names if hasattr(model, 'names') else model.module.names)} coco91class = coco80_to_coco91_class() s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', '[email protected]', '[email protected]:.95') p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0. loss = torch.zeros(3, device=device) jdict, stats, ap, ap_class, wandb_images = [], [], [], [], [] for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)): img = img.to(device, non_blocking=True) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 targets = targets.to(device) nb, _, height, width = img.shape # batch size, channels, height, width with torch.no_grad(): # Run model t = time_synchronized() inf_out, train_out = model(img, augment=augment) # inference and training outputs t0 += time_synchronized() - t # Compute loss if training: loss += compute_loss([x.float() for x in train_out], targets, model)[1][:3] # box, obj, cls # Run NMS targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(device) # to pixels lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_txt else [] # for autolabelling t = time_synchronized() output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres, labels=lb) t1 += time_synchronized() - t # Statistics per image for si, pred in enumerate(output): labels = targets[targets[:, 0] == si, 1:] nl = len(labels) tcls = labels[:, 0].tolist() if nl else [] # target class path = Path(paths[si]) seen += 1 if len(pred) == 0: if nl: stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls)) continue # Predictions predn = pred.clone() scale_coords(img[si].shape[1:], predn[:, :4], shapes[si][0], shapes[si][1]) # native-space pred # Append to text file if save_txt: gn = torch.tensor(shapes[si][0])[[1, 0, 1, 0]] # normalization gain whwh for *xyxy, conf, cls in predn.tolist(): xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format with open(save_dir / 'labels' / (path.stem + '.txt'), 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') # W&B logging if plots and len(wandb_images) < log_imgs: box_data = [{"position": {"minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3]}, "class_id": int(cls), "box_caption": "%s %.3f" % (names[cls], conf), "scores": {"class_score": conf}, "domain": "pixel"} for *xyxy, conf, cls in pred.tolist()] boxes = {"predictions": {"box_data": box_data, "class_labels": names}} # inference-space wandb_images.append(wandb.Image(img[si], boxes=boxes, caption=path.name)) # Append to pycocotools JSON dictionary if save_json: # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ... image_id = int(path.stem) if path.stem.isnumeric() else path.stem box = xyxy2xywh(predn[:, :4]) # xywh box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner for p, b in zip(pred.tolist(), box.tolist()): jdict.append({'image_id': image_id, 'category_id': coco91class[int(p[5])] if is_coco else int(p[5]), 'bbox': [round(x, 3) for x in b], 'score': round(p[4], 5)}) # Assign all predictions as incorrect correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device) if nl: detected = [] # target indices tcls_tensor = labels[:, 0] # target boxes tbox = xywh2xyxy(labels[:, 1:5]) scale_coords(img[si].shape[1:], tbox, shapes[si][0], shapes[si][1]) # native-space labels if plots: confusion_matrix.process_batch(pred, torch.cat((labels[:, 0:1], tbox), 1)) # Per target class for cls in torch.unique(tcls_tensor): ti = (cls == tcls_tensor).nonzero(as_tuple=False).view(-1) # prediction indices pi = (cls == pred[:, 5]).nonzero(as_tuple=False).view(-1) # target indices # Search for detections if pi.shape[0]: # Prediction to target ious ious, i = box_iou(predn[pi, :4], tbox[ti]).max(1) # best ious, indices # Append detections detected_set = set() for j in (ious > iouv[0]).nonzero(as_tuple=False): d = ti[i[j]] # detected target if d.item() not in detected_set: detected_set.add(d.item()) detected.append(d) correct[pi[j]] = ious[j] > iouv # iou_thres is 1xn if len(detected) == nl: # all targets already located in image break # Append statistics (correct, conf, pcls, tcls) stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)) # Plot images if plots and batch_i < 3: f = save_dir / f'test_batch{batch_i}_labels.jpg' # filename plot_images(img, targets, paths, f, names) # labels f = save_dir / f'test_batch{batch_i}_pred.jpg' plot_images(img, output_to_target(output), paths, f, names) # predictions # Compute statistics stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy if len(stats) and stats[0].any(): p, r, ap, f1, ap_class = ap_per_class(*stats, plot=plots, save_dir=save_dir, names=names) p, r, ap50, ap = p[:, 0], r[:, 0], ap[:, 0], ap.mean(1) # [P, R, [email protected], [email protected]:0.95] mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean() nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class else: nt = torch.zeros(1) # Plots if plots: confusion_matrix.plot(save_dir=save_dir, names=list(names.values())) if wandb and wandb.run: wandb.log({"Images": wandb_images}) wandb.log({"Validation": [wandb.Image(str(f), caption=f.name) for f in sorted(save_dir.glob('test*.jpg'))]}) # Print results pf = '%20s' + '%12.3g' * 6 # print format print(pf % ('all', seen, nt.sum(), mp, mr, map50, map)) # Print results per class if verbose and nc > 1 and len(stats): for i, c in enumerate(ap_class): print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i])) # Print speeds t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + (imgsz, imgsz, batch_size) # tuple if not training: print('Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g' % t) # Save JSON if save_json and len(jdict): w = Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else '' # weights anno_json = glob.glob('../coco/annotations/instances_val*.json')[0] # annotations json pred_json = str(save_dir / f"{w}_predictions.json") # predictions json print('\nEvaluating pycocotools mAP... saving %s...' % pred_json) with open(pred_json, 'w') as f: json.dump(jdict, f) try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval anno = COCO(anno_json) # init annotations api pred = anno.loadRes(pred_json) # init predictions api eval = COCOeval(anno, pred, 'bbox') if is_coco: eval.params.imgIds = [int(Path(x).stem) for x in dataloader.dataset.img_files] # image IDs to evaluate eval.evaluate() eval.accumulate() eval.summarize() map, map50 = eval.stats[:2] # update results ([email protected]:0.95, [email protected]) except Exception as e: print('ERROR: pycocotools unable to run: %s' % e) # Return results if not training: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") model.float() # for training maps = np.zeros(nc) + map for i, c in enumerate(ap_class): maps[c] = ap[i] return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t
def run( data, weights=None, # model.pt path(s) batch_size=32, # batch size imgsz=640, # inference size (pixels) conf_thres=0.001, # confidence threshold iou_thres=0.6, # NMS IoU threshold task='val', # train, val, test, speed or study device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu single_cls=False, # treat as single-class dataset augment=False, # augmented inference verbose=False, # verbose output save_txt=False, # save results to *.txt save_hybrid=False, # save label+prediction hybrid results to *.txt save_conf=False, # save confidences in --save-txt labels save_json=False, # save a cocoapi-compatible JSON results file project='runs/val', # save to project/name name='exp', # save to project/name exist_ok=False, # existing project/name ok, do not increment half=True, # use FP16 half-precision inference model=None, dataloader=None, save_dir=Path(''), plots=True, wandb_logger=None, compute_loss=None, ): # Initialize/load model and set device training = model is not None if training: # called by train.py device = next(model.parameters()).device # get model device else: # called directly device = select_device(device, batch_size=batch_size) # Directories save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Load model model = attempt_load(weights, map_location=device) # load FP32 model gs = max(int(model.stride.max()), 32) # grid size (max stride) imgsz = check_img_size(imgsz, s=gs) # check image size # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99 # if device.type != 'cpu' and torch.cuda.device_count() > 1: # model = nn.DataParallel(model) # Data with open(data) as f: data = yaml.safe_load(f) check_dataset(data) # check # Half half &= device.type != 'cpu' # half precision only supported on CUDA if half: model.half() # Configure model.eval() is_coco = type(data['val']) is str and data['val'].endswith( 'coco/val2017.txt') # COCO dataset nc = 1 if single_cls else int(data['nc']) # number of classes iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for [email protected]:0.95 niou = iouv.numel() # Logging log_imgs = 0 if wandb_logger and wandb_logger.wandb: log_imgs = min(wandb_logger.log_imgs, 100) # Dataloader if not training: if device.type != 'cpu': model( torch.zeros(1, 3, imgsz, imgsz).to(device).type_as( next(model.parameters()))) # run once task = task if task in ( 'train', 'val', 'test') else 'val' # path to train/val/test images dataloader = create_dataloader(data[task], imgsz, batch_size, gs, single_cls, pad=0.5, rect=True, prefix=colorstr(f'{task}: '))[0] seen = 0 confusion_matrix = ConfusionMatrix(nc=nc) names = { k: v for k, v in enumerate( model.names if hasattr(model, 'names') else model.module.names) } coco91class = coco80_to_coco91_class() s = ('%20s' + '%11s' * 6) % ('Class', 'Images', 'Labels', 'P', 'R', '[email protected]', '[email protected]:.95') p, r, f1, mp, mr, map50, map, t0, t1, t2 = 0., 0., 0., 0., 0., 0., 0., 0., 0., 0. loss = torch.zeros(3, device=device) jdict, stats, ap, ap_class, wandb_images = [], [], [], [], [] for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)): t_ = time_synchronized() img = img.to(device, non_blocking=True) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 targets = targets.to(device) nb, _, height, width = img.shape # batch size, channels, height, width t = time_synchronized() t0 += t - t_ # Run model out, train_out = model( img, augment=augment) # inference and training outputs t1 += time_synchronized() - t # Compute loss if compute_loss: loss += compute_loss([x.float() for x in train_out], targets)[1][:3] # box, obj, cls # Run NMS targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(device) # to pixels lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] # for autolabelling t = time_synchronized() out = non_max_suppression(out, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls) t2 += time_synchronized() - t # Statistics per image for si, pred in enumerate(out): labels = targets[targets[:, 0] == si, 1:] nl = len(labels) tcls = labels[:, 0].tolist() if nl else [] # target class path = Path(paths[si]) seen += 1 if len(pred) == 0: if nl: stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls)) continue # Predictions if single_cls: pred[:, 5] = 0 predn = pred.clone() scale_coords(img[si].shape[1:], predn[:, :4], shapes[si][0], shapes[si][1]) # native-space pred # Append to text file if save_txt: gn = torch.tensor(shapes[si][0])[[1, 0, 1, 0 ]] # normalization gain whwh for *xyxy, conf, cls in predn.tolist(): xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format with open(save_dir / 'labels' / (path.stem + '.txt'), 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') # W&B logging - Media Panel plots if len( wandb_images ) < log_imgs and wandb_logger.current_epoch > 0: # Check for test operation if wandb_logger.current_epoch % wandb_logger.bbox_interval == 0: box_data = [{ "position": { "minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3] }, "class_id": int(cls), "box_caption": "%s %.3f" % (names[cls], conf), "scores": { "class_score": conf }, "domain": "pixel" } for *xyxy, conf, cls in pred.tolist()] boxes = { "predictions": { "box_data": box_data, "class_labels": names } } # inference-space wandb_images.append( wandb_logger.wandb.Image(img[si], boxes=boxes, caption=path.name)) wandb_logger.log_training_progress( predn, path, names) if wandb_logger and wandb_logger.wandb_run else None # Append to pycocotools JSON dictionary if save_json: # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ... image_id = int( path.stem) if path.stem.isnumeric() else path.stem box = xyxy2xywh(predn[:, :4]) # xywh box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner for p, b in zip(pred.tolist(), box.tolist()): jdict.append({ 'image_id': image_id, 'category_id': coco91class[int(p[5])] if is_coco else int(p[5]), 'bbox': [round(x, 3) for x in b], 'score': round(p[4], 5) }) # Assign all predictions as incorrect correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device) if nl: detected = [] # target indices tcls_tensor = labels[:, 0] # target boxes tbox = xywh2xyxy(labels[:, 1:5]) scale_coords(img[si].shape[1:], tbox, shapes[si][0], shapes[si][1]) # native-space labels if plots: confusion_matrix.process_batch( predn, torch.cat((labels[:, 0:1], tbox), 1)) # Per target class for cls in torch.unique(tcls_tensor): ti = (cls == tcls_tensor).nonzero(as_tuple=False).view( -1) # target indices pi = (cls == pred[:, 5]).nonzero(as_tuple=False).view( -1) # prediction indices # Search for detections if pi.shape[0]: # Prediction to target ious ious, i = box_iou(predn[pi, :4], tbox[ti]).max( 1) # best ious, indices # Append detections detected_set = set() for j in (ious > iouv[0]).nonzero(as_tuple=False): d = ti[i[j]] # detected target if d.item() not in detected_set: detected_set.add(d.item()) detected.append(d) correct[ pi[j]] = ious[j] > iouv # iou_thres is 1xn if len( detected ) == nl: # all targets already located in image break # Append statistics (correct, conf, pcls, tcls) stats.append( (correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)) # Plot images if plots and batch_i < 3: f = save_dir / f'val_batch{batch_i}_labels.jpg' # labels Thread(target=plot_images, args=(img, targets, paths, f, names), daemon=True).start() f = save_dir / f'val_batch{batch_i}_pred.jpg' # predictions Thread(target=plot_images, args=(img, output_to_target(out), paths, f, names), daemon=True).start() # Compute statistics stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy if len(stats) and stats[0].any(): p, r, ap, f1, ap_class = ap_per_class(*stats, plot=plots, save_dir=save_dir, names=names) ap50, ap = ap[:, 0], ap.mean(1) # [email protected], [email protected]:0.95 mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean() nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class else: nt = torch.zeros(1) # Print results pf = '%20s' + '%11i' * 2 + '%11.3g' * 4 # print format print(pf % ('all', seen, nt.sum(), mp, mr, map50, map)) # Print results per class if (verbose or (nc < 50 and not training)) and nc > 1 and len(stats): for i, c in enumerate(ap_class): print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i])) # Print speeds t = tuple(x / seen * 1E3 for x in (t0, t1, t2)) # speeds per image if not training: shape = (batch_size, 3, imgsz, imgsz) print( f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}' % t) # Plots if plots: confusion_matrix.plot(save_dir=save_dir, names=list(names.values())) if wandb_logger and wandb_logger.wandb: val_batches = [ wandb_logger.wandb.Image(str(f), caption=f.name) for f in sorted(save_dir.glob('val*.jpg')) ] wandb_logger.log({"Validation": val_batches}) if wandb_images: wandb_logger.log({"Bounding Box Debugger/Images": wandb_images}) # Save JSON if save_json and len(jdict): w = Path(weights[0] if isinstance(weights, list) else weights ).stem if weights is not None else '' # weights anno_json = str( Path(data.get('path', '../coco')) / 'annotations/instances_val2017.json') # annotations json pred_json = str(save_dir / f"{w}_predictions.json") # predictions json print('\nEvaluating pycocotools mAP... saving %s...' % pred_json) with open(pred_json, 'w') as f: json.dump(jdict, f) try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb check_requirements(['pycocotools']) from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval anno = COCO(anno_json) # init annotations api pred = anno.loadRes(pred_json) # init predictions api eval = COCOeval(anno, pred, 'bbox') if is_coco: eval.params.imgIds = [ int(Path(x).stem) for x in dataloader.dataset.img_files ] # image IDs to evaluate eval.evaluate() eval.accumulate() eval.summarize() map, map50 = eval.stats[: 2] # update results ([email protected]:0.95, [email protected]) except Exception as e: print(f'pycocotools unable to run: {e}') # Return results model.float() # for training if not training: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") maps = np.zeros(nc) + map for i, c in enumerate(ap_class): maps[c] = ap[i] return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t
def detect(save_img=False): out, source, weights, view_img, save_txt, imgsz = \ opt.save_dir, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source.isnumeric() or source.startswith( ('rtsp://', 'rtmp://', 'http://')) or source.endswith('.txt') # Initialize set_logging() device = select_device(opt.device) if os.path.exists(out): # output dir shutil.rmtree(out) # delete dir os.makedirs(out) # make new dir half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device, resave=opt.resave) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors # names = model.module.names if hasattr(model, 'module') else model.names names = ["gec_object", "bad_gec_object", "screw_hole_box", "screw_bolt"] colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] print("---\nPred before NMS:\n", pred) # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() print("---\nPred after NMS:\n", pred) print("OPT:", opt) # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s save_path = str(Path(out) / Path(p).name) txt_path = str(Path(out) / Path(p).stem) + ( '_%g' % dataset.frame if dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, conf, *xywh) if opt.save_conf else ( cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line) + '\n') % line) if save_img or view_img: # Add bbox to image label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % Path(out)) print('Done. (%.3fs)' % (time.time() - t0))
def detect(self, _img, save_img=False): model = self.model save_txt = False save_img = True imgsz = 640 conf_thres = 0.25 iou_thres = 0.45 classes = None agnostic_nms = False view_img = False # Initialize set_logging() device = '' device = select_device(device) half = device.type != 'cpu' # half precision only supported on CUDA bbox_list = [] label_list = [] imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None dataset = LoadImages(_img, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once for img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=False)[0] # Apply NMS pred = non_max_suppression(pred, conf_thres, iou_thres, classes=classes, agnostic=agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image s, im0 = '', im0s #save_path = str(save_dir / p.name) #txt_path = str(save_dir / 'labels' / p.stem) + ('_%g' % dataset.frame if dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls in reversed(det): #if save_img or view_img: # Add bbox to image label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) for item in xyxy: #print(item) bbox_list.append(item.cpu().numpy()) label_list.append(label) bbox_array = np.array(bbox_list) bbox_array = bbox_array.reshape(-1, 4) # Print time (inference + NMS) #print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: cv2.imshow("QQ", im0) if cv2.waitKey(0) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) confidence = [] label_name = [] for label in label_list: split_label = label.split(' ') confidence.append(float(split_label[-1])) label_name.append(split_label[0]) return im0, bbox_array, confidence, label_name