def __call__(self, image, *args, **kwargs): img = self.image_preprocess(image) """ Inference """ pred = self.model(img)[0] """ Apply NMS """ det = non_max_suppression(pred, self.conf_thres, self.iou_thres)[0] """ Process detections """ im0 = image.copy() s = '' bbox_container = [] if len(det): """ Rescale boxes from img_size to im0 size """ det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() """ Print results """ """ detections per class """ for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() """ add to string """ s += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " """ Write results """ for *xyxy, conf, cls in reversed(det): c = int(cls) label = f'{self.names[c]} {conf:.2f}' """ xyxy: LU --> RD """ plot_one_box(xyxy, im0, label=label, color=colors(c, True), line_thickness=2) bbox = {'class': self.names[c], 'confidence': round(conf.item(), 2), 'box': [int(v.item()) for v in xyxy]} bbox_container.append(bbox) print(s) return im0, bbox_container
def draw_bbox(self, pred, img, save_path): # Process detections t1 = time.time() colors = [[random.randint(0, 255) for _ in range(3)] for _ in self.names] for i, det in enumerate(pred): # detections per image s, im0 = '', img save_path = os.path.join(save_path, 'test_001.jpg') s += '%gx%g ' % img.shape[:-1] # print string if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[:-1], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f'{n} {self.names[int(c)]}s, ' # add to string # Write results for *xyxy, conf, cls in reversed(det): label = f'{self.names[int(cls)]} {conf:.2f}' plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=1) # Print time (inference + NMS) print(f'{s}Done. ({time.time() - t1:.3f}s)') cv2.imwrite(save_path, im0)
def display(self, pprint=False, show=False, save=False, render=False, save_dir=''): colors = color_list() for i, (img, pred) in enumerate(zip(self.imgs, self.pred)): str = f'image {i + 1}/{len(self.pred)}: {img.shape[0]}x{img.shape[1]} ' if pred is not None: for c in pred[:, -1].unique(): n = (pred[:, -1] == c).sum() # detections per class str += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string if show or save or render: for *box, conf, cls in pred: # xyxy, confidence, class label = f'{self.names[int(cls)]} {conf:.2f}' plot_one_box(box, img, label=label, color=colors[int(cls) % 10]) img = Image.fromarray(img.astype(np.uint8)) if isinstance( img, np.ndarray) else img # from np if pprint: print(str.rstrip(', ')) if show: img.show(self.files[i]) # show if save: f = self.files[i] img.save(Path(save_dir) / f) # save print(f"{'Saved' * (i == 0)} {f}", end=',' if i < self.n - 1 else f' to {save_dir}\n') if render: self.imgs[i] = np.asarray(img)
def mk_image(self): image = self.cv_image.copy() (height, width, channel) = image.shape if self.collision_state: image2 = np.full((height, width, 3), 128, dtype=np.uint8) cv2.rectangle(image2, (0, 0), (width, height), (0, 0, 255), thickness=-1) image = cv2.addWeighted(image, 0.5, image2, 0.5, 2.2) cv2.rectangle(image, (0, 0), (width, height), (0, 0, 255), thickness=10) #print('dets :{}'.format(self.detections)) for det in self.detections: plot_one_box(det['bbox'], image, label=det['label_name'], color=det['color'], line_thickness=3) #cv2.rectangle(image, det['bbox'], (255, 0, 0), 2) if self.target_detection is not None: if self.is_tracking: cv2.rectangle(image, self.target_detection['bbox'], (0, 255, 0), 5) else: cv2.rectangle(image, self.target_detection['bbox'], (255, 255, 255), 10) image = cv2.resize(image, (int(width * 2), int(height * 2))) return image
def get_detect(imgf, save_dir = './'): os.makedirs(save_dir, exist_ok=True) dataset = LoadImages(imgf, img_size=imgsz) for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) pred = model(img, augment=yolo_conf.augment)[0] pred = non_max_suppression(pred, yolo_conf.conf_thres, yolo_conf.iou_thres, classes=yolo_conf.classes, agnostic=yolo_conf.agnostic_nms) for i, det in enumerate(pred): # detections per image p, s, im0 = Path(path), '', im0s save_path = os.path.join(save_dir, p.name) s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() for *xyxy, conf, cls in reversed(det): label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) cv2.imwrite(save_path, im0) return save_dir
def plot_label(xyxy, im0, text, color, line_thickness, label): if label in LABELS: plot_one_box(xyxy, im0, label=label, color=color, line_thickness=line_thickness)
def get_detect_one(img_bytes): img_string = np.array(img_bytes).tostring() img_string = np.asarray(bytearray(img_string), dtype="uint8") image = cv2.imdecode(img_string, cv2.IMREAD_COLOR) img0 = to_rgb(image) img = letterbox(img0, new_shape=yolo_conf.img_size)[0] img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) pred = model(img, augment=yolo_conf.augment)[0] pred = non_max_suppression(pred, yolo_conf.conf_thres, yolo_conf.iou_thres, classes=yolo_conf.classes, agnostic=yolo_conf.agnostic_nms) for i, det in enumerate(pred): # detections per image if len(det): det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img0.shape).round() for *xyxy, conf, cls in reversed(det): label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, img0, label=label, color=colors[int(cls)], line_thickness=3) #imsave("./tmp/test.jpg",img0) return img0
def draw_img(self, objs, img): for pred in objs: xyxy = pred["bbox"] cls = pred["class"] score = pred["score"] label = '%s %.2f' % (cls, score) cls_index = [i for i, elem in enumerate(self.names) if elem == cls][0] plot_one_box(xyxy, img, label=label, color=self.colors[int(cls_index)], line_thickness=3)
def make_box(det, frame, label, color): coords = [] gn = torch.tensor(frame.shape)[[1, 0, 1, 0]] # normalization gain whwh for *xyxy, conf, score, cls in reversed(det): xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh # print(xywh) coords.append(xyxy) plot_one_box(xyxy, frame, label=label, color= color, line_thickness=3) return coords
def recog2(det, im0, device, img_lp, imgsz_recog, half, model_recog, all_t2_t1, classify, modelc, names_recog, save_txt, gn, txt_path, save_img, view_img, colors): # Write results for *xyxy, conf, cls in reversed(det): ''' But first, Recognition ''' img_lp, img_lp0 = extract_img_lp(im0, xyxy, img_lp, device, imgsz_recog, half) t1 = time_synchronized() # Inference pred_lp = model_recog(img_lp, augment=opt.augment)[0] # Apply NMS pred_lp = non_max_suppression(pred_lp, opt.conf_thres_recog, opt.iou_thres_recog, classes=opt.classes_recog, agnostic=opt.agnostic_nms) t2 = time_synchronized() all_t2_t1 = all_t2_t1 + t2 - t1 # Apply Classifier if classify: pred_lp = apply_classifier(pred_lp, modelc, img_lp, img_lp0) # check_lp_lines_type cls = check_lp_lines_type(pred_lp[0], cls, img_lp, img_lp0) # Sort characters based on pred_lp license_str = sort_characters(pred_lp[0], cls, img_lp, img_lp0, names_recog) if len(license_str) == 0: continue if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if opt.save_conf else (cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(license_str + ' ' + ('%g ' * len(line)).rstrip() % line + '\n') if save_img or view_img: # Add bbox to image # label = '%s %.2f' % (names[int(cls)], conf) label = '%s %.2f' % (license_str, conf) line_thickness = 3 if im0.shape[0] < 500 else 4 plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) return all_t2_t1
def show_inference_and_return(self, im0s): # Initialize print("search for a cat") img = letterbox(im0s)[0] # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) img = torch.from_numpy(img).to(self.device) img = img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference pred = self.model(img)[0] # Apply NMS pred = non_max_suppression(pred) # Process detections cat = False for det in pred: # detections per image s, im0 = '', im0s.copy() s += '%gx%g ' % img.shape[2:] # print string # normalization gain whwh if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class # add to string s += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # Write results for *xyxy, conf, cls in reversed(det): if self.view_img: # Add bbox to image c = int(cls) # integer class if self.names[c] == 'cat': cat = True label = f'{self.names[c]} {conf:.2f}' plot_one_box(xyxy, im0, label=label) # Stream results if self.view_img: cv2.imshow("", im0) cv2.waitKey(1) if cat: return ['there\'s a cat!!'] else: return []
def display(self, pprint=False, show=False, save=False, render=False, save_txt=False, save_dir=''): colors = color_list() txt = "" count = 0 for i, (img, pred) in enumerate(zip(self.imgs, self.pred)): str = f'image {i + 1}/{len(self.pred)}: {img.shape[0]}x{img.shape[1]}, ' if pred is not None: for c in pred[:, -1].unique(): n = (pred[:, -1] == c).sum() # detections per class str += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " + "\n" if i == count: txt += f"{i}: {n}" + "\n" else: txt += f"{count}: 0" + "\n" txt += f"{i}: {n}" + "\n" count += 1 count += 1 nlines = txt.count("\n") if nlines != len(self.pred) and count >= len(self.pred): txt += f"{count}: 0" + "\n" if show or save or render: for *box, conf, cls in pred: # xyxy, confidence, class label = f'{self.names[int(cls)]} {conf:.2f}' plot_one_box(box, img, label=label, color=colors[int(cls) % 10]) img = Image.fromarray(img.astype(np.uint8)) if isinstance( img, np.ndarray) else img # from np if pprint: print(str.rstrip(', ')) if show: img.show(self.files[i]) # show if save: f = Path(save_dir) / self.files[i] img.save(f) # save print(f"{'Saving' * (i == 0)} {f},", end='' if i < self.n - 1 else ' done.\n') if render: self.imgs[i] = np.asarray(img) if save_txt: # nlines = txt.count("\n") # if nlines != len(self.pred): # txt += f"{i}: 0" + "\n" text_file = open("output.txt", "w") text_file.write(txt) text_file.close()
def hub(): model = torch.hub.load('ultralytics/yolov5', 'yolov5x', pretrained=True) cudnn.benchmark = True # set True to speed up constant image size inference names = model.module.names if hasattr(model, 'module') else model.names np.random.seed(2) colors = [[np.random.randint(0, 255) for _ in range(3)] for _ in names] # Inference cap = cv2.VideoCapture(0) while True: _, img = cap.read() img = img[60:-60, 140:-140, :] cv2.resize(img, (640, 640)) print(img.shape) cv2.imshow("img", img) if cv2.waitKey(1) == ord("q"): cv2.destroyAllWindows() break continue img_infer = np.asarray([cv2.resize(img, (640, 640))]).astype(np.float32) img_infer /= 255.0 img_infer = img_infer.transpose((0, 3, 1, 2)) tensor = torch.from_numpy(img_infer) pred = model(tensor)[0] pred = non_max_suppression(prediction=pred, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False) # 1batch det = pred[0] det[:, :4] = scale_coords(img_infer.shape[2:], det[:, :4], img.shape).round() for *xyxy, conf, cls in reversed(det): label = f'{names[int(cls)]} {conf:.2f}' plot_one_box(xyxy, img, label=label, color=colors[int(cls)], line_thickness=2) cv2.imshow("img", img) if cv2.waitKey(1) == ord("q"): cv2.destroyAllWindows() break
def analyze(self, img, im0s): img = torch.from_numpy(img).to(self.device) img = img.half() if self.half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference pred = self.model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) # Process detections for i, det in enumerate(pred): # detections per image print(pred) # if webcam: # batch_size >= 1 # p, s, im0 = Path(path[i]), '%g: ' % i, im0s[i].copy() # else: # p, s, im0 = Path(path), '', im0s im0 = im0s gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh objs_to_publish = [] if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results # for c in det[:, -1].unique(): # n = (det[:, -1] == c).sum() # detections per class # s += '%g %ss, ' % (n, self.names[int(c)]) # add to string # Write results for *xyxy, conf, cls in reversed(det): xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view( -1).tolist() # normalized xywh line = (cls, *xywh, conf) if opt.save_conf else (cls, *xywh) # label format label = '%s %.2f' % (self.names[int(cls)], conf) objs_to_publish.append(f"{label} {xywh}") plot_one_box(xyxy, im0, label=label, color=self.colors[int(cls)], line_thickness=3) else: print("Found nothing") return im0, objs_to_publish
def display(self, pprint=False, show=False, save=False, crop=False, render=False, save_dir=Path('')): for i, (im, pred) in enumerate(zip(self.imgs, self.pred)): str = f'image {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} ' if pred.shape[0]: for c in pred[:, -1].unique(): n = (pred[:, -1] == c).sum() # detections per class str += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string if show or save or render or crop: for *box, conf, cls in reversed(pred): # xyxy, confidence, class label = f'{self.names[int(cls)]} {conf:.2f}' if crop: save_one_box(box, im, file=save_dir / 'crops' / self.names[int(cls)] / self.files[i]) else: # all others im = plot_one_box(box, im, label=label, color=colors(cls)) else: str += '(no detections)' im = Image.fromarray(im.astype(np.uint8)) if isinstance(im, np.ndarray) else im # from np if pprint: LOGGER.info(str.rstrip(', ')) if show: im.show(self.files[i]) # show if save: f = self.files[i] im.save(save_dir / f) # save if i == self.n - 1: LOGGER.info(f"Saved {self.n} image{'s' * (self.n > 1)} to '{save_dir}'") if render: self.imgs[i] = np.asarray(im)
def detect(self, img0): # Run inference img0 = cv2.resize(img0, (640, 360)) # Padded resize img = letterbox(img0, new_shape=self.img_size)[0] img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) img = torch.from_numpy(img).to(self.device) img = img.half() if self.half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference with torch.no_grad(): pred = self.model(img, augment=self.augment)[0] # Apply NMS pred = non_max_suppression(pred, self.conf_thres, self.iou_thres, classes=self.classes, agnostic=self.agnostic_nms) # Process detections img_with_detections = np.copy(img0) for i, det in enumerate(pred): # detections per image if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img_with_detections.shape).round() # Write resultsq for *xyxy, conf, cls in reversed(det): if self.view_img: # Add bbox to image label = '%s %.2f' % (self.names[int(cls)], conf) plot_one_box(xyxy, img_with_detections, label=label, color=self.colors[int(cls)], line_thickness=3) return img_with_detections
def callback(self, ros_data): #### direct conversion to CV2 #### np_arr = np.fromstring(ros_data.data, np.uint8) image = cv2.imdecode(np_arr, cv2.IMREAD_COLOR) # OpenCV >= 3.0: while len(self.yolo_q): det = self.yolo_q.pop().split(',') label = f'{det[0]} {float(det[5]):.2f}' xyxy = np.asarray(det[1:5], dtype=np.float64, order='C') plot_one_box(xyxy, image, label=label, color=(255, 0, 0), line_thickness=2) cv2.imshow('cv_img', image) cv2.waitKey(2)
def build_results(boxes: torch.Tensor, labs: torch.Tensor, logos: dict, implot: torch.Tensor, colors: list, confs: torch.Tensor, stem: Path.stem, save_path: str, jlist: list) -> None: """ Save results to json list and/or to image. If opt.save_img, also save a copy of the image. """ # loop over boxes and save them to json or image for i in range(boxes.shape[0]): xyxy = boxes[i] label, nearest_label, distance = labs[i] if opt.save_img: plot_one_box(xyxy, implot, label=label, color=colors[int(nearest_label)], line_thickness=3) if opt.save_json: jsonify_logo(xyxy, confs[i], label, logos[nearest_label], distance, stem, jlist) # write image if opt.save_img: cv2.imwrite(save_path, implot)
def detect_and_annotate(self, img_in): detections, img_out = self.get_detections(img_in) # Loop over detections first = self.names.copy() if self.names else None for xywh, xyxy, conf, cls in detections: if isinstance(conf, tuple): conf = conf[0] img_lab = f'{conf:.2f}' # Add bbox to image c = int(cls) # integer class label = True if first and first[c]: first[c] = 0 else: label = False name = f'{self.names[c]} ' if label else '' label = f'{name}{img_lab}' plot_one_box(xyxy, img_out, label=label, color=colors(c, True), line_thickness=6) return img_out
def multithreading(cap, device, half, names, colors, model, q): while cap.isOpened(): ret, frame = cap.read() t0 = time.time() if not ret: print_div('No Frame') break fps_t1 = time.time() img, img0 = img_preprocess(frame) # img: Resize , img0:Orginal img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS : 取得每項預測的數值 pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier : 取得該數值的LAbel if False: pred = apply_classifier(pred, modelc, img, img0) # Draw Box for i, det in enumerate(pred): s = '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(img0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls in reversed(det): label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, img0, label=label, color=colors[int(cls)], line_thickness=3) # Print Results(inference + NMS) #print_div('%sDone. (%.3fs)' % (s, t2 - t1)) # Draw Image x, y, w, h = (img0.shape[1] // 4), 25, (img0.shape[1] // 2), 30 cv2.rectangle(img0, (x, 10), (x + w, y + h), (0, 0, 0), -1) cv2.putText( img0, '{} | inference: {:.4f}s | fps: {:.4f}'.format( opt.weights[0], t2 - t1, 1 / (time.time() - t0)), (x + 20, y + 20), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) q.put(img0)
def detect(save_img=False): source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith(('rtsp://', 'rtmp://', 'http://')) # Directories save_dir = Path( increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(opt.device) half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = Path(path[i]), '%g: ' % i, im0s[i].copy() else: p, s, im0 = Path(path), '', im0s save_path = str(save_dir / p.name) txt_path = str(save_dir / 'labels' / p.stem) + ( '_%g' % dataset.frame if dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if opt.save_conf else ( cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or view_img: # Add bbox to image label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=1) # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: cv2.imshow(str(p), im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") print('Done. (%.3fs)' % (time.time() - t0))
def detect(save_img=False): source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith( ('rtsp://', 'rtmp://', 'http://')) # Directories save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(opt.device) half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Set Dataloader vid_path, vid_writer = None, None save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img) if device.type != 'cpu' else None # run once fvs = torch.Tensor([]) for path, img, im0s, vid_cap in dataset: print('img size in dataset', img.shape) img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred_total = model(img, augment=opt.augment) # this is a tuple pred = pred_total[0] # tensor [1,6552,9] feat_tensor = pred_total[2] # tensor [1,2808] # print(feat_tensor.shape) if feat_tensor.shape[1] > 1000: fvs = torch.cat((fvs, feat_tensor), 0) # print(fvs.shape) # save fvs in a txt file # import numpy as np # try: # vec_path = './feat_vectors_training_data.txt' # os.path.join(path,'feat_vectors.txt' ) # vec_path = 'utils_obj/1_feat_vec_' + str(time.time()) + '.txt' # fvs_array = feat_tensor.detach().cpu().numpy() # # mat = np.matrix(fvs_array) # # with open(vec_path, 'wb') as f: # for row in fvs_array: # np.savetxt(f, row, fmt='%.2f') # f.close() # # except Exception as e: # print(e) # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0, frame = Path(path[i]), '%g: ' % i, im0s[i].copy(), dataset.count else: p, s, im0, frame = Path(path), '', im0s, getattr(dataset, 'frame', 0) save_path = str(save_dir / p.name) txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f'{n} {names[int(c)]}s, ' # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if opt.save_conf else (cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or view_img: # Add bbox to image label = f'{names[int(cls)]} {conf:.2f}' plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) # Print time (inference + NMS) # print(f'{s}Done. ({t2 - t1:.3f}s)') # Stream results if view_img: cv2.imshow(str(p), im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'image': cv2.imwrite(save_path, im0) else: # 'video' if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release() # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' # print(f"Results saved to {save_dir}{s}") # print(f'Done. ({time.time() - t0:.3f}s)') # save fvs in a txt file import numpy as np try: vec_path = save_dir / './feat_vectors.txt' # os.path.join(path,'feat_vectors.txt' ) fvs_array = fvs.detach().cpu().numpy() mat = np.matrix(fvs_array) with open(vec_path, 'wb') as f: for line in mat: np.savetxt(f, line, fmt='%.2f') f.close() except Exception as e: print(e)
def detect(save_img=False): # 获取输出文件夹,输入源,权重,参数等参数 source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith(('rtsp://', 'rtmp://', 'http://')) # Directories save_dir = Path( increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(opt.device) # 获取设备 # 如果设备为gpu,使用Float16 half = device.type != 'cpu' # half precision only supported on CUDA # Load model # 加载Float32模型,确保用户设定的输入图片分辨率能整除32(如不能则调整为能整除并返回) model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: # 设置Float16 model.half() # to FP16 # Second-stage classifier # 设置第二次分类,默认不使用 classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader # 通过不同的输入源来设置不同的数据加载方式 vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True # 如果检测视频的时候想显示出来,可以在这里加一行view_img = True # view_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors # 获取类别名字 names = model.module.names if hasattr(model, 'module') else model.names # 设置画框的颜色 colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference t0 = time.time() # 进行一次前向推理,测试程序是否正常 img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once """ path 图片/视频路径 img 进行resize+pad之后的图片 img0 原size图片 cap 当读取图片时为None,读取视频时为视频源 """ for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) # 图片也设置为Float16 img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 # 没有batch_size的话则在最前面添加一个轴 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() """ 前向传播 返回pred的shape是(1, num_boxes, 5+num_class) h,w为传入网络图片的长和宽,注意dataset在检测时使用了矩形推理,所以这里h不一定等于w num_boxes = h/32 * w/32 + h/16 * w/16 + h/8 * w/8 pred[..., 0:4]为预测框坐标 预测框坐标为xywh(中心点+宽长)格式 pred[..., 4]为objectness置信度 pred[..., 5:-1]为分类结果 """ pred = model(img, augment=opt.augment)[0] # Apply NMS """ pred:前向传播的输出 conf_thres:置信度阈值 iou_thres:iou阈值 classes:是否只保留特定的类别 agnostic:进行nms是否也去除不同类别之间的框 经过nms之后,预测框格式:xywh-->xyxy(左上角右下角) pred是一个列表list[torch.tensor],长度为batch_size 每一个torch.tensor的shape为(num_boxes, 6),内容为box+conf+cls """ pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier # 添加二次分类,默认不使用 if classify: pred = apply_classifier(pred, modelc, img, im0s) # 裁剪区域的标签,自加 roi_num = 0 # Process detections # 对每一张图片作处理 for i, det in enumerate(pred): # detections per image # 如果输入源是webcam,则batch_size不为1,取出dataset中的一张图片 if webcam: # batch_size >= 1 p, s, im0 = Path(path[i]), '%g: ' % i, im0s[i].copy() else: p, s, im0 = Path(path), '', im0s # 设置保存图片/视频的路径 save_path = str(save_dir / p.name) # 设置保存框坐标txt文件的路径 txt_path = str(save_dir / 'labels' / p.stem) + ( '_%g' % dataset.frame if dataset.mode == 'video' else '') # 设置打印信息(图片长宽) s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if len(det): # Rescale boxes from img_size to im0 size # 调整预测框的坐标:基于resize+pad的图片的坐标-->基于原size图片的坐标 # 此时坐标格式为xyxy det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results # 打印检测到的类别数量 for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls in reversed(det): # 裁剪区域的标签,自加 roi_num += 1 if save_txt: # Write to file # 将xyxy(左上角+右下角)格式转为xywh(中心点+宽长)格式,并除上w,h做归一化,转化为列表再保存 xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if opt.save_conf else ( cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') # 在原图上画框 if save_img or view_img: # Add bbox to image label = '%s %.2f' % (names[int(cls)], conf) # 将检测结果选取出来,自加 roi_l, roi_u, roi_r, roi_d = int(xyxy[0]), int( xyxy[1]), int(xyxy[2]), int(xyxy[3]) roi = im0[roi_u:roi_d, roi_l:roi_r] cv2.imwrite( save_path[:-4] + '_' + str(roi_num) + '.jpg', roi) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) # Print time (inference + NMS) # 打印前向传播+nms时间 print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results # 如果设置展示,则show图片/视频 if view_img: cv2.imshow(str(p), im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) # 设置保存图片/视频 if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im0) print(save_path) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % save_dir) # 打开保存图片和txt的路径(好像只适用于MacOS系统) # 打印总时间 print('Done. (%.3fs)' % (time.time() - t0))
def detect(file_name): #print('-->start anju_detect') #source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size agnostic_nms = False augment = False classes = None conf_thres = 0.25 device = '' exist_ok = True imgsz = 640 iou_thres = 0.45 name = 'result_img' nosave = False project = 'static' save_conf = False save_txt = True source = 'uploads/' + file_name update = False view_img = True weights = 'best.pt' save_img = True # Directories #print('-->detect') save_dir = Path(increment_path(Path(project) / name, exist_ok=True)) # increment run # Initialize #print('-->Initialize') set_logging() device = select_device(device) half = device.type != 'cpu' # half precision only supported on CUDA # Load model #print('-->Load model') model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride imgsz = check_img_size(imgsz, s=stride) # check img_size if half: model.half() # to FP16 # Second-stage classifier print('-->Second-stage classifier') classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader print('-->Set Dataloader') vid_path, vid_writer = None, None #print('-->3 source:',source) #print('-->3 file_name:',file_name) #print('-->4 source:',source) dataset = LoadImages(source, img_size=imgsz, stride=stride) # Get names and colors print('-->Get names and colors') names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference print('-->Run inference') if device.type != 'cpu': model( torch.zeros(1, 3, imgsz, imgsz).to(device).type_as( next(model.parameters()))) # run once t0 = time.time() labels = [] for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference print('-->Inference') t1 = time_synchronized() pred = model(img, augment=augment)[0] # Apply NMS print('-->Apply NMS') pred = non_max_suppression(pred, conf_thres, iou_thres, classes=classes, agnostic=agnostic_nms) t2 = time_synchronized() # Apply Classifier print('-->Apply Classifier') if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections print('-->Process detections') for i, det in enumerate(pred): # detections per image p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # img.jpg txt_path = str(save_dir / p.stem) + ( '' if dataset.mode == 'image' else f'_{frame}') # img.txt s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh # ysc 20210420 delete old file , add labe list if os.path.isfile(txt_path + '.txt'): os.remove(txt_path + '.txt') if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results print('-->Print results') for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Write results print('-->Write results') for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file #20210420 ysc save label name #xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh #line = (cls, *xywh, conf) if opt.save_conf else (cls, *xywh) # label format label = f'{names[int(cls)]} {conf:.2f}' # detected label name with open(txt_path + '.txt', 'a') as f: #f.write(('%g ' * len(line)).rstrip() % line + '\n') # print labe map f.write(label + '\n') if save_img or view_img: # Add bbox to image label = f'{names[int(cls)]}-{conf:.2f}' plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) print('-->label :', label) labels.append(label) # Print time (inference + NMS) print(f'{s}Done. ({t2 - t1:.3f}s)') # Save results (image with detections) print('-->Save results (image with detections)') if save_img: cv2.imwrite(save_path, im0) print('labels=', labels) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") #print(f'Done. ({time.time() - t0:.3f}s)') return labels
def detect( model="mobilenet_thin", # A model option for being cool weights='yolov5s.pt', # model.pt path(s) source='data/images', # file/dir/URL/glob, 0 for webcam imgsz=640, # inference size (pixels) conf_thres=0.25, # confidence threshold iou_thres=0.45, # NMS IOU threshold max_det=1000, # maximum detections per image device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu view_img=False, # show results save_txt=False, # save results to *.txt save_conf=False, # save confidences in --save-txt labels save_crop=False, # save cropped prediction boxes nosave=False, # do not save images/videos classes=None, # filter by class: --class 0, or --class 0 2 3 agnostic_nms=False, # class-agnostic NMS augment=False, # augmented inference update=False, # update all models project='runs/detect', # save results to project/name name='exp', # save results to project/name exist_ok=False, # existing project/name ok, do not increment line_thickness=3, # bounding box thickness (pixels) hide_labels=False, # hide labels hide_conf=False, # hide confidences half=False, # use FP16 half-precision inference ): w, h = 432, 368 e = TfPoseEstimator(get_graph_path(model), target_size=(w, h)) save_img = not nosave and not source.endswith( '.txt') # save inference images webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith( ('rtsp://', 'rtmp://', 'http://', 'https://')) # Directories save_dir = Path(project) #save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(device) half &= device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride imgsz = check_img_size(imgsz, s=stride) # check image size names = model.module.names if hasattr( model, 'module') else model.names # get class names if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = check_imshow() cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride) else: dataset = LoadImages(source, img_size=imgsz, stride=stride) # Run inference breakCond = False if device.type != 'cpu': model( torch.zeros(1, 3, imgsz, imgsz).to(device).type_as( next(model.parameters()))) # run once t0 = time.time() for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Openpose getting keypoints and individual crops print("\n") myImg = im0s.copy() keypoints, humans = getKeyPoints(myImg, e, w, h) crops = [ getCrop(point[0], myImg, 10, device, point[1] / 2) for point in keypoints ] # Inference t1 = time_synchronized() pred = model(img, augment=augment)[0] # Apply NMS pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det) t2 = time_synchronized() # Need to adjust bboxes to full image if len(pred) > 0: breakCond = True # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0, frame = path[i], f'{i}: ', im0s[i].copy( ), dataset.count else: p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # img.jpg txt_path = str(save_dir / 'labels' / p.stem) + ( '' if dataset.mode == 'image' else f'_{frame}') # img.txt s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh imc = im0.copy() if save_crop else im0 # for save_crop if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Check if any overlap between keypoint and det (handheld weapon) for detection in det: for crop in crops: if bbox_iou(detection, crop) > 0: cv2.putText(im0, "Spider-Sense Tingling!", (30, 90), cv2.FONT_HERSHEY_SIMPLEX, 3, (255, 0, 0), 5) break # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or save_crop or view_img: # Add bbox to image c = int(cls) # integer class label = None if hide_labels else ( names[c] if hide_conf else f'{names[c]} {conf:.2f}') plot_one_box(xyxy, im0, label=label, color=colors(c, True), line_thickness=line_thickness) if save_crop: save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True) # write keypoint boxes for *xyxy, conf, cls in reversed(crops): plot_one_box(xyxy, imc, label="keyP", color=colors(c, True), line_thickness=line_thickness) # Stream results if view_img: cv2.imshow(str(p), im0) cv2.waitKey(1) # 1 millisecond # Save results (image with detections) im0 = TfPoseEstimator.draw_humans(im0, humans, imgcopy=False) if save_img: if dataset.mode == 'image': cv2.imwrite(save_path, im0) else: # 'video' or 'stream' if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer if vid_cap: # video fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) else: # stream fps, w, h = 30, im0.shape[1], im0.shape[0] save_path += '.mp4' vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") if update: strip_optimizer(weights) # update model (to fix SourceChangeWarning) print(f'Done. ({time.time() - t0:.3f}s)')
def detect(save_img=False): source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith(('rtsp://', 'rtmp://', 'http://')) # Directories save_dir = Path( increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(opt.device) half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride imgsz = check_img_size(imgsz, s=stride) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride) else: save_img = True dataset = LoadImages(source, img_size=imgsz, stride=stride) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] ct = CentroidTracker() # Run inference if device.type != 'cpu': model( torch.zeros(1, 3, imgsz, imgsz).to(device).type_as( next(model.parameters()))) # run once t0 = time.time() memory = {} people_counter = 0 detect_frame_num = 0 before = [] for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() #img_center_y = int(im0.shape[0]//2) #line = [(0,int(img_center_y*1.3)),(int(im0.shape[1]*0.55),int(img_center_y*1.3))] #cv2.line(im0,line[0],line[1],(0,0,255),5) # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy( ), dataset.count else: p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # img.jpg txt_path = str(save_dir / 'labels' / p.stem) + ( '' if dataset.mode == 'image' else f'_{frame}') # img.txt s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh index_id = [] previous = memory.copy() memory = {} boxes = [] if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Write results for *xyxy, conf, cls in reversed(det): xyxy_list = torch.tensor(xyxy).view(1, 4).view(-1).tolist() # center_x = int(np.mean([xyxy_list[0],xyxy_list[2]])) # center_y = int(np.mean([xyxy_list[1],xyxy_list[3]])) # cv.circle(im,(center_x)) xywh_list = xyxy2xywh(torch.tensor(xyxy).view( 1, 4)).view(-1).tolist() boxes.append(xywh_list) for box in boxes: (x, y) = (int(box[0]), int(box[1])) (w, h) = (int(box[2]), int(box[3])) if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if opt.save_conf else ( cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or view_img: # Add bbox to image label = f'{names[int(cls)]}' plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) #cv2.putText(im0,'Person : {}'.format(final_person_cnt),(130,100),cv2.FONT_HERSHEY_COMPLEX,1.0,(0,0,255),3) #cv2.putText(im0,'Car : {}'.format(final_car_cnt),(130,150),cv2.FONT_HERSHEY_COMPLEX,1.0,(0,0,255),3) # Print time (inference + NMS) print(f'{s}Done. ({t2 - t1:.3f}s)') # Stream results if view_img: cv2.imshow(str(p), im0) # Save results (image with detections) if save_img: if dataset.mode == 'image': cv2.imwrite(save_path, im0) else: # 'video' if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") print(f'Done. ({time.time() - t0:.3f}s)')
def detect(self, detetConfig=None): detectResult = [] print("检测参数:" + str(detetConfig)) model = self.model save_img = False vw = None # 初始化若干参数 source, view_img, save_txt = detetConfig["source"], detetConfig[ "view_img"], detetConfig["save_txt"] webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith( ('rtsp://', 'rtmp://', 'http://')) # Directories save_dir = Path(detetConfig['saveDir']) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir half = self.half device = self.device # 检查图像 imgsz = check_img_size(detetConfig["imgsz"], s=model.stride.max()) # check img_size # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference self.dataset = LoadStreams(source, img_size=imgsz) self.cap = self.dataset.getCap() self.isStream = True vw = videoRecordUtils.createVideoWriter(self.cap) else: save_img = True self.isStream = False self.dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once for path, img, im0s, vid_cap in self.dataset: if self.isDetect == False: break img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=detetConfig["augment"])[0] # Apply NMS pred = non_max_suppression(pred, detetConfig["conf_thres"], detetConfig["iou_thres"], classes=detetConfig["classes"], agnostic=detetConfig["agnostic_nms"]) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = Path(path[i]), '%g: ' % i, im0s[i].copy() else: p, s, im0 = Path(path), '', im0s save_path = str(save_dir / p.name) txt_path = str(save_dir / 'labels' / p.stem) + ('_%g' % self.dataset.frame if self.dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results detectObjectItems = [] for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if detetConfig["save_conf"] else ( cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or view_img: # Add bbox to image label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) detectObjectItems.append({ "x": int(xyxy[0]), "y": int(xyxy[1]), "w": int(xyxy[2]), "h": int(xyxy[3]), "label": label, "class": int(cls.int()), "conf": float(conf.float()), "color": colors[int(cls)] }) detectResult.append({ "file": p.name, "detectObject": detectObjectItems }) # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: ret, buffer = cv2.imencode('.jpg', im0) frame = buffer.tobytes() # record video print("record video....") vw.write(im0) if self.Broardcast: #为节省内存资源 如果队列数量超过 30则清空 if self.q.qsize() > 30: self.q.queue.clear() self.q.put(frame) # Save results (image with detections) if save_img: if self.dataset.mode == 'images': cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) if vw != None: pass videoRecordUtils.closeVideoWrite(vw) print("detect finished.......") return detectResult
def detect(self): self.coordinates = [] view_img = False save_txt = False imgsz = 640 webcam = False # Directories save_dir = Path(r"C:\detect") # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device('') half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(self.weights, map_location=device) stride = int(model.stride.max()) # model stride imgsz = check_img_size(imgsz, s=stride) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None save_img = True dataset = LoadImages(self.source, img_size=imgsz, stride=stride) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference if device.type != 'cpu': model( torch.zeros(1, 3, imgsz, imgsz).to(device).type_as( next(model.parameters()))) # run once t0 = time.time() for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=False)[0] # Apply NMS pred = non_max_suppression(pred, 0.4, 0.45, classes=None, agnostic=False) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy( ), dataset.count else: p, s, im0, frame = path, '', im0s, getattr( dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # img.jpg txt_path = str(save_dir / 'labels' / p.stem) + ( '' if dataset.mode == 'image' else f'_{frame}') # img.txt s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or view_img: # Add bbox to image label = f'{names[int(cls)]} {conf:.2f}' c1, c2 = (int(xyxy[0]), int(xyxy[1])), (int(xyxy[2]), int(xyxy[3])) self.coordinates.append({ 'raw_coordinates': xyxy, 'label': f'{conf:.2f}', 'top_left': c1, 'bottom_right': c2 }) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) # Print time (inference + NMS) print(f'{s}Done. ({t2 - t1:.3f}s)') # Stream results if view_img: cv2.imshow(str(p), im0) cv2.waitKey(1) # 1 millisecond # Save results (image with detections) if save_img: if dataset.mode == 'image': img = cv2.imwrite(save_path, im0) else: # 'video' if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") print(f'Done. ({time.time() - t0:.3f}s)') return self.coordinates
def detect(opt): source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size save_img = not opt.nosave and not source.endswith( '.txt') # save inference images webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith( ('rtsp://', 'rtmp://', 'http://', 'https://')) # Directories save_dir = increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(opt.device) half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride imgsz = check_img_size(imgsz, s=stride) # check img_size names = model.module.names if hasattr( model, 'module') else model.names # get class names if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = check_imshow() cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride) else: dataset = LoadImages(source, img_size=imgsz, stride=stride) # Run inference if device.type != 'cpu': model( torch.zeros(1, 3, imgsz, imgsz).to(device).type_as( next(model.parameters()))) # run once t0 = time.time() for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy( ), dataset.count else: p, s, im0, frame = path, '', im0s.copy(), getattr( dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # img.jpg txt_path = str(save_dir / 'labels' / p.stem) + ( '' if dataset.mode == 'image' else f'_{frame}') # img.txt s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if opt.save_conf else ( cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or opt.save_crop or view_img: # Add bbox to image c = int(cls) # integer class label = None if opt.hide_labels else ( names[c] if opt.hide_conf else f'{names[c]} {conf:.2f}') plot_one_box(xyxy, im0, label=label, color=colors(c, True), line_thickness=opt.line_thickness) if opt.save_crop: save_one_box(xyxy, im0s, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True) # Print time (inference + NMS) print(f'{s}Done. ({t2 - t1:.3f}s)') # Stream results if view_img: cv2.imshow(str(p), im0) cv2.waitKey(1) # 1 millisecond # Save results (image with detections) if save_img: if dataset.mode == 'image': cv2.imwrite(save_path, im0) else: # 'video' or 'stream' if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer if vid_cap: # video fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) else: # stream fps, w, h = 30, im0.shape[1], im0.shape[0] save_path += '.mp4' vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") print(f'Done. ({time.time() - t0:.3f}s)')
def inference(self, path, image_name_list): with torch.no_grad(): boxes_total = [] confes_total = [] clses_total = [] t0 = time.time() imgs = [] imgs_numpy = [] for image_name in image_name_list: ii = cv2.imread(path + image_name) imgs_numpy.append(ii) # img = cv2.resize(ii, (self.imgsz, self.imgsz)) img = letterbox(ii, new_shape=self.imgsz)[0] img = img[:, :, ::-1].transpose(2, 0, 1) img = np.ascontiguousarray(img) imgs.append(img) img = np.array(imgs) img = torch.from_numpy(img).to(self.device) img = img.half() if self.half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = self.model(img, augment=self.opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, self.opt.conf_thres, self.opt.iou_thres) t2 = time_synchronized() # Process detections for i, det in enumerate(pred): # detections per image boxes = [] confes = [] clses = [] p, s, im0 = Path(path + image_name_list[i]), '', imgs_numpy[i].copy() s += '%gx%g ' % imgs_numpy[i].shape[1:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(imgs_numpy[i].shape[1:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, self.names[int(c)] ) # add to string # Write results for *xyxy, conf, cls in reversed(det): boxes.append([ int(xyxy[0]), int(xyxy[1]), int(xyxy[2]), int(xyxy[3]) ]) confes.append(float(conf.cpu().numpy())) clses.append(int(cls.cpu().numpy())) if self.opt.save_img != '': # Add bbox to image label = '%s %.2f' % (self.names[int(cls)], conf) plot_one_box(xyxy, im0, label=label, color=self.colors[int(cls)], line_thickness=3) # cv2.imwrite('/workspace/JuneLi/bbtv/PaddleOCR-1.0-2021/result/inference_results-10/2_liushui/' + 'tabel_' + path.replace('.pdf', '.jpg'), np.array(im0, dtype=np.uint8)) # cv2.imwrite('./buffer/io.jpg', np.array(im0, dtype=np.uint8)) # print() # time.sleep(99999) # Print time (inference + NMS) # print('%sdet table use time. (%.3fs)' % (s, t2 - t1)) # Save results (image with detections) # if self.opt.save_img != '': # if not os.path.exists(self.opt.save_img): # os.mkdir(self.opt.save_img) # cv2.imwrite(self.opt.save_img + p.name, im0) boxes_total.append(boxes) confes_total.append(confes) clses_total.append(clses) # print('det table use time. (%.3fs)' % (time.time() - t0)) return boxes_total, confes_total, clses_total