def forward(self, imgs, size=640, augment=False, profile=False): # Inference from various sources. For height=720, width=1280, RGB images example inputs are: # filename: imgs = 'data/samples/zidane.jpg' # URI: = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/zidane.jpg' # OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(720,1280,3) # PIL: = Image.open('image.jpg') # HWC x(720,1280,3) # numpy: = np.zeros((720,1280,3)) # HWC # torch: = torch.zeros(16,3,720,1280) # BCHW # multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images p = next(self.model.parameters()) # for device and type if isinstance(imgs, torch.Tensor): # torch return self.model(imgs.to(p.device).type_as(p), augment, profile) # inference # Pre-process n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else ( 1, [imgs]) # number of images, list of images shape0, shape1 = [], [] # image and inference shapes for i, im in enumerate(imgs): if isinstance(im, str): # filename or uri im = Image.open( requests.get(im, stream=True).raw if im.startswith('http') else im) # open im = np.array(im) # to numpy if im.shape[0] < 5: # image in CHW im = im.transpose( (1, 2, 0)) # reverse dataloader .transpose(2, 0, 1) im = im[:, :, :3] if im.ndim == 3 else np.tile( im[:, :, None], 3) # enforce 3ch input s = im.shape[:2] # HWC shape0.append(s) # image shape g = (size / max(s)) # gain shape1.append([y * g for y in s]) imgs[i] = im # update shape1 = [ make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0) ] # inference shape x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs] # pad x = np.stack(x, 0) if n > 1 else x[0][None] # stack x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW x = torch.from_numpy(x).to( p.device).type_as(p) / 255. # uint8 to fp16/32 # Inference with torch.no_grad(): y = self.model(x, augment, profile)[0] # forward y = non_max_suppression(y, conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) # NMS # Post-process for i in range(n): scale_coords(shape1, y[i][:, :4], shape0[i]) return Detections(imgs, y, self.names)
def forward(self, imgs, size=640, augment=False, profile=False): # supports inference from various sources. For height=720, width=1280, RGB images example inputs are: # opencv: x = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(720,1280,3) # PIL: x = Image.open('image.jpg') # HWC x(720,1280,3) # numpy: x = np.zeros((720,1280,3)) # HWC # torch: x = torch.zeros(16,3,720,1280) # BCHW # multiple: x = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images p = next(self.model.parameters()) # for device and type if isinstance(imgs, torch.Tensor): # torch return self.model(imgs.to(p.device).type_as(p), augment, profile) # inference # Pre-process n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else (1, [imgs]) shape0, shape1 = [], [] # image and inference shapes for i, img in enumerate(imgs): if isinstance(img, str): img = Image.open(img) img = np.array(img) if img.shape[0] < 5: img = img.transpose((1, 2, 0)) img = img[:, :, :3] if img.ndim == 3 else np.tile( img[:, :, None], 3) s = img.shape[:2] # HWC shape0.append(s) # image shape g = (size / max(s)) # gain shape1.append([y * g for y in s]) imgs[i] = img shape1 = [ make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0) ] # inference shape x = [letterbox(img, new_shape=shape1, auto=False)[0] for img in imgs] # pad x = np.stack(x, 0) if n > 1 else x[0][None] # stack x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW x = torch.from_numpy(x).to( p.device).type_as(p) / 255. # uint8 to fp16/32 # Inference with torch.no_grad(): y = self.model(x, augment, profile)[0] # y = non_max_suppression(y, conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) y = non_max_suppression_torch_ops(y, conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) # Post-process for i in range(n): scale_coords(shape1, y[i][:, :4], shape0[i]) return Detections(imgs, y, self.names)
def detect_bbox(self, img: np.ndarray, img_size: int = 640, stride: int = 32, min_accuracy: float = 0.5) -> List: """ TODO: input img in BGR format, not RGB; To Be Implemented in release 2.2 """ # normalize img_shape = img.shape img = letterbox(img, img_size, stride=stride)[0] img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) img = torch.from_numpy(img).to(self.device) img = img.half() if self.half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) pred = self.model(img)[0] # Apply NMS pred = non_max_suppression(pred) res = [] for i, det in enumerate(pred): if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img_shape).round() res.append(det.cpu().detach().numpy()) if len(res): return [[x1, y1, x2, y2, acc, b] for x1, y1, x2, y2, acc, b in res[0] if acc > min_accuracy] else: return []
def detect_pedestrians(self, camera_frame: np.ndarray) -> List[BoundingBox]: # Pre-process im = letterbox(camera_frame, self.IMG_SIZE, stride=self.model.stride)[0] im = np.ascontiguousarray(im.transpose(2, 0, 1)) im = torch.from_numpy(np.expand_dims(im, 0)).to(self.device) im = im.half() im /= 255 # Inference pred = self.model(im) # NMS pred = non_max_suppression( pred, conf_thres=self.conf, iou_thres=self.NMS_IOU_THRESHOLD )[0] # Scale predictions to original image coordinates pred_boxes = ( scale_coords(im.shape[2:], pred[:, :4], camera_frame.shape) .round() .cpu() .numpy() .astype(int) ) return [self.detection_to_bounding_box(det) for det in pred_boxes]
def image_track(self, im0): """ :param im0: original image, BGR format :return: """ # preprocess ************************************************************ # Padded resize img = letterbox(im0, new_shape=self.img_size)[0] # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) # numpy to tensor img = torch.from_numpy(img).to(self.device) img = img.half() if self.half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) s = '%gx%g ' % img.shape[2:] # print string # Detection time ********************************************************* # Inference t1 = time_synchronized() with torch.no_grad(): pred = self.detector( img, augment=self.args.augment)[0] # list: bz * [ (#obj, 6)] # Apply NMS and filter object other than person (cls:0) pred = non_max_suppression(pred, self.args.conf_thres, self.args.iou_thres, classes=self.args.classes, agnostic=self.args.agnostic_nms) t2 = time_synchronized() # get all obj ************************************************************ det = pred[0] # for video, bz is 1 if det is not None and len( det): # det: (#obj, 6) x1 y1 x2 y2 conf cls # Rescale boxes from img_size to original im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results. statistics of number of each obj for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, self.names[int(c)]) # add to string bbox_xywh = xyxy2xywh(det[:, :4]).cpu() confs = det[:, 4:5].cpu() # ****************************** deepsort **************************** outputs = self.deepsort.update(bbox_xywh, confs, im0) # (#ID, 5) x1,y1,x2,y2,track_ID else: outputs = torch.zeros((0, 5)) t3 = time.time() return outputs, t2 - t1, t3 - t2
def detect_displacement(self, img_path, img_size=None): """ 基于yolo v5中detect.py的run函数改造此函数即可 方法一: 直接调用detect.run()方法并设置结果写出到txt文件 通过读取txt文件解析结果(此方法每次调用都需要重新加载模型 适合一次性大批量处理) 方法二: 复用detect.run中代码,将模型加载放到 self._load_models中 将探测代码放到 detect_displacement中 :param img_path 图片路径 :param img_size 图片(高 宽) :return 类别,置信度,边框(x, y, w, h) """ stride = max(int(self.model.stride.max()), 32) imgsz = general.check_img_size(img_size, s=stride) dataset = datasets.LoadImages(img_path, img_size=imgsz, stride=stride, auto=True) for path, im, im0s, vid_cap, s in dataset: im = torch.from_numpy(im).to(self.device) # uint8 to fp16/32 im = im.float() # 0 - 255 to 0.0 - 1.0 im /= 255 if len(im.shape) == 3: # expand for batch 4-dim im = im[None] pred = self.model(im, augment=False, visualize=False) # 非极大值抑制 pred = general.non_max_suppression(pred[0], 0.25, 0.4, None, False, max_det=1000) # Process predictions # per image for _, det in enumerate(pred): if len(det): # process result det[:, :4] = general.scale_coords(im.shape[2:], det[:, :4], im0s.shape).round() for *xyxy, conf, cls in reversed(det): box = torch.tensor(xyxy).view(1, 4).view(-1).tolist() box[2] = (box[2] - box[0]) box[3] = (box[3] - box[1]) confidence_value = conf.item() class_index = cls.item() return class_index, confidence_value, box #print('conf %s, class %s, box: %s' % (confidence_value, class_index, box)) """ ann = plots.Annotator(im0s.copy()) ann.box_label(xyxy, 'dis') im0 = ann.result() cv2.imshow('dis', im0) cv2.waitKey(5000) """ return None, None, None
def detect(self, source, img_size=640, conf=None, iou=None): conf = self.model.conf if not conf else conf iou = self.model.iou if not iou else iou img_size = check_img_size(img_size, s=self.model.stride.max()) # check img_size # Set Dataloader cudnn.benchmark = True dataset = LoadImages(source, img_size=img_size) names = self.model.module.names if hasattr(self.model, 'module') else self.model.names img = torch.zeros((1, 3, img_size, img_size), device=self.device) # init img _ = self.model(img.half() if self.half else img) if self.device.type != 'cpu' else None # run once detections = [] for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(self.device) img = img.half() if self.half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) pred = self.model(img, augment=False)[0] pred = non_max_suppression_torch_ops(pred, conf, iou, classes=None) # Process detections for i, det in enumerate(pred): # detections per image p, s, im0 = path, '', im0s detection_result = {"entities": [], "detections": [], "src": path} gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results # Calling detach is necessary for c in det[:, -1].detach().unique(): n = (det[:, -1] == c).sum() # detections per class detection_result['entities'].append((names[int(c)], int(n))) # Write results for *xyxy, conf, cls in reversed(det): t_xyxy = torch.tensor(xyxy).view(1, 4) xywh = (xyxy2xywh(t_xyxy) / gn).view(-1).tolist() # normalized xywh detection_result['detections'].append(dict(xyxy=t_xyxy.view(-1).tolist(), xywh=xywh, cls=names[int(cls)], confidence="{:.2%}".format(float(conf)))) detections.append(detection_result) return detections
def postprocess(predictions, imgsz0, imgsz1): """ Convert class IDs to class names. """ predictions[:, :4] = scale_coords(imgsz1, predictions[:, :4], imgsz0).round() predictions = predictions.cpu().numpy().tolist() return [{ "box": row[:4], "confidence": row[4], "class": CLASS_MAP[int(row[5])] } for row in predictions]
def detect(self, source: List[np.array], img_size=640, conf=None, iou=None): stacked, sizes, div_sizes = self.preprocess(source) result = self.infer(stacked, conf, iou) detections = [] for i, det in enumerate(result): scale_coords(div_sizes, result[i][:, :4], sizes[i].original) detection_result = {"entities": [], "detections": []} gn = torch.tensor(sizes[i].original)[[1, 0, 1, 0]] for c in det[:, -1].detach().unique(): n = (det[:, -1] == c).sum() # detections per class detection_result['entities'].append((self.names[int(c)], int(n))) for *xyxy, conf, cls in reversed(det): t_xyxy = torch.tensor(xyxy).view(1, 4) xywh = (xyxy2xywh(t_xyxy) / gn).view(-1).tolist() # normalized xywh detection_result['detections'].append(dict(xyxy=t_xyxy.view(-1).tolist(), xywh=xywh, cls=self.names[int(cls)], confidence="{:.2%}".format(float(conf)))) detections.append(detection_result) return detections
def get_yolo_roi(img_path, model, device, dataset_name): # 面积大于阈值 if dataset_name == "ped2": min_area_thr = 10*10 elif dataset_name == "avenue": min_area_thr = 30*30 elif dataset_name == "shanghaiTech": min_area_thr = 8*8 else: raise NotImplementedError dataset = LoadImages(img_path, img_size=640) for path, img, im0s, vid_cap in dataset: p, s, im0 = Path(path), '', im0s # print(device) img = torch.from_numpy(img).to(device) img = img.float() img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img)[0] # Apply NMS pred = non_max_suppression(pred, 0.25, 0.45) # Process detections for i, det in enumerate(pred): # detections per image if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class # results bboxs = [] for *xyxy, conf, cls in reversed(det): box = [int(x.cpu().item()) for x in xyxy] if (box[3]-box[1]+1)*(box[2]-box[0]+1) > min_area_thr: bboxs.append( tuple(box) ) return bboxs
def forward(self, x, size=640, augment=False, profile=False): # supports inference from various sources. For height=720, width=1280, RGB images example inputs are: # opencv: x = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(720,1280,3) # PIL: x = Image.open('image.jpg') # HWC x(720,1280,3) # numpy: x = np.zeros((720,1280,3)) # HWC # torch: x = torch.zeros(16,3,720,1280) # BCHW # multiple: x = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images p = next(self.model.parameters()) # for device and type if isinstance(x, torch.Tensor): # torch return self.model(x.to(p.device).type_as(p), augment, profile) # inference # Pre-process if not isinstance(x, list): x = [x] shape0, shape1 = [], [] # image and inference shapes batch = range(len(x)) # batch size for i in batch: x[i] = np.array(x[i])[:, :, :3] # up to 3 channels if png s = x[i].shape[:2] # HWC shape0.append(s) # image shape g = (size / max(s)) # gain shape1.append([y * g for y in s]) shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)] # inference shape x = [letterbox(x[i], new_shape=shape1, auto=False)[0] for i in batch] # pad x = np.stack(x, 0) if batch[-1] else x[0][None] # stack x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW x = torch.from_numpy(x).to(p.device).type_as(p) / 255. # uint8 to fp16/32 # Inference x = self.model(x, augment, profile) # forward x = non_max_suppression(x[0], conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) # NMS # Post-process for i in batch: if x[i] is not None: x[i][:, :4] = scale_coords(shape1, x[i][:, :4], shape0[i]) return x
def predict(self, img, im0s): img = torch.from_numpy(img).to(self._device) img = img.half() if self._half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = self._model(img)[0] # Apply NMS det = non_max_suppression(pred, self._conf_thres, self._iou_thres, classes=self._classes)[0] t2 = time_synchronized() logging.info('Inference time: {:.3f}s'.format(t2 - t1)) if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0s.shape).round() return det
def detect(self,img,model,stride,device,imgsz): names = model.module.names if hasattr(model, 'module') else model.names # t0 = time.time() im0s = img.copy() img = letterbox(im0s, imgsz, stride=stride)[0] # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) img = torch.from_numpy(img).to(device) half = device.type != "cpu" # half precision only supported on CUDA img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference # t1 = time_synchronized() pred = model(img, augment=True)[0] # print(pred) # Apply NMS pred = non_max_suppression(pred, 0.60, 0.5, classes=[0,2,3,5,7], agnostic=True) t2 = time_synchronized() xywhs,labels,xyxys,confs = [],[],[],[] for i, det in enumerate(pred): im0 = im0s.copy() if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() for *xyxy, conf, cls in reversed(det): label = f'{names[int(cls)]}' xywh = self.bbox_rel(*xyxy) xyxys.append(xyxy) xywhs.append(xywh) labels.append(label) confs.append([conf.item()]) # print(labels) return xyxys,xywhs,labels,confs,im0
def detect(opt, save_img=False): global bird_image out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source == '0' or source.startswith('rtsp') or source.startswith( 'http') or source.endswith('.txt') # initialize the ROI frame cv2.namedWindow("image") cv2.setMouseCallback("image", get_mouse_points) # Initialize device = select_device(opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = torch.load(weights, map_location=device)['model'].float() # load to FP32 model.to(device).eval() if half: model.half() # to FP16 # initialize deepsort cfg = get_config() cfg.merge_from_file(opt.config_deepsort) deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=True) # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: view_img = True save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names #initialize moving average window movingAverageUpdater = movingAverage.movingAverage(5) # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once save_path = str(Path(out)) txt_path = str(Path(out)) + '/results.txt' d = DynamicUpdate() d.on_launch() risk_factors = [] frame_nums = [] count = 0 for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset): if (frame_idx == 0): while True: image = im0s cv2.imshow("image", image) cv2.waitKey(1) if len(mouse_pts) == 7: cv2.destroyWindow("image") break four_points = mouse_pts # Get perspective, M is the transformation matrix for bird's eye view M, Minv = get_camera_perspective(image, four_points[0:4]) # Last two points in getMousePoints... this will be the threshold distance between points threshold_pts = src = np.float32(np.array([four_points[4:]])) # Convert distance to bird's eye view warped_threshold_pts = cv2.perspectiveTransform(threshold_pts, M)[0] # Get distance in pixels threshold_pixel_dist = np.sqrt( (warped_threshold_pts[0][0] - warped_threshold_pts[1][0])**2 + (warped_threshold_pts[0][1] - warped_threshold_pts[1][1])**2) # Draw the ROI on the output images ROI_pts = np.array([ four_points[0], four_points[1], four_points[3], four_points[2] ], np.int32) # initialize birdeye view video writer frame_h, frame_w, _ = image.shape bevw = birdeye_video_writer.birdeye_video_writer( frame_h, frame_w, M, threshold_pixel_dist) else: break t = time.time() for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset): print("Loop time: ", time.time() - t) t = time.time() img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) cv2.polylines(im0s, [ROI_pts], True, (0, 255, 255), thickness=4) # Inferenc tOther = time.time() t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() print("Non max suppression and inference: ", time.time() - tOther) print("Pre detection time: ", time.time() - t) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s s += '%gx%g ' % img.shape[2:] # print string save_path = str(Path(out) / Path(p).name) if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() bbox_xywh = [] bbox_xyxy = [] confs = [] ROI_polygon = Polygon(ROI_pts) # Adapt detections to deep sort input format for *xyxy, conf, cls in det: img_h, img_w, _ = im0.shape x_c, y_c, bbox_w, bbox_h = bbox_rel(img_w, img_h, *xyxy) obj = [x_c, y_c, bbox_w, bbox_h] confs.append([conf.item()]) bbox_xyxy.append(xyxy) bbox_xywh.append(obj) xywhs = torch.Tensor(bbox_xywh) confss = torch.Tensor(confs) # Pass detections to deepsort deepsortTime = time.time() #outputs = deepsort.update(xywhs, confss, im0) print("Deepsort function call: ", (time.time() - deepsortTime)) outputs = bbox_xyxy # draw boxes for visualization if len(outputs) > 0: # filter deepsort output outputs_in_ROI, ids_in_ROI = remove_points_outside_ROI( bbox_xyxy, ROI_polygon) center_coords_in_ROI = xywh_to_center_coords( outputs_in_ROI) warped_pts = birdeye_transformer.transform_center_coords_to_birdeye( center_coords_in_ROI, M) clusters = DBSCAN(eps=threshold_pixel_dist, min_samples=1).fit(warped_pts) print(clusters.labels_) draw_boxes(im0, outputs_in_ROI, clusters.labels_) risk_dict = Counter(clusters.labels_) bird_image = bevw.create_birdeye_frame( warped_pts, clusters.labels_, risk_dict) # movingAverageUpdater.updatePoints(warped_pts, ids_in_ROI) # # gettingAvgTime = time.time() # movingAveragePairs = movingAverageUpdater.getCurrentAverage() # # movingAverageIds = [id for id, x_coord, y_coord in movingAveragePairs] # movingAveragePts = [(x_coord, y_coord) for id, x_coord, y_coord in movingAveragePairs] # embded the bird image to the video # otherStuff = time.time() # if(len(movingAveragePairs) > 0): # movingAvgClusters = DBSCAN(eps=threshold_pixel_dist, min_samples=1).fit(movingAveragePts) # movingAvgClustersLables = movingAvgClusters.labels_ # risk_dict = Counter(movingAvgClustersLables) # bird_image = bevw.create_birdeye_frame(movingAveragePts, movingAvgClustersLables, risk_dict) # bird_image = resize(bird_image, 20) # bv_height, bv_width, _ = bird_image.shape # frame_x_center, frame_y_center = frame_w //2, frame_h//2 # x_offset = 20 # # im0[ frame_y_center-bv_height//2:frame_y_center+bv_height//2, \ # x_offset:bv_width+x_offset ] = bird_image # else: # risk_dict = Counter(clusters.labels_) # bird_image = bevw.create_birdeye_frame(warped_pts, clusters.labels_, risk_dict) bird_image = resize(bird_image, 20) bv_height, bv_width, _ = bird_image.shape frame_x_center, frame_y_center = frame_w // 2, frame_h // 2 x_offset = 20 im0[frame_y_center - bv_height // 2:frame_y_center + bv_height // 2, \ x_offset:bv_width + x_offset] = bird_image # print("Other stuff: ", time.time() - otherStuff) #write the risk graph risk_factors += [compute_frame_rf(risk_dict)] frame_nums += [frame_idx] graphTime = time.time() if (frame_idx > 100): count += 1 frame_nums.pop(0) risk_factors.pop(0) if frame_idx % 10 == 0: d.on_running(frame_nums, risk_factors, count, count + 100) print("Graph Time: ", time.time() - graphTime) # Write MOT compliant results to file if save_txt and len(outputs_in_ROI) != 0: for j, output in enumerate(outputs_in_ROI): bbox_left = output[0] bbox_top = output[1] bbox_w = output[2] bbox_h = output[3] identity = output[-1] with open(txt_path, 'a') as f: f.write(('%g ' * 10 + '\n') % (frame_idx, identity, bbox_left, bbox_top, bbox_w, bbox_h, -1, -1, -1, -1)) # label format # Stream results if view_img: # cv2.imshow("bird_image", bird_image) cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, bird_image) cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % os.getcwd() + os.sep + out) if platform == 'darwin': # MacOS os.system('open ' + save_path) print('Done. (%.3fs)' % (time.time() - t0))
def detect(opt, save_img=False): out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source == '0' or source.startswith('rtsp') or source.startswith( 'http') or source.endswith('.txt') # initialize deepsort cfg = get_config() cfg.merge_from_file(opt.config_deepsort) deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=True) # Initialize device = select_device(opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride imgsz = check_img_size(imgsz, s=stride) # check img_size names = model.module.names if hasattr( model, 'module') else model.names # get class names if half: model.half() # to FP16 # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: view_img = True save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img # run once _ = model(img.half() if half else img) if device.type != 'cpu' else None save_path = str(Path(out)) txt_path = str(Path(out)) + '/results.txt' for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset): img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s s += '%gx%g ' % img.shape[2:] # print string save_path = str(Path(out) / Path(p).name) if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string bbox_xywh = [] confs = [] # Adapt detections to deep sort input format for *xyxy, conf, cls in det: x_c, y_c, bbox_w, bbox_h = bbox_rel(*xyxy) obj = [x_c, y_c, bbox_w, bbox_h] bbox_xywh.append(obj) confs.append([conf.item()]) xywhs = torch.Tensor(bbox_xywh) confss = torch.Tensor(confs) # Pass detections to deepsort outputs = deepsort.update(xywhs, confss, im0) # draw boxes for visualization if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] draw_boxes(im0, bbox_xyxy, identities) # Write MOT compliant results to file if save_txt and len(outputs) != 0: for j, output in enumerate(outputs): bbox_left = output[0] bbox_top = output[1] bbox_w = output[2] bbox_h = output[3] identity = output[-1] with open(txt_path, 'a') as f: f.write(('%g ' * 10 + '\n') % (frame_idx, identity, bbox_left, bbox_top, bbox_w, bbox_h, -1, -1, -1, -1)) # label format else: deepsort.increment_ages() # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: print('saving img!') if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: print('saving video!') if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % os.getcwd() + os.sep + out) if platform == 'darwin': # MacOS os.system('open ' + save_path) print('Done. (%.3fs)' % (time.time() - t0))
def test( data, weights=None, # model.pt path(s) batch_size=32, # batch size imgsz=640, # inference size (pixels) conf_thres=0.001, # confidence threshold iou_thres=0.6, # NMS IoU threshold task='val', # train, val, test, speed or study device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu single_cls=False, # treat as single-class dataset augment=False, # augmented inference verbose=False, # verbose output save_txt=False, # save results to *.txt save_hybrid=False, # save label+prediction hybrid results to *.txt save_conf=False, # save confidences in --save-txt labels save_json=False, # save a cocoapi-compatible JSON results file project='runs/test', # save to project/name name='exp', # save to project/name exist_ok=False, # existing project/name ok, do not increment half=True, # use FP16 half-precision inference model=None, dataloader=None, save_dir=Path(''), plots=True, wandb_logger=None, compute_loss=None, ): # Initialize/load model and set device training = model is not None if training: # called by train.py device = next(model.parameters()).device # get model device else: # called directly set_logging() device = select_device(device, batch_size=batch_size) # Directories save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Load model model = attempt_load(weights, map_location=device) # load FP32 model gs = max(int(model.stride.max()), 32) # grid size (max stride) imgsz = check_img_size(imgsz, s=gs) # check image size # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99 # if device.type != 'cpu' and torch.cuda.device_count() > 1: # model = nn.DataParallel(model) # Half half &= device.type != 'cpu' # half precision only supported on CUDA if half: model.half() # Configure model.eval() if isinstance(data, str): with open(data) as f: data = yaml.safe_load(f) check_dataset(data) # check is_coco = data['val'].endswith('coco/val2017.txt') # COCO dataset nc = 1 if single_cls else int(data['nc']) # number of classes iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for [email protected]:0.95 niou = iouv.numel() # Logging log_imgs = 0 if wandb_logger and wandb_logger.wandb: log_imgs = min(wandb_logger.log_imgs, 100) # Dataloader if not training: if device.type != 'cpu': model( torch.zeros(1, 3, imgsz, imgsz).to(device).type_as( next(model.parameters()))) # run once task = task if task in ( 'train', 'val', 'test') else 'val' # path to train/val/test images dataloader = create_dataloader(data[task], imgsz, batch_size, gs, single_cls, pad=0.5, rect=True, prefix=colorstr(f'{task}: '))[0] seen = 0 confusion_matrix = ConfusionMatrix(nc=nc) names = { k: v for k, v in enumerate( model.names if hasattr(model, 'names') else model.module.names) } coco91class = coco80_to_coco91_class() s = ('%20s' + '%11s' * 6) % ('Class', 'Images', 'Labels', 'P', 'R', '[email protected]', '[email protected]:.95') p, r, f1, mp, mr, map50, map, t0, t1, t2 = 0., 0., 0., 0., 0., 0., 0., 0., 0., 0. loss = torch.zeros(3, device=device) jdict, stats, ap, ap_class, wandb_images = [], [], [], [], [] for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)): t_ = time_synchronized() img = img.to(device, non_blocking=True) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 targets = targets.to(device) nb, _, height, width = img.shape # batch size, channels, height, width t = time_synchronized() t0 += t - t_ # Run model out, train_out = model( img, augment=augment) # inference and training outputs t1 += time_synchronized() - t # Compute loss if compute_loss: loss += compute_loss([x.float() for x in train_out], targets)[1][:3] # box, obj, cls # Run NMS targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(device) # to pixels lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] # for autolabelling t = time_synchronized() out = non_max_suppression(out, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls) t2 += time_synchronized() - t # Statistics per image for si, pred in enumerate(out): labels = targets[targets[:, 0] == si, 1:] nl = len(labels) tcls = labels[:, 0].tolist() if nl else [] # target class path = Path(paths[si]) seen += 1 if len(pred) == 0: if nl: stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls)) continue # Predictions if single_cls: pred[:, 5] = 0 predn = pred.clone() scale_coords(img[si].shape[1:], predn[:, :4], shapes[si][0], shapes[si][1]) # native-space pred # Append to text file if save_txt: gn = torch.tensor(shapes[si][0])[[1, 0, 1, 0 ]] # normalization gain whwh for *xyxy, conf, cls in predn.tolist(): xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format with open(save_dir / 'labels' / (path.stem + '.txt'), 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') # W&B logging - Media Panel plots if len( wandb_images ) < log_imgs and wandb_logger.current_epoch > 0: # Check for test operation if wandb_logger.current_epoch % wandb_logger.bbox_interval == 0: box_data = [{ "position": { "minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3] }, "class_id": int(cls), "box_caption": "%s %.3f" % (names[cls], conf), "scores": { "class_score": conf }, "domain": "pixel" } for *xyxy, conf, cls in pred.tolist()] boxes = { "predictions": { "box_data": box_data, "class_labels": names } } # inference-space wandb_images.append( wandb_logger.wandb.Image(img[si], boxes=boxes, caption=path.name)) wandb_logger.log_training_progress( predn, path, names) if wandb_logger and wandb_logger.wandb_run else None # Append to pycocotools JSON dictionary if save_json: # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ... image_id = int( path.stem) if path.stem.isnumeric() else path.stem box = xyxy2xywh(predn[:, :4]) # xywh box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner for p, b in zip(pred.tolist(), box.tolist()): jdict.append({ 'image_id': image_id, 'category_id': coco91class[int(p[5])] if is_coco else int(p[5]), 'bbox': [round(x, 3) for x in b], 'score': round(p[4], 5) }) # Assign all predictions as incorrect correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device) if nl: detected = [] # target indices tcls_tensor = labels[:, 0] # target boxes tbox = xywh2xyxy(labels[:, 1:5]) scale_coords(img[si].shape[1:], tbox, shapes[si][0], shapes[si][1]) # native-space labels if plots: confusion_matrix.process_batch( predn, torch.cat((labels[:, 0:1], tbox), 1)) # Per target class for cls in torch.unique(tcls_tensor): ti = (cls == tcls_tensor).nonzero(as_tuple=False).view( -1) # target indices pi = (cls == pred[:, 5]).nonzero(as_tuple=False).view( -1) # prediction indices # Search for detections if pi.shape[0]: # Prediction to target ious ious, i = box_iou(predn[pi, :4], tbox[ti]).max( 1) # best ious, indices # Append detections detected_set = set() for j in (ious > iouv[0]).nonzero(as_tuple=False): d = ti[i[j]] # detected target if d.item() not in detected_set: detected_set.add(d.item()) detected.append(d) correct[ pi[j]] = ious[j] > iouv # iou_thres is 1xn if len( detected ) == nl: # all targets already located in image break # Append statistics (correct, conf, pcls, tcls) stats.append( (correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)) # Plot images if plots and batch_i < 3: f = save_dir / f'test_batch{batch_i}_labels.jpg' # labels Thread(target=plot_images, args=(img, targets, paths, f, names), daemon=True).start() f = save_dir / f'test_batch{batch_i}_pred.jpg' # predictions Thread(target=plot_images, args=(img, output_to_target(out), paths, f, names), daemon=True).start() # Compute statistics stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy if len(stats) and stats[0].any(): p, r, ap, f1, ap_class = ap_per_class(*stats, plot=plots, save_dir=save_dir, names=names) ap50, ap = ap[:, 0], ap.mean(1) # [email protected], [email protected]:0.95 mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean() nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class else: nt = torch.zeros(1) # Print results pf = '%20s' + '%11i' * 2 + '%11.3g' * 4 # print format print(pf % ('all', seen, nt.sum(), mp, mr, map50, map)) # Print results per class if (verbose or (nc < 50 and not training)) and nc > 1 and len(stats): for i, c in enumerate(ap_class): print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i])) # Print speeds t = tuple(x / seen * 1E3 for x in (t0, t1, t2)) # speeds per image if not training: shape = (batch_size, 3, imgsz, imgsz) print( f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}' % t) # Plots if plots: confusion_matrix.plot(save_dir=save_dir, names=list(names.values())) if wandb_logger and wandb_logger.wandb: val_batches = [ wandb_logger.wandb.Image(str(f), caption=f.name) for f in sorted(save_dir.glob('test*.jpg')) ] wandb_logger.log({"Validation": val_batches}) if wandb_images: wandb_logger.log({"Bounding Box Debugger/Images": wandb_images}) # Save JSON if save_json and len(jdict): w = Path(weights[0] if isinstance(weights, list) else weights ).stem if weights is not None else '' # weights anno_json = '../coco/annotations/instances_val2017.json' # annotations json pred_json = str(save_dir / f"{w}_predictions.json") # predictions json print('\nEvaluating pycocotools mAP... saving %s...' % pred_json) with open(pred_json, 'w') as f: json.dump(jdict, f) try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb check_requirements(['pycocotools']) from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval anno = COCO(anno_json) # init annotations api pred = anno.loadRes(pred_json) # init predictions api eval = COCOeval(anno, pred, 'bbox') if is_coco: eval.params.imgIds = [ int(Path(x).stem) for x in dataloader.dataset.img_files ] # image IDs to evaluate eval.evaluate() eval.accumulate() eval.summarize() map, map50 = eval.stats[: 2] # update results ([email protected]:0.95, [email protected]) except Exception as e: print(f'pycocotools unable to run: {e}') # Return results model.float() # for training if not training: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") maps = np.zeros(nc) + map for i, c in enumerate(ap_class): maps[c] = ap[i] return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t
def detect(opt, save_img=False): out, source, weights, view_img, save_txt, imgsz = opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source == '0' or source.startswith('rtsp') or source.startswith( 'http') or source.endswith('.txt') global counter global features # initialize deepsort cfg = get_config() cfg.merge_from_file(opt.config_deepsort) deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=True) # Initialize device = select_device(opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA. Make faster cmputation with lower precision. #Write headers into csv file with open(str(Path(args.output)) + '/results.csv', 'a') as f: f.write("Time,People Count Changed,TotalCount,ActivePerson,\n") #Initialize the scheduler for every 2 secs scheduler = BackgroundScheduler() scheduler.start() scheduler.add_job(func=write_csv, args=[opt.output], trigger=IntervalTrigger(seconds=2)) # Load model model = torch.load(weights, map_location=device)['model'].float() # load to FP32 model.to(device).eval() if half: model.half() # to FP16 # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: view_img = True save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once save_path = str(Path(out)) txt_path = str(Path(out)) + '/results.csv' for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset): img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s s += '%gx%g ' % img.shape[2:] # print string save_path = str(Path(out) / Path(p).name) if det is not None and len(det): # Rescale boxes from img_size to im0(640) size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string bbox_xywh = [] confs = [] # Adapt detections to deep sort input format for *xyxy, conf, cls in det: img_h, img_w, _ = im0.shape x_c, y_c, bbox_w, bbox_h = bbox_rel(img_w, img_h, *xyxy) obj = [x_c, y_c, bbox_w, bbox_h] bbox_xywh.append(obj) confs.append([conf.item()]) xywhs = torch.Tensor(bbox_xywh) confss = torch.Tensor(confs) # Pass detections to deepsort outputs = deepsort.update(xywhs, confss, im0) # draw boxes for visualization if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] draw_boxes(im0, bbox_xyxy, identities) features['identities'] = identities if save_txt and len(outputs) != 0: for j, output in enumerate(outputs): bbox_left = output[0] bbox_top = output[1] bbox_w = output[2] bbox_h = output[3] identity = output[-1] # with open(txt_path, 'a') as f: # f.write(f"{datetime.now()},{changes if changes != counter else 0},{counter},{len(identities)},\n") # label format # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1), end='\r') # Write Counter on img cv2.putText(im0, "Counter : " + str(counter), (10, 20), cv2.FONT_HERSHEY_PLAIN, 2, [1, 190, 200], 2) # Stream results # if view_img: # cv2.imshow(p, im0) # if cv2.waitKey(1) == ord('q'): # q to quit # raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': print('saving img!') cv2.imwrite(save_path, im0) else: # print('saving video!') if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer. Issues with video writer. Fix later fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % os.getcwd() + os.sep + out) print('Done. (%.3fs)' % (time.time() - t0))
def forward(self, imgs, size=640, augment=False, profile=False): # Inference from various sources. For height=640, width=1280, RGB images example inputs are: # file: imgs = 'data/images/zidane.jpg' # str or PosixPath # URI: = 'https://ultralytics.com/images/zidane.jpg' # OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(640,1280,3) # PIL: = Image.open('image.jpg') or ImageGrab.grab() # HWC x(640,1280,3) # numpy: = np.zeros((640,1280,3)) # HWC # torch: = torch.zeros(16,3,320,640) # BCHW (scaled to size=640, 0-1 values) # multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images t = [time_sync()] p = next(self.model.parameters()) if self.pt else torch.zeros( 1) # for device and type autocast = self.amp and (p.device.type != 'cpu' ) # Automatic Mixed Precision (AMP) inference if isinstance(imgs, torch.Tensor): # torch with amp.autocast(enabled=autocast): return self.model( imgs.to(p.device).type_as(p), augment, profile) # inference # Pre-process n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else ( 1, [imgs]) # number of images, list of images shape0, shape1, files = [], [], [ ] # image and inference shapes, filenames for i, im in enumerate(imgs): f = f'image{i}' # filename if isinstance(im, (str, Path)): # filename or uri im, f = Image.open( requests.get(im, stream=True).raw if str(im). startswith('http') else im), im im = np.asarray(exif_transpose(im)) elif isinstance(im, Image.Image): # PIL Image im, f = np.asarray( exif_transpose(im)), getattr(im, 'filename', f) or f files.append(Path(f).with_suffix('.jpg').name) if im.shape[0] < 5: # image in CHW im = im.transpose( (1, 2, 0)) # reverse dataloader .transpose(2, 0, 1) im = im[..., :3] if im.ndim == 3 else np.tile( im[..., None], 3) # enforce 3ch input s = im.shape[:2] # HWC shape0.append(s) # image shape g = (size / max(s)) # gain shape1.append([y * g for y in s]) imgs[i] = im if im.data.contiguous else np.ascontiguousarray( im) # update shape1 = [ make_divisible(x, self.stride) for x in np.stack(shape1, 0).max(0) ] # inference shape x = [ letterbox(im, new_shape=shape1 if self.pt else size, auto=False)[0] for im in imgs ] # pad x = np.stack(x, 0) if n > 1 else x[0][None] # stack x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW x = torch.from_numpy(x).to( p.device).type_as(p) / 255 # uint8 to fp16/32 t.append(time_sync()) with amp.autocast(enabled=autocast): # Inference y = self.model(x, augment, profile) # forward t.append(time_sync()) # Post-process y = non_max_suppression(y if self.dmb else y[0], self.conf, iou_thres=self.iou, classes=self.classes, agnostic=self.agnostic, multi_label=self.multi_label, max_det=self.max_det) # NMS for i in range(n): scale_coords(shape1, y[i][:, :4], shape0[i]) t.append(time_sync()) return Detections(imgs, y, files, t, self.names, x.shape)
def detect(opt, save_img=False): ct = CentroidTracker() out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source == '0' or source.startswith( 'rtsp') or source.startswith('http') or source.endswith('.txt') # initialize deepsort cfg = get_config() cfg.merge_from_file(opt.config_deepsort) deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=True) # Initialize device = select_device(opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA now = datetime.datetime.now().strftime("%Y/%m/%d/%H:%M:%S") # current time # Load model model = torch.load(weights, map_location=device)[ 'model'].float() # load to FP32 model.to(device).eval() # ============================================================================= filepath_mask = 'D:/Internship Crime Detection/YOLOv5 person detection/AjnaTask/Mytracker/yolov5/weights/mask.pt' model_mask = torch.load(filepath_mask, map_location = device)['model'].float() model_mask.to(device).eval() if half: model_mask.half() names_m = model_mask.module.names if hasattr(model_mask, 'module') else model_mask.names # ============================================================================= if half: model.half() # to FP16 # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: view_img = False save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img # run once _ = model(img.half() if half else img) if device.type != 'cpu' else None save_path = str(Path(out)) txt_path = str(Path(out)) + '/results.txt' memory = {} people_counter = 0 in_people = 0 out_people = 0 people_mask = 0 people_none = 0 time_sum = 0 # now_time = datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S') colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset): img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # ============================================================================= pred_mask = model_mask(img)[0] # ============================================================================= # Apply NMS pred = non_max_suppression( pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) # ============================================================================= pred_mask = non_max_suppression(pred_mask, 0.4, 0.5, classes = [0, 1, 2], agnostic = None) if pred_mask is None: continue classification = torch.cat(pred_mask)[:, -1] if len(classification) == 0: print("----",None) continue index = int(classification[0]) mask_class = names_m[index] print("MASK CLASS>>>>>>> \n", mask_class) # ============================================================================= # Create the haar cascade # cascPath = "D:/Internship Crime Detection/YOLOv5 person detection/AjnaTask/Mytracker/haarcascade_frontalface_alt2.xml" # faceCascade = cv2.CascadeClassifier(cascPath) t2 = time_synchronized() # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s s += '%gx%g ' % img.shape[2:] # print string save_path = str(Path(out) / Path(p).name) img_center_y = int(im0.shape[0]//2) # line = [(int(im0.shape[1]*0.258),int(img_center_y*1.3)),(int(im0.shape[1]*0.55),int(img_center_y*1.3))] # print("LINE>>>>>>>>>", line,"------------") # line = [(990, 672), (1072, 24)] line = [(1272, 892), (1800, 203)] # [(330, 468), (704, 468)] print("LINE>>>>>>>>>", line,"------------") cv2.line(im0,line[0],line[1],(0,0,255),5) # ============================================================================= # gray = cv2.cvtColor(im0, cv2.COLOR_BGR2GRAY) # # Detect faces in the image # faces = faceCascade.detectMultiScale( # gray, # scaleFactor=1.1, # minNeighbors=5, # minSize=(30, 30) # ) # # Draw a rectangle around the faces # for (x, y, w, h) in faces: # cv2.rectangle(im0, (x, y), (x+w, y+h), (0, 255, 0), 2) # text_x = x # text_y = y+h # cv2.putText(im0, mask_class, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, # 1, (0, 0, 255), thickness=1, lineType=2) # ============================================================================= if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords( img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string bbox_xywh = [] confs = [] bbox_xyxy = [] rects = [] # Is it correct? # Adapt detections to deep sort input format for *xyxy, conf, cls in det: x_c, y_c, bbox_w, bbox_h = bbox_rel(*xyxy) # label = f'{names[int(cls)]}' xyxy_list = torch.tensor(xyxy).view(1,4).view(-1).tolist() plot_one_box(xyxy, im0, label='person', color=colors[int(cls)], line_thickness=3) rects.append(xyxy_list) obj = [x_c, y_c, bbox_w, bbox_h,int(cls)] #cv2.circle(im0,(int(x_c),int(y_c)),color=(0,255,255),radius=12,thickness = 10) bbox_xywh.append(obj) # bbox_xyxy.append(rec) confs.append([conf.item()]) xywhs = torch.Tensor(bbox_xywh) confss = torch.Tensor(confs) # Pass detections to deepsort outputs = ct.update(rects) # xyxy # outputs = deepsort.update(xywhs, confss, im0) # deepsort index_id = [] previous = memory.copy() memory = {} boxes = [] names_ls = [] # draw boxes for visualization if len(outputs) > 0: # print('output len',len(outputs)) for id_,centroid in outputs.items(): # boxes.append([output[0],output[1],output[2],output[3]]) # index_id.append('{}-{}'.format(names_ls[-1],output[-2])) index_id.append(id_) boxes.append(centroid) memory[index_id[-1]] = boxes[-1] i = int(0) print(">>>>>>>",boxes) for box in boxes: # extract the bounding box coordinates # (x, y) = (int(box[0]), int(box[1])) # (w, h) = (int(box[2]), int(box[3])) x = int(box[0]) y = int(box[1]) # GGG if index_id[i] in previous: previous_box = previous[index_id[i]] (x2, y2) = (int(previous_box[0]), int(previous_box[1])) # (w2, h2) = (int(previous_box[2]), int(previous_box[3])) p0 = (x,y) p1 = (x2,y2) cv2.line(im0, p0, p1, (0,255,0), 3) # current frame obj center point - before frame obj center point if intersect(p0, p1, line[0], line[1]): people_counter += 1 print('==============================') print(p0,"---------------------------",p0[1]) print('==============================') print(line[1][1],'------------------',line[0][0],'-----------------', line[1][0],'-------------',line[0][1]) # if p0[1] <= line[1][1]: # in_people +=1 # else: # # if mask_class == 'mask': # # print("COUNTING MASK..", mask_class) # # people_mask += 1 # # if mask_class == 'none': # # people_none += 1 # out_people +=1 if p0[1] >= line[1][1]: in_people += 1 if mask_class == 'mask': people_mask += 1 else: people_none += 1 else: out_people += 1 i += 1 # Write MOT compliant results to file if save_txt and len(outputs) != 0: for j, output in enumerate(outputs): bbox_left = output[0] bbox_top = output[1] bbox_w = output[2] bbox_h = output[3] identity = output[-1] with open(txt_path, 'a') as f: f.write(('%g ' * 10 + '\n') % (frame_idx, identity, bbox_left, bbox_top, bbox_w, bbox_h, -1, -1, -1, -1)) # label format else: deepsort.increment_ages() cv2.putText(im0, 'Person [down][up] : [{}][{}]'.format(out_people,in_people),(130,50),cv2.FONT_HERSHEY_COMPLEX,1.0,(0,0,255),3) cv2.putText(im0, 'Person [mask][no_mask] : [{}][{}]'.format(people_mask, people_none), (130,100),cv2.FONT_HERSHEY_COMPLEX,1.0,(0,0,255),3) # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) time_sum += t2-t1 # Stream results if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': # im0= cv2.resize(im0,(0,0),fx=0.5,fy=0.5,interpolation=cv2.INTER_LINEAR) cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release() # release previous video writer fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % os.getcwd() + os.sep + out) if platform == 'darwin': # MacOS os.system('open ' + save_path) print('Done. (%.3fs)' % (time.time() - t0))
def detect( weights="yolov5s.pt", source="yolov5/data/images", img_size=640, conf_thres=0.75, iou_thres=0.45, device="", view_img=False, save_txt=False, save_conf=False, classes=None, agnostic_nms=False, augment=False, update=False, project="runs/detect", name="exp", exist_ok=False, save_img=False, ): """ Args: weights: str model.pt path(s) source: str file/folder, 0 for webcam img_size: int inference size (pixels) conf_thres: float object confidence threshold iou_thres: float IOU threshold for NMS device: str cuda device, i.e. 0 or 0,1,2,3 or cpu view_img: bool display results save_txt: bool save results to *.txt save_conf: bool save confidences in save_txt labels classes: int filter by class: [0], or [0, 2, 3] agnostic-nms: bool class-agnostic NMS augment: bool augmented inference update: bool update all models project: str save results to project/name name: str save results to project/name exist_ok: bool existing project/name ok, do not increment """ source, weights, view_img, save_txt, imgsz = ( source, weights, view_img, save_txt, img_size, ) webcam = ( source.isnumeric() or source.endswith(".txt") or source.lower().startswith(("rtsp://", "rtmp://", "http://")) ) # Directories save_dir = Path( increment_path(Path(project) / name, exist_ok=exist_ok) ) # increment run (save_dir / "labels" if save_txt else save_dir).mkdir( parents=True, exist_ok=True ) # make dir # Initialize set_logging() device = select_device(device) half = device.type != "cpu" # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name="resnet101", n=2) # initialize modelc.load_state_dict( torch.load("weights/resnet101.pt", map_location=device)["model"] ).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, "module") else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img) if device.type != "cpu" else None # run once for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=augment)[0] # Apply NMS pred = non_max_suppression( pred, conf_thres, iou_thres, classes=classes, agnostic=agnostic_nms, ) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0, frame = path[i], "%g: " % i, im0s[i].copy(), dataset.count else: p, s, im0, frame = path, "", im0s, getattr(dataset, "frame", 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # img.jpg txt_path = str(save_dir / "labels" / p.stem) + ( "" if dataset.mode == "image" else f"_{frame}" ) # img.txt s += "%gx%g " % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}s, " # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = ( (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn) .view(-1) .tolist() ) # normalized xywh line = ( (cls, *xywh, conf) if save_conf else (cls, *xywh) ) # label format with open(txt_path + ".txt", "a") as f: f.write(("%g " * len(line)).rstrip() % line + "\n") if save_img or view_img: # Add bbox to image label = f"{names[int(cls)]} {conf:.2f}" plot_one_box( xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3, ) # Print time (inference + NMS) print(f"{s}Done. ({t2 - t1:.3f}s)") # Stream results if view_img: cv2.imshow(str(p), im0) # Save results (image with detections) if save_img: if dataset.mode == "image": cv2.imwrite(save_path, im0) else: # 'video' if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release() # release previous video writer fourcc = "mp4v" # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h) ) vid_writer.write(im0) if save_txt or save_img: s = ( f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else "" ) print(f"Results saved to {save_dir}{s}") print(f"Done. ({time.time() - t0:.3f}s)")
def run( weights=ROOT / 'yolov5s.pt', # model.pt path(s) source=ROOT / 'data/images', # file/dir/URL/glob, 0 for webcam data=ROOT / 'data/coco128.yaml', # dataset.yaml path imgsz=(640, 640), # inference size (height, width) conf_thres=0.25, # confidence threshold iou_thres=0.45, # NMS IOU threshold max_det=1000, # maximum detections per image device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu view_img=False, # show results save_txt=False, # save results to *.txt save_conf=False, # save confidences in --save-txt labels save_crop=False, # save cropped prediction boxes nosave=False, # do not save images/videos classes=None, # filter by class: --class 0, or --class 0 2 3 agnostic_nms=False, # class-agnostic NMS augment=False, # augmented inference visualize=False, # visualize features update=False, # update all models project=ROOT / 'runs/detect', # save results to project/name name='exp', # save results to project/name exist_ok=False, # existing project/name ok, do not increment line_thickness=3, # bounding box thickness (pixels) hide_labels=False, # hide labels hide_conf=False, # hide confidences half=False, # use FP16 half-precision inference dnn=False, # use OpenCV DNN for ONNX inference ): source = str(source) save_img = not nosave and not source.endswith( '.txt') # save inference images is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS) is_url = source.lower().startswith( ('rtsp://', 'rtmp://', 'http://', 'https://')) webcam = source.isnumeric() or source.endswith('.txt') or (is_url and not is_file) if is_url and is_file: source = check_file(source) # download # Directories save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Load model device = select_device(device) model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data) stride, names, pt, jit, onnx, engine = model.stride, model.names, model.pt, model.jit, model.onnx, model.engine imgsz = check_img_size(imgsz, s=stride) # check image size # Half half &= ( pt or jit or onnx or engine ) and device.type != 'cpu' # FP16 supported on limited backends with CUDA if pt or jit: model.model.half() if half else model.model.float() # Dataloader if webcam: view_img = check_imshow() cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt) bs = len(dataset) # batch_size else: dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt) bs = 1 # batch_size vid_path, vid_writer = [None] * bs, [None] * bs # Run inference model.warmup(imgsz=(1 if pt else bs, 3, *imgsz), half=half) # warmup dt, seen = [0.0, 0.0, 0.0], 0 for path, im, im0s, vid_cap, s in dataset: t1 = time_sync() im = torch.from_numpy(im).to(device) im = im.half() if half else im.float() # uint8 to fp16/32 im /= 255 # 0 - 255 to 0.0 - 1.0 if len(im.shape) == 3: im = im[None] # expand for batch dim t2 = time_sync() dt[0] += t2 - t1 # Inference visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False pred = model(im, augment=augment, visualize=visualize) t3 = time_sync() dt[1] += t3 - t2 # NMS pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det) dt[2] += time_sync() - t3 # Second-stage classifier (optional) # pred = utils.general.apply_classifier(pred, classifier_model, im, im0s) # Process predictions for i, det in enumerate(pred): # per image seen += 1 if webcam: # batch_size >= 1 p, im0, frame = path[i], im0s[i].copy(), dataset.count s += f'{i}: ' else: p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # im.jpg txt_path = str(save_dir / 'labels' / p.stem) + ( '' if dataset.mode == 'image' else f'_{frame}') # im.txt s += '%gx%g ' % im.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh imc = im0.copy() if save_crop else im0 # for save_crop annotator = Annotator(im0, line_width=line_thickness, example=str(names)) if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or save_crop or view_img: # Add bbox to image c = int(cls) # integer class label = None if hide_labels else ( names[c] if hide_conf else f'{names[c]} {conf:.2f}') annotator.box_label(xyxy, label, color=colors(c, True)) if save_crop: save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True) # Stream results im0 = annotator.result() if view_img: cv2.imshow(str(p), im0) cv2.waitKey(1) # 1 millisecond # Save results (image with detections) if save_img: if dataset.mode == 'image': cv2.imwrite(save_path, im0) else: # 'video' or 'stream' if vid_path[i] != save_path: # new video vid_path[i] = save_path if isinstance(vid_writer[i], cv2.VideoWriter): vid_writer[i].release( ) # release previous video writer if vid_cap: # video fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) else: # stream fps, w, h = 30, im0.shape[1], im0.shape[0] save_path = str(Path(save_path).with_suffix( '.mp4')) # force *.mp4 suffix on results videos vid_writer[i] = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) vid_writer[i].write(im0) # Print time (inference-only) LOGGER.info(f'{s}Done. ({t3 - t2:.3f}s)') # Print results t = tuple(x / seen * 1E3 for x in dt) # speeds per image LOGGER.info( f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}' % t) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}") if update: strip_optimizer(weights) # update model (to fix SourceChangeWarning)
def predict(self, src_image): param = self.getParam() # Initialize init_logging() half = self.device.type != 'cpu' # half precision only supported on CUDA # Load model if self.model is None or param.update: self.model = attempt_load(param.model_path, map_location=self.device) # load FP32 model stride = int(self.model.stride.max()) # model stride param.input_size = check_img_size(param.input_size, s=stride) # check img_size if half: self.model.half() # to FP16F # Get names and colors self.names = self.model.module.names if hasattr(self.model, 'module') else self.model.names self.colors = [[random.randint(0, 255) for _ in range(3)] for _ in self.names] param.update = False else: stride = int(self.model.stride.max()) # model stride # Resize image image = letterbox(src_image, param.input_size, stride)[0] image = image.transpose(2, 0, 1) image = np.ascontiguousarray(image) self.emitStepProgress() # Run inference image = torch.from_numpy(image).to(self.device) image = image.half() if half else image.float() # uint8 to fp16/32 image /= 255.0 # 0 - 255 to 0.0 - 1.0 if image.ndimension() == 3: image = image.unsqueeze(0) self.emitStepProgress() # Inference pred = self.model(image, augment=param.augment)[0] self.emitStepProgress() # Apply NMS pred = non_max_suppression(pred, param.conf_thres, param.iou_thres, agnostic=param.agnostic_nms) self.emitStepProgress() graphics_output = self.getOutput(1) graphics_output.setNewLayer("YoloV5") graphics_output.setImageIndex(0) detected_names = [] detected_conf = [] # Process detections for i, det in enumerate(pred): # detections per image if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(image.shape[2:], det[:, :4], src_image.shape).round() # Results for *xyxy, conf, cls in reversed(det): # Box w = float(xyxy[2] - xyxy[0]) h = float(xyxy[3] - xyxy[1]) prop_rect = core.GraphicsRectProperty() prop_rect.pen_color = self.colors[int(cls)] graphics_box = graphics_output.addRectangle(float(xyxy[0]), float(xyxy[1]), w, h, prop_rect) graphics_box.setCategory(self.names[int(cls)]) # Label name = self.names[int(cls)] prop_text = core.GraphicsTextProperty() prop_text.font_size = 8 prop_text.color = self.colors[int(cls)] graphics_output.addText(name, float(xyxy[0]), float(xyxy[1]), prop_text) detected_names.append(name) detected_conf.append(conf.item()) # Init numeric output numeric_ouput = self.getOutput(2) numeric_ouput.clearData() numeric_ouput.setOutputType(dataprocess.NumericOutputType.TABLE) numeric_ouput.addValueList(detected_conf, "Confidence", detected_names) self.emitStepProgress()
def detect(opt, save_img=False): out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source == '0' or source.startswith('rtsp') or source.startswith( 'http') or source.endswith('.txt') array_detected_object = [] # initialize deepsort cfg = get_config() cfg.merge_from_file(opt.config_deepsort) deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=True) # Initialize device = select_device(opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = torch.load(weights, map_location=device)['model'].float() # load to FP32 model.to(device).eval() if half: model.half() # to FP16 # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: view_img = True save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once save_path = str(Path(out)) txt_path = str(Path(out)) + '/results.txt' peopleIn = 0 peopleOut = 0 detectedIds = [] for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset): img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s s += '%gx%g ' % img.shape[2:] # print string save_path = str(Path(out) / Path(p).name) if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string bbox_xywh = [] confs = [] # Adapt detections to deep sort input format for *xyxy, conf, cls in det: img_h, img_w, _ = im0.shape x_c, y_c, bbox_w, bbox_h = bbox_rel(img_w, img_h, *xyxy) obj = [x_c, y_c, bbox_w, bbox_h] bbox_xywh.append(obj) confs.append([conf.item()]) xywhs = torch.Tensor(bbox_xywh) confss = torch.Tensor(confs) # Pass detections to deepsort outputs = deepsort.update(xywhs, confss, im0) # draw boxes for visualization if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] draw_boxes(im0, bbox_xyxy, identities) # Write MOT compliant results to file if save_txt and len(outputs) != 0: for j, output in enumerate(outputs): bbox_left = output[0] bbox_top = output[1] bbox_w = output[2] bbox_h = output[3] identity = output[-1] array_detected_object.append(identity) array_detected_object = list( dict.fromkeys(array_detected_object)) xas = 0 yas = 0 if identity >= 0: xas = bbox_xyxy[0][0] yas = bbox_xyxy[0][1] if identity not in detectedIds and int(bbox_top) >= 10: detectedIds.append(identity) if int(bbox_top) >= 500 or (int(bbox_left) >= 800 and int(bbox_top) >= 80): peopleOut += 1 if int(bbox_top) <= 100: peopleIn += 1 # with open(txt_path, 'a') as f: # f.write(('%g ' * 10 + '\n') % (frame_idx, identity, bbox_left, # bbox_top, bbox_w, bbox_h, -1, -1, -1, -1)) # label format # f.write(('%g ' * 3 + '\n') % (identity, bbox_left, bbox_top)) # label format # resultText = str(identity) + '-' + str(bbox_top) # f.write(resultText + '\n') # label format # f.write(('%g ' * 4 + '\n') % (-1, frame_idx, -1, -1, str(xas), str(yas))) # label format # f.write(str(identity)) # f.write(('%g ' * 1 + '\n') % (identity)) # f.write('\n') # f.write(str(bbox_xyxy)) # f.write("Number people counted: " + str(len(array_detected_object))) # with open(txt_path, 'r') as fp: # line = fp.readline() # cnt = 1 # while line: # identity = line.split("-")[0] # infoCheck = line.split("-")[1] # if identity not in detectedIds: # detectedIds.append(identity) # if int(infoCheck) > 680 : # peopleOut += 1 # else: # peopleIn +=1 # print("Line {}: {}".format(cnt, line.strip().split("-")[0])) # line = fp.readline() # cnt += 1 # print("All people counted: " + str(peopleIn + peopleOut)) # print("Number people in: " + str(peopleIn)) # print("Number people out: " + str(peopleOut)) font = cv2.FONT_HERSHEY_DUPLEX cv2.putText(im0, "People in out counted: " + str(peopleIn + peopleOut), (50, 100), font, 0.8, (0, 255, 255), 2, font) cv2.putText(im0, "Number people in: " + str(peopleIn), (50, 135), font, 0.8, (0, 255, 255), 2, font) cv2.putText(im0, "Number people out: " + str(peopleOut), (50, 170), font, 0.8, (0, 255, 255), 2, font) # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit txt_result = str(Path(out)) + '/result-counted.txt' print("All people counted: " + str(peopleIn + peopleOut)) print("Number people in: " + str(peopleIn)) print("Number people out: " + str(peopleOut)) with open(txt_path, 'a') as f: f.write("All people counted: " + str(peopleIn + peopleOut)) f.write("Number people in: " + str(peopleIn)) f.write("Number people out: " + str(peopleOut)) raise StopIteration # Save results (image with detections) if save_img: print('saving img!') if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: print('saving video!') if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % os.getcwd() + os.sep + out) if platform == 'darwin': # MacOS os.system('open ' + save_path) print('Done. (%.3fs)' % (time.time() - t0))
def mainFunc(args): # Set the main function flag print("Main Function Start...") # Check the GPU device print("Number of available GPUs: {}".format(torch.cuda.device_count())) # Check whether using the distributed runing for the network is_distributed = initDistributed(args) master = True if is_distributed and os.environ["RANK"]: master = int( os.environ["RANK"]) == 0 # check whether this node is master node # Configuration for device setting set_logging() if is_distributed: device = torch.device('cuda:{}'.format(args.local_rank)) else: device = select_device(args.device) half = device.type != 'cpu' # half precision only supported on CUDA # Load the configuration config = loadConfig(args.config) # CuDNN related setting if torch.cuda.is_available(): cudnn.benchmark = config.DEVICE.CUDNN.BENCHMARK cudnn.deterministic = config.DEVICE.CUDNN.DETERMINISTIC cudnn.enabled = config.DEVICE.CUDNN.ENABLED # Configurations for dirctories save_img, save_dir, source, yolov5_weights, view_img, save_txt, imgsz = \ False, Path(args.save_dir), args.source, args.weights, args.view_img, args.save_txt, args.img_size webcam = source.isnumeric() or source.startswith( ('rtsp://', 'rtmp://', 'http://')) or source.endswith('.txt') if save_dir == Path('runs/detect'): # if default os.makedirs('runs/detect', exist_ok=True) # make base save_dir = Path(increment_dir(save_dir / 'exp', args.name)) # increment run os.makedirs(save_dir / 'labels' if save_txt else save_dir, exist_ok=True) # make new dir # Load yolov5 model for human detection model_yolov5 = attempt_load(config.MODEL.PRETRAINED.YOLOV5, map_location=device) imgsz = check_img_size(imgsz, s=model_yolov5.stride.max()) # check img_size if half: model_yolov5.half() # to FP16 # Second-stage classifier classify = False if classify: model_classifier = load_classifier(name='resnet101', n=2) # initialize model_classifier.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights model_classifier.to(device).eval() # Load resnet model for human keypoints estimation model_resnet = eval('pose_models.' + config.MODEL.NAME.RESNET + '.get_pose_net')(config, is_train=False) if config.EVAL.RESNET.MODEL_FILE: print('=> loading model from {}'.format(config.EVAL.RESNET.MODEL_FILE)) model_resnet.load_state_dict(torch.load(config.EVAL.RESNET.MODEL_FILE), strict=False) else: print('expected model defined in config at EVAL.RESNET.MODEL_FILE') model_resnet.to(device) model_resnet.eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz) pose_transform = transforms.Compose( [ # input transformation for 2d human pose estimation transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) # Get names and colors names = model_yolov5.module.names if hasattr( model_yolov5, 'module') else model_yolov5.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Construt filters for filtering 2D/3D human keypoints # filters_2d = constructFilters((1,16,2), freq=25, mincutoff=1, beta=0.01) # for test # filters_3d = constructFilters((1,16,3), freq=25, mincutoff=1, beta=0.01) # Run the yolov5 and resnet for 2d human pose estimation # with torch.no_grad(): # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model_yolov5(img.half() if half else img ) if device.type != 'cpu' else None # run once # Process every video frame for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred_boxes = model_yolov5(img, augment=args.augment)[0] # Apply NMS pred_boxes = non_max_suppression(pred_boxes, args.conf_thres, args.iou_thres, classes=args.classes, agnostic=args.agnostic_nms) t2 = time_synchronized() # Can not find people and move to next frame if pred_boxes[0] is None: # show the frame with no human detected cv2.namedWindow("2D Human Pose Estimation", cv2.WINDOW_NORMAL) cv2.imshow("2D Human Pose Estimation", im0s[0].copy()) # wait manual operations # with kb.Listener(on_press=on_press) as listener: # listener.join() # return # if kb.is_pressed('t'): # return print("No Human Detected and Move on.") print("-" * 30) continue # Print time (inference + NMS) detect_time = t2 - t1 detect_fps = 1.0 / detect_time print("Human Detection Time: {}, Human Detection FPS: {}".format( detect_time, detect_fps)) # Apply Classifier if classify: # false pred_boxes = apply_classifier(pred_boxes, model_classifier, img, im0s) # Estimate 2d human pose(multiple person) centers = [] scales = [] for id, boxes in enumerate(pred_boxes): if boxes is not None and len(boxes): boxes[:, :4] = scale_coords(img.shape[2:], boxes[:, :4], im0s[id].copy().shape).round() # convert tensor to list format boxes = np.delete(boxes.cpu().numpy(), [-2, -1], axis=1).tolist() for l in range(len(boxes)): boxes[l] = [tuple(boxes[l][0:2]), tuple(boxes[l][2:4])] # convert box to center and scale for box in boxes: center, scale = box_to_center_scale(box, imgsz, imgsz) centers.append(center) scales.append(scale) t3 = time_synchronized() pred_pose_2d = get_pose_estimation_prediction(config, model_resnet, im0s[0], centers, scales, transform=pose_transform, device=device) t4 = time_synchronized() # Print time (2d human pose estimation) estimate_time = t4 - t3 estimate_fps = 1.0 / estimate_time print("Pose Estimation Time: {}, Pose Estimation FPS: {}".format( estimate_time, estimate_fps)) # Filter the predicted 2d human pose(multiple person) t5 = time_synchronized() # if False: # for test if config.EVAL.RESNET.USE_FILTERS_2D: # construct filters for every keypoints of every person in 2D filters_2d = constructFilters(pred_pose_2d.shape, freq=1, mincutoff=1, beta=0.01) print("Shape of filters_2d: ({}, {}, {})".format( len(filters_2d), len(filters_2d[0]), len(filters_2d[0][0]))) # for test for per in range(pred_pose_2d.shape[0]): for kp in range(pred_pose_2d.shape[1]): for coord in range(pred_pose_2d.shape[2]): pred_pose_2d[per][kp][coord] = filters_2d[per][kp][ coord](pred_pose_2d[per][kp][coord]) t6 = time_synchronized() # Print time (filter 2d human pose) filter_time_2d = t6 - t5 filter_fps_2d = 1.0 / filter_time_2d print("Filter 2D Pose Time: {}, Filter 2D Pose FPS: {}".format( filter_time_2d, filter_fps_2d)) # Process detections and estimations in 2D for i, box in enumerate(pred_boxes): if webcam: # batch_size >= 1 p, s, im0 = Path(path[i]), '%g: ' % i, im0s[i].copy() else: p, s, im0 = Path(path), '', im0s save_path = str(save_dir / p.name) txt_path = str(save_dir / 'labels' / p.stem) + ( '_%g' % dataset.frame if dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if box is not None and len(box): # Rescale boxes from img_size to im0 size box[:, :4] = scale_coords(img.shape[2:], box[:, :4], im0.shape).round() # Print results for c in box[:, -1].unique(): n = (box[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls in reversed(box): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if args.save_conf else ( cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line) + '\n') % line) # Add bbox to image if save_img or view_img: label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) # Draw joint keypoints, number orders and human skeletons for every detected people in 2D for person in pred_pose_2d: # draw the human keypoints for idx, coord in enumerate(person): x_coord, y_coord = int(coord[0]), int(coord[1]) cv2.circle(im0, (x_coord, y_coord), 1, (0, 0, 255), 5) cv2.putText(im0, str(idx), (x_coord, y_coord), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2, cv2.LINE_AA) # draw the human skeletons in PACIFIC mode for skeleton in PACIFIC_SKELETON_INDEXES: cv2.line(im0, (int(person[skeleton[0]][0]), int(person[skeleton[0]][1])), (int(person[skeleton[1]][0]), int(person[skeleton[1]][1])), skeleton[2], 2) # Print time (inference + NMS + estimation) print('%sDone. (%.3fs)' % (s, t4 - t1)) # Stream results if view_img: detect_text = "Detect FPS:{0:0>5.2f}/{1:0>6.2f}ms".format( detect_fps, detect_time * 1000) estimate_text = "Estimate FPS:{0:0>5.2f}/{1:0>6.2f}ms".format( estimate_fps, estimate_time * 1000) cv2.putText(im0, detect_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2, cv2.LINE_AA) cv2.putText(im0, estimate_text, (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2, cv2.LINE_AA) cv2.namedWindow("2D Human Pose Estimation", cv2.WINDOW_NORMAL) cv2.imshow("2D Human Pose Estimation", im0) if cv2.waitKey(1) & 0xFF == ord('q'): # q to quit return # goto .mainFunc # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) # Print time (inference + NMS + estimation + 2d filtering) all_process_time = t6 - t1 all_process_fps = 1.0 / all_process_time print("All Process Time: {}, All Process FPS: {}".format( all_process_time, all_process_fps)) print("-" * 30) # Goto label # label .mainFunc # Print saving results if save_txt or save_img: print('Results saved to %s' % save_dir) # Release video reader and writer, then destory all opencv windows dataset.vid_cap.release() vid_writer.release() cv2.destroyAllWindows() print('Present 2D Human Pose Inference Done. Total Time:(%.3f seconds)' % (time.time() - t0))
def detect(self): # pylint: disable=too-many-locals,too-many-branches,too-many-statements """ Start main code for object detection and distance calculations """ start_time = time.time() logger.info('Start detecting') logger.debug('Device: %s', self.device) window_name = 'Stream' if self.webcam: # Full screen cv2.namedWindow(window_name, cv2.WND_PROP_FULLSCREEN) cv2.setWindowProperty(window_name, cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN) # Run inference img = torch.zeros((1, 3, self.imgsz, self.imgsz), device=self.device) # init img _ = self.model(img.half() if self.half else img ) if self.half else None # run once frame_id = 0 for path, img, im0s, vid_cap in self.dataset: img, pred, prediction_time = self.get_predictions(img) objects_base = [] # Process detections for idx_image, det in enumerate(pred): # detections per image if self.webcam: # batch_size >= 1 path_frame, im0 = path[idx_image], im0s[idx_image].copy() print_details = '%g: ' % idx_image else: path_frame, im0 = path, im0s print_details = '' # Must be inside the for loop so code can be used with multiple files (e.g. images) save_path = str(Path(self.out) / Path(path_frame).name) if self.save_txt or self.debug: # normalization gain whwh gn_whwh = torch.tensor(im0.shape)[[1, 0, 1, 0]] # pylint: disable=not-callable print_details += '%gx%g ' % img.shape[2:] if det is not None and len(det) > 0: # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results if self.save_txt or self.debug: classes_cnt = Counter(det[:, -1].tolist()) for class_idx, class_cnt in classes_cnt.items(): print_details += '%g %ss, ' % ( class_cnt, self.class_names[int(class_idx)]) # Write results for *xyxy, conf, cls in det: if self.save_txt: # Write to file # normalized xywh xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn_whwh).view(-1).tolist() # pylint: disable=not-callable with open( save_path[:save_path.rfind('.')] + '.txt', 'a') as file: file.write(('%g ' * 5 + '\n') % (cls, *xywh)) # label format if self.save_img or self.view_img: # Add bbox to image label = '%s %.2f' % (self.class_names[int(cls)], conf) if label is not None: if (label.split())[0] == 'person': # Save bbox and initialize it with zero, the "safe" label objects_base.append([xyxy, 0]) # Plot lines connecting people and get highest label per person objects_base = self.monitor_distance_current_boxes( objects_base, im0, 1) # Plot box with highest label on person plot.draw_boxes(objects_base, im0, self.overlay_images, 1, True) # Count label occurrences per frame risk_count = self.label_occurrences(objects_base) if self.view_img: # Flip screen in horizontal direction im0 = im0[:, ::-1, :] # Plot legend if self.opt.add_legend: im0 = plot.add_risk_counts(im0, risk_count, LEGEND_HEIGHT, self.opt.lang) # Plot banner if self.opt.add_banner: im0 = plot.add_banner(im0, self.banner_icon, BANNER_WIDTH) if self.debug: # Print frames per second running_time = time.time() - start_time frame_id = frame_id + 1 logger.debug('Frame rate: %s', round(frame_id / running_time, 2)) # Print time (inference + NMS) logger.debug('%sDone. (%.3fs)', print_details, prediction_time) # Stream results if self.view_img: if self.resolution: # Interpolation INTER_AREA is better, INTER_LINEAR (default) is faster im0 = cv2.resize(im0, self.resolution) cv2.imshow(window_name, im0) # im0[:, ::-1, :] if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results if self.save_img: self.save_results(im0, vid_cap, save_path) logger.info('Results saved to %s', Path(self.out)) logger.info('Done. (%.3fs)', (time.time() - start_time))
def get_detector_results(self, request): """ Args: request (GetDetectorResultsRequest): Returns: GetDetectorResultsResponse """ try: import torch from yolov5.utils.general import non_max_suppression from yolov5.utils.general import scale_coords from yolov5.utils.datasets import letterbox import numpy as np except ImportError: raise if self.currently_busy.is_set(): return GetDetectorResultsResponse(status=ServiceStatus(BUSY=True)) self.currently_busy.set() detections = Detections() try: image = ros_numpy.numpify(request.image) if request.image.encoding == "rgb8": image = image[..., ::-1] original_shape = image.shape img = letterbox(image, new_shape=self.image_size)[0] img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB img = np.ascontiguousarray(img) img = torch.from_numpy(img).to(self.device) img = img.half() if self.half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) with torch.no_grad(): pred = self.model(img, augment=False)[0] pred = non_max_suppression(pred, self.conf_thresh, self.iou_thresh, agnostic=False) for i, det in enumerate(pred): if det is not None and len(det): det[:, :4] = scale_coords(img.shape[2:], det[:, :4], original_shape).round() for x1, y1, x2, y2, conf, cls in reversed(det): x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) confidence = float(conf) class_name = self.names[int(cls)] roi = RegionOfInterest(x1=x1, y1=y1, x2=x2, y2=y2) seg_roi = SegmentOfInterest(x=[], y=[]) detections.objects.append(Detection(roi=roi, seg_roi=seg_roi, id=self._new_id(), track_id=-1, confidence=confidence, class_name=class_name)) self.currently_busy.clear() except Exception as e: print("FruitCastServer error: ", e) return GetDetectorResultsResponse(status=ServiceStatus(ERROR=True), results=detections) return GetDetectorResultsResponse(status=ServiceStatus(OKAY=True), results=detections)
def detect(self, opt): print("before detect lock") self.qmut_1.lock() print("after detect lock") out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source == '0' or source.startswith('rtsp') or source.startswith('http') or source.endswith('.txt') # initialize deepsort cfg = get_config() cfg.merge_from_file(opt.config_deepsort) deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=True) # Initialize device = select_device(opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = torch.load(weights, map_location=device)['model'].float() # load to FP32 model.to(device).eval() if half: model.half() # to FP16 # Set Dataloader if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: view_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names # Run inference self.t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img) if device.type != 'cpu' else None # run once for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset): if not self.Consuming: # dataset.stop_cap() raise StopIteration img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Process detections for i, det in enumerate(pred): # detections per image if not self.Consuming: # dataset.stop_cap() raise StopIteration if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s s += '%gx%g ' % img.shape[2:] # print string if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() n = 0 # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # 将当前帧的总人数发送给前端pyqt界面 self.sum_person.emit(n) self.msleep(30) bbox_xywh = [] confs = [] # Adapt detections to deep sort input format for *xyxy, conf, cls in det: img_h, img_w, _ = im0.shape x_c, y_c, bbox_w, bbox_h = bbox_rel(img_w, img_h, *xyxy) obj = [x_c, y_c, bbox_w, bbox_h] bbox_xywh.append(obj) confs.append([conf.item()]) xywhs = torch.Tensor(bbox_xywh) confss = torch.Tensor(confs) # Pass detections to deepsort outputs = deepsort.update(xywhs, confss, im0) # draw boxes for visualization if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] self.bbox_id.emit([bbox_xyxy, identities]) self.msleep(30) draw_boxes(im0, bbox_xyxy, identities) # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: # self.detOut.emit(im0) self.queue.put(im0) # if self.queue.qsize() > 3: self.qmut_1.unlock() if self.queue.qsize() > 1: self.queue.get(False) self.queue.task_done() else: self.msleep(30) print('Done. (%.3fs)' % (time.time() - self.t0))
def test_model(self, config, testset): # pylint: disable=unused-argument """The testing loop for YOLOv5. Arguments: config: Configuration parameters as a dictionary. testset: The test dataset. """ assert Config().data.datasource == 'YOLO' test_loader = yolo.DataSource.get_test_loader(config['batch_size'], testset) device = next(self.model.parameters()).device # get model device # Configure self.model.eval() with open(Config().data.data_params) as f: data = yaml.load(f, Loader=yaml.SafeLoader) # model dict check_dataset(data) # check nc = Config().data.num_classes # number of classes iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for [email protected]:0.95 niou = iouv.numel() seen = 0 names = { k: v for k, v in enumerate(self.model.names if hasattr( self.model, 'names') else self.module.names) } s = ('%20s' + '%12s' * 6) % \ ('Class', 'Images', 'Targets', 'P', 'R', '[email protected]', '[email protected]:.95') p, r, f1, mp, mr, map50, map, = 0., 0., 0., 0., 0., 0., 0. stats, ap, ap_class = [], [], [] for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(test_loader, desc=s)): img = img.to(device, non_blocking=True).float() img /= 255.0 # 0 - 255 to 0.0 - 1.0 targets = targets.to(device) nb, _, height, width = img.shape # batch size, channels, height, width with torch.no_grad(): # Run model if Config().algorithm.type == 'mistnet': logits = self.model.forward_to(img) logits = logits.cpu().detach().numpy() logits = unary_encoding.encode(logits) logits = torch.from_numpy(logits.astype('float32')) out, train_out = self.model.forward_from(logits.to(device)) else: out, train_out = self.model(img) # Run NMS targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(device) # to pixels lb = [] # for autolabelling out = non_max_suppression(out, conf_thres=0.001, iou_thres=0.6, labels=lb, multi_label=True) # Statistics per image for si, pred in enumerate(out): labels = targets[targets[:, 0] == si, 1:] nl = len(labels) tcls = labels[:, 0].tolist() if nl else [] # target class seen += 1 if len(pred) == 0: if nl: stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls)) continue # Predictions predn = pred.clone() scale_coords(img[si].shape[1:], predn[:, :4], shapes[si][0], shapes[si][1]) # native-space pred # Assign all predictions as incorrect correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device) if nl: detected = [] # target indices tcls_tensor = labels[:, 0] # target boxes tbox = xywh2xyxy(labels[:, 1:5]) scale_coords(img[si].shape[1:], tbox, shapes[si][0], shapes[si][1]) # native-space labels # Per target class for cls in torch.unique(tcls_tensor): ti = (cls == tcls_tensor).nonzero(as_tuple=False).view( -1) # prediction indices pi = (cls == pred[:, 5]).nonzero(as_tuple=False).view( -1) # target indices # Search for detections if pi.shape[0]: # Prediction to target ious ious, i = box_iou(predn[pi, :4], tbox[ti]).max( 1) # best ious, indices # Append detections detected_set = set() for j in (ious > iouv[0]).nonzero(as_tuple=False): d = ti[i[j]] # detected target if d.item() not in detected_set: detected_set.add(d.item()) detected.append(d) correct[pi[j]] = ious[ j] > iouv # iou_thres is 1xn if len( detected ) == nl: # all targets already located in image break # Append statistics (correct, conf, pcls, tcls) stats.append( (correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)) # Compute statistics stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy if len(stats) and stats[0].any(): p, r, ap, f1, ap_class = ap_per_class(*stats, plot=False, save_dir='', names=names) ap50, ap = ap[:, 0], ap.mean(1) # [email protected], [email protected]:0.95 mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean() nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class else: nt = torch.zeros(1) # Print results pf = '%20s' + '%12.3g' * 6 # print format print(pf % ('all', seen, nt.sum(), mp, mr, map50, map)) return map50
def update(self): f = 0 start_time = datetime.datetime.now() today = datetime.date.today() # dd/mm/YY date = today.strftime("%d/%m/%Y") current_time = start_time.strftime("%H:%M:%S") trackIds, position, speed_e, fps = [], {}, 0, 0.0 two_w, three_w, four_w, truck, bus, total = 0, 0, 0, 0, 0, 0 img = torch.zeros((1, 3, self.imgsz, self.imgsz), device=self.device) # init img (grabbed, frame) = self.vs.read() path = "traffic3.mp4" img0 = frame names = self.model.module.names if hasattr( self.model, "module") else self.model.names if grabbed == True: img = letterbox(img0, new_shape=640)[0] # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) f = f + 1 # count = self.count+1 img = torch.from_numpy(img).to(self.device) img = img.half() if self.half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = self.model(img, augment=self.augment)[0] # Apply NMS pred = non_max_suppression( pred, self.conf_thres, self.iou_thres, classes=self.classes, agnostic=self.agnostic_nms, ) t2 = time_synchronized() # Process detections for i, det in enumerate(pred): # detections per image if self.webcam: # batch_size >= 1 p, s, im0 = path[i], "%g: " % i, img0[i].copy() else: p, s, im0 = path, "", img0 s += "%gx%g " % img.shape[2:] # print string # save_path = str(Path(self.out) / Path(p).name) if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() bbox_xywh = [] confs = [] labels = [] # Adapt detections to deep sort input format for *xyxy, conf, cls in det: label = f"{names[int(cls)]}" bbox_left = min([xyxy[0].item(), xyxy[2].item()]) bbox_top = min([xyxy[1].item(), xyxy[3].item()]) bbox_w = abs(xyxy[0].item() - xyxy[2].item()) bbox_h = abs(xyxy[1].item() - xyxy[3].item()) x_c = bbox_left + bbox_w / 2 y_c = bbox_top + bbox_h / 2 bbox_w = bbox_w bbox_h = bbox_h # x_c, y_c, bbox_w, bbox_h = bbox_rel(self, *xyxy) obj = [x_c, y_c, bbox_w, bbox_h] bbox_xywh.append(obj) confs.append([conf.item()]) labels.append(label) confss, labelss = [], [] for conf, label in zip(confs, labels): confss.append(conf) labelss.append(label) xywhs = torch.Tensor(bbox_xywh) confss = torch.Tensor(confs) # Pass detections to deepsort outputs = self.deepsort.update(xywhs, confss, im0) # draw line cv2.polylines(im0, [self.pts_arr], self.isClosed, (255, 0, 0), 2) cv2.rectangle(img0, (650, 0), (850, 170), color=(0, 0, 0), thickness=-1) if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] offset = (0, 0) counter = 0 for i, box in enumerate(bbox_xyxy): if i < (len(labels[::-1]) - 1): x1, y1, x2, y2 = [int(i) for i in box] x1 += offset[0] x2 += offset[0] y1 += offset[1] y2 += offset[1] # box text and bar id = int(identities[i] ) if identities is not None else 0 label = "{}{:d}".format("", id) cls = labels[::-1][i] # Object counting if cls == "motorcycle": two_w, total = self.Obj_counting( id, label, trackIds, two_w, total) elif cls == "auto": three_w, total = self.Obj_counting( id, label, trackIds, three_w, total) elif cls == "car": four_w, total = self.Obj_counting( id, label, trackIds, four_w, total) elif cls == "truck": truck, total = self.Obj_counting( id, label, trackIds, truck, total) elif cls == "bus": bus, total = self.Obj_counting( id, label, trackIds, bus, total) fps = self.calculate_fps(start_time, f) # check if center points of object is inside the polygon point = Point((int(x1 + (x2 - x1) / 2), int(y1 + (y2 - y1) / 2))) polygon = Polygon(self.points) if (polygon.contains(point)) == True: counter = counter + 1 t_size = cv2.getTextSize( label, cv2.FONT_HERSHEY_PLAIN, 2, 2)[0] cv2.rectangle(im0, (x1, y1), (x2, y2), (0, 255, 0), 3) if counter > 5: flow = "High" elif counter >= 2 and counter < 5: flow = "Medium" else: flow = "Low" cv2.putText( im0, "Occupancy - " + str(counter), (650, 30), cv2.FONT_HERSHEY_DUPLEX, .5, (255, 0, 0), 1, ) cv2.putText( im0, "Date - " + str(date), (650, 60), cv2.FONT_HERSHEY_DUPLEX, .5, (255, 0, 0), 1, ) cv2.putText( im0, "Time - " + str(current_time), (650, 90), cv2.FONT_HERSHEY_DUPLEX, .5, (255, 0, 0), 1, ) cv2.putText( im0, "Speed - " + "N A", (650, 120), cv2.FONT_HERSHEY_DUPLEX, .5, (255, 0, 0), 1, ) cv2.putText( im0, "Flow - " + str(flow), (650, 150), cv2.FONT_HERSHEY_DUPLEX, .5, (255, 0, 0), 1, ) # img = cv2.resize(img, (650, 360)) # image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) image = PIL.Image.fromarray(img0) image = PIL.ImageTk.PhotoImage(image) font = ("Arial", 12) self.canvas.configure(image=image) self.canvas.image = image result = tk.Label( self.counting_result, text=f"Counting Results", width=12, font=font, anchor="center", fg="blue", ) result.grid(row=0, column=2, padx=2) # result.pack(padx=10, pady=10) if self.two_w is None: self.two_w = tk.Label( self.counting_result, text=f"Two Wheeler \n\n{two_w}", width=13, font=font, anchor="center", bg="#8080c0", fg="white", ) self.two_w.grid(row=1, column=0, padx=2) else: self.two_w.configure(text=f"Two Wheeler\n\n{two_w}") if self.three_w is None: self.three_w = tk.Label( self.counting_result, text=f"Three Wheeler\n\n{three_w}", font=font, width=13, anchor="center", bg="#8080c0", fg="white", ) self.three_w.grid(row=1, column=1, padx=2) else: self.three_w.configure( text=f"Three Wheeler\n\n{three_w}") if self.four_w is None: self.four_w = tk.Label( self.counting_result, text=f"Four Wheeler\n\n{four_w}", width=13, font=font, anchor="center", bg="#8080c0", fg="white", ) self.four_w.grid(row=1, column=2, padx=2) else: self.four_w.configure(text=f"Four Wheeler\n\n{four_w}") if self.truck is None: self.truck = tk.Label( self.counting_result, text=f"Truck\n\n{truck}", font=font, width=10, anchor="center", bg="#8080c0", fg="white", ) self.truck.grid(row=1, column=3, padx=1) else: self.truck.configure(text=f"Truck\n\n{truck}") if self.bus is None: self.bus = tk.Label( self.counting_result, text=f"Bus\n\n{bus}", font=font, width=10, anchor="center", bg="#8080c0", fg="white", ) self.bus.grid(row=1, column=4, padx=2) else: self.bus.configure(text=f"Bus\n\n{bus}") if self.total is None: self.total = tk.Label( self.counting_result, text=f"Total Vehicle\n\n{total}", font=font, width=10, anchor="center", bg="#8080c0", fg="white", ) self.total.grid(row=1, column=5, pady=2) else: self.total.configure(text=f"Total Vehicle\n\n{total}") if self.fps is None: self.fps = tk.Label( self.counting_result, text=f"FPS\n\n{fps:.2f}", font=font, width=13, anchor="center", bg="#8080c0", fg="white", ) self.fps.grid(row=2, column=0, pady=2) else: self.fps.configure(text=f"FPS\n\n{fps:.2f}") else: self.deepsort.increment_ages() self.root.after(self.delay, self.update) # Print time (inference + NMS) print("%sDone. (%.3fs)" % (s, t2 - t1)) else: self.root.quit() print( "***********************************************FINSHED***********************************************" )
def detect(save_img=False): out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source.isnumeric() or source.startswith( 'rtsp') or source.startswith('http') or source.endswith('.txt') # Initialize set_logging() device = select_device(opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s save_path = str(Path(out) / Path(p).name) txt_path = str(Path(out) / Path(p).stem) + ( '_%g' % dataset.frame if dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * 5 + '\n') % (cls, *xywh)) # label format if save_img or view_img: # Add bbox to image label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: # cv2.imshow(p, im0) cv2.imwrite("C:/Users/lenovo/Desktop/server/output/camera.jpg", im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIterationq # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) # vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc('X', '2', '6', '4'), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % Path(out)) if platform.system() == 'Darwin' and not opt.update: # MacOS os.system('open ' + save_path) print('Done. (%.3fs)' % (time.time() - t0))