def preprocess_image(img0, image_size): # preprocessing found in datasets.py img = letterbox(img0, new_shape=image_size)[0] # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB img = np.ascontiguousarray(img) return img
def image_track(self, im0): """ :param im0: original image, BGR format :return: """ # preprocess ************************************************************ # Padded resize img = letterbox(im0, new_shape=self.img_size)[0] # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) # numpy to tensor img = torch.from_numpy(img).to(self.device) img = img.half() if self.half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) s = '%gx%g ' % img.shape[2:] # print string # Detection time ********************************************************* # Inference t1 = time_synchronized() with torch.no_grad(): pred = self.detector( img, augment=self.args.augment)[0] # list: bz * [ (#obj, 6)] # Apply NMS and filter object other than person (cls:0) pred = non_max_suppression(pred, self.args.conf_thres, self.args.iou_thres, classes=self.args.classes, agnostic=self.args.agnostic_nms) t2 = time_synchronized() # get all obj ************************************************************ det = pred[0] # for video, bz is 1 if det is not None and len( det): # det: (#obj, 6) x1 y1 x2 y2 conf cls # Rescale boxes from img_size to original im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results. statistics of number of each obj for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, self.names[int(c)]) # add to string bbox_xywh = xyxy2xywh(det[:, :4]).cpu() confs = det[:, 4:5].cpu() # ****************************** deepsort **************************** outputs = self.deepsort.update(bbox_xywh, confs, im0) # (#ID, 5) x1,y1,x2,y2,track_ID else: outputs = torch.zeros((0, 5)) t3 = time.time() return outputs, t2 - t1, t3 - t2
def detect_bbox(self, img: np.ndarray, img_size: int = 640, stride: int = 32, min_accuracy: float = 0.5) -> List: """ TODO: input img in BGR format, not RGB; To Be Implemented in release 2.2 """ # normalize img_shape = img.shape img = letterbox(img, img_size, stride=stride)[0] img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) img = torch.from_numpy(img).to(self.device) img = img.half() if self.half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) pred = self.model(img)[0] # Apply NMS pred = non_max_suppression(pred) res = [] for i, det in enumerate(pred): if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img_shape).round() res.append(det.cpu().detach().numpy()) if len(res): return [[x1, y1, x2, y2, acc, b] for x1, y1, x2, y2, acc, b in res[0] if acc > min_accuracy] else: return []
def forward(self, imgs, size=640, augment=False, profile=False): # Inference from various sources. For height=720, width=1280, RGB images example inputs are: # filename: imgs = 'data/samples/zidane.jpg' # URI: = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/zidane.jpg' # OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(720,1280,3) # PIL: = Image.open('image.jpg') # HWC x(720,1280,3) # numpy: = np.zeros((720,1280,3)) # HWC # torch: = torch.zeros(16,3,720,1280) # BCHW # multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images p = next(self.model.parameters()) # for device and type if isinstance(imgs, torch.Tensor): # torch return self.model(imgs.to(p.device).type_as(p), augment, profile) # inference # Pre-process n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else ( 1, [imgs]) # number of images, list of images shape0, shape1 = [], [] # image and inference shapes for i, im in enumerate(imgs): if isinstance(im, str): # filename or uri im = Image.open( requests.get(im, stream=True).raw if im.startswith('http') else im) # open im = np.array(im) # to numpy if im.shape[0] < 5: # image in CHW im = im.transpose( (1, 2, 0)) # reverse dataloader .transpose(2, 0, 1) im = im[:, :, :3] if im.ndim == 3 else np.tile( im[:, :, None], 3) # enforce 3ch input s = im.shape[:2] # HWC shape0.append(s) # image shape g = (size / max(s)) # gain shape1.append([y * g for y in s]) imgs[i] = im # update shape1 = [ make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0) ] # inference shape x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs] # pad x = np.stack(x, 0) if n > 1 else x[0][None] # stack x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW x = torch.from_numpy(x).to( p.device).type_as(p) / 255. # uint8 to fp16/32 # Inference with torch.no_grad(): y = self.model(x, augment, profile)[0] # forward y = non_max_suppression(y, conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) # NMS # Post-process for i in range(n): scale_coords(shape1, y[i][:, :4], shape0[i]) return Detections(imgs, y, self.names)
def normalize_img(self, img, img_size, stride, auto=True): """ TODO: auto=False if pipeline batch size > 1 """ img = letterbox(img, img_size, stride=stride, auto=auto)[0] img = img.transpose(2, 0, 1) # to 3x416x416 img = np.ascontiguousarray(img) return img
def preprocess(self, images: List[np.array]): sizes = [self.get_image_size(img) for img in images] div_sizes = np.array([x.scaled for x in sizes]) # noinspection PyArgumentList div_sizes = [cached_divisible(x, self._stride_max) for x in div_sizes.max(axis=0)] # img_sized = [letterbox(img, new_shape=div_sizes)[0] for (i, img) in enumerate(images)] # Yolov5 sends list img_sized = [letterbox(img, new_shape=div_sizes)[0] for (i, img) in enumerate(images)] img_stacked = self.stack_to_torch(img_sized) return img_stacked, sizes, div_sizes
def forward(self, imgs, size=640, augment=False, profile=False): # supports inference from various sources. For height=720, width=1280, RGB images example inputs are: # opencv: x = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(720,1280,3) # PIL: x = Image.open('image.jpg') # HWC x(720,1280,3) # numpy: x = np.zeros((720,1280,3)) # HWC # torch: x = torch.zeros(16,3,720,1280) # BCHW # multiple: x = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images p = next(self.model.parameters()) # for device and type if isinstance(imgs, torch.Tensor): # torch return self.model(imgs.to(p.device).type_as(p), augment, profile) # inference # Pre-process n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else (1, [imgs]) shape0, shape1 = [], [] # image and inference shapes for i, img in enumerate(imgs): if isinstance(img, str): img = Image.open(img) img = np.array(img) if img.shape[0] < 5: img = img.transpose((1, 2, 0)) img = img[:, :, :3] if img.ndim == 3 else np.tile( img[:, :, None], 3) s = img.shape[:2] # HWC shape0.append(s) # image shape g = (size / max(s)) # gain shape1.append([y * g for y in s]) imgs[i] = img shape1 = [ make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0) ] # inference shape x = [letterbox(img, new_shape=shape1, auto=False)[0] for img in imgs] # pad x = np.stack(x, 0) if n > 1 else x[0][None] # stack x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW x = torch.from_numpy(x).to( p.device).type_as(p) / 255. # uint8 to fp16/32 # Inference with torch.no_grad(): y = self.model(x, augment, profile)[0] # y = non_max_suppression(y, conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) y = non_max_suppression_torch_ops(y, conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) # Post-process for i in range(n): scale_coords(shape1, y[i][:, :4], shape0[i]) return Detections(imgs, y, self.names)
def run_lpr(event, context): """Triggered by a change to a Cloud Storage bucket. Args: event (dict): Event payload. context (google.cloud.functions.Context): Metadata for the event. """ file = event filename = file["name"] client = storage.Client() source_bucket = client.get_bucket(file["bucket"]) source_blob = source_bucket.get_blob(file["name"]) # Decode image = np.asarray(bytearray(source_blob.download_as_string()), dtype="uint8") image = cv2.imdecode(image, cv2.IMREAD_UNCHANGED) # Letterbox img = letterbox(image, new_shape=imgsz_detect)[0] # Stack img = np.stack(img, 0) # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to bsx3x416x416 img = np.ascontiguousarray(img) im0s = [image] # source size image res = proc(img, im0s, view_img=False) with NamedTemporaryFile() as temp: temp.write(b"hello") temp.write(json.dumps(res).encode()) temp.flush() dest_filename = filename + ".txt" dest_bucket_name = "yolov5-output" dest_bucket = client.get_bucket(dest_bucket_name) dest_blob = dest_bucket.blob(dest_filename) dest_blob.upload_from_filename(temp.name)
def lpr(): uploaded_file = request.files['file'] if uploaded_file.filename != '': nparr = np.fromstring(uploaded_file.read(), np.uint8) image = cv2.imdecode(nparr, IMREAD_UNCHANGED) # Letterbox img = letterbox(image, new_shape=imgsz_detect)[0] # Stack img = np.stack(img, 0) # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to bsx3x416x416 img = np.ascontiguousarray(img) im0s = [image] # source size image res = proc(img, im0s, view_img=False) response = {"results": res} return Response(response=json.dumps(response), status=200, mimetype="application/json") else: return Response(response="no image uploaded", status=403)
def forward(self, x, size=640, augment=False, profile=False): # supports inference from various sources. For height=720, width=1280, RGB images example inputs are: # opencv: x = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(720,1280,3) # PIL: x = Image.open('image.jpg') # HWC x(720,1280,3) # numpy: x = np.zeros((720,1280,3)) # HWC # torch: x = torch.zeros(16,3,720,1280) # BCHW # multiple: x = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images p = next(self.model.parameters()) # for device and type if isinstance(x, torch.Tensor): # torch return self.model(x.to(p.device).type_as(p), augment, profile) # inference # Pre-process if not isinstance(x, list): x = [x] shape0, shape1 = [], [] # image and inference shapes batch = range(len(x)) # batch size for i in batch: x[i] = np.array(x[i])[:, :, :3] # up to 3 channels if png s = x[i].shape[:2] # HWC shape0.append(s) # image shape g = (size / max(s)) # gain shape1.append([y * g for y in s]) shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)] # inference shape x = [letterbox(x[i], new_shape=shape1, auto=False)[0] for i in batch] # pad x = np.stack(x, 0) if batch[-1] else x[0][None] # stack x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW x = torch.from_numpy(x).to(p.device).type_as(p) / 255. # uint8 to fp16/32 # Inference x = self.model(x, augment, profile) # forward x = non_max_suppression(x[0], conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) # NMS # Post-process for i in batch: if x[i] is not None: x[i][:, :4] = scale_coords(shape1, x[i][:, :4], shape0[i]) return x
def preprocess(image_file, stride, imgsz): """ Prepare the input for inferencing. """ # read image file img = np.asarray(bytearray(image_file), dtype="uint8") img = cv2.imdecode(img, 1) imgsz0 = torch.Tensor(img.shape[:2]) # resize image img = letterbox(img, imgsz, stride=stride)[0] # convert from BGR to RGB img = img[:, :, ::-1].transpose(2, 0, 1) img = np.ascontiguousarray(img) # convert to tensor img = torch.from_numpy(img).to(DEVICE) # normalize RGB values to percentage img = img.float() / 255.0 if img.ndimension() == 3: img = img.unsqueeze(0) return img, imgsz0
def detect(self,img,model,stride,device,imgsz): names = model.module.names if hasattr(model, 'module') else model.names # t0 = time.time() im0s = img.copy() img = letterbox(im0s, imgsz, stride=stride)[0] # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) img = torch.from_numpy(img).to(device) half = device.type != "cpu" # half precision only supported on CUDA img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference # t1 = time_synchronized() pred = model(img, augment=True)[0] # print(pred) # Apply NMS pred = non_max_suppression(pred, 0.60, 0.5, classes=[0,2,3,5,7], agnostic=True) t2 = time_synchronized() xywhs,labels,xyxys,confs = [],[],[],[] for i, det in enumerate(pred): im0 = im0s.copy() if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() for *xyxy, conf, cls in reversed(det): label = f'{names[int(cls)]}' xywh = self.bbox_rel(*xyxy) xyxys.append(xyxy) xywhs.append(xywh) labels.append(label) confs.append([conf.item()]) # print(labels) return xyxys,xywhs,labels,confs,im0
def forward(self, imgs, size=640, augment=False, profile=False): # Inference from various sources. For height=640, width=1280, RGB images example inputs are: # file: imgs = 'data/images/zidane.jpg' # str or PosixPath # URI: = 'https://ultralytics.com/images/zidane.jpg' # OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(640,1280,3) # PIL: = Image.open('image.jpg') or ImageGrab.grab() # HWC x(640,1280,3) # numpy: = np.zeros((640,1280,3)) # HWC # torch: = torch.zeros(16,3,320,640) # BCHW (scaled to size=640, 0-1 values) # multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images t = [time_sync()] p = next(self.model.parameters()) if self.pt else torch.zeros( 1) # for device and type autocast = self.amp and (p.device.type != 'cpu' ) # Automatic Mixed Precision (AMP) inference if isinstance(imgs, torch.Tensor): # torch with amp.autocast(enabled=autocast): return self.model( imgs.to(p.device).type_as(p), augment, profile) # inference # Pre-process n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else ( 1, [imgs]) # number of images, list of images shape0, shape1, files = [], [], [ ] # image and inference shapes, filenames for i, im in enumerate(imgs): f = f'image{i}' # filename if isinstance(im, (str, Path)): # filename or uri im, f = Image.open( requests.get(im, stream=True).raw if str(im). startswith('http') else im), im im = np.asarray(exif_transpose(im)) elif isinstance(im, Image.Image): # PIL Image im, f = np.asarray( exif_transpose(im)), getattr(im, 'filename', f) or f files.append(Path(f).with_suffix('.jpg').name) if im.shape[0] < 5: # image in CHW im = im.transpose( (1, 2, 0)) # reverse dataloader .transpose(2, 0, 1) im = im[..., :3] if im.ndim == 3 else np.tile( im[..., None], 3) # enforce 3ch input s = im.shape[:2] # HWC shape0.append(s) # image shape g = (size / max(s)) # gain shape1.append([y * g for y in s]) imgs[i] = im if im.data.contiguous else np.ascontiguousarray( im) # update shape1 = [ make_divisible(x, self.stride) for x in np.stack(shape1, 0).max(0) ] # inference shape x = [ letterbox(im, new_shape=shape1 if self.pt else size, auto=False)[0] for im in imgs ] # pad x = np.stack(x, 0) if n > 1 else x[0][None] # stack x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW x = torch.from_numpy(x).to( p.device).type_as(p) / 255 # uint8 to fp16/32 t.append(time_sync()) with amp.autocast(enabled=autocast): # Inference y = self.model(x, augment, profile) # forward t.append(time_sync()) # Post-process y = non_max_suppression(y if self.dmb else y[0], self.conf, iou_thres=self.iou, classes=self.classes, agnostic=self.agnostic, multi_label=self.multi_label, max_det=self.max_det) # NMS for i in range(n): scale_coords(shape1, y[i][:, :4], shape0[i]) t.append(time_sync()) return Detections(imgs, y, files, t, self.names, x.shape)
def predict(self, src_image): param = self.getParam() # Initialize init_logging() half = self.device.type != 'cpu' # half precision only supported on CUDA # Load model if self.model is None or param.update: self.model = attempt_load(param.model_path, map_location=self.device) # load FP32 model stride = int(self.model.stride.max()) # model stride param.input_size = check_img_size(param.input_size, s=stride) # check img_size if half: self.model.half() # to FP16F # Get names and colors self.names = self.model.module.names if hasattr(self.model, 'module') else self.model.names self.colors = [[random.randint(0, 255) for _ in range(3)] for _ in self.names] param.update = False else: stride = int(self.model.stride.max()) # model stride # Resize image image = letterbox(src_image, param.input_size, stride)[0] image = image.transpose(2, 0, 1) image = np.ascontiguousarray(image) self.emitStepProgress() # Run inference image = torch.from_numpy(image).to(self.device) image = image.half() if half else image.float() # uint8 to fp16/32 image /= 255.0 # 0 - 255 to 0.0 - 1.0 if image.ndimension() == 3: image = image.unsqueeze(0) self.emitStepProgress() # Inference pred = self.model(image, augment=param.augment)[0] self.emitStepProgress() # Apply NMS pred = non_max_suppression(pred, param.conf_thres, param.iou_thres, agnostic=param.agnostic_nms) self.emitStepProgress() graphics_output = self.getOutput(1) graphics_output.setNewLayer("YoloV5") graphics_output.setImageIndex(0) detected_names = [] detected_conf = [] # Process detections for i, det in enumerate(pred): # detections per image if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(image.shape[2:], det[:, :4], src_image.shape).round() # Results for *xyxy, conf, cls in reversed(det): # Box w = float(xyxy[2] - xyxy[0]) h = float(xyxy[3] - xyxy[1]) prop_rect = core.GraphicsRectProperty() prop_rect.pen_color = self.colors[int(cls)] graphics_box = graphics_output.addRectangle(float(xyxy[0]), float(xyxy[1]), w, h, prop_rect) graphics_box.setCategory(self.names[int(cls)]) # Label name = self.names[int(cls)] prop_text = core.GraphicsTextProperty() prop_text.font_size = 8 prop_text.color = self.colors[int(cls)] graphics_output.addText(name, float(xyxy[0]), float(xyxy[1]), prop_text) detected_names.append(name) detected_conf.append(conf.item()) # Init numeric output numeric_ouput = self.getOutput(2) numeric_ouput.clearData() numeric_ouput.setOutputType(dataprocess.NumericOutputType.TABLE) numeric_ouput.addValueList(detected_conf, "Confidence", detected_names) self.emitStepProgress()
def automate(self): self.clear_bbox() self.processingLabel.config(text="Processing ") self.processingLabel.update_idletasks() open_cv_image0 = np.array(self.img) # Padded resize open_cv_image = letterbox(open_cv_image0, new_shape=self.img_size)[0] # Convert open_cv_image = open_cv_image[:, :, ::-1].transpose( 2, 0, 1) # BGR to RGB, to 3x416x416 open_cv_image = np.ascontiguousarray(open_cv_image) img = torch.from_numpy(open_cv_image).to(self.device) img = img.half() if self.half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # inference object detection pred = self.object_model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, self.opt.conf_thres, self.opt.iou_thres, classes=self.opt.classes, agnostic=self.opt.agnostic_nms) # process detections det = pred[0] # gn = torch.tensor(open_cv_image0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], open_cv_image0.shape).round() for idx, (*xyxy, conf, cls) in enumerate(reversed(det)): if conf < 0.5: continue xyxy = torch.tensor(xyxy).view(-1).cpu().numpy().astype(np.int) label = config.labels_to_names[int(cls)] curr_label_list = self.labelListBox.get(0, END) curr_label_list = list(curr_label_list) if label not in curr_label_list: continue b = xyxy.tolist() self.bboxId = self.canvas.create_rectangle( b[0], b[1], b[2], b[3], width=2, outline=config.COLORS[len(self.bboxList) % len(config.COLORS)]) self.bboxList.append((b[0], b[1], b[2], b[3])) o1 = self.canvas.create_oval(b[0] - 3, b[1] - 3, b[0] + 3, b[1] + 3, fill="red") o2 = self.canvas.create_oval(b[2] - 3, b[1] - 3, b[2] + 3, b[1] + 3, fill="red") o3 = self.canvas.create_oval(b[2] - 3, b[3] - 3, b[2] + 3, b[3] + 3, fill="red") o4 = self.canvas.create_oval(b[0] - 3, b[3] - 3, b[0] + 3, b[3] + 3, fill="red") self.bboxPointList.append(o1) self.bboxPointList.append(o2) self.bboxPointList.append(o3) self.bboxPointList.append(o4) self.bboxIdList.append(self.bboxId) self.bboxId = None self.objectLabelList.append(str(label)) self.objectListBox.insert( END, '(%d, %d) -> (%d, %d)' % (b[0], b[1], b[2], b[3]) + ': ' + str(label)) self.objectListBox.itemconfig( len(self.bboxIdList) - 1, fg=config.COLORS[(len(self.bboxIdList) - 1) % len(config.COLORS)]) # inference faces frame = Image.fromarray(open_cv_image0[:, :, ::-1]) # detect faces boxes, probs = self.face_model.detect(frame, landmarks=False) if boxes is None: boxes = [] probs = [] for box, conf in zip(boxes, probs): if conf < 0.5: continue # box in xyxy format b = box.astype(np.int).tolist() label = 'face' curr_label_list = self.labelListBox.get(0, END) curr_label_list = list(curr_label_list) if label not in curr_label_list: continue self.bboxId = self.canvas.create_rectangle( b[0], b[1], b[2], b[3], width=2, outline=config.COLORS[len(self.bboxList) % len(config.COLORS)]) self.bboxList.append((b[0], b[1], b[2], b[3])) o1 = self.canvas.create_oval(b[0] - 3, b[1] - 3, b[0] + 3, b[1] + 3, fill="red") o2 = self.canvas.create_oval(b[2] - 3, b[1] - 3, b[2] + 3, b[1] + 3, fill="red") o3 = self.canvas.create_oval(b[2] - 3, b[3] - 3, b[2] + 3, b[3] + 3, fill="red") o4 = self.canvas.create_oval(b[0] - 3, b[3] - 3, b[0] + 3, b[3] + 3, fill="red") self.bboxPointList.append(o1) self.bboxPointList.append(o2) self.bboxPointList.append(o3) self.bboxPointList.append(o4) self.bboxIdList.append(self.bboxId) self.bboxId = None self.objectLabelList.append(str(label)) self.objectListBox.insert( END, '(%d, %d) -> (%d, %d)' % (b[0], b[1], b[2], b[3]) + ': ' + str(label)) self.objectListBox.itemconfig( len(self.bboxIdList) - 1, fg=config.COLORS[(len(self.bboxIdList) - 1) % len(config.COLORS)]) self.processingLabel.config(text="Done")
def detect(self, weights, step=1000, conf_thres=0.1, imgsz=640, targetfilepath=None, iou_thres=0.25, targetclasses=None): if self.model and self.model_path == weights: pass else: self.model_path = weights model = attempt_load(self.model_path, map_location=self.device) self.names = model.module.names if hasattr( model, 'module') else model.names model.float() self.model = model self.soundclasses = pd.read_csv( self.model_path.replace('best.pt', 'soundclass.csv'), encoding='utf8', index_col='sounclass_id').T.to_dict() if targetclasses: classes = [self.names.index(name) for name in targetclasses] else: classes = None self.tfr(targetfilepath=targetfilepath, spect_type='rainbow') # prepare input data clips dataset = [] for ts in range(0, self.duration, step): clip_start = round(ts / self.duration * self.rainbow_img.shape[1]) clip_end = clip_start + round( self.clip_length / self.duration * self.rainbow_img.shape[1]) if clip_end > self.rainbow_img.shape[1]: break img0 = self.rainbow_img[:, clip_start:clip_end] img = letterbox(img0, new_shape=imgsz)[0] # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) dataset.append([ os.path.join(self.audiopath, self.audiofilename), img, img0, ts ]) labels = [[ 'file', 'classid', 'species_name', 'sound_class', 'scientific_name', "time_begin", "time_end", "freq_low", "freq_high", "score" ]] for path, img, im0, time_start in dataset: img = torch.from_numpy(img).float().to(self.device) img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference pred = self.model(img, augment=False)[0] pred = non_max_suppression(pred, conf_thres=conf_thres, iou_thres=iou_thres, classes=classes) for det in pred: # detections per image gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if len(det): det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() for *xyxy, conf, cls in reversed(det): xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh ttff = self.xywh2ttff(xywh) ts, te, fl, fh = ttff classid = self.names[int(cls)] species_name = self.soundclasses[classid][ 'species_name'] sound_class = self.soundclasses[classid]['sound_class'] scientific_name = self.soundclasses[classid][ 'scientific_name'] labels.append([ path, classid, species_name, sound_class, scientific_name, round(time_start + ts), round(time_start + te), fl, fh, round(float(conf), 3) ]) return labels
def update(self): f = 0 start_time = datetime.datetime.now() today = datetime.date.today() # dd/mm/YY date = today.strftime("%d/%m/%Y") current_time = start_time.strftime("%H:%M:%S") trackIds, position, speed_e, fps = [], {}, 0, 0.0 two_w, three_w, four_w, truck, bus, total = 0, 0, 0, 0, 0, 0 img = torch.zeros((1, 3, self.imgsz, self.imgsz), device=self.device) # init img (grabbed, frame) = self.vs.read() path = "traffic3.mp4" img0 = frame names = self.model.module.names if hasattr( self.model, "module") else self.model.names if grabbed == True: img = letterbox(img0, new_shape=640)[0] # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) f = f + 1 # count = self.count+1 img = torch.from_numpy(img).to(self.device) img = img.half() if self.half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = self.model(img, augment=self.augment)[0] # Apply NMS pred = non_max_suppression( pred, self.conf_thres, self.iou_thres, classes=self.classes, agnostic=self.agnostic_nms, ) t2 = time_synchronized() # Process detections for i, det in enumerate(pred): # detections per image if self.webcam: # batch_size >= 1 p, s, im0 = path[i], "%g: " % i, img0[i].copy() else: p, s, im0 = path, "", img0 s += "%gx%g " % img.shape[2:] # print string # save_path = str(Path(self.out) / Path(p).name) if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() bbox_xywh = [] confs = [] labels = [] # Adapt detections to deep sort input format for *xyxy, conf, cls in det: label = f"{names[int(cls)]}" bbox_left = min([xyxy[0].item(), xyxy[2].item()]) bbox_top = min([xyxy[1].item(), xyxy[3].item()]) bbox_w = abs(xyxy[0].item() - xyxy[2].item()) bbox_h = abs(xyxy[1].item() - xyxy[3].item()) x_c = bbox_left + bbox_w / 2 y_c = bbox_top + bbox_h / 2 bbox_w = bbox_w bbox_h = bbox_h # x_c, y_c, bbox_w, bbox_h = bbox_rel(self, *xyxy) obj = [x_c, y_c, bbox_w, bbox_h] bbox_xywh.append(obj) confs.append([conf.item()]) labels.append(label) confss, labelss = [], [] for conf, label in zip(confs, labels): confss.append(conf) labelss.append(label) xywhs = torch.Tensor(bbox_xywh) confss = torch.Tensor(confs) # Pass detections to deepsort outputs = self.deepsort.update(xywhs, confss, im0) # draw line cv2.polylines(im0, [self.pts_arr], self.isClosed, (255, 0, 0), 2) cv2.rectangle(img0, (650, 0), (850, 170), color=(0, 0, 0), thickness=-1) if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] offset = (0, 0) counter = 0 for i, box in enumerate(bbox_xyxy): if i < (len(labels[::-1]) - 1): x1, y1, x2, y2 = [int(i) for i in box] x1 += offset[0] x2 += offset[0] y1 += offset[1] y2 += offset[1] # box text and bar id = int(identities[i] ) if identities is not None else 0 label = "{}{:d}".format("", id) cls = labels[::-1][i] # Object counting if cls == "motorcycle": two_w, total = self.Obj_counting( id, label, trackIds, two_w, total) elif cls == "auto": three_w, total = self.Obj_counting( id, label, trackIds, three_w, total) elif cls == "car": four_w, total = self.Obj_counting( id, label, trackIds, four_w, total) elif cls == "truck": truck, total = self.Obj_counting( id, label, trackIds, truck, total) elif cls == "bus": bus, total = self.Obj_counting( id, label, trackIds, bus, total) fps = self.calculate_fps(start_time, f) # check if center points of object is inside the polygon point = Point((int(x1 + (x2 - x1) / 2), int(y1 + (y2 - y1) / 2))) polygon = Polygon(self.points) if (polygon.contains(point)) == True: counter = counter + 1 t_size = cv2.getTextSize( label, cv2.FONT_HERSHEY_PLAIN, 2, 2)[0] cv2.rectangle(im0, (x1, y1), (x2, y2), (0, 255, 0), 3) if counter > 5: flow = "High" elif counter >= 2 and counter < 5: flow = "Medium" else: flow = "Low" cv2.putText( im0, "Occupancy - " + str(counter), (650, 30), cv2.FONT_HERSHEY_DUPLEX, .5, (255, 0, 0), 1, ) cv2.putText( im0, "Date - " + str(date), (650, 60), cv2.FONT_HERSHEY_DUPLEX, .5, (255, 0, 0), 1, ) cv2.putText( im0, "Time - " + str(current_time), (650, 90), cv2.FONT_HERSHEY_DUPLEX, .5, (255, 0, 0), 1, ) cv2.putText( im0, "Speed - " + "N A", (650, 120), cv2.FONT_HERSHEY_DUPLEX, .5, (255, 0, 0), 1, ) cv2.putText( im0, "Flow - " + str(flow), (650, 150), cv2.FONT_HERSHEY_DUPLEX, .5, (255, 0, 0), 1, ) # img = cv2.resize(img, (650, 360)) # image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) image = PIL.Image.fromarray(img0) image = PIL.ImageTk.PhotoImage(image) font = ("Arial", 12) self.canvas.configure(image=image) self.canvas.image = image result = tk.Label( self.counting_result, text=f"Counting Results", width=12, font=font, anchor="center", fg="blue", ) result.grid(row=0, column=2, padx=2) # result.pack(padx=10, pady=10) if self.two_w is None: self.two_w = tk.Label( self.counting_result, text=f"Two Wheeler \n\n{two_w}", width=13, font=font, anchor="center", bg="#8080c0", fg="white", ) self.two_w.grid(row=1, column=0, padx=2) else: self.two_w.configure(text=f"Two Wheeler\n\n{two_w}") if self.three_w is None: self.three_w = tk.Label( self.counting_result, text=f"Three Wheeler\n\n{three_w}", font=font, width=13, anchor="center", bg="#8080c0", fg="white", ) self.three_w.grid(row=1, column=1, padx=2) else: self.three_w.configure( text=f"Three Wheeler\n\n{three_w}") if self.four_w is None: self.four_w = tk.Label( self.counting_result, text=f"Four Wheeler\n\n{four_w}", width=13, font=font, anchor="center", bg="#8080c0", fg="white", ) self.four_w.grid(row=1, column=2, padx=2) else: self.four_w.configure(text=f"Four Wheeler\n\n{four_w}") if self.truck is None: self.truck = tk.Label( self.counting_result, text=f"Truck\n\n{truck}", font=font, width=10, anchor="center", bg="#8080c0", fg="white", ) self.truck.grid(row=1, column=3, padx=1) else: self.truck.configure(text=f"Truck\n\n{truck}") if self.bus is None: self.bus = tk.Label( self.counting_result, text=f"Bus\n\n{bus}", font=font, width=10, anchor="center", bg="#8080c0", fg="white", ) self.bus.grid(row=1, column=4, padx=2) else: self.bus.configure(text=f"Bus\n\n{bus}") if self.total is None: self.total = tk.Label( self.counting_result, text=f"Total Vehicle\n\n{total}", font=font, width=10, anchor="center", bg="#8080c0", fg="white", ) self.total.grid(row=1, column=5, pady=2) else: self.total.configure(text=f"Total Vehicle\n\n{total}") if self.fps is None: self.fps = tk.Label( self.counting_result, text=f"FPS\n\n{fps:.2f}", font=font, width=13, anchor="center", bg="#8080c0", fg="white", ) self.fps.grid(row=2, column=0, pady=2) else: self.fps.configure(text=f"FPS\n\n{fps:.2f}") else: self.deepsort.increment_ages() self.root.after(self.delay, self.update) # Print time (inference + NMS) print("%sDone. (%.3fs)" % (s, t2 - t1)) else: self.root.quit() print( "***********************************************FINSHED***********************************************" )
def get_detector_results(self, request): """ Args: request (GetDetectorResultsRequest): Returns: GetDetectorResultsResponse """ try: import torch from yolov5.utils.general import non_max_suppression from yolov5.utils.general import scale_coords from yolov5.utils.datasets import letterbox import numpy as np except ImportError: raise if self.currently_busy.is_set(): return GetDetectorResultsResponse(status=ServiceStatus(BUSY=True)) self.currently_busy.set() detections = Detections() try: image = ros_numpy.numpify(request.image) if request.image.encoding == "rgb8": image = image[..., ::-1] original_shape = image.shape img = letterbox(image, new_shape=self.image_size)[0] img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB img = np.ascontiguousarray(img) img = torch.from_numpy(img).to(self.device) img = img.half() if self.half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) with torch.no_grad(): pred = self.model(img, augment=False)[0] pred = non_max_suppression(pred, self.conf_thresh, self.iou_thresh, agnostic=False) for i, det in enumerate(pred): if det is not None and len(det): det[:, :4] = scale_coords(img.shape[2:], det[:, :4], original_shape).round() for x1, y1, x2, y2, conf, cls in reversed(det): x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) confidence = float(conf) class_name = self.names[int(cls)] roi = RegionOfInterest(x1=x1, y1=y1, x2=x2, y2=y2) seg_roi = SegmentOfInterest(x=[], y=[]) detections.objects.append(Detection(roi=roi, seg_roi=seg_roi, id=self._new_id(), track_id=-1, confidence=confidence, class_name=class_name)) self.currently_busy.clear() except Exception as e: print("FruitCastServer error: ", e) return GetDetectorResultsResponse(status=ServiceStatus(ERROR=True), results=detections) return GetDetectorResultsResponse(status=ServiceStatus(OKAY=True), results=detections)