def predict(self, image): img = np.array(image)[:, :, :3].copy() # padded resize img = letterbox(img, new_shape=self._img_size)[0] # convert img = img.transpose(2, 0, 1) # to 3xWxH img = np.ascontiguousarray(img) img = torch.from_numpy(img).to(self._device) img = img.half() if self._half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference pred = self._model(img, augment=self._args.augment)[0] # Apply NMS pred = non_max_suppression(pred, self._args.conf_thres, self._args.iou_thres, classes=self._args.classes, agnostic=self._args.agnostic_nms) det = pred[0] outputs = [] for c in det[:, -1].unique(): outputs.append(self._names[int(c)]) return outputs
def _image_transform(self, path): img0 = cv2.imread(path) # BGR img = letterbox(img0, new_shape=self.img_size, auto=False)[0] # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) return img, img0
def main_process(input_img): img0 = input_img.copy() img = letterbox(img0, new_shape=imgsz)[0] img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() img /= 255.0 if img.ndimension() == 3: img = img.unsqueeze(0) t1 = time_synchronized() pred = model(img, augment=True)[0] pred = non_max_suppression(pred, my_confidence, my_threshold, classes=my_filterclasses, agnostic=None) t2 = time_synchronized() total = 0 for i, det in enumerate(pred): gn = torch.tensor(img0.shape)[[1, 0, 1, 0]] if det is not None and len(det): det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img0.shape).round() for *xyxy, conf, cls in reversed(det): xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() label = '%sbaht (%.0f%%)' % (names[int(cls)], conf*100) total += int(names[int(cls)]) plot_one_box(xyxy, img0, label=label, color=colors[int(cls)], line_thickness=3) print(label) print('Done. (%.3fs)' % (t2 - t1)) # cv2.rectangle(img0,(0,10),(250,90),(0,0,0),-1) img0 = cv2.putText(img0, "total "+str(total)+" Baht", (10,45+30*3), cv2.FONT_HERSHEY_DUPLEX, 1, (0,0,255), 2) return img0
def infer1(self, im, prepare=True, post=True): if type(im) == str: im = cv2.imread(im) assert type(im) == np.ndarray if prepare: img = letterbox(im, new_shape=self.imsz)[0] # resize img = img[..., ::-1].transpose(2, 0, 1) # BGR->RGB->(C,H,W) img = np.ascontiguousarray(img) else: img = im img = torch.from_numpy(img).to(self.device) # convert uint8 to fp16/fp32, [0,255] to [0,1.0] img = (img.half() if self.half else img.float()) / 255.0 if img.ndimension() == 3: img = img.unsqueeze(0) t0 = time_sync() # Inference pred = self.model(img, augment=self.augment)[0] # Apply NMS. pred=[N,(n,6)]: list of batch_size=N tensors (n,6) pred = non_max_suppression(pred, self.conf_thres, self.iou_thres, classes=self.classes, agnostic=self.agnostic_nms) dt = (time_sync() - t0) * 1000 if post: for det in pred: # Rescale boxes from img_shape to im_shape, det=(n,6) det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im.shape).round() return im, pred, dt, self.plot(im, pred[0]) # det=pred[-1] return im, pred, dt # det[i]=(x1,y1,x2,y2,conf,cls)
def objectdetect(frame, count): dict_object = {} img = letterbox(frame, new_shape=imgsz)[0] img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 if img.ndimension() == 3: img = img.unsqueeze(0) pred = model(img, augment=opt.augment)[0] pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes) for index, detect in enumerate(pred): if detect is not None and len(detect): # Rescale boxes from img_size to im0 size detect[:, :4] = scale_coords(img.shape[2:], detect[:, :4], frame.shape).round() for *xyxy, conf, cls in detect: label = names[int(cls)] x1, y1, x2, y2 = int(xyxy[0]), int(xyxy[1]), int(xyxy[2]), int( xyxy[3]) dict_object[label] = frame[y1:y2, x1:x2] plot_one_box(xyxy, frame, label=label, color=colors[int(cls)], line_thickness=3) cv2.imwrite(f'images/frame{count}.jpg', frame) plt.imshow(frame) plt.show() return dict_object
def load_file(): image = request.files.get('file') name = request.form.get("name1") image_bin = image.read() img = io.BytesIO(image_bin) data = Image.open(img) max_l = max(data.size) h = data.size[0] w = data.size[1] img_np = np.array(data) img_np = letterbox(img_np, new_shape=640)[0] img_np = img_np[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img_np = np.ascontiguousarray(img_np) img_np = img_np / 255.0 img_t = torch.from_numpy(img_np) img_t = img_t.half() if half else img_t.float() # uint8 to fp16/32 # img_np /= 255.0 # 0 - 255 to 0.0 - 1.0 if img_t.ndimension() == 3: img_t = img_t.unsqueeze(0) print(img_t.shape) pred = model(img_t, augment=False)[0] print(pred.shape) # Apply NMS #--------------------------------conf--iou-------------------------------- pred = non_max_suppression(pred, 0.25, 0.45, classes=None, agnostic=True) pred = torch.stack(pred, dim=0) print(pred.shape) if pred[:, :, -1].sum() > 0.5: return 'fall' else: return 'stand'
def detect_image(self, img): original_frame = img # Padded resize img = letterbox(img, self.imgsz, stride=self.stride)[0] # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 frame = np.ascontiguousarray(img) frame = torch.from_numpy(frame).to(self.device) frame = frame.half() if self.half else frame.float() # uint8 to fp16/32 frame /= 255.0 # 0 - 255 to 0.0 - 1.0 if frame.ndimension() == 3: frame = frame.unsqueeze(0) # Inference t1 = time_synchronized() pred = self.model(frame, augment=True)[0] # Apply NMS pred = non_max_suppression(pred, self.conf_thres, self.iou_thres, agnostic=self.agnostic_nms)[0] # Rescale boxes from img_size to im0 size if type(pred) == type(None): output = {"boxes": None, "classes": None, "scores": None} return output pred[:, :4] = scale_coords(frame.shape[2:], pred[:, :4], original_frame.shape).round() scores = [] for *xyxy, conf, cls in pred: scores.append(float(conf.cpu().detach().numpy())) pred_array = pred.cpu().detach().numpy() classes = pred_array[:,-1].astype(int) classes = [self.names[elem] for elem in classes] output = {"boxes": pred_array[:,:4], "classes": classes, "scores": scores} return self.__make_objects_from_detections(output)
def __init__(self, sources='streams.txt', img_size=640, stride=32): self.mode = 'stream' self.img_size = img_size self.stride = stride if os.path.isfile(sources): with open(sources, 'r') as f: sources = [x.strip() for x in f.read().strip().splitlines() if len(x.strip())] else: sources = [sources] n = len(sources) self.imgs = [None] * n self.sources = [clean_str(x) for x in sources] # clean source names for later for i, s in enumerate(sources): # Start the thread to read frames from the video stream print(f'{i + 1}/{n}: {s}... ', end='') self.cap = cv2.VideoCapture(eval(s) if s.isnumeric() else s) assert self.cap.isOpened(), f'Failed to open {s}' w = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = self.cap.get(cv2.CAP_PROP_FPS) % 100 _, self.imgs[i] = self.cap.read() # guarantee first frame print(f' success ({w}x{h} at {fps:.2f} FPS).') print('') # newline # check for common shapes s = np.stack([letterbox(x, self.img_size, stride=self.stride)[0].shape for x in self.imgs], 0) # shapes self.rect = np.unique(s, axis=0).shape[0] == 1 # rect inference if all shapes equal if not self.rect: print('WARNING: Different stream shapes detected. For optimal performance supply similarly-shaped streams.')
def detect_bbox(self, img, img_size=640, stride=32, min_accuracy=0.5): # normalize img_shape = img.shape img = letterbox(img, img_size, stride=stride)[0] img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) img = torch.from_numpy(img).to(self.device) img = img.half() if self.half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) pred = self.model(img)[0] # Apply NMS pred = non_max_suppression(pred) res = [] for i, det in enumerate(pred): if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img_shape).round() res.append(det.cpu().detach().numpy()) if len(res): return [[x1, y1, x2, y2, acc, b] for x1, y1, x2, y2, acc, b in res[0] if acc > min_accuracy] else: return []
def __next__(self): self.count += 1 if cv2.waitKey(1) == ord('q'): # q to quit self.cap.release() cv2.destroyAllWindows() raise StopIteration # Read frame if self.pipe == 0: # local camera ret_val, img0 = self.cap.read() img0 = cv2.flip(img0, 1) # flip left-right else: # IP camera n = 0 while True: n += 1 self.cap.grab() if n % 30 == 0: # skip frames ret_val, img0 = self.cap.retrieve() if ret_val: break # Print assert ret_val, f'Camera Error {self.pipe}' img_path = 'webcam.jpg' print(f'webcam {self.count}: ', end='') # Padded resize img = letterbox(img0, self.img_size, stride=self.stride)[0] # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) return img_path, img, img0, None
def __next__(self): self.count += 1 # FIXME: Race Conditions?? img0 = self.imgs self.imgs = [] # TODO: block if no new images if len(img0) == 0: return self.source, None, img0, None else: # if cv2.waitKey(1) == ord('q'): # q to quit # cv2.destroyAllWindows() # raise StopIteration # Letterbox img = [ letterbox(x, new_shape=self.img_size, auto=self.rect)[0] for x in img0 ] # Stack img = np.stack(img, 0) # Convert # BGR to RGB, to bsx3x416x416 img = img[:, :, :, ::-1].transpose(0, 3, 1, 2) img = np.ascontiguousarray(img) print(f'Recieved {img.shape}') return self.source, img, img0, None
def transform_yolo(img0): img = letterbox(img0, new_shape=max(img0.shape))[0] img = img[:, :, ::-1].transpose(2, 0, 1) img = np.ascontiguousarray(img) img = torch.from_numpy(img).float() img /= 255.0 return img
def process_frame(begin, end): t1 = time.time() imgs = letterbox(begin, end, new_shape=img_size)[0] print("time prepare process ", time.time() - t1) t1 = time.time() res = detection.detect(imgs) print("time detection ", time.time() - t1)
def gen_batch(images: list, img_size, stride, device, batch_size=32): def process(img): img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) half = device.type != 'cpu' img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 return img imgs = [letterbox(x, img_size, stride=stride)[0] for x in images] imgs = list(map(process, imgs)) if len(imgs) % batch_size == 0: num_batch =len(imgs) // batch_size else: if len(imgs) < batch_size: num_batch = 1 else: num_batch = len(imgs) // batch_size + 1 batchs = [] for idx in range(num_batch): batchs.append(torch.stack(imgs[batch_size*idx : batch_size*(idx+1)])) # imgs = torch.stack(imgs) print("Len of batchs: ", len(batchs)) return batchs
def predict(self, inputData): img_origin = cv2.imread(inputData.data) img = letterbox(img_origin, new_shape=opt.img_size)[0] # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference pred = self.p_model(img)[0] pred = non_max_suppression(pred, 0.25, 0.45) # Parse Inference boxes = [] classes = [] scores = [] for i, det in enumerate(pred): # detections per image # Write results for *xyxy, conf, cls in reversed(det): boxes.append([ int(xyxy[0]), int(xyxy[1]), int(xyxy[2]), int(xyxy[3]) ]) classes.append(int(cls)) scores.append(float(conf)) output = {'boxes': boxes, 'classes': classes, 'scores': scores} return output
def load_image(cv_bgr_image, img_size, stride): # Padded resize img = letterbox(cv_bgr_image, img_size, stride=stride)[0] # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) return img, cv_bgr_image.shape
def forward_one(model, bgr_mat, checked_imgsz, device, half, opt): img = letterbox(bgr_mat, new_shape=checked_imgsz)[0] img = img[:, :, ::-1].transpose(2, 0, 1) img = np.ascontiguousarray(img) img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = torch_utils.time_synchronized() # Process detections for i, det in enumerate(pred): # detections per image if det is not None and len(det): det[:, :4] = scale_coords(img.shape[2:], det[:, :4], bgr_mat.shape).round() return pred
def __next__(self): # if self.count == self.nf: # raise StopIteration return self.lastframe data = None # Strip pending data in the queue while not self.queue.empty(): data = self.queue.get() if data is None: raise StopIteration skip = 0 if data is not None: skip -= 1 while data is None or skip > 0: data = self.queue.get() if data is None: raise StopIteration skip -= 1 img0 = np.frombuffer(data, dtype=np.uint8) img0 = img0.reshape((self.height, self.width, self.depth)) # Padded resize img = letterbox(img0, self.img_size, stride=self.stride)[0] # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) return 'stdin', img, img0, None
def get_detect_one(img_bytes): img_string = np.array(img_bytes).tostring() img_string = np.asarray(bytearray(img_string), dtype="uint8") image = cv2.imdecode(img_string, cv2.IMREAD_COLOR) img0 = to_rgb(image) img = letterbox(img0, new_shape=yolo_conf.img_size)[0] img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) pred = model(img, augment=yolo_conf.augment)[0] pred = non_max_suppression(pred, yolo_conf.conf_thres, yolo_conf.iou_thres, classes=yolo_conf.classes, agnostic=yolo_conf.agnostic_nms) for i, det in enumerate(pred): # detections per image if len(det): det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img0.shape).round() for *xyxy, conf, cls in reversed(det): label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, img0, label=label, color=colors[int(cls)], line_thickness=3) #imsave("./tmp/test.jpg",img0) return img0
def two_detect(im0, model, shape): img = letterbox(im0, new_shape=640)[0] img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) img = torch.from_numpy(img).cuda() img = img.float() img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) #xywh pred = model(img, augment=False)[0] # crop letterbox #xyxy pred = non_max_suppression(pred, 0.45, 0.25) all_det = [] for i, det in enumerate(pred): det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() for xyxy in det: tmp_det = [0] * 6 tmp_det[0], tmp_det[1], tmp_det[2], tmp_det[3] = xyxy[0] + shape[ 0], xyxy[1] + shape[1], xyxy[2] + shape[0], xyxy[3] + shape[1] xyxy[0], xyxy[1], xyxy[2], xyxy[3] = tmp_det[0], tmp_det[ 1], tmp_det[2], tmp_det[3] all_det.append(xyxy) if len(all_det) == 0: return None out = torch.stack(all_det) return out
def img_process(img_path, long_side=640, stride_max=32): ''' 图像预处理 ''' orgimg = cv2.imread(img_path) img0 = copy.deepcopy(orgimg) h0, w0 = orgimg.shape[:2] # orig hw r = long_side / max(h0, w0) # resize image to img_size if r != 1: # always resize down, only resize up if training with augmentation interp = cv2.INTER_AREA if r < 1 else cv2.INTER_LINEAR img0 = cv2.resize(img0, (int(w0 * r), int(h0 * r)), interpolation=interp) imgsz = check_img_size(long_side, s=stride_max) # check img_size img = letterbox(img0, new_shape=imgsz, auto=False)[0] # auto True最小矩形 False固定尺度 # Convert img = img[:, :, ::-1].transpose(2, 0, 1).copy() # BGR to RGB, to 3x416x416 img = torch.from_numpy(img) img = img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) return img, orgimg
def predict(self, image): img = letterbox(image, new_shape=self.img_size)[0] img = img[:, :, ::-1].transpose(2, 0, 1) img = np.ascontiguousarray(img) img = torch.from_numpy(img).to(self.device) img = img.half() if self.half else img.float() img /= 255.0 if img.ndimension() == 3: img = img.unsqueeze(0) pred = self.model(img, augment=False)[0] pred = non_max_suppression(pred, self.confidence, self.iou, classes=None, agnostic=self.agnostic_nms) _output = list() for i, det in enumerate(pred): if det is not None and len(det): det[:, :4] = scale_coords(img.shape[2:], det[:, :4], image.shape).round() for *xyxy, conf, cls in reversed(det): _output.append({ "points": [int(each) for each in xyxy], "conf": int(conf), "class": self.classes[int(cls)] }) return _output
def inference(self, im0): # from yolov5/ts_utils/datasets.py > LoadImages # Padded resize img = ds.letterbox(im0, new_shape=self.img_size)[0] # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) img = torch.from_numpy(img).to(self.device) # img = img.half() if half else img.float() # uint8 to fp16/32 img = img.float() img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) with torch.no_grad(): pred = self.model(img, augment=False)[0] # print(predictions.shape) # Apply NMS with low threshold to do a first discard of predictions # predictions is list of boxes (x1, y1, x2, y2, conf, class) --> float pred = gen.non_max_suppression(pred, self.conf_th, self.iou_th) # print(pred[0].shape) #--> first dimension is number of classes pred = pred[0] # We only predict on one image # Rescale boxes from img_size to im0 size if len(pred): pred[:, :4] = gen.scale_coords(img.shape[2:], pred[:, :4], im0.shape).round() return pred
def detect(self,im0s,img): img = letterbox(im0s, new_shape=self.img_size)[0] img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB img = np.ascontiguousarray(img) img = torch.from_numpy(img).to(self.device).float() img /= 255.0 # if img.ndimension() == 3: # img = img.unsqueeze(0) pred = self.model(img, augment=False)[0] box_detects=[] ims=[] classes=[] pred = non_max_suppression(pred, self.conf_thres, self.iou_thres, classes=None, agnostic=False) for i, det in enumerate(pred): # detections per image if det is not None and len(det): det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0s.shape).round() for *x, conf, cls in reversed(det): if self.classes[int(cls)] in self.names : c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3])) ims.append(im0s[c1[1]:c2[1],c1[0]:c2[0]]) top=c1[1] left=c1[0] right=c2[0] bottom=c2[1] box_detects.append(np.array([left,top, right,bottom])) classes.append(self.classes[int(cls)]) return box_detects,ims,classes
def predict(self, img_path: str, confidence: float = 0.4): confidence = max(0.1, confidence) img0 = Image.open(img_path).convert("RGB") img = np.asarray(letterbox(img0, new_shape=self.reso)[0]) img = img[:, :, ::-1].transpose(2, 0, 1) img = np.ascontiguousarray(img) img = torch.from_numpy(img).to(self.device) img0 = np.asarray(img0) img = img.half() if self.half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) pred = self.model(img, augment=False)[0] pred = non_max_suppression(pred, confidence, 0.45, classes=None, agnostic=False)[0] if pred is None: pred = [] else: # Rescale boxes from img_size to im0 size pred[:, :4] = scale_coords(img.shape[2:], pred[:, :4], img0.shape).round() return pred
def predict(self, img0): if img0 is None: return None img = letterbox(img0, self.img_size, stride=self.stride)[0] # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) img = torch.from_numpy(img).to(self.device) img = img.half() if self.half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) pred = self.model(img, augment='True')[0] # Apply NMS pred = non_max_suppression(pred, self.conf_thres, self.iou_thres, classes=self.classes, agnostic=self.agnostic_nms) pred = pred[0] if len(pred): # Rescale boxes from img_size to im0 size pred[:, :4] = scale_coords(img.shape[2:], pred[:, :4], img0.shape).round() return pred
def __getitem__(self, index): if index - self.current_index > 32 or index - self.current_index < 0: self.stream.seek(index, whence='frame') image = None for packet in self.stream.container.demux(): if packet.dts is None: continue if packet.pts < packet.dts: continue if packet.stream is not self.stream: continue for frame in packet.decode(): if frame.index > index: raise StopIteration if frame.index == index: self.current_index = index image = frame.to_rgb().to_ndarray() break if image is not None: break assert image is not None, "No frame at index %d found." % index # Letterbox h, w, _ = image.shape if self.rect: #shape = self.batch_shapes[self.batch[index]] wh_ratio = w / h if w > h: shape = (self.img_size / wh_ratio, self.img_size) shape = (int(np.floor(shape[0] / 32) * 32 + 32), shape[1]) else: shape = (self.img_size, self.img_size * wh_ratio) shape = (shape[0], int(np.floor(shape[1] / 32) * 32 + 32)) image, ratio, padw, padh = letterbox(image, new_shape=shape, mode='rect') else: shape = self.img_size image, ratio, padw, padh = letterbox(image, new_shape=shape, mode='square') # Normalize image = np.ascontiguousarray(image, dtype=np.float32) # uint8 to float32 image /= 255.0 # 0 - 255 to 0.0 - 1.0 return torch.from_numpy(image), (h, w)
def process_image(transform,processing_model,img): global network, class_names, class_colors tracks = [] # imgs = [] (device,model,names,colors,imgsz) = processing_model # view_img = True try: im0 = img.copy() img = letterbox(im0)[0] #, new_shape=(imgsz,imgsz))[0] # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) img = torch.from_numpy(img).to(device) img = img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) pred = model(img, augment=False)[0] # Apply NMS pred = non_max_suppression(pred, 0.25, 0.45, classes=0)#, agnostic=opt.agnostic_nms) # # Apply Classifier # if classify: # pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image s = '%g: ' % i gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls in reversed(det): label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) img = im0 tracks = pred except Exception as e: track = traceback.format_exc() print(track) print("YOLO 5 Exception",e) pass return tracks,img
def forward(self, imgs, size=640, augment=False, profile=False): # Inference from various sources. For height=720, width=1280, RGB images example inputs are: # filename: imgs = 'data/samples/zidane.jpg' # URI: = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/zidane.jpg' # OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(720,1280,3) # PIL: = Image.open('image.jpg') # HWC x(720,1280,3) # numpy: = np.zeros((720,1280,3)) # HWC # torch: = torch.zeros(16,3,720,1280) # BCHW # multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images p = next(self.model.parameters()) # for device and type if isinstance(imgs, torch.Tensor): # torch return self.model(imgs.to(p.device).type_as(p), augment, profile) # inference # Pre-process n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else ( 1, [imgs]) # number of images, list of images shape0, shape1 = [], [] # image and inference shapes for i, im in enumerate(imgs): if isinstance(im, str): # filename or uri im = Image.open( requests.get(im, stream=True).raw if im.startswith('http') else im) # open im = np.array(im) # to numpy if im.shape[0] < 5: # image in CHW im = im.transpose( (1, 2, 0)) # reverse dataloader .transpose(2, 0, 1) im = im[:, :, :3] if im.ndim == 3 else np.tile( im[:, :, None], 3) # enforce 3ch input s = im.shape[:2] # HWC shape0.append(s) # image shape g = (size / max(s)) # gain shape1.append([y * g for y in s]) imgs[i] = im # update shape1 = [ make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0) ] # inference shape x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs] # pad x = np.stack(x, 0) if n > 1 else x[0][None] # stack x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW x = torch.from_numpy(x).to( p.device).type_as(p) / 255. # uint8 to fp16/32 # Inference with torch.no_grad(): y = self.model(x, augment, profile)[0] # forward y = non_max_suppression(y, conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) # NMS # Post-process for i in range(n): scale_coords(shape1, y[i][:, :4], shape0[i]) return Detections(imgs, y, self.names)
def processImg(img_mat, new_shape=(416, 416)): img = letterbox(img_mat, new_shape=new_shape, auto=False)[0] # img = letterbox(img_mat, new_shape=new_shape)[0] cv2.imshow("img", img) cv2.waitKey() img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 return img return np.ascontiguousarray(img)