def detect(self, path, img, im0s): device = self.device model = self.model half = self.half cfg = self.cfg paths = [path] if isinstance(path, str) else path im0s = im0s[np.newaxis, :] if im0s.ndim == 3 else im0s imgs = img[np.newaxis, :] if img.ndim == 3 else img imgs = torch.from_numpy(imgs).to(device) imgs = imgs.half() if half else imgs.float() imgs /= 255.0 # inference model.eval() pred = model(imgs, augment=cfg['augment'])[0] # NMS pred = non_max_suppression(pred, cfg['conf_thres'], cfg['iou_thres'], classes=0, agnostic=cfg['agnostic_nms']) # 处理结果: 转为nparray assert len(pred) == 1 # 只允许单图检测 ret = [] for i, det in enumerate(pred): p, im0 = paths[i], im0s[i] if det is not None and len(det): # print(det.shape, img.shape) det[:, :4] = scale_coords(imgs.shape[2:], det[:, :4], im0.shape).round() # 有时候会返回None det = det.cpu().numpy() ret.append(det) if det is not None else None ret = np.array(ret) # [1, num_obj, 6], None # print(ret.shape, ret) if cfg['filt_classes'] is not None and len(ret) > 0: # filter class valid_ret = [] # print(ret.shape, ret, len(ret)) for valid_cls in cfg['filt_classes'].split(','): tmp = ret[ret[:, :, -1] == int(valid_cls)] if len(tmp) != 0: valid_ret.append(tmp) if len( valid_ret) == 0 else valid_ret.extend(tmp) # valid_ret.extend(tmp) if len(tmp) != 0 else None ret = np.array(valid_ret) # ret = ret[np.newaxis, :, :] # print('xx', ret.shape, ret) ret = ret[0, :, :] if len(ret) > 0 else None return ret # nparray, [num_obj, 6] 6: xyxy,conf,cls
def forward(self, x, size=640, augment=False, profile=False): # supports inference from various sources. For height=720, width=1280, RGB images example inputs are: # opencv: x = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(720,1280,3) # PIL: x = Image.open('image.jpg') # HWC x(720,1280,3) # numpy: x = np.zeros((720,1280,3)) # HWC # torch: x = torch.zeros(16,3,720,1280) # BCHW # multiple: x = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images p = next(self.model.parameters()) # for device and type if isinstance(x, torch.Tensor): # torch return self.model(x.to(p.device).type_as(p), augment, profile) # inference # Pre-process if not isinstance(x, list): x = [x] shape0, shape1 = [], [] # image and inference shapes batch = range(len(x)) # batch size for i in batch: x[i] = np.array(x[i])[:, :, :3] # up to 3 channels if png s = x[i].shape[:2] # HWC shape0.append(s) # image shape g = (size / max(s)) # gain shape1.append([y * g for y in s]) shape1 = [ make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0) ] # inference shape x = [letterbox(x[i], new_shape=shape1, auto=False)[0] for i in batch] # pad x = np.stack(x, 0) if batch[-1] else x[0][None] # stack x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW x = torch.from_numpy(x).to( p.device).type_as(p) / 255. # uint8 to fp16/32 # Inference x = self.model(x, augment, profile) # forward x = non_max_suppression(x[0], conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) # NMS # Post-process for i in batch: if x[i] is not None: x[i][:, :4] = scale_coords(shape1, x[i][:, :4], shape0[i]) return x
def forward(self, x): return non_max_suppression(x[0], conf_thres=self.conf, iou_thres=self.iou, classes=self.classes)
def forward(self, imgs, size=640, augment=False, profile=False): # Inference from various sources. For height=720, width=1280, RGB images example inputs are: # filename: imgs = 'data/samples/zidane.jpg' # URI: = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/zidane.jpg' # OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(720,1280,3) # PIL: = Image.open('image.jpg') # HWC x(720,1280,3) # numpy: = np.zeros((720,1280,3)) # HWC # torch: = torch.zeros(16,3,720,1280) # BCHW # multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images p = next(self.model.parameters()) # for device and type if isinstance(imgs, torch.Tensor): # torch return self.model(imgs.to(p.device).type_as(p), augment, profile) # inference # Pre-process n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else ( 1, [imgs]) # number of images, list of images shape0, shape1, files = [], [], [ ] # image and inference shapes, filenames for i, im in enumerate(imgs): if isinstance(im, str): # filename or uri im = Image.open( requests.get(im, stream=True).raw if im.startswith('http') else im) # open files.append( Path(im.filename).with_suffix('.jpg'). name if isinstance(im, Image.Image) else f'image{i}.jpg') im = np.array(im) # to numpy if im.shape[0] < 5: # image in CHW im = im.transpose( (1, 2, 0)) # reverse dataloader .transpose(2, 0, 1) im = im[:, :, :3] if im.ndim == 3 else np.tile( im[:, :, None], 3) # enforce 3ch input s = im.shape[:2] # HWC shape0.append(s) # image shape g = (size / max(s)) # gain shape1.append([y * g for y in s]) imgs[i] = im # update shape1 = [ make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0) ] # inference shape x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs] # pad x = np.stack(x, 0) if n > 1 else x[0][None] # stack x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW x = torch.from_numpy(x).to( p.device).type_as(p) / 255. # uint8 to fp16/32 # Inference with torch.no_grad(): y = self.model(x, augment, profile)[0] # forward y = non_max_suppression(y, conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) # NMS # Post-process for i in range(n): scale_coords(shape1, y[i][:, :4], shape0[i]) return Detections(imgs, y, files, self.names)