Beispiel #1
    def detect(self, path, img, im0s):
        device = self.device
        model = self.model
        half = self.half
        cfg = self.cfg

        paths = [path] if isinstance(path, str) else path
        im0s = im0s[np.newaxis, :] if im0s.ndim == 3 else im0s
        imgs = img[np.newaxis, :] if img.ndim == 3 else img
        imgs = torch.from_numpy(imgs).to(device)
        imgs = imgs.half() if half else imgs.float()
        imgs /= 255.0

        # inference
        pred = model(imgs, augment=cfg['augment'])[0]
        # NMS
        pred = non_max_suppression(pred,

        # 处理结果: 转为nparray
        assert len(pred) == 1  # 只允许单图检测
        ret = []
        for i, det in enumerate(pred):
            p, im0 = paths[i], im0s[i]
            if det is not None and len(det):
                # print(det.shape, img.shape)
                det[:, :4] = scale_coords(imgs.shape[2:], det[:, :4],
                                          im0.shape).round()  # 有时候会返回None
                det = det.cpu().numpy()
            ret.append(det) if det is not None else None

        ret = np.array(ret)  # [1, num_obj, 6], None
        # print(ret.shape, ret)
        if cfg['filt_classes'] is not None and len(ret) > 0:  # filter class
            valid_ret = []
            # print(ret.shape, ret, len(ret))
            for valid_cls in cfg['filt_classes'].split(','):
                tmp = ret[ret[:, :, -1] == int(valid_cls)]
                if len(tmp) != 0:
                    valid_ret.append(tmp) if len(
                        valid_ret) == 0 else valid_ret.extend(tmp)
                    # valid_ret.extend(tmp) if len(tmp) != 0 else None
            ret = np.array(valid_ret)
            # ret = ret[np.newaxis, :, :]
        # print('xx', ret.shape, ret)
        ret = ret[0, :, :] if len(ret) > 0 else None

        return ret  # nparray, [num_obj, 6] 6: xyxy,conf,cls
Beispiel #2
    def forward(self, x, size=640, augment=False, profile=False):
        # supports inference from various sources. For height=720, width=1280, RGB images example inputs are:
        #   opencv:     x = cv2.imread('image.jpg')[:,:,::-1]  # HWC BGR to RGB x(720,1280,3)
        #   PIL:        x ='image.jpg')  # HWC x(720,1280,3)
        #   numpy:      x = np.zeros((720,1280,3))  # HWC
        #   torch:      x = torch.zeros(16,3,720,1280)  # BCHW
        #   multiple:   x = ['image1.jpg'),'image2.jpg'), ...]  # list of images

        p = next(self.model.parameters())  # for device and type
        if isinstance(x, torch.Tensor):  # torch
            return self.model(, augment,
                              profile)  # inference

        # Pre-process
        if not isinstance(x, list):
            x = [x]
        shape0, shape1 = [], []  # image and inference shapes
        batch = range(len(x))  # batch size
        for i in batch:
            x[i] = np.array(x[i])[:, :, :3]  # up to 3 channels if png
            s = x[i].shape[:2]  # HWC
            shape0.append(s)  # image shape
            g = (size / max(s))  # gain
            shape1.append([y * g for y in s])
        shape1 = [
            make_divisible(x, int(self.stride.max()))
            for x in np.stack(shape1, 0).max(0)
        ]  # inference shape
        x = [letterbox(x[i], new_shape=shape1, auto=False)[0]
             for i in batch]  # pad
        x = np.stack(x, 0) if batch[-1] else x[0][None]  # stack
        x = np.ascontiguousarray(x.transpose((0, 3, 1, 2)))  # BHWC to BCHW
        x = torch.from_numpy(x).to(
            p.device).type_as(p) / 255.  # uint8 to fp16/32

        # Inference
        x = self.model(x, augment, profile)  # forward
        x = non_max_suppression(x[0],
                                classes=self.classes)  # NMS

        # Post-process
        for i in batch:
            if x[i] is not None:
                x[i][:, :4] = scale_coords(shape1, x[i][:, :4], shape0[i])
        return x
Beispiel #3
 def forward(self, x):
     return non_max_suppression(x[0],
Beispiel #4
    def forward(self, imgs, size=640, augment=False, profile=False):
        # Inference from various sources. For height=720, width=1280, RGB images example inputs are:
        #   filename:   imgs = 'data/samples/zidane.jpg'
        #   URI:             = ''
        #   OpenCV:          = cv2.imread('image.jpg')[:,:,::-1]  # HWC BGR to RGB x(720,1280,3)
        #   PIL:             ='image.jpg')  # HWC x(720,1280,3)
        #   numpy:           = np.zeros((720,1280,3))  # HWC
        #   torch:           = torch.zeros(16,3,720,1280)  # BCHW
        #   multiple:        = ['image1.jpg'),'image2.jpg'), ...]  # list of images

        p = next(self.model.parameters())  # for device and type
        if isinstance(imgs, torch.Tensor):  # torch
            return self.model(, augment,
                              profile)  # inference

        # Pre-process
        n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else (
            1, [imgs])  # number of images, list of images
        shape0, shape1, files = [], [], [
        ]  # image and inference shapes, filenames
        for i, im in enumerate(imgs):
            if isinstance(im, str):  # filename or uri
                im =
                    requests.get(im, stream=True).raw
                    if im.startswith('http') else im)  # open
                name if isinstance(im, Image.Image) else f'image{i}.jpg')
            im = np.array(im)  # to numpy
            if im.shape[0] < 5:  # image in CHW
                im = im.transpose(
                    (1, 2, 0))  # reverse dataloader .transpose(2, 0, 1)
            im = im[:, :, :3] if im.ndim == 3 else np.tile(
                im[:, :, None], 3)  # enforce 3ch input
            s = im.shape[:2]  # HWC
            shape0.append(s)  # image shape
            g = (size / max(s))  # gain
            shape1.append([y * g for y in s])
            imgs[i] = im  # update
        shape1 = [
            make_divisible(x, int(self.stride.max()))
            for x in np.stack(shape1, 0).max(0)
        ]  # inference shape
        x = [letterbox(im, new_shape=shape1, auto=False)[0]
             for im in imgs]  # pad
        x = np.stack(x, 0) if n > 1 else x[0][None]  # stack
        x = np.ascontiguousarray(x.transpose((0, 3, 1, 2)))  # BHWC to BCHW
        x = torch.from_numpy(x).to(
            p.device).type_as(p) / 255.  # uint8 to fp16/32

        # Inference
        with torch.no_grad():
            y = self.model(x, augment, profile)[0]  # forward
        y = non_max_suppression(y,
                                classes=self.classes)  # NMS

        # Post-process
        for i in range(n):
            scale_coords(shape1, y[i][:, :4], shape0[i])

        return Detections(imgs, y, files, self.names)