Python letterboxの例、yolov5.utils.datasets.letterbox Pythonの例

コード例 #1

0

ファイルを表示

 def preprocess_image(img0, image_size):
     # preprocessing found in datasets.py
     img = letterbox(img0, new_shape=image_size)[0]
     # Convert
     img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB
     img = np.ascontiguousarray(img)
     return img

コード例 #2

0

ファイルを表示

ファイル: main.py プロジェクト: vuminhduc97/DeepSORT_YOLOv5_Pytorch

    def image_track(self, im0):
        """
        :param im0: original image, BGR format
        :return:
        """
        # preprocess ************************************************************
        # Padded resize
        img = letterbox(im0, new_shape=self.img_size)[0]
        # Convert
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img)

        # numpy to tensor
        img = torch.from_numpy(img).to(self.device)
        img = img.half() if self.half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)
        s = '%gx%g ' % img.shape[2:]  # print string

        # Detection time *********************************************************
        # Inference
        t1 = time_synchronized()
        with torch.no_grad():
            pred = self.detector(
                img, augment=self.args.augment)[0]  # list: bz * [ (#obj, 6)]

        # Apply NMS and filter object other than person (cls:0)
        pred = non_max_suppression(pred,
                                   self.args.conf_thres,
                                   self.args.iou_thres,
                                   classes=self.args.classes,
                                   agnostic=self.args.agnostic_nms)
        t2 = time_synchronized()

        # get all obj ************************************************************
        det = pred[0]  # for video, bz is 1
        if det is not None and len(
                det):  # det: (#obj, 6)  x1 y1 x2 y2 conf cls

            # Rescale boxes from img_size to original im0 size
            det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                      im0.shape).round()

            # Print results. statistics of number of each obj
            for c in det[:, -1].unique():
                n = (det[:, -1] == c).sum()  # detections per class
                s += '%g %ss, ' % (n, self.names[int(c)])  # add to string

            bbox_xywh = xyxy2xywh(det[:, :4]).cpu()
            confs = det[:, 4:5].cpu()

            # ****************************** deepsort ****************************
            outputs = self.deepsort.update(bbox_xywh, confs, im0)
            # (#ID, 5) x1,y1,x2,y2,track_ID
        else:
            outputs = torch.zeros((0, 5))

        t3 = time.time()
        return outputs, t2 - t1, t3 - t2

コード例 #3

0

ファイルを表示

    def detect_bbox(self,
                    img: np.ndarray,
                    img_size: int = 640,
                    stride: int = 32,
                    min_accuracy: float = 0.5) -> List:
        """
        TODO: input img in BGR format, not RGB; To Be Implemented in release 2.2
        """
        # normalize
        img_shape = img.shape
        img = letterbox(img, img_size, stride=stride)[0]
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img)
        img = torch.from_numpy(img).to(self.device)
        img = img.half() if self.half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        pred = self.model(img)[0]
        # Apply NMS
        pred = non_max_suppression(pred)
        res = []
        for i, det in enumerate(pred):
            if len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img_shape).round()
                res.append(det.cpu().detach().numpy())
        if len(res):
            return [[x1, y1, x2, y2, acc, b] for x1, y1, x2, y2, acc, b in res[0] if acc > min_accuracy]
        else:
            return []

コード例 #4

0

ファイルを表示

    def forward(self, imgs, size=640, augment=False, profile=False):
        # Inference from various sources. For height=720, width=1280, RGB images example inputs are:
        #   filename:   imgs = 'data/samples/zidane.jpg'
        #   URI:             = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/zidane.jpg'
        #   OpenCV:          = cv2.imread('image.jpg')[:,:,::-1]  # HWC BGR to RGB x(720,1280,3)
        #   PIL:             = Image.open('image.jpg')  # HWC x(720,1280,3)
        #   numpy:           = np.zeros((720,1280,3))  # HWC
        #   torch:           = torch.zeros(16,3,720,1280)  # BCHW
        #   multiple:        = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...]  # list of images

        p = next(self.model.parameters())  # for device and type
        if isinstance(imgs, torch.Tensor):  # torch
            return self.model(imgs.to(p.device).type_as(p), augment,
                              profile)  # inference

        # Pre-process
        n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else (
            1, [imgs])  # number of images, list of images
        shape0, shape1 = [], []  # image and inference shapes
        for i, im in enumerate(imgs):
            if isinstance(im, str):  # filename or uri
                im = Image.open(
                    requests.get(im, stream=True).raw
                    if im.startswith('http') else im)  # open
            im = np.array(im)  # to numpy
            if im.shape[0] < 5:  # image in CHW
                im = im.transpose(
                    (1, 2, 0))  # reverse dataloader .transpose(2, 0, 1)
            im = im[:, :, :3] if im.ndim == 3 else np.tile(
                im[:, :, None], 3)  # enforce 3ch input
            s = im.shape[:2]  # HWC
            shape0.append(s)  # image shape
            g = (size / max(s))  # gain
            shape1.append([y * g for y in s])
            imgs[i] = im  # update
        shape1 = [
            make_divisible(x, int(self.stride.max()))
            for x in np.stack(shape1, 0).max(0)
        ]  # inference shape
        x = [letterbox(im, new_shape=shape1, auto=False)[0]
             for im in imgs]  # pad
        x = np.stack(x, 0) if n > 1 else x[0][None]  # stack
        x = np.ascontiguousarray(x.transpose((0, 3, 1, 2)))  # BHWC to BCHW
        x = torch.from_numpy(x).to(
            p.device).type_as(p) / 255.  # uint8 to fp16/32

        # Inference
        with torch.no_grad():
            y = self.model(x, augment, profile)[0]  # forward
        y = non_max_suppression(y,
                                conf_thres=self.conf,
                                iou_thres=self.iou,
                                classes=self.classes)  # NMS

        # Post-process
        for i in range(n):
            scale_coords(shape1, y[i][:, :4], shape0[i])

        return Detections(imgs, y, self.names)

コード例 #5

0

ファイルを表示

    def normalize_img(self, img, img_size, stride, auto=True):
        """
        TODO: auto=False if pipeline batch size > 1
        """
        img = letterbox(img, img_size, stride=stride, auto=auto)[0]
        img = img.transpose(2, 0, 1)  # to 3x416x416
        img = np.ascontiguousarray(img)

        return img

コード例 #6

0

ファイルを表示

ファイル: runner.py プロジェクト: szf2020/yolov5Jetson

 def preprocess(self, images: List[np.array]):
     sizes = [self.get_image_size(img) for img in images]
     div_sizes = np.array([x.scaled for x in sizes])
     # noinspection PyArgumentList
     div_sizes = [cached_divisible(x, self._stride_max) for x in div_sizes.max(axis=0)]
     # img_sized = [letterbox(img, new_shape=div_sizes)[0] for (i, img) in enumerate(images)]
     # Yolov5 sends list
     img_sized = [letterbox(img, new_shape=div_sizes)[0] for (i, img) in enumerate(images)]
     img_stacked = self.stack_to_torch(img_sized)
     return img_stacked, sizes, div_sizes

コード例 #7

0

ファイルを表示

    def forward(self, imgs, size=640, augment=False, profile=False):
        # supports inference from various sources. For height=720, width=1280, RGB images example inputs are:
        #   opencv:     x = cv2.imread('image.jpg')[:,:,::-1]  # HWC BGR to RGB x(720,1280,3)
        #   PIL:        x = Image.open('image.jpg')  # HWC x(720,1280,3)
        #   numpy:      x = np.zeros((720,1280,3))  # HWC
        #   torch:      x = torch.zeros(16,3,720,1280)  # BCHW
        #   multiple:   x = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...]  # list of images

        p = next(self.model.parameters())  # for device and type
        if isinstance(imgs, torch.Tensor):  # torch
            return self.model(imgs.to(p.device).type_as(p), augment,
                              profile)  # inference

        # Pre-process
        n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else (1, [imgs])
        shape0, shape1 = [], []  # image and inference shapes
        for i, img in enumerate(imgs):
            if isinstance(img, str):
                img = Image.open(img)
            img = np.array(img)
            if img.shape[0] < 5:
                img = img.transpose((1, 2, 0))
            img = img[:, :, :3] if img.ndim == 3 else np.tile(
                img[:, :, None], 3)
            s = img.shape[:2]  # HWC
            shape0.append(s)  # image shape
            g = (size / max(s))  # gain
            shape1.append([y * g for y in s])
            imgs[i] = img
        shape1 = [
            make_divisible(x, int(self.stride.max()))
            for x in np.stack(shape1, 0).max(0)
        ]  # inference shape
        x = [letterbox(img, new_shape=shape1, auto=False)[0]
             for img in imgs]  # pad
        x = np.stack(x, 0) if n > 1 else x[0][None]  # stack
        x = np.ascontiguousarray(x.transpose((0, 3, 1, 2)))  # BHWC to BCHW
        x = torch.from_numpy(x).to(
            p.device).type_as(p) / 255.  # uint8 to fp16/32

        # Inference
        with torch.no_grad():
            y = self.model(x, augment, profile)[0]
        # y = non_max_suppression(y, conf_thres=self.conf, iou_thres=self.iou, classes=self.classes)
        y = non_max_suppression_torch_ops(y,
                                          conf_thres=self.conf,
                                          iou_thres=self.iou,
                                          classes=self.classes)

        # Post-process
        for i in range(n):
            scale_coords(shape1, y[i][:, :4], shape0[i])

        return Detections(imgs, y, self.names)

コード例 #8

0

ファイルを表示

def run_lpr(event, context):
    """Triggered by a change to a Cloud Storage bucket.
    Args:
         event (dict): Event payload.
         context (google.cloud.functions.Context): Metadata for the event.
    """
    file = event
    filename = file["name"]

    client = storage.Client()
    source_bucket = client.get_bucket(file["bucket"])
    source_blob = source_bucket.get_blob(file["name"])

    # Decode
    image = np.asarray(bytearray(source_blob.download_as_string()),
                       dtype="uint8")
    image = cv2.imdecode(image, cv2.IMREAD_UNCHANGED)
    # Letterbox
    img = letterbox(image, new_shape=imgsz_detect)[0]
    # Stack
    img = np.stack(img, 0)
    # Convert
    img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to bsx3x416x416
    img = np.ascontiguousarray(img)
    im0s = [image]  # source size image

    res = proc(img, im0s, view_img=False)

    with NamedTemporaryFile() as temp:
        temp.write(b"hello")
        temp.write(json.dumps(res).encode())

        temp.flush()

        dest_filename = filename + ".txt"
        dest_bucket_name = "yolov5-output"
        dest_bucket = client.get_bucket(dest_bucket_name)
        dest_blob = dest_bucket.blob(dest_filename)
        dest_blob.upload_from_filename(temp.name)

コード例 #9

0

ファイルを表示

def lpr():
    uploaded_file = request.files['file']
    if uploaded_file.filename != '':
        nparr = np.fromstring(uploaded_file.read(), np.uint8)
        image = cv2.imdecode(nparr, IMREAD_UNCHANGED)
        # Letterbox
        img = letterbox(image, new_shape=imgsz_detect)[0]
        # Stack
        img = np.stack(img, 0)
        # Convert
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to bsx3x416x416
        img = np.ascontiguousarray(img)
        im0s = [image]  # source size image

        res = proc(img, im0s, view_img=False)

        response = {"results": res}
        return Response(response=json.dumps(response),
                        status=200,
                        mimetype="application/json")
    else:
        return Response(response="no image uploaded", status=403)

コード例 #10

0

ファイルを表示

    def forward(self, x, size=640, augment=False, profile=False):
        # supports inference from various sources. For height=720, width=1280, RGB images example inputs are:
        #   opencv:     x = cv2.imread('image.jpg')[:,:,::-1]  # HWC BGR to RGB x(720,1280,3)
        #   PIL:        x = Image.open('image.jpg')  # HWC x(720,1280,3)
        #   numpy:      x = np.zeros((720,1280,3))  # HWC
        #   torch:      x = torch.zeros(16,3,720,1280)  # BCHW
        #   multiple:   x = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...]  # list of images

        p = next(self.model.parameters())  # for device and type
        if isinstance(x, torch.Tensor):  # torch
            return self.model(x.to(p.device).type_as(p), augment, profile)  # inference

        # Pre-process
        if not isinstance(x, list):
            x = [x]
        shape0, shape1 = [], []  # image and inference shapes
        batch = range(len(x))  # batch size
        for i in batch:
            x[i] = np.array(x[i])[:, :, :3]  # up to 3 channels if png
            s = x[i].shape[:2]  # HWC
            shape0.append(s)  # image shape
            g = (size / max(s))  # gain
            shape1.append([y * g for y in s])
        shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)]  # inference shape
        x = [letterbox(x[i], new_shape=shape1, auto=False)[0] for i in batch]  # pad
        x = np.stack(x, 0) if batch[-1] else x[0][None]  # stack
        x = np.ascontiguousarray(x.transpose((0, 3, 1, 2)))  # BHWC to BCHW
        x = torch.from_numpy(x).to(p.device).type_as(p) / 255.  # uint8 to fp16/32

        # Inference
        x = self.model(x, augment, profile)  # forward
        x = non_max_suppression(x[0], conf_thres=self.conf, iou_thres=self.iou, classes=self.classes)  # NMS

        # Post-process
        for i in batch:
            if x[i] is not None:
                x[i][:, :4] = scale_coords(shape1, x[i][:, :4], shape0[i])
        return x

コード例 #11

0

ファイルを表示

def preprocess(image_file, stride, imgsz):
    """ Prepare the input for inferencing. """
    # read image file
    img = np.asarray(bytearray(image_file), dtype="uint8")
    img = cv2.imdecode(img, 1)
    imgsz0 = torch.Tensor(img.shape[:2])

    # resize image
    img = letterbox(img, imgsz, stride=stride)[0]

    # convert from BGR to RGB
    img = img[:, :, ::-1].transpose(2, 0, 1)
    img = np.ascontiguousarray(img)

    # convert to tensor
    img = torch.from_numpy(img).to(DEVICE)

    # normalize RGB values to percentage
    img = img.float() / 255.0
    if img.ndimension() == 3:
        img = img.unsqueeze(0)

    return img, imgsz0

コード例 #12

0

ファイルを表示

ファイル: yolov5_detection.py プロジェクト: schlange-git/traffic-object-detection-with-yolov5

 def detect(self,img,model,stride,device,imgsz):
     names = model.module.names if hasattr(model, 'module') else model.names
     # t0 = time.time()
     im0s = img.copy()
     img = letterbox(im0s, imgsz, stride=stride)[0]
     # Convert
     img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
     img = np.ascontiguousarray(img)
     img = torch.from_numpy(img).to(device)
     half = device.type != "cpu"  # half precision only supported on CUDA
     img = img.half() if half else img.float()  # uint8 to fp16/32
     img /= 255.0  # 0 - 255 to 0.0 - 1.0
     if img.ndimension() == 3:
         img = img.unsqueeze(0)
         # Inference
     # t1 = time_synchronized()
     pred = model(img, augment=True)[0]
     # print(pred)
     # Apply NMS
     pred = non_max_suppression(pred, 0.60, 0.5, classes=[0,2,3,5,7], agnostic=True)
     t2 = time_synchronized()
     xywhs,labels,xyxys,confs = [],[],[],[]
     for i, det in enumerate(pred):
         im0 = im0s.copy()
         if len(det):
             # Rescale boxes from img_size to im0 size
             det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
             for *xyxy, conf, cls in reversed(det):
                  label = f'{names[int(cls)]}'
                  xywh = self.bbox_rel(*xyxy)
                  xyxys.append(xyxy)
                  xywhs.append(xywh)
                  labels.append(label)
                  confs.append([conf.item()])
             # print(labels)
     return xyxys,xywhs,labels,confs,im0

コード例 #13

0

ファイルを表示

    def forward(self, imgs, size=640, augment=False, profile=False):
        # Inference from various sources. For height=640, width=1280, RGB images example inputs are:
        #   file:       imgs = 'data/images/zidane.jpg'  # str or PosixPath
        #   URI:             = 'https://ultralytics.com/images/zidane.jpg'
        #   OpenCV:          = cv2.imread('image.jpg')[:,:,::-1]  # HWC BGR to RGB x(640,1280,3)
        #   PIL:             = Image.open('image.jpg') or ImageGrab.grab()  # HWC x(640,1280,3)
        #   numpy:           = np.zeros((640,1280,3))  # HWC
        #   torch:           = torch.zeros(16,3,320,640)  # BCHW (scaled to size=640, 0-1 values)
        #   multiple:        = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...]  # list of images

        t = [time_sync()]
        p = next(self.model.parameters()) if self.pt else torch.zeros(
            1)  # for device and type
        autocast = self.amp and (p.device.type != 'cpu'
                                 )  # Automatic Mixed Precision (AMP) inference
        if isinstance(imgs, torch.Tensor):  # torch
            with amp.autocast(enabled=autocast):
                return self.model(
                    imgs.to(p.device).type_as(p), augment,
                    profile)  # inference

        # Pre-process
        n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else (
            1, [imgs])  # number of images, list of images
        shape0, shape1, files = [], [], [
        ]  # image and inference shapes, filenames
        for i, im in enumerate(imgs):
            f = f'image{i}'  # filename
            if isinstance(im, (str, Path)):  # filename or uri
                im, f = Image.open(
                    requests.get(im, stream=True).raw if str(im).
                    startswith('http') else im), im
                im = np.asarray(exif_transpose(im))
            elif isinstance(im, Image.Image):  # PIL Image
                im, f = np.asarray(
                    exif_transpose(im)), getattr(im, 'filename', f) or f
            files.append(Path(f).with_suffix('.jpg').name)
            if im.shape[0] < 5:  # image in CHW
                im = im.transpose(
                    (1, 2, 0))  # reverse dataloader .transpose(2, 0, 1)
            im = im[..., :3] if im.ndim == 3 else np.tile(
                im[..., None], 3)  # enforce 3ch input
            s = im.shape[:2]  # HWC
            shape0.append(s)  # image shape
            g = (size / max(s))  # gain
            shape1.append([y * g for y in s])
            imgs[i] = im if im.data.contiguous else np.ascontiguousarray(
                im)  # update
        shape1 = [
            make_divisible(x, self.stride) for x in np.stack(shape1, 0).max(0)
        ]  # inference shape
        x = [
            letterbox(im, new_shape=shape1 if self.pt else size, auto=False)[0]
            for im in imgs
        ]  # pad
        x = np.stack(x, 0) if n > 1 else x[0][None]  # stack
        x = np.ascontiguousarray(x.transpose((0, 3, 1, 2)))  # BHWC to BCHW
        x = torch.from_numpy(x).to(
            p.device).type_as(p) / 255  # uint8 to fp16/32
        t.append(time_sync())

        with amp.autocast(enabled=autocast):
            # Inference
            y = self.model(x, augment, profile)  # forward
            t.append(time_sync())

            # Post-process
            y = non_max_suppression(y if self.dmb else y[0],
                                    self.conf,
                                    iou_thres=self.iou,
                                    classes=self.classes,
                                    agnostic=self.agnostic,
                                    multi_label=self.multi_label,
                                    max_det=self.max_det)  # NMS
            for i in range(n):
                scale_coords(shape1, y[i][:, :4], shape0[i])

            t.append(time_sync())
            return Detections(imgs, y, files, t, self.names, x.shape)

コード例 #14

0

ファイルを表示

    def predict(self, src_image):
        param = self.getParam()

        # Initialize
        init_logging()
        half = self.device.type != 'cpu'  # half precision only supported on CUDA

        # Load model
        if self.model is None or param.update:
            self.model = attempt_load(param.model_path, map_location=self.device)  # load FP32 model
            stride = int(self.model.stride.max())  # model stride
            param.input_size = check_img_size(param.input_size, s=stride)  # check img_size
            if half:
                self.model.half()  # to FP16F

            # Get names and colors
            self.names = self.model.module.names if hasattr(self.model, 'module') else self.model.names
            self.colors = [[random.randint(0, 255) for _ in range(3)] for _ in self.names]
            param.update = False
        else:
            stride = int(self.model.stride.max())  # model stride

        # Resize image
        image = letterbox(src_image, param.input_size, stride)[0]
        image = image.transpose(2, 0, 1)
        image = np.ascontiguousarray(image)
        self.emitStepProgress()

        # Run inference
        image = torch.from_numpy(image).to(self.device)
        image = image.half() if half else image.float()  # uint8 to fp16/32
        image /= 255.0  # 0 - 255 to 0.0 - 1.0
        if image.ndimension() == 3:
            image = image.unsqueeze(0)

        self.emitStepProgress()

        # Inference
        pred = self.model(image, augment=param.augment)[0]
        self.emitStepProgress()

        # Apply NMS
        pred = non_max_suppression(pred, param.conf_thres, param.iou_thres, agnostic=param.agnostic_nms)
        self.emitStepProgress()

        graphics_output = self.getOutput(1)
        graphics_output.setNewLayer("YoloV5")
        graphics_output.setImageIndex(0)

        detected_names = []
        detected_conf = []

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(image.shape[2:], det[:, :4], src_image.shape).round()

                # Results
                for *xyxy, conf, cls in reversed(det):
                    # Box
                    w = float(xyxy[2] - xyxy[0])
                    h = float(xyxy[3] - xyxy[1])
                    prop_rect = core.GraphicsRectProperty()
                    prop_rect.pen_color = self.colors[int(cls)]
                    graphics_box = graphics_output.addRectangle(float(xyxy[0]), float(xyxy[1]), w, h, prop_rect)
                    graphics_box.setCategory(self.names[int(cls)])
                    # Label
                    name = self.names[int(cls)]
                    prop_text = core.GraphicsTextProperty()
                    prop_text.font_size = 8
                    prop_text.color = self.colors[int(cls)]
                    graphics_output.addText(name, float(xyxy[0]), float(xyxy[1]), prop_text)
                    detected_names.append(name)
                    detected_conf.append(conf.item())

        # Init numeric output
        numeric_ouput = self.getOutput(2)
        numeric_ouput.clearData()
        numeric_ouput.setOutputType(dataprocess.NumericOutputType.TABLE)
        numeric_ouput.addValueList(detected_conf, "Confidence", detected_names)
        self.emitStepProgress()

コード例 #15

0

ファイルを表示

    def automate(self):
        self.clear_bbox()
        self.processingLabel.config(text="Processing     ")
        self.processingLabel.update_idletasks()
        open_cv_image0 = np.array(self.img)

        # Padded resize
        open_cv_image = letterbox(open_cv_image0, new_shape=self.img_size)[0]

        # Convert
        open_cv_image = open_cv_image[:, :, ::-1].transpose(
            2, 0, 1)  # BGR to RGB, to 3x416x416
        open_cv_image = np.ascontiguousarray(open_cv_image)

        img = torch.from_numpy(open_cv_image).to(self.device)
        img = img.half() if self.half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # inference object detection
        pred = self.object_model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   self.opt.conf_thres,
                                   self.opt.iou_thres,
                                   classes=self.opt.classes,
                                   agnostic=self.opt.agnostic_nms)

        # process detections
        det = pred[0]
        # gn = torch.tensor(open_cv_image0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
        if det is not None and len(det):
            # Rescale boxes from img_size to im0 size
            det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                      open_cv_image0.shape).round()

            for idx, (*xyxy, conf, cls) in enumerate(reversed(det)):

                if conf < 0.5:
                    continue

                xyxy = torch.tensor(xyxy).view(-1).cpu().numpy().astype(np.int)
                label = config.labels_to_names[int(cls)]

                curr_label_list = self.labelListBox.get(0, END)
                curr_label_list = list(curr_label_list)

                if label not in curr_label_list:
                    continue

                b = xyxy.tolist()

                self.bboxId = self.canvas.create_rectangle(
                    b[0],
                    b[1],
                    b[2],
                    b[3],
                    width=2,
                    outline=config.COLORS[len(self.bboxList) %
                                          len(config.COLORS)])
                self.bboxList.append((b[0], b[1], b[2], b[3]))
                o1 = self.canvas.create_oval(b[0] - 3,
                                             b[1] - 3,
                                             b[0] + 3,
                                             b[1] + 3,
                                             fill="red")
                o2 = self.canvas.create_oval(b[2] - 3,
                                             b[1] - 3,
                                             b[2] + 3,
                                             b[1] + 3,
                                             fill="red")
                o3 = self.canvas.create_oval(b[2] - 3,
                                             b[3] - 3,
                                             b[2] + 3,
                                             b[3] + 3,
                                             fill="red")
                o4 = self.canvas.create_oval(b[0] - 3,
                                             b[3] - 3,
                                             b[0] + 3,
                                             b[3] + 3,
                                             fill="red")
                self.bboxPointList.append(o1)
                self.bboxPointList.append(o2)
                self.bboxPointList.append(o3)
                self.bboxPointList.append(o4)
                self.bboxIdList.append(self.bboxId)
                self.bboxId = None
                self.objectLabelList.append(str(label))
                self.objectListBox.insert(
                    END, '(%d, %d) -> (%d, %d)' % (b[0], b[1], b[2], b[3]) +
                    ': ' + str(label))
                self.objectListBox.itemconfig(
                    len(self.bboxIdList) - 1,
                    fg=config.COLORS[(len(self.bboxIdList) - 1) %
                                     len(config.COLORS)])

        # inference faces
        frame = Image.fromarray(open_cv_image0[:, :, ::-1])

        # detect faces
        boxes, probs = self.face_model.detect(frame, landmarks=False)

        if boxes is None:
            boxes = []
            probs = []

        for box, conf in zip(boxes, probs):

            if conf < 0.5:
                continue

            # box in xyxy format
            b = box.astype(np.int).tolist()
            label = 'face'

            curr_label_list = self.labelListBox.get(0, END)
            curr_label_list = list(curr_label_list)

            if label not in curr_label_list:
                continue

            self.bboxId = self.canvas.create_rectangle(
                b[0],
                b[1],
                b[2],
                b[3],
                width=2,
                outline=config.COLORS[len(self.bboxList) % len(config.COLORS)])
            self.bboxList.append((b[0], b[1], b[2], b[3]))
            o1 = self.canvas.create_oval(b[0] - 3,
                                         b[1] - 3,
                                         b[0] + 3,
                                         b[1] + 3,
                                         fill="red")
            o2 = self.canvas.create_oval(b[2] - 3,
                                         b[1] - 3,
                                         b[2] + 3,
                                         b[1] + 3,
                                         fill="red")
            o3 = self.canvas.create_oval(b[2] - 3,
                                         b[3] - 3,
                                         b[2] + 3,
                                         b[3] + 3,
                                         fill="red")
            o4 = self.canvas.create_oval(b[0] - 3,
                                         b[3] - 3,
                                         b[0] + 3,
                                         b[3] + 3,
                                         fill="red")
            self.bboxPointList.append(o1)
            self.bboxPointList.append(o2)
            self.bboxPointList.append(o3)
            self.bboxPointList.append(o4)
            self.bboxIdList.append(self.bboxId)
            self.bboxId = None
            self.objectLabelList.append(str(label))
            self.objectListBox.insert(
                END, '(%d, %d) -> (%d, %d)' % (b[0], b[1], b[2], b[3]) + ': ' +
                str(label))
            self.objectListBox.itemconfig(
                len(self.bboxIdList) - 1,
                fg=config.COLORS[(len(self.bboxIdList) - 1) %
                                 len(config.COLORS)])

        self.processingLabel.config(text="Done")

コード例 #16

0

ファイルを表示

ファイル: silic.py プロジェクト: RedbirdTaiwan/silic

    def detect(self,
               weights,
               step=1000,
               conf_thres=0.1,
               imgsz=640,
               targetfilepath=None,
               iou_thres=0.25,
               targetclasses=None):
        if self.model and self.model_path == weights:
            pass
        else:
            self.model_path = weights
            model = attempt_load(self.model_path, map_location=self.device)
            self.names = model.module.names if hasattr(
                model, 'module') else model.names
            model.float()
            self.model = model
            self.soundclasses = pd.read_csv(
                self.model_path.replace('best.pt', 'soundclass.csv'),
                encoding='utf8',
                index_col='sounclass_id').T.to_dict()
        if targetclasses:
            classes = [self.names.index(name) for name in targetclasses]
        else:
            classes = None
        self.tfr(targetfilepath=targetfilepath, spect_type='rainbow')

        # prepare input data clips
        dataset = []
        for ts in range(0, self.duration, step):
            clip_start = round(ts / self.duration * self.rainbow_img.shape[1])
            clip_end = clip_start + round(
                self.clip_length / self.duration * self.rainbow_img.shape[1])
            if clip_end > self.rainbow_img.shape[1]:
                break
            img0 = self.rainbow_img[:, clip_start:clip_end]
            img = letterbox(img0, new_shape=imgsz)[0]
            # Convert
            img = img[:, :, ::-1].transpose(2, 0,
                                            1)  # BGR to RGB, to 3x416x416
            img = np.ascontiguousarray(img)
            dataset.append([
                os.path.join(self.audiopath, self.audiofilename), img, img0, ts
            ])

        labels = [[
            'file', 'classid', 'species_name', 'sound_class',
            'scientific_name', "time_begin", "time_end", "freq_low",
            "freq_high", "score"
        ]]
        for path, img, im0, time_start in dataset:
            img = torch.from_numpy(img).float().to(self.device)
            img /= 255.0  # 0 - 255 to 0.0 - 1.0
            if img.ndimension() == 3:
                img = img.unsqueeze(0)
            # Inference
            pred = self.model(img, augment=False)[0]
            pred = non_max_suppression(pred,
                                       conf_thres=conf_thres,
                                       iou_thres=iou_thres,
                                       classes=classes)
            for det in pred:  # detections per image
                gn = torch.tensor(im0.shape)[[1, 0, 1,
                                              0]]  # normalization gain whwh
                if len(det):
                    det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                              im0.shape).round()
                    for *xyxy, conf, cls in reversed(det):
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()  # normalized xywh
                        ttff = self.xywh2ttff(xywh)
                        ts, te, fl, fh = ttff
                        classid = self.names[int(cls)]
                        species_name = self.soundclasses[classid][
                            'species_name']
                        sound_class = self.soundclasses[classid]['sound_class']
                        scientific_name = self.soundclasses[classid][
                            'scientific_name']
                        labels.append([
                            path, classid, species_name, sound_class,
                            scientific_name,
                            round(time_start + ts),
                            round(time_start + te), fl, fh,
                            round(float(conf), 3)
                        ])

        return labels

コード例 #17

0

ファイルを表示

    def update(self):
        f = 0
        start_time = datetime.datetime.now()
        today = datetime.date.today()
        # dd/mm/YY
        date = today.strftime("%d/%m/%Y")
        current_time = start_time.strftime("%H:%M:%S")

        trackIds, position, speed_e, fps = [], {}, 0, 0.0
        two_w, three_w, four_w, truck, bus, total = 0, 0, 0, 0, 0, 0
        img = torch.zeros((1, 3, self.imgsz, self.imgsz),
                          device=self.device)  # init img
        (grabbed, frame) = self.vs.read()

        path = "traffic3.mp4"
        img0 = frame
        names = self.model.module.names if hasattr(
            self.model, "module") else self.model.names

        if grabbed == True:
            img = letterbox(img0, new_shape=640)[0]
            # Convert
            img = img[:, :, ::-1].transpose(2, 0,
                                            1)  # BGR to RGB, to 3x416x416
            img = np.ascontiguousarray(img)
            f = f + 1
            # count = self.count+1
            img = torch.from_numpy(img).to(self.device)
            img = img.half() if self.half else img.float()  # uint8 to fp16/32
            img /= 255.0  # 0 - 255 to 0.0 - 1.0
            if img.ndimension() == 3:
                img = img.unsqueeze(0)
            # Inference
            t1 = time_synchronized()
            pred = self.model(img, augment=self.augment)[0]

            # Apply NMS
            pred = non_max_suppression(
                pred,
                self.conf_thres,
                self.iou_thres,
                classes=self.classes,
                agnostic=self.agnostic_nms,
            )
            t2 = time_synchronized()
            # Process detections
            for i, det in enumerate(pred):  # detections per image
                if self.webcam:  # batch_size >= 1
                    p, s, im0 = path[i], "%g: " % i, img0[i].copy()
                else:
                    p, s, im0 = path, "", img0

                s += "%gx%g " % img.shape[2:]  # print string
                # save_path = str(Path(self.out) / Path(p).name)

                if det is not None and len(det):
                    # Rescale boxes from img_size to im0 size
                    det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                              im0.shape).round()

                    bbox_xywh = []
                    confs = []
                    labels = []

                    # Adapt detections to deep sort input format
                    for *xyxy, conf, cls in det:
                        label = f"{names[int(cls)]}"
                        bbox_left = min([xyxy[0].item(), xyxy[2].item()])
                        bbox_top = min([xyxy[1].item(), xyxy[3].item()])
                        bbox_w = abs(xyxy[0].item() - xyxy[2].item())
                        bbox_h = abs(xyxy[1].item() - xyxy[3].item())
                        x_c = bbox_left + bbox_w / 2
                        y_c = bbox_top + bbox_h / 2
                        bbox_w = bbox_w
                        bbox_h = bbox_h
                        # x_c, y_c, bbox_w, bbox_h = bbox_rel(self, *xyxy)
                        obj = [x_c, y_c, bbox_w, bbox_h]
                        bbox_xywh.append(obj)
                        confs.append([conf.item()])
                        labels.append(label)

                    confss, labelss = [], []
                    for conf, label in zip(confs, labels):
                        confss.append(conf)
                        labelss.append(label)

                    xywhs = torch.Tensor(bbox_xywh)
                    confss = torch.Tensor(confs)

                    # Pass detections to deepsort
                    outputs = self.deepsort.update(xywhs, confss, im0)

                    # draw line
                    cv2.polylines(im0, [self.pts_arr], self.isClosed,
                                  (255, 0, 0), 2)
                    cv2.rectangle(img0, (650, 0), (850, 170),
                                  color=(0, 0, 0),
                                  thickness=-1)
                    if len(outputs) > 0:
                        bbox_xyxy = outputs[:, :4]
                        identities = outputs[:, -1]
                        offset = (0, 0)
                        counter = 0
                        for i, box in enumerate(bbox_xyxy):
                            if i < (len(labels[::-1]) - 1):
                                x1, y1, x2, y2 = [int(i) for i in box]
                                x1 += offset[0]
                                x2 += offset[0]
                                y1 += offset[1]
                                y2 += offset[1]
                                # box text and bar
                                id = int(identities[i]
                                         ) if identities is not None else 0
                                label = "{}{:d}".format("", id)

                                cls = labels[::-1][i]

                                # Object counting
                                if cls == "motorcycle":
                                    two_w, total = self.Obj_counting(
                                        id, label, trackIds, two_w, total)
                                elif cls == "auto":
                                    three_w, total = self.Obj_counting(
                                        id, label, trackIds, three_w, total)
                                elif cls == "car":
                                    four_w, total = self.Obj_counting(
                                        id, label, trackIds, four_w, total)
                                elif cls == "truck":
                                    truck, total = self.Obj_counting(
                                        id, label, trackIds, truck, total)
                                elif cls == "bus":
                                    bus, total = self.Obj_counting(
                                        id, label, trackIds, bus, total)
                                fps = self.calculate_fps(start_time, f)
                                # check if center points of object is inside the polygon
                                point = Point((int(x1 + (x2 - x1) / 2),
                                               int(y1 + (y2 - y1) / 2)))
                                polygon = Polygon(self.points)
                                if (polygon.contains(point)) == True:
                                    counter = counter + 1
                                    t_size = cv2.getTextSize(
                                        label, cv2.FONT_HERSHEY_PLAIN, 2, 2)[0]
                                    cv2.rectangle(im0, (x1, y1), (x2, y2),
                                                  (0, 255, 0), 3)
                        if counter > 5:
                            flow = "High"
                        elif counter >= 2 and counter < 5:
                            flow = "Medium"
                        else:
                            flow = "Low"
                        cv2.putText(
                            im0,
                            "Occupancy - " + str(counter),
                            (650, 30),
                            cv2.FONT_HERSHEY_DUPLEX,
                            .5,
                            (255, 0, 0),
                            1,
                        )
                        cv2.putText(
                            im0,
                            "Date - " + str(date),
                            (650, 60),
                            cv2.FONT_HERSHEY_DUPLEX,
                            .5,
                            (255, 0, 0),
                            1,
                        )
                        cv2.putText(
                            im0,
                            "Time - " + str(current_time),
                            (650, 90),
                            cv2.FONT_HERSHEY_DUPLEX,
                            .5,
                            (255, 0, 0),
                            1,
                        )
                        cv2.putText(
                            im0,
                            "Speed - " + "N A",
                            (650, 120),
                            cv2.FONT_HERSHEY_DUPLEX,
                            .5,
                            (255, 0, 0),
                            1,
                        )
                        cv2.putText(
                            im0,
                            "Flow - " + str(flow),
                            (650, 150),
                            cv2.FONT_HERSHEY_DUPLEX,
                            .5,
                            (255, 0, 0),
                            1,
                        )

                    # img = cv2.resize(img, (650, 360))
                    # image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                    image = PIL.Image.fromarray(img0)
                    image = PIL.ImageTk.PhotoImage(image)
                    font = ("Arial", 12)
                    self.canvas.configure(image=image)
                    self.canvas.image = image
                    result = tk.Label(
                        self.counting_result,
                        text=f"Counting Results",
                        width=12,
                        font=font,
                        anchor="center",
                        fg="blue",
                    )
                    result.grid(row=0, column=2, padx=2)
                    # result.pack(padx=10, pady=10)
                    if self.two_w is None:
                        self.two_w = tk.Label(
                            self.counting_result,
                            text=f"Two Wheeler \n\n{two_w}",
                            width=13,
                            font=font,
                            anchor="center",
                            bg="#8080c0",
                            fg="white",
                        )
                        self.two_w.grid(row=1, column=0, padx=2)
                    else:
                        self.two_w.configure(text=f"Two Wheeler\n\n{two_w}")

                    if self.three_w is None:
                        self.three_w = tk.Label(
                            self.counting_result,
                            text=f"Three Wheeler\n\n{three_w}",
                            font=font,
                            width=13,
                            anchor="center",
                            bg="#8080c0",
                            fg="white",
                        )
                        self.three_w.grid(row=1, column=1, padx=2)
                    else:
                        self.three_w.configure(
                            text=f"Three Wheeler\n\n{three_w}")

                    if self.four_w is None:
                        self.four_w = tk.Label(
                            self.counting_result,
                            text=f"Four Wheeler\n\n{four_w}",
                            width=13,
                            font=font,
                            anchor="center",
                            bg="#8080c0",
                            fg="white",
                        )
                        self.four_w.grid(row=1, column=2, padx=2)
                    else:
                        self.four_w.configure(text=f"Four Wheeler\n\n{four_w}")

                    if self.truck is None:
                        self.truck = tk.Label(
                            self.counting_result,
                            text=f"Truck\n\n{truck}",
                            font=font,
                            width=10,
                            anchor="center",
                            bg="#8080c0",
                            fg="white",
                        )
                        self.truck.grid(row=1, column=3, padx=1)
                    else:
                        self.truck.configure(text=f"Truck\n\n{truck}")

                    if self.bus is None:
                        self.bus = tk.Label(
                            self.counting_result,
                            text=f"Bus\n\n{bus}",
                            font=font,
                            width=10,
                            anchor="center",
                            bg="#8080c0",
                            fg="white",
                        )
                        self.bus.grid(row=1, column=4, padx=2)
                    else:
                        self.bus.configure(text=f"Bus\n\n{bus}")

                    if self.total is None:
                        self.total = tk.Label(
                            self.counting_result,
                            text=f"Total Vehicle\n\n{total}",
                            font=font,
                            width=10,
                            anchor="center",
                            bg="#8080c0",
                            fg="white",
                        )
                        self.total.grid(row=1, column=5, pady=2)
                    else:
                        self.total.configure(text=f"Total Vehicle\n\n{total}")

                    if self.fps is None:
                        self.fps = tk.Label(
                            self.counting_result,
                            text=f"FPS\n\n{fps:.2f}",
                            font=font,
                            width=13,
                            anchor="center",
                            bg="#8080c0",
                            fg="white",
                        )
                        self.fps.grid(row=2, column=0, pady=2)
                    else:
                        self.fps.configure(text=f"FPS\n\n{fps:.2f}")

                else:
                    self.deepsort.increment_ages()
                self.root.after(self.delay, self.update)
                # Print time (inference + NMS)
                print("%sDone. (%.3fs)" % (s, t2 - t1))

        else:
            self.root.quit()
            print(
                "***********************************************FINSHED***********************************************"
            )

コード例 #18

0

ファイルを表示

    def get_detector_results(self, request):
        """

        Args:
            request (GetDetectorResultsRequest):

        Returns:
            GetDetectorResultsResponse
        """
        try:
            import torch
            from yolov5.utils.general import non_max_suppression
            from yolov5.utils.general import scale_coords
            from yolov5.utils.datasets import letterbox
            import numpy as np
        except ImportError:
            raise

        if self.currently_busy.is_set():
            return GetDetectorResultsResponse(status=ServiceStatus(BUSY=True))
        self.currently_busy.set()

        detections = Detections()

        try:
            image = ros_numpy.numpify(request.image)
            if request.image.encoding == "rgb8":
                image = image[..., ::-1]

            original_shape = image.shape
            img = letterbox(image, new_shape=self.image_size)[0]
            img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB
            img = np.ascontiguousarray(img)

            img = torch.from_numpy(img).to(self.device)
            img = img.half() if self.half else img.float()  # uint8 to fp16/32
            img /= 255.0  # 0 - 255 to 0.0 - 1.0
            if img.ndimension() == 3:
                img = img.unsqueeze(0)
            with torch.no_grad():
                pred = self.model(img, augment=False)[0]
            pred = non_max_suppression(pred, self.conf_thresh, self.iou_thresh, agnostic=False)

            for i, det in enumerate(pred):
                if det is not None and len(det):
                    det[:, :4] = scale_coords(img.shape[2:], det[:, :4], original_shape).round()

                    for x1, y1, x2, y2, conf, cls in reversed(det):
                        x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
                        confidence = float(conf)
                        class_name = self.names[int(cls)]
                        roi = RegionOfInterest(x1=x1, y1=y1, x2=x2, y2=y2)
                        seg_roi = SegmentOfInterest(x=[], y=[])
                        detections.objects.append(Detection(roi=roi, seg_roi=seg_roi, id=self._new_id(), track_id=-1,
                                                            confidence=confidence, class_name=class_name))
                self.currently_busy.clear()
        except Exception as e:
            print("FruitCastServer error: ", e)
            return GetDetectorResultsResponse(status=ServiceStatus(ERROR=True), results=detections)

        return GetDetectorResultsResponse(status=ServiceStatus(OKAY=True), results=detections)