Example #1
0
    def __init__(self,
                 cfg=None,
                 weight=None,
                 img_size=(416, 416),
                 device=None):
        if cfg == None:
            cfg = os.path.dirname(os.path.abspath(__file__))
            cfg = os.path.join(cfg, 'cfg/yolov-obj.cfg')
        if weight == None:
            weight = os.path.dirname(os.path.abspath(__file__))
            weight = os.path.join(weight, 'cfg/yolov-obj_final.weights')

        assert os.path.exists(cfg), 'yolo.configure file must exist'
        assert os.path.exists(weight), 'yolo.weight file must exist'

        self.img_size = img_size

        model = Darknet(cfg, img_size)
        load_darknet_weights(model, weight)
        model.fuse()

        self.model = model.to(device)
        self.model.eval()
        print('load detector weight of %s' % weight)

        self.device = device
    def __init__(self, config, device):
        self.opt = opt = config
        self.conf_thres = opt['conf_thres']
        self.nms_thres = opt['nms_thres']
        self.img_size = opt['img_size']
        self.out_img_size = out_size = opt['out_size']

        # Set up model
        self.model = Darknet(opt['model_def'], img_size=opt['img_size'])\
            .to(device)

        if opt['weights_path'].endswith(".weights"):
            # Load darknet weights
            self.model.load_darknet_weights(opt['weights_path'])
        else:
            # Load checkpoint weights
            self.model.load_state_dict(torch.load(opt['weights_path']))

        self.model.eval()  # Set in evaluation mode
        # Extracts class labels from file
        self.classes = yolo_utils.load_classes(opt['class_path'])

        mode = "nearest"
        self.b1_scale = nn.Upsample(scale_factor=out_size // 8, mode=mode)
        self.b2_scale = nn.Upsample(scale_factor=out_size // 16, mode=mode)
        self.b3_scale = nn.Upsample(scale_factor=out_size // 32, mode=mode)
        self.no_detects = 0
Example #3
0
def define_yolo(model_def):
    """
    return
    ---
    a Darknet class object: yolo    
    the forward function of yolo returns:
        -(featuremap, yolo_outputs)         # for inference
        -(loss, featuremap, yolo_outputs)   # for training 
    """
    yolo = Darknet(model_def)

    return yolo
Example #4
0
    def __init__(
            self,
            device,
            img_size=416,
            person_detector=False,
            video=False,
            return_dict=False
    ):

        homedir = '/'

        weights_path = os.path.join(homedir, 'torch/models/yolov3.weights')
        os.makedirs(os.path.dirname(weights_path), exist_ok=True)

        if not os.path.isfile(weights_path):
            url = 'https://pjreddie.com/media/files/yolov3.weights'
            outdir = os.path.dirname(weights_path)
            download_url(url, outdir)

        model_def = os.path.join(homedir, 'torch/config/yolov3.cfg')
        os.makedirs(os.path.dirname(model_def), exist_ok=True)

        if not os.path.isfile(model_def):
            url = 'https://raw.githubusercontent.com/mkocabas/yolov3-pytorch/master/yolov3/config/yolov3.cfg'
            outdir = os.path.dirname(model_def)
            download_url(url, outdir)

        self.conf_thres = 0.8
        self.nms_thres = 0.4
        self.img_size = img_size
        self.video = video
        self.person_detector = person_detector
        self.device = device
        self.return_dict = return_dict

        self.model = Darknet(model_def, img_size=img_size).to(device)
        self.model.load_darknet_weights(weights_path)
        # self.model.load_state_dict(torch.load(weights_path))
        self.model.eval()
    def __init__(self):
        self.img_size = 512
        self.augment = False
        self.half = False
        self.agnostic_nms = False
        self.iou_thres = 0.6
        self.fourcc = 'mp4v'
        self.conf_thres = 0.3
        self.out = 'output'
        self.save_txt = True
        self.view_img = True
        self.save_img = True

        weights = 'yolov3/weights/yolov3.pt'
        self.device = 'cuda'
        self.model = Darknet('yolov3/cfg/yolov3.cfg', self.img_size)
        self.model.load_state_dict(torch.load(weights, map_location=self.device)['model'])

        # Second-stage classifier
        self.classify = False
        if self.classify:
            self.modelc = torch_utils.load_classifier(name='resnet101', n=2)  # initialize
            self.modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=self.device)['model'])  # load weights
            self.modelc.to(self.device).eval()

        # Eval mode
        self.model.to(self.device).eval()

        # Fuse Conv2d + BatchNorm2d layers
        # model.fuse()

        # Half precision
        self.half = self.half and self.device.type != 'cpu'  # half precision only supported on CUDA
        if self.half:
            self.model.half()

        # Get names and colors
        self.names = load_classes('yolov3/data/coco.names')
Example #6
0
    def __init__(self,
                 device,
                 img_size=416,
                 person_detector=False,
                 video=False,
                 return_dict=False):

        homedir = os.path.expanduser("~")
        weights_path = os.path.join(homedir, '.torch/models/yolov3.weights')
        model_def = os.path.join(homedir, '.torch/config/yolov3.cfg')

        self.conf_thres = 0.8
        self.nms_thres = 0.4
        self.img_size = img_size
        self.video = video
        self.person_detector = person_detector
        self.device = device
        self.return_dict = return_dict

        self.model = Darknet(model_def, img_size=img_size).to(device)
        self.model.load_darknet_weights(weights_path)
        # self.model.load_state_dict(torch.load(weights_path))
        self.model.eval()
Example #7
0
def init_model(cfg_path, device):

    opt = yaml.load(open(cfg_path))

    nms_thres = opt['nms_thres']
    # Set up model
    model = Darknet(opt['model_def'], img_size=opt['img_size']).to(device)

    if opt['weights_path'].endswith(".weights"):
        # Load darknet weights
        model.load_darknet_weights(opt['weights_path'])
    else:
        # Load checkpoint weights
        model.load_state_dict(torch.load(opt['weights_path']))

    model.eval()  # Set in evaluation mode
    classes = utils.load_classes(
        opt['class_path'])  # Extracts class labels from file
    return model, classes
Example #8
0
class YOLOv3:
    def __init__(
            self,
            device,
            img_size=416,
            person_detector=False,
            video=False,
            return_dict=False
    ):

        homedir = '/'

        weights_path = os.path.join(homedir, 'torch/models/yolov3.weights')
        os.makedirs(os.path.dirname(weights_path), exist_ok=True)

        if not os.path.isfile(weights_path):
            url = 'https://pjreddie.com/media/files/yolov3.weights'
            outdir = os.path.dirname(weights_path)
            download_url(url, outdir)

        model_def = os.path.join(homedir, 'torch/config/yolov3.cfg')
        os.makedirs(os.path.dirname(model_def), exist_ok=True)

        if not os.path.isfile(model_def):
            url = 'https://raw.githubusercontent.com/mkocabas/yolov3-pytorch/master/yolov3/config/yolov3.cfg'
            outdir = os.path.dirname(model_def)
            download_url(url, outdir)

        self.conf_thres = 0.8
        self.nms_thres = 0.4
        self.img_size = img_size
        self.video = video
        self.person_detector = person_detector
        self.device = device
        self.return_dict = return_dict

        self.model = Darknet(model_def, img_size=img_size).to(device)
        self.model.load_darknet_weights(weights_path)
        # self.model.load_state_dict(torch.load(weights_path))
        self.model.eval()

    @torch.no_grad()
    def __call__(self, batch):
        if self.video:
            inp_batch = []
            for img in batch:
                # Pad to square resolution
                img, _ = pad_to_square(img, 0)
                # Resize
                img = resize(img, self.img_size)
                inp_batch.append(img)
            inp_batch = torch.stack(inp_batch).float().to(self.device)
        else:
            inp_batch = batch

        detections = self.model(inp_batch)
        detections = non_max_suppression(detections, self.conf_thres, self.nms_thres)

        for idx, det in enumerate(detections):
            if det is None:
                det = {
                    'boxes': torch.empty(0,4),
                    'scores': torch.empty(0),
                    'classes': torch.empty(0),
                }
                detections[idx] = det
                continue

            if self.video:
                det = rescale_boxes(det, self.img_size, batch.shape[-2:])

            if self.person_detector:
                det = det[det[:,6] == 0]

            if self.return_dict:
                det = {
                    'boxes': det[:, :4],
                    'scores': det[:, 4] * det[:, 5],
                    'classes': det[:, 6],
                }

            detections[idx] = det



        return detections
Example #9
0
class YOLOv3:
    def __init__(self,
                 device,
                 img_size=416,
                 person_detector=False,
                 video=False,
                 return_dict=False):

        homedir = os.path.expanduser("~")
        weights_path = os.path.join(homedir, '.torch/models/yolov3.weights')
        model_def = os.path.join(homedir, '.torch/config/yolov3.cfg')

        self.conf_thres = 0.8
        self.nms_thres = 0.4
        self.img_size = img_size
        self.video = video
        self.person_detector = person_detector
        self.device = device
        self.return_dict = return_dict

        self.model = Darknet(model_def, img_size=img_size).to(device)
        self.model.load_darknet_weights(weights_path)
        # self.model.load_state_dict(torch.load(weights_path))
        self.model.eval()

    @torch.no_grad()
    def __call__(self, batch):
        if self.video:
            inp_batch = []
            for img in batch:
                # Pad to square resolution
                img, _ = pad_to_square(img, 0)
                # Resize
                img = resize(img, self.img_size)
                inp_batch.append(img)
            inp_batch = torch.stack(inp_batch).float().to(self.device)
        else:
            inp_batch = batch

        detections = self.model(inp_batch)
        detections = non_max_suppression(detections, self.conf_thres,
                                         self.nms_thres)

        for idx, det in enumerate(detections):
            if det is None:
                det = {
                    'boxes': torch.empty(0, 4),
                    'scores': torch.empty(0),
                    'classes': torch.empty(0),
                }
                detections[idx] = det
                continue

            if self.video:
                det = rescale_boxes(det, self.img_size, batch.shape[-2:])

            if self.person_detector:
                det = det[det[:, 6] == 0]

            if self.return_dict:
                det = {
                    'boxes': det[:, :4],
                    'scores': det[:, 4] * det[:, 5],
                    'classes': det[:, 6],
                }

            detections[idx] = det

        return detections
def myDetect(save_img=False,
             imgSize=416,
             outputPath="../output",
             inputSource='0',
             opt_names='',
             opt_cfg='cfg/yolov3-spp.cfg',
             currentWeights='weights/yolov3-spp.weights',
             opt_fourcc='mp4v',
             opt_half=False,
             opt_view_img=False,
             opt_save_txt=False,
             opt_device='',
             opt_agnostic_nms=False,
             opt_iou_thres=0.5,
             opt_conf_thres=0.3,
             opt_classes=0):
    img_size = (
        416, 256
    ) if ONNX_EXPORT else imgSize  # (320, 192) or (416, 256) or (608, 352) for (height, width)
    out, source, weights, half, view_img, save_txt = outputPath, inputSource, currentWeights, opt_half, opt_view_img, opt_save_txt
    webcam = source == '0' or source.startswith('rtsp') or source.startswith(
        'http') or source.endswith('.txt')

    # Initialize
    device = torch_utils.select_device(
        device='cpu' if ONNX_EXPORT else opt_device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder

    # Initialize model
    model = Darknet(opt_cfg, img_size)

    # Load weights
    attempt_download(weights)
    if weights.endswith('.pt'):  # pytorch format
        model.load_state_dict(
            torch.load(weights, map_location=device)['model'])
    else:  # darknet format
        load_darknet_weights(model, weights)

    # Second-stage classifier
    classify = False
    if classify:
        modelc = torch_utils.load_classifier(name='resnet101',
                                             n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Fuse Conv2d + BatchNorm2d layers
    # model.fuse()

    # Eval mode
    model.to(device).eval()

    # Export mode
    if ONNX_EXPORT:
        img = torch.zeros((1, 3) + img_size)  # (1, 3, 320, 192)
        torch.onnx.export(model,
                          img,
                          'weights/export.onnx',
                          verbose=False,
                          opset_version=10)

        # Validate exported model
        import onnx
        model = onnx.load('weights/export.onnx')  # Load the ONNX model
        onnx.checker.check_model(model)  # Check that the IR is well formed
        print(onnx.helper.printable_graph(
            model.graph))  # Print a human readable representation of the graph
        return

    # Half precision
    half = half and device.type != 'cpu'  # half precision only supported on CUDA
    if half:
        model.half()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        torch.backends.cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=img_size, half=half)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=img_size, half=half)

    # Get names and colors
    names = load_classes(opt_names)
    # print("names",names)
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    ###   自定义内容
    datium = {"hat": 0, "person": 0}
    ###   自定义内容

    # Run inference
    t0 = time.time()
    for path, img, im0s, vid_cap in dataset:
        t = time.time()

        # Get detections
        img = torch.from_numpy(img).to(device)
        if img.ndimension() == 3:
            img = img.unsqueeze(0)
        pred = model(img)[0]

        if opt_half:
            pred = pred.float()

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt_conf_thres,
                                   opt_iou_thres,
                                   classes=opt_classes,
                                   agnostic=opt_agnostic_nms)

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i]
            else:
                p, s, im0 = path, '', im0s

            save_path = str(Path(out) / Path(p).name)
            s += '%gx%g ' % img.shape[2:]  # print string
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                datium = {"hat": 0, "person": 0}

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string
                    datium[names[int(c)]] = int(n)

                # with open('resultData.txt', 'w') as f:  # 设置文件对象
                #     f.write(str(datium))  # 将字符串写入文件中

                # localtime = time.time()

                print(datium['hat'], datium['person'])

                try:
                    # 执行sql语句
                    cursor.execute(
                        "INSERT INTO maskData(mask,nomask) VALUES({hat},{person})"
                        .format(hat=datium['hat'], person=datium['person']))
                    # 提交到数据库执行
                    db.commit()
                except:
                    print("发生错误")
                    # 如果发生错误则回滚
                    db.rollback()

                # db.close()

                # Write results
                for *xyxy, conf, cls in det:
                    if save_txt:  # Write to file
                        with open(save_path + '.txt', 'a') as file:
                            file.write(('%g ' * 6 + '\n') % (*xyxy, cls, conf))

                    if save_img or view_img:  # Add bbox to image
                        label = '%s %.2f' % (names[int(cls)], conf)
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors[int(cls)])

            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, time.time() - t))

            # Stream results
            if view_img:
                # cv2.imshow("webcam", im0)
                cv2.imwrite(
                    "C:/Users/y2554/Desktop/mask/server/output/camera.jpg",
                    im0)
                # if cv2.waitKey(1) == ord('q'):  # q to quit
                #     raise StopIteration

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                    print("save_path:{}".format(save_path))
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path,
                            cv2.VideoWriter_fourcc('H', '2', '6', '4'), fps,
                            (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % os.getcwd() + os.sep + out)
        print('Results saved to %s' % os.sep + save_path)
        if platform == 'darwin':  # MacOS
            os.system('open ' + out + ' ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))
    # return save_path
    combineData = [save_path, datium]
    return combineData
def detect(save_img=False):
    img_size = (
        416, 256
    ) if ONNX_EXPORT else opt.img_size  # (320, 192) or (416, 256) or (608, 352) for (height, width)
    out, source, weights, half, view_img, save_txt = opt.output, opt.source, opt.weights, opt.half, opt.view_img, opt.save_txt
    webcam = source == '0' or source.startswith('rtsp') or source.startswith(
        'http') or source.endswith('.txt')

    # Initialize
    device = torch_utils.select_device(
        device='cpu' if ONNX_EXPORT else opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder

    # Initialize model
    model = Darknet(opt.cfg, img_size)

    # Load weights
    attempt_download(weights)
    if weights.endswith('.pt'):  # pytorch format
        model.load_state_dict(
            torch.load(weights, map_location=device)['model'])
    else:  # darknet format
        load_darknet_weights(model, weights)

    # Second-stage classifier
    classify = False
    if classify:
        modelc = torch_utils.load_classifier(name='resnet101',
                                             n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Fuse Conv2d + BatchNorm2d layers
    # model.fuse()

    # Eval mode
    model.to(device).eval()

    # Export mode
    if ONNX_EXPORT:
        img = torch.zeros((1, 3) + img_size)  # (1, 3, 320, 192)
        torch.onnx.export(model,
                          img,
                          'weights/export.onnx',
                          verbose=False,
                          opset_version=10)

        # Validate exported model
        import onnx
        model = onnx.load('weights/export.onnx')  # Load the ONNX model
        onnx.checker.check_model(model)  # Check that the IR is well formed
        print(onnx.helper.printable_graph(
            model.graph))  # Print a human readable representation of the graph
        return

    # Half precision
    half = half and device.type != 'cpu'  # half precision only supported on CUDA
    if half:
        model.half()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        torch.backends.cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=img_size, half=half)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=img_size, half=half)

    # Get names and colors
    names = load_classes(opt.names)
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # Run inference
    t0 = time.time()
    for path, img, im0s, vid_cap in dataset:
        t = time.time()

        # Get detections
        img = torch.from_numpy(img).to(device)
        if img.ndimension() == 3:
            img = img.unsqueeze(0)
        pred = model(img)[0]

        if opt.half:
            pred = pred.float()

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i]
            else:
                p, s, im0 = path, '', im0s

            save_path = str(Path(out) / Path(p).name)
            print('save_path ' + save_path[:-4])
            s += '%gx%g ' % img.shape[2:]  # print string

            if det is None:
                emptyList.append(save_path)

            print(save_path)

            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                # Write results
                for *xyxy, conf, cls in det:
                    if save_txt:  # Write to file
                        with open(save_path[:-4] + '.txt', 'a') as file:
                            file.write(
                                ('%s %.3f %.2f %.2f %.2f %.2f ' + '\n') %
                                (names[int(cls)], conf, *xyxy))

                    if save_img or view_img:  # Add bbox to image
                        label = '%s %.2f' % (names[int(cls)], conf)
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors[int(cls)])

            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, time.time() - t))
            print(emptyList)
            # Stream results
            if view_img:
                cv2.imshow("webcam", im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                    # print(save_path)
                    # pirate = cv2.imread(save_path)
                    # cv2.imshow('pirate', pirate)
                    # cv2.waitKey(0)
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*opt.fourcc),
                            fps, (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % os.getcwd() + os.sep + out)
        if platform == 'darwin':  # MacOS
            os.system('open ' + out + ' ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))
class ObjectDetector:
    def __init__(self):
        self.img_size = 512
        self.augment = False
        self.half = False
        self.agnostic_nms = False
        self.iou_thres = 0.6
        self.fourcc = 'mp4v'
        self.conf_thres = 0.3
        self.out = 'output'
        self.save_txt = True
        self.view_img = True
        self.save_img = True

        weights = 'yolov3/weights/yolov3.pt'
        self.device = 'cuda'
        self.model = Darknet('yolov3/cfg/yolov3.cfg', self.img_size)
        self.model.load_state_dict(torch.load(weights, map_location=self.device)['model'])

        # Second-stage classifier
        self.classify = False
        if self.classify:
            self.modelc = torch_utils.load_classifier(name='resnet101', n=2)  # initialize
            self.modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=self.device)['model'])  # load weights
            self.modelc.to(self.device).eval()

        # Eval mode
        self.model.to(self.device).eval()

        # Fuse Conv2d + BatchNorm2d layers
        # model.fuse()

        # Half precision
        self.half = self.half and self.device.type != 'cpu'  # half precision only supported on CUDA
        if self.half:
            self.model.half()

        # Get names and colors
        self.names = load_classes('yolov3/data/coco.names')

    def detect(self, img):
        # Run inference
        im0 = img.copy()

        # Padded resize
        img = letterbox(im0, new_shape=self.img_size)[0]

        # Convert
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img)

        img = torch.from_numpy(img).to(self.device)
        img = img.half() if self.half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)
    
        # Inference
        t1 = torch_utils.time_synchronized()
        with torch.no_grad():
            pred = self.model(img, augment=self.augment)[0]
        t2 = torch_utils.time_synchronized()
        # print('Predict time: (%.3fs)' % (t2 - t1))
    
        # to float
        if self.half:
            pred = pred.float()
    
        # Apply NMS
        pred = non_max_suppression(pred, self.conf_thres, self.iou_thres,
                                   multi_label=False, classes=None, agnostic=self.agnostic_nms)
    
        # Apply Classifier
        if self.classify:
            pred = apply_classifier(pred, self.modelc, img, im0)

        # Process detections
        det = pred[0]
        sce = Scene(im0)
        if det is not None and len(det):
            # Rescale boxes from img_size to im0 size
            det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
            # Write results
            for *xyxy, conf, cls in det:
                # xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
                obj = Object(self.names[int(cls)], xyxy, conf)
                sce.objs.append(obj)

        return sce
class YoloDetector:
    def __init__(self, config, device):
        self.opt = opt = config
        self.conf_thres = opt['conf_thres']
        self.nms_thres = opt['nms_thres']
        self.img_size = opt['img_size']
        self.out_img_size = out_size = opt['out_size']

        # Set up model
        self.model = Darknet(opt['model_def'], img_size=opt['img_size'])\
            .to(device)

        if opt['weights_path'].endswith(".weights"):
            # Load darknet weights
            self.model.load_darknet_weights(opt['weights_path'])
        else:
            # Load checkpoint weights
            self.model.load_state_dict(torch.load(opt['weights_path']))

        self.model.eval()  # Set in evaluation mode
        # Extracts class labels from file
        self.classes = yolo_utils.load_classes(opt['class_path'])

        mode = "nearest"
        self.b1_scale = nn.Upsample(scale_factor=out_size // 8, mode=mode)
        self.b2_scale = nn.Upsample(scale_factor=out_size // 16, mode=mode)
        self.b3_scale = nn.Upsample(scale_factor=out_size // 32, mode=mode)
        self.no_detects = 0

    def rescale_boxes(self, boxes, current_dim, original_shape):
        """ Rescales bounding boxes to the original shape """
        orig_h, orig_w = original_shape

        # The amount of padding that was added
        pad_x = max(orig_h - orig_w, 0) * (current_dim / max(original_shape))
        pad_y = max(orig_w - orig_h, 0) * (current_dim / max(original_shape))
        # Image height and width after padding is removed
        unpad_h = current_dim - pad_y
        unpad_w = current_dim - pad_x

        # Rescale bounding boxes to dimension of original image
        boxes[:, 0] = ((boxes[:, 0] - pad_x // 2) / unpad_w) * orig_w
        boxes[:, 1] = ((boxes[:, 1] - pad_y // 2) / unpad_h) * orig_h
        boxes[:, 2] = ((boxes[:, 2] - pad_x // 2) / unpad_w) * orig_w
        boxes[:, 3] = ((boxes[:, 3] - pad_y // 2) / unpad_h) * orig_h
        return boxes

    @staticmethod
    def class_selector():
        with open("yolov3/data/coco.names", "r") as f:
            yolo_classes = f.readlines()
            yolo_classes = [x.strip() for x in yolo_classes]

        indexer = torch.zeros(len(CLASSES), len(yolo_classes)).bool()

        for i, (k, v) in enumerate(CLASSES.items()):
            indexer[i, yolo_classes.index(v)] = 1
        return indexer

    def detect(self, rgb_img):
        self.no_detects += 1

        max_batch = 128

        """ Should run with RGB images normalized in [0, 1] """
        with torch.no_grad():
            multi_batch = []
            all_imgs = rgb_img
            for i in range(len(all_imgs) // max_batch + 1):
                rgb_img = all_imgs[i*max_batch: (i+1)*max_batch]
                if len(rgb_img) <= 0:
                    break

                bs = rgb_img.size(0)

                detections = self.model(rgb_img)
                b1, b2, b3 = (detections[:, :192], detections[:, 192: 960],
                              detections[:, 960:])

                ordd = (0, 1, 4, 2, 3)

                b1 = b1.view(bs, 3, 8, 8, 85).permute(*ordd).contiguous().view(bs, -1, 8, 8)
                b2 = b2.view(bs, 3, 16, 16, 85).permute(*ordd).contiguous().view(bs, -1, 16, 16)
                b3 = b3.view(bs, 3, 32, 32, 85).permute(*ordd).contiguous().view(bs, -1, 32, 32)

                b1 = self.b1_scale(b1)
                b2 = self.b2_scale(b2)
                b3 = self.b3_scale(b3)

                out = (b1 + b2 + b3) / 3
                out = out.view(bs, 3, 85, 32, 32)
                out = out.mean(dim=1)#.permute(0, 3, 1, 2)
                # out = torch.cat([b1, b2, b3], dim=1)
                multi_batch.append(out)

        if len(multi_batch) > 1:
            out = torch.cat(multi_batch, dim=0)
        else:
            out = multi_batch[0]

        out = out.detach()
        return out

    def get_bounding_boxes(self, img, display=False):
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        t_img = torch.from_numpy(
            img.astype('float') / 255.0).cuda().permute(2, 0, 1).unsqueeze(0)

        detections = self.model(t_img)
        detections = yolo_utils.non_max_suppression(detections,
                                                    self.conf_thres,
                                                    self.nms_thres)[0]

        # Draw bounding boxes and labels of detections
        if display:
            img_disp = img.copy()

        if detections is not None:
            # Rescale boxes to original image
            detections = self.rescale_boxes(detections, self.img_size,
                                            img.shape[:2])
            unique_labels = detections[:, -1].cpu().unique()

            for x1, y1, x2, y2, conf, cls_conf, cls_pred in detections:
                print("\t+ Label: %s, Conf: %.5f" %
                      (self.classes[int(cls_pred)], cls_conf.item()))

                box_w = x2 - x1
                box_h = y2 - y1

                print("{} {} {} {}" .format(x1, y1, x2, y2))

                # Create a Rectangle patch
                if display:
                    img_disp = cv2.rectangle(img_disp, (x2, y2), (x1, y1),
                                             (255,0,0), 2)

        if display:
            cv2.imshow("Test", img_disp)
            cv2.waitKey(0)

        return detections
def main(args: argparse.Namespace):
    # setting log and logger
    logname = ''
    if args.islog:
        logdir = Path('../logs')
        if not logdir.exists():
            logdir.mkdir(parents=True)
        now_dt = datetime.now()
        logname = '{:d}-{:d}-inference.log'.format(
            now_dt.strftime('%m%dT%H%M%S'), now_dt.microsecond)
        logname = str(logdir / logname)
    logger = logging.getLogger(__name__)
    log_handler(logger, logname=logname)
    logger.info(args)

    # prepare video IO
    cap = cv2.VideoCapture(args.video)
    video_nframe = cap.get(cv2.CAP_PROP_FRAME_COUNT)
    video_w = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
    video_h = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
    video_fps = cap.get(cv2.CAP_PROP_FPS)
    logger.info('video h={}, w={}, fps={:3f}, nframe={}'.format(
        int(video_h), int(video_w), video_fps, int(video_nframe)))
    
    output_dir = Path(args.output_dir)
    if not output_dir.exists():
        output_dir.mkdir(parents=True)
    output_videoname = str(output_dir / '{}.avi'.format(Path(args.video).stem))
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_videoname, fourcc=fourcc, fps=int(video_fps), frameSize=(int(video_w), int(video_h)))

    # load model and weight
    logger.info('load model')
    model = Darknet(args.config, img_size=args.img_size)
    logger.info('load weight')
    model.load_weights(args.checkpoint)
    model.cuda()
    model.eval()
    classes = load_classes(args.classname)
    tracker = SORT()

    # draw setting
    cmap = plt.get_cmap('tab20b')
    bbox_palette = [cmap(i)[:3] for i in np.linspace(0, 1, 1000)]
    random.shuffle(bbox_palette)

    # loop over the video
    for frame_idx in tqdm(range(int(video_nframe))):
        ok, frame = cap.read()
        if not ok:
            break
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        pilimg = Image.fromarray(frame)

        # detection
        _start_time = datetime.now()
        detections = detect_image(pilimg, model, img_size=args.img_size)
        _cost_time = datetime.now() - _start_time

        # image and bbox transition
        frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
        image = np.array(pilimg)
        pad_x = max(image.shape[0] - image.shape[1], 0) * (args.img_size / max(image.shape))
        pad_y = max(image.shape[1] - image.shape[0], 0) * (args.img_size / max(image.shape))
        unpad_h = args.img_size - pad_y
        unpad_w = args.img_size - pad_x

        if detections is not None:
            logger.debug('detect frame {} in {}, get detections {}'.format(
                frame_idx+1, str(_cost_time), detections.shape))
            tracked_detections = tracker.update(detections.cpu())
            unique_labels = detections[:, -1].cpu().unique()
            num_unique_labels = len(unique_labels)
            for x1, y1, x2, y2, obj_id, cls_pred in tracked_detections:
                box_h = int(((y2 - y1) / unpad_h) * frame.shape[0])
                box_w = int(((x2 - x1) / unpad_w) * frame.shape[1])
                y1 = int(((y1 - pad_y // 2) / unpad_h) * frame.shape[0])
                x1 = int(((x1 - pad_x // 2) / unpad_w) * frame.shape[1])
                label = classes[int(cls_pred)]
                color = bbox_palette[int(obj_id) % len(bbox_palette)]
                color = [i*255 for i in color]

                cv2.rectangle(frame,
                              (x1, y1),
                              (x1+box_w, y1+box_h),
                              color, 2)
                cv2.rectangle(frame,
                              (x1, y1-35),
                              (x1+len(label)*19+60, y1),
                              color, -1)
                cv2.putText(frame,
                            '{}-{}'.format(label, int(obj_id)),
                            (x1, y1-10),
                            cv2.FONT_HERSHEY_SIMPLEX,
                            1, (255, 255, 255), 3)
        out.write(frame)
    cap.release()
    out.release()