def train(
        cfg,
        data_cfg,
        resume=False,
        epochs=273,  # 500200 batches at bs 64, dataset length 117263
        batch_size=16,
        accumulate=1,
        weights_path='weights',
        init_weights='yolov3-player_stage2_start.81'):
    #init_seeds()
    weights = weights_path + os.sep
    latest = weights + 'latest.pt'
    best = weights + 'best.pt'
    device, n_gpu = torch_utils.select_device()

    #Image size
    cfg_model = parse_cfg(cfg)
    img_size = (int(cfg_model[0]['width']), int(cfg_model[0]['height']))

    # Configure run
    train_path = parse_data_cfg(data_cfg)['train_path']
    train_set = parse_data_cfg(data_cfg)['train_set']

    # Initialize model
    model = Darknet(cfg).to(device)

    # Optimizer
    optimizer = optim.SGD(model.parameters(),
                          lr=hyp['lr0'],
                          momentum=hyp['momentum'],
                          weight_decay=hyp['weight_decay'])

    cutoff = -1  # backbone reaches to cutoff layer
    start_epoch = 0
    best_loss = float('inf')

    if resume:  # Load previously saved model(resume from latest.pt)
        chkpt = torch.load(latest, map_location=device)  # load checkpoint
        model.load_state_dict(chkpt['model'])

        start_epoch = chkpt['epoch'] + 1
        if chkpt['optimizer'] is not None:
            optimizer.load_state_dict(chkpt['optimizer'])
            best_loss = chkpt['best_loss']
        del chkpt

    else:  # Initialize model with backbone (optional)
        model.load_weights(weights + init_weights)

    # Scheduler
    scheduler = optim.lr_scheduler.MultiStepLR(optimizer,
                                               milestones=[100, 440, 1097],
                                               gamma=0.1,
                                               last_epoch=start_epoch - 1)

    # Dataset
    dataset = YoloDataSets(data_path=train_path,
                           input_size=img_size,
                           batch_size=batch_size,
                           image_set=train_set,
                           augment=True,
                           jitter_x=0.3,
                           jitter_y=0.3)

    # Initialize distributed training
    if torch.cuda.device_count() > 1:
        dist.init_process_group(backend=opt.backend,
                                init_method=opt.dist_url,
                                world_size=opt.world_size,
                                rank=opt.rank)
        model = torch.nn.parallel.DistributedDataParallel(model)
        # sampler = torch.utils.data.distributed.DistributedSampler(dataset)

    # Dataloader
    dataloader = DataLoader(
        dataset,
        batch_size=batch_size,
        num_workers=opt.num_workers,
        shuffle=False,  # disable rectangular training if True
        pin_memory=True,
        collate_fn=dataset.collate_fn)

    # Mixed precision training https://github.com/NVIDIA/apex
    # install help: https://github.com/NVIDIA/apex/issues/259
    mixed_precision = False
    if mixed_precision:
        from apex import amp
        model, optimizer = amp.initialize(model, optimizer, opt_level='O1')

    # Start training
    t = time.time()
    model.hyp = hyp  # attach hyperparameters to model
    #model_info(model)

    nb = len(dataloader)
    results = (0, 0, 0, 0, 0)  # P, R, mAP, F1, test_loss

    n_burnin = int(cfg_model[0]["burn_in"])  # burn-in batches

    for epoch in range(start_epoch, epochs):
        model.train()
        print(
            ('\n%8s%12s' + '%10s' * 7) % ('Epoch', 'Batch', 'xy', 'wh', 'conf',
                                          'cls', 'total', 'nTargets', 'time'))

        # Update scheduler
        scheduler.step(epoch)

        mloss = torch.zeros(5).to(device)  # mean losses
        for i, (imgs, targets) in enumerate(dataloader):
            imgs = imgs.to(device)
            targets = targets.to(device)
            nt = len(targets)
            #plot_images(imgs=imgs, targets=targets, fname='train_batch%d.jpg' % i)

            # SGD burn-in
            if epoch == 0 and i <= n_burnin:
                lr = hyp['lr0'] * (i / n_burnin)**4
                for x in optimizer.param_groups:
                    x['lr'] = lr

            if i == 0:
                print('learning rate: %g' % optimizer.param_groups[0]['lr'])
            # Run model
            pred, loss, loss_items = model(imgs, targets)
            loss = torch.mean(loss)
            n_ = int(loss_items.size()[0] / 5)
            loss_items = torch.mean(loss_items.view((n_, 5)), 0)

            if torch.isnan(loss):
                print('WARNING: nan loss detected, ending training')
                return results

            # Compute gradient
            if mixed_precision:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

            # Accumulate gradient for x batches before optimizing
            if (i + 1) % accumulate == 0 or (i + 1) == nb:
                optimizer.step()
                optimizer.zero_grad()

            # Update running mean of tracked metrics
            mloss = (mloss * i + loss_items) / (i + 1)
            # Print batch results
            s = ('%8s%12s' +
                 '%10.3g' * 7) % ('%g/%g' % (epoch, epochs - 1), '%g/%g' %
                                  (i, nb - 1), *mloss, nt, time.time() - t)

            t = time.time()
            print(s)

        # Calculate mAP (always test final epoch, skip first 5 if opt.nosave)
        if not (opt.notest or
                (opt.nosave and epoch < 5)) or epoch == epochs - 1:
            with torch.no_grad():
                results, maps = test.test(cfg,
                                          data_cfg,
                                          batch_size=batch_size,
                                          img_size=img_size,
                                          model=model,
                                          conf_thres=0.1,
                                          iou_thres=0.4)

        # Write epoch results
        with open('results.txt', 'a') as file:
            file.write(s + '%11.3g' * 5 % results +
                       '\n')  # P, R, mAP, F1, test_loss

        # Update best loss
        test_loss = results[4]
        if test_loss < best_loss:
            best_loss = test_loss

        # Save training results
        save = True and not opt.nosave
        if save:
            # Create checkpoint
            chkpt = {
                'epoch':
                epoch,
                'best_loss':
                best_loss,
                'model':
                model.module.state_dict()
                if type(model) is nn.parallel.DistributedDataParallel else
                model.state_dict(),
                'optimizer':
                optimizer.state_dict()
            }

            # Save latest checkpoint
            torch.save(chkpt, latest)

            # Save best checkpoint
            if best_loss == test_loss:
                torch.save(chkpt, best)

            # Save backup every 10 epochs (optional)
            if epoch > 0 and epoch % 10 == 0:
                torch.save(chkpt, weights + 'backup%g.pt' % epoch)

            # Delete checkpoint
            del chkpt

    return results
Exemple #2
0
def detect():

    agnostic_nms = False
    augment = False
    classes = None
    conf_thres = 0.4
    device = ""
    imgsz = 640
    iou_thres = 0.5
    out = "inference/output"
    save_txt = True
    source = "inference/input/image.jpg"
    update = False
    view_img = False
    weights = "training/weights.pt"

    webcam = (source.isnumeric() or source.startswith(
        ("rtsp://", "rtmp://", "http://")) or source.endswith(".txt"))

    # Initialize
    set_logging()
    device = select_device(device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    # half = device.type != 'cpu'  # half precision only supported on CUDA
    detpath = "inference/detection"
    if os.path.exists(detpath):
        shutil.rmtree(detpath)  # delete output folder
    os.makedirs(detpath)

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name="resnet101", n=2)  # initialize
        modelc.load_state_dict(
            torch.load("weights/resnet101.pt",
                       map_location=device)["model"])  # load weights
        modelc.to(device).eval()

    save_img = True
    save_txt = True
    save_obj = True
    dataset = LoadImages(source, img_size=imgsz)

    names = model.module.names if hasattr(model, "module") else model.names
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    # _ = model(img.half() if half else img) if device.type != 'cpu' else None  # run once
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        # img = img.half() if half else img.float()  # uint8 to fp16/32
        img = img.float()
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   conf_thres,
                                   iou_thres,
                                   classes=classes,
                                   agnostic=agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], "%g: " % i, im0s[i].copy()
            else:
                p, s, im0 = path, "", im0s

            save_path = str(Path(out) / Path(p).name)
            txt_path = str(Path(out) / Path(p).stem) + (
                "_%g" % dataset.frame if dataset.mode == "video" else "")
            s += "%gx%g " % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += "%g %ss, " % (n, names[int(c)])  # add to string

                # Write results
                for *xyxy, conf, cls in det:
                    if save_txt:  # Write to file
                        xywh = ((xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                 gn).view(-1).tolist())  # normalized xywh
                        with open(txt_path + ".txt", "a") as f:
                            f.write(("%g " * 5 + "\n") %
                                    (cls, *xywh))  # label format

                    if save_img or view_img:  # Add bbox to image
                        label = "%s %.2f" % (names[int(cls)], conf)
                        plot_one_box(
                            xyxy,
                            im0,
                            label=label,
                            color=colors[int(cls)],
                            line_thickness=3,
                        )

                    if save_obj:
                        # cv2.imwrite(detpath, im0)
                        for k in range(len(det)):
                            if int(cls) == 0:
                                x, y, w, h = (
                                    int(xyxy[0]),
                                    int(xyxy[1]),
                                    int(xyxy[2] - xyxy[0]),
                                    int(xyxy[3] - xyxy[1]),
                                )
                                img_ = im0.astype(np.uint8)
                                crop_img = img_[y:y + h, x:x + w]

                                #!!rescale image !!!
                                filename = label + "{:}.jpg".format(+1)
                                filepath = os.path.join(
                                    r"./inference/detection/", filename)
                                cv2.imwrite(filepath, crop_img)

            # Print time (inference + NMS)
            print("%sDone. (%.3fs)" % (s, t2 - t1))

            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord("q"):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                if dataset.mode == "images":
                    cv2.imwrite(save_path, im0)

    if save_txt or save_img:
        print("Results saved to %s" % Path(out))

    print("Done. (%.3fs)" % (time.time() - t0))
Exemple #3
0
def detect(save_img=False):
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source.isnumeric() or source.startswith(('rtsp://', 'rtmp://', 'http://')) or source.endswith('.txt')

    # Initialize
    set_logging()
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))]

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img) if device.type != 'cpu' else None  # run once
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            save_path = str(Path(out) / Path(p).name)
            txt_path = str(Path(out) / Path(p).stem) + ('_%g' % dataset.frame if dataset.mode == 'video' else '')
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * 5 + '\n') % (cls, *xywh))  # label format

                    if save_img or view_img:  # Add bbox to image
                        label = '%s %.2f' % (names[int(cls)], conf)
                        plot_one_box(xyxy, im0, label=None, color=[255, 69, 0], line_thickness=1)

            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1))

            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release()  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % Path(out))
        if platform.system() == 'Darwin' and not opt.update:  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))
def detect(save_img=False):
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source == '0' or source.startswith('rtsp') or source.startswith(
        'http') or source.endswith('.txt')

    # Initialize
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    # if webcam:
    #     view_img = True
    #     cudnn.benchmark = True  # set True to speed up constant image size inference
    #     dataset = LoadStreams(source, img_size=imgsz)
    # else:
    #     save_img = True
    #     dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once

    img_dir_path = '/home/lishuang/Disk/remote/pycharm/yolov5data/fangweisui/images/tx2_train_data_with_xml'
    answerdict = {
        "T_2018_06_06_1_company_2": 2,
        "T_2018_07_09_hxxy_2": 2,
        "T_2018_07_09_hxxy_out_2": 2,
        "T_2018_07_12_1_company_1": 1,
        "T_2018_07_17_hxxy_1": 1,
        "T_2018_07_19_child_1": 1,
        "T_2018_07_19_child_2": 2,
        "T_2018_07_19_child_3": 3,
        "T_2018_07_19_child_out_1": 1,
        "T_2018_07_19_child_out_2": 2,
        "T_child_2017-04-22_3_35_2": 2,
        "T_child_company_2018_11_29_2": 2,
        "T_gangzhuao_xianchang_1": 1,
        "T_shanghai_hongqiao_xianchang_test": 1,
    }
    file_data = ""
    file_data += "filename,0,1,2,3,miss,correct,error\n"

    num_zero = {}
    num_one = {}
    num_two = {}
    num_three = {}
    num_head = {}
    num_body = {}
    img_paths = os.listdir(img_dir_path)
    for img_dir in img_paths:
        img_path = os.path.join(img_dir_path, img_dir)
        if os.path.isdir(img_path):

            save_img = True
            imgdataset = LoadImages(img_path, img_size=imgsz)

            outimg = str(Path(out) / img_dir)
            if os.path.exists(outimg):
                shutil.rmtree(outimg)  # delete output folder
            os.makedirs(outimg)  # make new output folder

            outimg = str(Path(out) / img_dir / "0")
            if os.path.exists(outimg):
                shutil.rmtree(outimg)  # delete output folder
            os.makedirs(outimg)  # make new output folder

            outimg = str(Path(out) / img_dir / "1")
            if os.path.exists(outimg):
                shutil.rmtree(outimg)  # delete output folder
            os.makedirs(outimg)  # make new output folder

            outimg = str(Path(out) / img_dir / "2")
            if os.path.exists(outimg):
                shutil.rmtree(outimg)  # delete output folder
            os.makedirs(outimg)  # make new output folder

            outimg = str(Path(out) / img_dir / "3")
            if os.path.exists(outimg):
                shutil.rmtree(outimg)  # delete output folder
            os.makedirs(outimg)  # make new output folder

            for path, img, im0s, vid_cap in imgdataset:  #one video
                img = torch.from_numpy(img).to(device)
                img = img.half() if half else img.float()  # uint8 to fp16/32
                img /= 255.0  # 0 - 255 to 0.0 - 1.0
                if img.ndimension() == 3:
                    img = img.unsqueeze(0)

                # Inference
                t1 = time_synchronized()
                pred = model(img, augment=opt.augment)[0]

                # Apply NMS
                pred = non_max_suppression(pred,
                                           opt.conf_thres,
                                           opt.iou_thres,
                                           classes=opt.classes,
                                           agnostic=opt.agnostic_nms)
                t2 = time_synchronized()

                # Apply Classifier
                if classify:
                    pred = apply_classifier(pred, modelc, img, im0s)

                boxnum = 0
                boxnumbody = 0
                boxnumhead = 0
                # Process detections
                for i, det in enumerate(pred):  # detections per image
                    if webcam:  # batch_size >= 1
                        p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
                    else:
                        p, s, im0 = path, '', im0s

                    save_path = str(Path(out) / img_dir / Path(p).name)
                    s += '%gx%g ' % img.shape[2:]  # print string
                    gn = torch.tensor(im0.shape)[[1, 0, 1, 0
                                                  ]]  # normalization gain whwh

                    if det is not None and len(det):
                        # Rescale boxes from img_size to im0 size
                        det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                                  im0.shape).round()

                        # Print results
                        for c in det[:, -1].unique():
                            n = (det[:, -1] == c).sum()  # detections per class
                            s += '%g %ss, ' % (n, names[int(c)]
                                               )  # add to string

                        # Write results
                        for *xyxy, conf, cls in det:
                            if cls == 0:
                                # label = 'person'
                                boxnum += 1
                                boxnumbody += 1
                            elif cls == 1:
                                # label = 'head'
                                boxnumhead += 1

                            if save_img or view_img:  # Add bbox to image
                                label = '%s %.2f' % (names[int(cls)], conf)
                                plot_one_box(xyxy,
                                             im0,
                                             label=label,
                                             color=colors[int(cls)],
                                             line_thickness=3)

                    if img_dir not in num_zero:
                        num_zero[img_dir] = 0
                    if img_dir not in num_one:
                        num_one[img_dir] = 0
                    if img_dir not in num_two:
                        num_two[img_dir] = 0
                    if img_dir not in num_three:
                        num_three[img_dir] = 0
                    boxnum = max(boxnumbody, boxnumhead)
                    if boxnum == 0:
                        num_zero[img_dir] = num_zero[img_dir] + 1
                        save_path = str(
                            Path(out) / img_dir / "0" / Path(p).name)
                    elif boxnum == 1:
                        num_one[img_dir] = num_one[img_dir] + 1
                        save_path = str(
                            Path(out) / img_dir / "1" / Path(p).name)
                    elif boxnum == 2:
                        num_two[img_dir] = num_two[img_dir] + 1
                        save_path = str(
                            Path(out) / img_dir / "2" / Path(p).name)
                    else:
                        num_three[img_dir] = num_three[img_dir] + 1
                        save_path = str(
                            Path(out) / img_dir / "3" / Path(p).name)

                    # Print time (inference + NMS)
                    print('%sDone. (%.3fs)' % (s, t2 - t1))

                    # Stream results
                    if view_img:
                        cv2.imshow(p, im0)
                        if cv2.waitKey(1) == ord('q'):  # q to quit
                            raise StopIteration

                    # Save results (image with detections)
                    if save_img:
                        if imgdataset.mode == 'images':
                            cv2.imwrite(save_path, im0)
                        else:
                            if vid_path != save_path:  # new video
                                vid_path = save_path
                                if isinstance(vid_writer, cv2.VideoWriter):
                                    vid_writer.release(
                                    )  # release previous video writer

                                fourcc = 'mp4v'  # output video codec
                                fps = vid_cap.get(cv2.CAP_PROP_FPS)
                                w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                                h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                                vid_writer = cv2.VideoWriter(
                                    save_path, cv2.VideoWriter_fourcc(*fourcc),
                                    fps, (w, h))
                            vid_writer.write(im0)

    for key in img_paths:
        if not os.path.isdir(os.path.join(img_dir_path, key)):
            continue
        num_error = 0
        num_correct = 0
        num_miss = 0
        if answerdict[key] == 1:
            num_error = num_two[key] + num_three[key]
            num_correct = num_one[key]
            num_miss = num_zero[key]
        elif answerdict[key] == 2:
            num_error = num_three[key]
            num_correct = num_two[key]
            num_miss = num_zero[key] + num_one[key]
        elif answerdict[key] == 3:
            num_error = 0
            num_correct = num_three[key]
            num_miss = num_zero[key] + num_one[key] + num_two[key]
        elif answerdict[key] == 0:
            num_error = num_one[key] + num_two[key] + num_three[key]
            num_correct = num_zero[key]
            num_miss = 0
        file_data += key + ",{},{},{},{},{},{},{}\n".format(
            num_zero[key], num_one[key], num_two[key], num_three[key],
            num_miss, num_correct, num_error)

    with open('yolov5-fangweisui.csv', 'w') as f:
        f.write(file_data)

    if save_txt or save_img:
        print('Results saved to %s' % os.getcwd() + os.sep + out)
        if platform == 'darwin' and not opt.update:  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))
Exemple #5
0
        # opt.hyp = opt.hyp or ('hyp.finetune.yaml' if opt.weights else 'hyp.scratch.yaml')
        opt.data, opt.cfg, opt.hyp = check_file(opt.data), check_file(
            opt.cfg), check_file(opt.hyp)  # check files
        assert len(opt.cfg) or len(
            opt.weights), 'either --cfg or --weights must be specified'
        opt.img_size.extend(
            [opt.img_size[-1]] *
            (2 - len(opt.img_size)))  # extend to 2 sizes (train, test)
        opt.name = 'evolve' if opt.evolve else opt.name
        opt.save_dir = increment_path(Path(opt.project) / opt.name,
                                      exist_ok=opt.exist_ok
                                      | opt.evolve)  # increment run

    # DDP mode
    opt.total_batch_size = opt.batch_size
    device = select_device(opt.device, batch_size=opt.batch_size)
    if opt.local_rank != -1:
        assert torch.cuda.device_count() > opt.local_rank
        torch.cuda.set_device(opt.local_rank)
        device = torch.device('cuda', opt.local_rank)
        dist.init_process_group(backend='nccl',
                                init_method='env://')  # distributed backend
        assert opt.batch_size % opt.world_size == 0, '--batch-size must be multiple of CUDA device count'
        opt.batch_size = opt.total_batch_size // opt.world_size

    # Hyperparameters
    with open(opt.hyp) as f:
        hyp = yaml.load(f, Loader=yaml.SafeLoader)  # load hyps

    # Train
    logger.info(opt)
Exemple #6
0
def main(opt):
    # Checks
    set_logging(RANK)
    if RANK in [-1, 0]:
        print(
            colorstr('train: ') + ', '.join(f'{k}={v}'
                                            for k, v in vars(opt).items()))
        check_git_status()
        check_requirements(requirements=FILE.parent / 'requirements.txt',
                           exclude=['thop'])

    # Resume
    if opt.resume and not check_wandb_resume(
            opt) and not opt.evolve:  # resume an interrupted run
        ckpt = opt.resume if isinstance(
            opt.resume,
            str) else get_latest_run()  # specified or most recent path
        assert os.path.isfile(
            ckpt), 'ERROR: --resume checkpoint does not exist'
        with open(Path(ckpt).parent.parent / 'opt.yaml') as f:
            opt = argparse.Namespace(**yaml.safe_load(f))  # replace
        opt.cfg, opt.weights, opt.resume = '', ckpt, True  # reinstate
        LOGGER.info(f'Resuming training from {ckpt}')
    else:
        opt.data, opt.cfg, opt.hyp = check_file(opt.data), check_file(
            opt.cfg), check_file(opt.hyp)  # check files
        assert len(opt.cfg) or len(
            opt.weights), 'either --cfg or --weights must be specified'
        if opt.evolve:
            opt.project = 'runs/evolve'
            opt.exist_ok = opt.resume
        opt.save_dir = str(
            increment_path(Path(opt.project) / opt.name,
                           exist_ok=opt.exist_ok))

    # DDP mode
    device = select_device(opt.device, batch_size=opt.batch_size)
    if LOCAL_RANK != -1:
        from datetime import timedelta
        assert torch.cuda.device_count(
        ) > LOCAL_RANK, 'insufficient CUDA devices for DDP command'
        assert opt.batch_size % WORLD_SIZE == 0, '--batch-size must be multiple of CUDA device count'
        assert not opt.image_weights, '--image-weights argument is not compatible with DDP training'
        assert not opt.evolve, '--evolve argument is not compatible with DDP training'
        torch.cuda.set_device(LOCAL_RANK)
        device = torch.device('cuda', LOCAL_RANK)
        dist.init_process_group(
            backend="nccl" if dist.is_nccl_available() else "gloo")

    # Train
    if not opt.evolve:
        train(opt.hyp, opt, device)
        if WORLD_SIZE > 1 and RANK == 0:
            _ = [
                print('Destroying process group... ', end=''),
                dist.destroy_process_group(),
                print('Done.')
            ]

    # Evolve hyperparameters (optional)
    else:
        # Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit)
        meta = {
            'lr0':
            (1, 1e-5, 1e-1),  # initial learning rate (SGD=1E-2, Adam=1E-3)
            'lrf':
            (1, 0.01, 1.0),  # final OneCycleLR learning rate (lr0 * lrf)
            'momentum': (0.3, 0.6, 0.98),  # SGD momentum/Adam beta1
            'weight_decay': (1, 0.0, 0.001),  # optimizer weight decay
            'warmup_epochs': (1, 0.0, 5.0),  # warmup epochs (fractions ok)
            'warmup_momentum': (1, 0.0, 0.95),  # warmup initial momentum
            'warmup_bias_lr': (1, 0.0, 0.2),  # warmup initial bias lr
            'box': (1, 0.02, 0.2),  # box loss gain
            'cls': (1, 0.2, 4.0),  # cls loss gain
            'cls_pw': (1, 0.5, 2.0),  # cls BCELoss positive_weight
            'obj': (1, 0.2, 4.0),  # obj loss gain (scale with pixels)
            'obj_pw': (1, 0.5, 2.0),  # obj BCELoss positive_weight
            'iou_t': (0, 0.1, 0.7),  # IoU training threshold
            'anchor_t': (1, 2.0, 8.0),  # anchor-multiple threshold
            'anchors': (2, 2.0, 10.0),  # anchors per output grid (0 to ignore)
            'fl_gamma':
            (0, 0.0, 2.0),  # focal loss gamma (efficientDet default gamma=1.5)
            'hsv_h': (1, 0.0, 0.1),  # image HSV-Hue augmentation (fraction)
            'hsv_s': (1, 0.0,
                      0.9),  # image HSV-Saturation augmentation (fraction)
            'hsv_v': (1, 0.0, 0.9),  # image HSV-Value augmentation (fraction)
            'degrees': (1, 0.0, 45.0),  # image rotation (+/- deg)
            'translate': (1, 0.0, 0.9),  # image translation (+/- fraction)
            'scale': (1, 0.0, 0.9),  # image scale (+/- gain)
            'shear': (1, 0.0, 10.0),  # image shear (+/- deg)
            'perspective':
            (0, 0.0, 0.001),  # image perspective (+/- fraction), range 0-0.001
            'flipud': (1, 0.0, 1.0),  # image flip up-down (probability)
            'fliplr': (0, 0.0, 1.0),  # image flip left-right (probability)
            'mosaic': (1, 0.0, 1.0),  # image mixup (probability)
            'mixup': (1, 0.0, 1.0),  # image mixup (probability)
            'copy_paste': (1, 0.0, 1.0)
        }  # segment copy-paste (probability)

        with open(opt.hyp) as f:
            hyp = yaml.safe_load(f)  # load hyps dict
            if 'anchors' not in hyp:  # anchors commented in hyp.yaml
                hyp['anchors'] = 3
        opt.noval, opt.nosave, save_dir = True, True, Path(
            opt.save_dir)  # only val/save final epoch
        # ei = [isinstance(x, (int, float)) for x in hyp.values()]  # evolvable indices
        evolve_yaml, evolve_csv = save_dir / 'hyp_evolve.yaml', save_dir / 'evolve.csv'
        if opt.bucket:
            os.system(f'gsutil cp gs://{opt.bucket}/evolve.csv {save_dir}'
                      )  # download evolve.csv if exists

        for _ in range(opt.evolve):  # generations to evolve
            if evolve_csv.exists(
            ):  # if evolve.csv exists: select best hyps and mutate
                # Select parent(s)
                parent = 'single'  # parent selection method: 'single' or 'weighted'
                x = np.loadtxt(evolve_csv, ndmin=2, delimiter=',', skiprows=1)
                n = min(5, len(x))  # number of previous results to consider
                x = x[np.argsort(-fitness(x))][:n]  # top n mutations
                w = fitness(x) - fitness(x).min() + 1E-6  # weights (sum > 0)
                if parent == 'single' or len(x) == 1:
                    # x = x[random.randint(0, n - 1)]  # random selection
                    x = x[random.choices(range(n),
                                         weights=w)[0]]  # weighted selection
                elif parent == 'weighted':
                    x = (x * w.reshape(
                        n, 1)).sum(0) / w.sum()  # weighted combination

                # Mutate
                mp, s = 0.8, 0.2  # mutation probability, sigma
                npr = np.random
                npr.seed(int(time.time()))
                g = np.array([meta[k][0] for k in hyp.keys()])  # gains 0-1
                ng = len(meta)
                v = np.ones(ng)
                while all(
                        v == 1
                ):  # mutate until a change occurs (prevent duplicates)
                    v = (g * (npr.random(ng) < mp) * npr.randn(ng) *
                         npr.random() * s + 1).clip(0.3, 3.0)
                for i, k in enumerate(hyp.keys()):  # plt.hist(v.ravel(), 300)
                    hyp[k] = float(x[i + 7] * v[i])  # mutate

            # Constrain to limits
            for k, v in meta.items():
                hyp[k] = max(hyp[k], v[1])  # lower limit
                hyp[k] = min(hyp[k], v[2])  # upper limit
                hyp[k] = round(hyp[k], 5)  # significant digits

            # Train mutation
            results = train(hyp.copy(), opt, device)

            # Write mutation results
            print_mutation(results, hyp.copy(), save_dir, opt.bucket)

        # Plot results
        plot_evolve(evolve_csv)
        print(
            f'Hyperparameter evolution finished\n'
            f"Results saved to {colorstr('bold', save_dir)}\n"
            f'Use best hyperparameters example: $ python train.py --hyp {evolve_yaml}'
        )
Exemple #7
0
def test(
        data,
        weights=None,
        batch_size=16,
        imgsz=640,
        conf_thres=0.001,
        iou_thres=0.6,  # for NMS
        save_json=False,
        verbose=False,
        model=None,
        dataloader=None,
        logdir='./runs',
        merge=False):
    # Initialize/load model and set device
    if model is None:
        training = False
        device = torch_utils.select_device(opt.device, batch_size=batch_size)

        # Remove previous
        for f in glob.glob(os.path.join(logdir, 'test_batch*.jpg')):
            os.remove(f)

        # Load model
        model = torch.load(
            weights, map_location=device)['model'].float()  # load to FP32
        torch_utils.model_info(model)
        model.fuse()
        model.to(device)

        # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99
        # if device.type != 'cpu' and torch.cuda.device_count() > 1:
        #     model = nn.DataParallel(model)

    else:  # called by train.py
        training = True
        device = next(model.parameters()).device  # get model device

    # Half
    half = device.type != 'cpu' and torch.cuda.device_count(
    ) == 1  # half precision only supported on single-GPU
    half = False
    if half:
        model.half()  # to FP16

    # Configure
    model.eval()
    with open(data) as f:
        data = yaml.load(f, Loader=yaml.FullLoader)  # model dict
    nc = int(data['num_classes'])  # number of classes
    iouv = torch.linspace(0.5, 0.95,
                          10).to(device)  # iou vector for [email protected]:0.95
    niou = iouv.numel()
    losser = YoloLoss(model)
    # Dataloader
    if dataloader is None:  # not training
        merge = opt.merge  # use Merge NMS
        img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
        _ = model(img.half() if half else img
                  ) if device.type != 'cpu' else None  # run once
        path = data['test'] if opt.task == 'test' else data[
            'val']  # path to val/test images
        dataloader = kitti.create_dataloader(path,
                                             imgsz,
                                             batch_size,
                                             int(max(model.stride)),
                                             config=None,
                                             augment=False,
                                             cache=False,
                                             pad=0.5,
                                             rect=True)[0]

    seen = 0
    names = data['names']
    kitti8class = data_utils.kitti8_classes()
    s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R',
                                 '[email protected]', '[email protected]:.95')
    p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0.
    loss = torch.zeros(3, device=device)
    jdict, stats, ap, ap_class = [], [], [], []
    for batch_i, (img, targets, paths,
                  shapes) in enumerate(tqdm.tqdm(dataloader, desc=s)):
        targets.delete_by_mask()
        targets.to_float32()
        targ = ParamList(targets.size, True)
        targ.copy_from(targets)
        img_id = targets.get_field('img_id')
        classes = targets.get_field('class')
        bboxes = targets.get_field('bbox')
        targets = torch.cat(
            [img_id.unsqueeze(-1),
             classes.unsqueeze(-1), bboxes], dim=-1)
        img = img.to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        # img /= 1.0  # 0 - 255 to 0.0 - 1.0
        targets = targets.to(device)
        nb, _, height, width = img.shape  # batch size, channels, height, width
        whwh = torch.Tensor([width, height, width, height]).to(device)

        # Disable gradients
        with torch.no_grad():
            # Run model
            t = torch_utils.time_synchronized()
            inf_out, train_out = model(img)  # inference and training outputs
            t0 += torch_utils.time_synchronized() - t

            # Compute loss
            if training:  # if model has loss hyperparameters
                # loss += calc_loss([x.float() for x in train_out], targets, model)[1][:3]  # GIoU, obj, cls
                loss += losser([x.float() for x in train_out], targ)[1][:3]
            # Run NMS
            t = torch_utils.time_synchronized()
            output = postprocess.apply_nms(inf_out,
                                           nc,
                                           conf_thres=conf_thres,
                                           iou_thres=iou_thres,
                                           merge=merge)
            t1 += torch_utils.time_synchronized() - t

        # Statistics per image
        for si, pred in enumerate(output):
            labels = targets[targets[:, 0] == si, 1:]
            nl = len(labels)
            tcls = labels[:, 0].tolist() if nl else []  # target class
            seen += 1

            if pred is None:
                if nl:
                    stats.append((torch.zeros(0, niou, dtype=torch.bool),
                                  torch.Tensor(), torch.Tensor(), tcls))
                continue

            # Append to text file
            # with open('test.txt', 'a') as file:
            #    [file.write('%11.5g' * 7 % tuple(x) + '\n') for x in pred]

            # Clip boxes to image bounds
            utils.clip_coords(pred, (height, width))

            # Append to pycocotools JSON dictionary

            if save_json:
                # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
                image_id = int(Path(paths[si]).stem.split('_')[-1])
                box = pred[:, :4].clone()  # xyxy
                utils.scale_coords(img[si].shape[1:], box, shapes[si][0],
                                   shapes[si][1])  # to original shape
                box = data_utils.xyxy2xywh(box)  # xywh
                box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
                for p, b in zip(pred.tolist(), box.tolist()):
                    jdict.append({
                        'image_id': image_id,
                        'category_id': kitti8class[int(p[5])],
                        'bbox': [round(x, 3) for x in b],
                        'score': round(p[4], 5)
                    })

            # Assign all predictions as incorrect
            correct = torch.zeros(pred.shape[0],
                                  niou,
                                  dtype=torch.bool,
                                  device=device)
            if nl:
                detected = []  # target indices
                tcls_tensor = labels[:, 0]

                # target boxes
                tbox = data_utils.xywh2xyxy(labels[:, 1:5]) * whwh

                # Per target class
                for cls in torch.unique(tcls_tensor):
                    ti = (cls == tcls_tensor).nonzero(as_tuple=False).view(
                        -1)  # prediction indices
                    pi = (cls == pred[:, 5]).nonzero(as_tuple=False).view(
                        -1)  # target indices

                    # Search for detections
                    if pi.shape[0]:
                        # Prediction to target ious
                        ious, i = metrics_utils.box_iou(
                            pred[pi, :4],
                            tbox[ti]).max(1)  # best ious, indices

                        # Append detections
                        for j in (ious > iouv[0]).nonzero(as_tuple=False):
                            d = ti[i[j]]  # detected target
                            if d not in detected:
                                detected.append(d)
                                correct[
                                    pi[j]] = ious[j] > iouv  # iou_thres is 1xn
                                if len(
                                        detected
                                ) == nl:  # all targets already located in image
                                    break

            # Append statistics (correct, conf, pcls, tcls)
            stats.append(
                (correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls))

        # Plot images
        if batch_i < 1:
            f = os.path.join(logdir,
                             'test_batch%g_gt.jpg' % batch_i)  # filename
            visual_utils.plot_images(img, targets, paths, f,
                                     names)  # ground truth
            f = os.path.join(logdir, 'test_batch%g_pred.jpg' % batch_i)
            visual_utils.plot_images(img,
                                     utils.output_to_target(
                                         output, width, height), paths, f,
                                     names)  # predictions

    # Compute statistics
    stats = [np.concatenate(x, 0) for x in zip(*stats)]  # to numpy
    if len(stats):
        p, r, ap, f1, ap_class = metrics_utils.ap_per_class(*stats)
        p, r, ap50, ap = p[:, 0], r[:, 0], ap[:, 0], ap.mean(
            1)  # [P, R, [email protected], [email protected]:0.95]
        mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean()
        nt = np.bincount(stats[3].astype(np.int64),
                         minlength=nc)  # number of targets per class
    else:
        nt = torch.zeros(1)

    # Print results
    pf = '%20s' + '%12.3g' * 6  # print format
    print(pf % ('all', seen, nt.sum(), mp, mr, map50, map))

    # Print results per class
    if verbose and nc > 1 and len(stats):
        for i, c in enumerate(ap_class):
            print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i]))

    # Print speeds
    t = tuple(x / seen * 1E3
              for x in (t0, t1, t0 + t1)) + (imgsz, imgsz, batch_size)  # tuple
    if not training:
        print(
            'Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g'
            % t)

    # Save JSON
    if save_json and map50 and len(jdict):
        imgIds = [
            int(Path(x).stem.split('_')[-1])
            for x in dataloader.dataset.img_files
        ]
        f = 'detections_val2017_%s_results.json' % \
            (weights.split(os.sep)[-1].replace('.pt', '') if weights else '')  # filename
        print('\nCOCO mAP with pycocotools... saving %s...' % f)
        with open(f, 'w') as file:
            json.dump(jdict, file)

        try:
            from pycocotools.coco import COCO
            from pycocotools.cocoeval import COCOeval

            # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
            cocoGt = COCO(
                glob.glob('../coco/annotations/instances_val*.json')
                [0])  # initialize COCO ground truth api
            cocoDt = cocoGt.loadRes(f)  # initialize COCO pred api

            cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')
            cocoEval.params.imgIds = imgIds  # image IDs to evaluate
            cocoEval.evaluate()
            cocoEval.accumulate()
            cocoEval.summarize()
            map, map50 = cocoEval.stats[:
                                        2]  # update results ([email protected]:0.95, [email protected])
        except:
            print(
                'WARNING: pycocotools must be installed with numpy==1.17 to run correctly. '
                'See https://github.com/cocodataset/cocoapi/issues/356')

    # Return results
    model.float()  # for training
    maps = np.zeros(nc) + map
    for i, c in enumerate(ap_class):
        maps[c] = ap[i]
    return (mp, mr, map50, map,
            *(loss.cpu() / len(dataloader)).tolist()), maps, t
Exemple #8
0
def run(
        data,
        weights=None,  # model.pt path(s)
        batch_size=32,  # batch size
        imgsz=640,  # inference size (pixels)
        conf_thres=0.001,  # confidence threshold
        iou_thres=0.6,  # NMS IoU threshold
        task='val',  # train, val, test, speed or study
        device='',  # cuda device, i.e. 0 or 0,1,2,3 or cpu
        workers=8,  # max dataloader workers (per RANK in DDP mode)
        single_cls=False,  # treat as single-class dataset
        augment=False,  # augmented inference
        verbose=False,  # verbose output
        save_txt=False,  # save results to *.txt
        save_hybrid=False,  # save label+prediction hybrid results to *.txt
        save_conf=False,  # save confidences in --save-txt labels
        save_json=False,  # save a COCO-JSON results file
        project=ROOT / 'runs/val',  # save to project/name
        name='exp',  # save to project/name
        exist_ok=False,  # existing project/name ok, do not increment
        half=True,  # use FP16 half-precision inference
        dnn=False,  # use OpenCV DNN for ONNX inference
        model=None,
        dataloader=None,
        save_dir=Path(''),
        plots=True,
        callbacks=Callbacks(),
        compute_loss=None,
):
    # Initialize/load model and set device
    training = model is not None
    if training:  # called by train.py
        device, pt, jit, engine = next(model.parameters(
        )).device, True, False, False  # get model device, PyTorch model

        half &= device.type != 'cpu'  # half precision only supported on CUDA
        model.half() if half else model.float()
    else:  # called directly
        device = select_device(device, batch_size=batch_size)

        # Directories
        save_dir = increment_path(Path(project) / name,
                                  exist_ok=exist_ok)  # increment run
        (save_dir / 'labels' if save_txt else save_dir).mkdir(
            parents=True, exist_ok=True)  # make dir

        # Load model
        model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data)
        stride, pt, jit, onnx, engine = model.stride, model.pt, model.jit, model.onnx, model.engine
        imgsz = check_img_size(imgsz, s=stride)  # check image size
        half &= (
            pt or jit or onnx or engine
        ) and device.type != 'cpu'  # FP16 supported on limited backends with CUDA
        if pt or jit:
            model.model.half() if half else model.model.float()
        elif engine:
            batch_size = model.batch_size
            if model.trt_fp16_input != half:
                LOGGER.info('model ' +
                            ('requires' if model.
                             trt_fp16_input else 'incompatible with') +
                            ' --half. Adjusting automatically.')
                half = model.trt_fp16_input
        else:
            half = False
            batch_size = 1  # export.py models default to batch-size 1
            device = torch.device('cpu')
            LOGGER.info(
                f'Forcing --batch-size 1 square inference shape(1,3,{imgsz},{imgsz}) for non-PyTorch backends'
            )

        # Data
        data = check_dataset(data)  # check

    # Configure
    model.eval()
    is_coco = isinstance(data.get('val'), str) and data['val'].endswith(
        'coco/val2017.txt')  # COCO dataset
    nc = 1 if single_cls else int(data['nc'])  # number of classes
    iouv = torch.linspace(0.5, 0.95,
                          10).to(device)  # iou vector for [email protected]:0.95
    niou = iouv.numel()

    # Dataloader
    if not training:
        model.warmup(imgsz=(1 if pt else batch_size, 3, imgsz, imgsz),
                     half=half)  # warmup
        pad = 0.0 if task in ('speed', 'benchmark') else 0.5
        rect = False if task == 'benchmark' else pt  # square inference for benchmarks
        task = task if task in (
            'train', 'val', 'test') else 'val'  # path to train/val/test images
        dataloader = create_dataloader(data[task],
                                       imgsz,
                                       batch_size,
                                       stride,
                                       single_cls,
                                       pad=pad,
                                       rect=rect,
                                       workers=workers,
                                       prefix=colorstr(f'{task}: '))[0]

    seen = 0
    confusion_matrix = ConfusionMatrix(nc=nc)
    names = {
        k: v
        for k, v in enumerate(
            model.names if hasattr(model, 'names') else model.module.names)
    }
    class_map = coco80_to_coco91_class() if is_coco else list(range(1000))
    s = ('%20s' + '%11s' * 6) % ('Class', 'Images', 'Labels', 'P', 'R',
                                 '[email protected]', '[email protected]:.95')
    dt, p, r, f1, mp, mr, map50, map = [0.0, 0.0,
                                        0.0], 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
    loss = torch.zeros(3, device=device)
    jdict, stats, ap, ap_class = [], [], [], []
    pbar = tqdm(dataloader,
                desc=s,
                bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}')  # progress bar
    for batch_i, (im, targets, paths, shapes) in enumerate(pbar):
        t1 = time_sync()
        if pt or jit or engine:
            im = im.to(device, non_blocking=True)
            targets = targets.to(device)
        im = im.half() if half else im.float()  # uint8 to fp16/32
        im /= 255  # 0 - 255 to 0.0 - 1.0
        nb, _, height, width = im.shape  # batch size, channels, height, width
        t2 = time_sync()
        dt[0] += t2 - t1

        # Inference
        out, train_out = model(im) if training else model(
            im, augment=augment, val=True)  # inference, loss outputs
        dt[1] += time_sync() - t2

        # Loss
        if compute_loss:
            loss += compute_loss([x.float() for x in train_out],
                                 targets)[1]  # box, obj, cls

        # NMS
        targets[:, 2:] *= torch.Tensor([width, height, width,
                                        height]).to(device)  # to pixels
        lb = [targets[targets[:, 0] == i, 1:]
              for i in range(nb)] if save_hybrid else []  # for autolabelling
        t3 = time_sync()
        out = non_max_suppression(out,
                                  conf_thres,
                                  iou_thres,
                                  labels=lb,
                                  multi_label=True,
                                  agnostic=single_cls)
        dt[2] += time_sync() - t3

        # Metrics
        for si, pred in enumerate(out):
            labels = targets[targets[:, 0] == si, 1:]
            nl = len(labels)
            tcls = labels[:, 0].tolist() if nl else []  # target class
            path, shape = Path(paths[si]), shapes[si][0]
            seen += 1

            if len(pred) == 0:
                if nl:
                    stats.append((torch.zeros(0, niou, dtype=torch.bool),
                                  torch.Tensor(), torch.Tensor(), tcls))
                continue

            # Predictions
            if single_cls:
                pred[:, 5] = 0
            predn = pred.clone()
            scale_coords(im[si].shape[1:], predn[:, :4], shape,
                         shapes[si][1])  # native-space pred

            # Evaluate
            if nl:
                tbox = xywh2xyxy(labels[:, 1:5])  # target boxes
                scale_coords(im[si].shape[1:], tbox, shape,
                             shapes[si][1])  # native-space labels
                labelsn = torch.cat((labels[:, 0:1], tbox),
                                    1)  # native-space labels
                correct = process_batch(predn, labelsn, iouv)
                if plots:
                    confusion_matrix.process_batch(predn, labelsn)
            else:
                correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool)
            stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(),
                          tcls))  # (correct, conf, pcls, tcls)

            # Save/log
            if save_txt:
                save_one_txt(predn,
                             save_conf,
                             shape,
                             file=save_dir / 'labels' / (path.stem + '.txt'))
            if save_json:
                save_one_json(predn, jdict, path,
                              class_map)  # append to COCO-JSON dictionary
            callbacks.run('on_val_image_end', pred, predn, path, names, im[si])

        # Plot images
        if plots and batch_i < 3:
            f = save_dir / f'val_batch{batch_i}_labels.jpg'  # labels
            Thread(target=plot_images,
                   args=(im, targets, paths, f, names),
                   daemon=True).start()
            f = save_dir / f'val_batch{batch_i}_pred.jpg'  # predictions
            Thread(target=plot_images,
                   args=(im, output_to_target(out), paths, f, names),
                   daemon=True).start()

    # Compute metrics
    stats = [np.concatenate(x, 0) for x in zip(*stats)]  # to numpy
    if len(stats) and stats[0].any():
        tp, fp, p, r, f1, ap, ap_class = ap_per_class(*stats,
                                                      plot=plots,
                                                      save_dir=save_dir,
                                                      names=names)
        ap50, ap = ap[:, 0], ap.mean(1)  # [email protected], [email protected]:0.95
        mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean()
        nt = np.bincount(stats[3].astype(np.int64),
                         minlength=nc)  # number of targets per class
    else:
        nt = torch.zeros(1)

    # Print results
    pf = '%20s' + '%11i' * 2 + '%11.3g' * 4  # print format
    LOGGER.info(pf % ('all', seen, nt.sum(), mp, mr, map50, map))

    # Print results per class
    if (verbose or (nc < 50 and not training)) and nc > 1 and len(stats):
        for i, c in enumerate(ap_class):
            LOGGER.info(pf %
                        (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i]))

    # Print speeds
    t = tuple(x / seen * 1E3 for x in dt)  # speeds per image
    if not training:
        shape = (batch_size, 3, imgsz, imgsz)
        LOGGER.info(
            f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}'
            % t)

    # Plots
    if plots:
        confusion_matrix.plot(save_dir=save_dir, names=list(names.values()))
        callbacks.run('on_val_end')

    # Save JSON
    if save_json and len(jdict):
        w = Path(weights[0] if isinstance(weights, list) else weights
                 ).stem if weights is not None else ''  # weights
        anno_json = str(
            Path(data.get('path', '../coco')) /
            'annotations/instances_val2017.json')  # annotations json
        pred_json = str(save_dir / f"{w}_predictions.json")  # predictions json
        LOGGER.info(f'\nEvaluating pycocotools mAP... saving {pred_json}...')
        with open(pred_json, 'w') as f:
            json.dump(jdict, f)

        try:  # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
            check_requirements(['pycocotools'])
            from pycocotools.coco import COCO
            from pycocotools.cocoeval import COCOeval

            anno = COCO(anno_json)  # init annotations api
            pred = anno.loadRes(pred_json)  # init predictions api
            eval = COCOeval(anno, pred, 'bbox')
            if is_coco:
                eval.params.imgIds = [
                    int(Path(x).stem) for x in dataloader.dataset.im_files
                ]  # image IDs to evaluate
            eval.evaluate()
            eval.accumulate()
            eval.summarize()
            map, map50 = eval.stats[:
                                    2]  # update results ([email protected]:0.95, [email protected])
        except Exception as e:
            LOGGER.info(f'pycocotools unable to run: {e}')

    # Return results
    model.float()  # for training
    if not training:
        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
        LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}")
    maps = np.zeros(nc) + map
    for i, c in enumerate(ap_class):
        maps[c] = ap[i]
    return (mp, mr, map50, map,
            *(loss.cpu() / len(dataloader)).tolist()), maps, t
Exemple #9
0
def detect(opt):
    source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    save_img = not opt.nosave and not source.endswith(
        '.txt')  # save inference images
    webcam = source.isnumeric() or source.endswith(
        '.txt') or source.lower().startswith(
            ('rtsp://', 'rtmp://', 'http://', 'https://'))

    # Directories
    save_dir = increment_path(Path(opt.project) / opt.name,
                              exist_ok=opt.exist_ok)  # increment run
    (save_dir / 'labels' if save_txt else save_dir).mkdir(
        parents=True, exist_ok=True)  # make dir

    if opt.save_clean_img:
        os.mkdir(save_dir / Path('clean'))

    # Initialize
    set_logging()
    device = select_device(opt.device)
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    stride = int(model.stride.max())  # model stride
    imgsz = check_img_size(imgsz, s=stride)  # check img_size
    names = model.module.names if hasattr(
        model, 'module') else model.names  # get class names
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model']).to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = check_imshow()
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz, stride=stride)
    else:
        dataset = LoadImages(source, img_size=imgsz, stride=stride)

    # Run inference
    if device.type != 'cpu':
        model(
            torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(
                next(model.parameters())))  # run once
    t0 = time.time()
    skip = 0
    for path, img, im0s, vid_cap in dataset:
        skip += 1
        if (skip % opt.frame_skip) != 0:
            continue
        else:  # unnecessary to have this in else just here for clarity
            skip = 0
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   opt.classes,
                                   opt.agnostic_nms,
                                   max_det=opt.max_det)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0, frame = path[i], f'{i}: ', im0s[i].copy(
                ), dataset.count
            else:
                p, s, im0, frame = path, '', im0s.copy(), getattr(
                    dataset, 'frame', 0)

            p = Path(p)  # to Path
            save_path = str(save_dir / p.name)  # img.jpg
            txt_path = str(save_dir / 'labels' / p.stem) + (
                '' if dataset.mode == 'image' else f'_{frame}')  # img.txt
            if not opt.quiet:
                s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            imc = im0.copy(
            ) if opt.save_crop or opt.save_clean_img else im0  # for opt.save_crop
            if len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                if not opt.quiet:
                    for c in det[:, -1].unique():
                        n = (det[:, -1] == c).sum()  # detections per class
                        s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()  # normalized xywh
                        line = (cls, *xywh, conf) if opt.save_conf else (
                            cls, *xywh)  # label format
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * len(line)).rstrip() % line + '\n')

                    if opt.save_crop:
                        c = int(cls)
                        save_one_box(xyxy,
                                     im0,
                                     file=save_dir / 'crops' / names[c] /
                                     f'{p.stem}.jpg',
                                     BGR=True)

                    if save_img or opt.save_crop or view_img:  # Add bbox to image
                        c = int(cls)  # integer class
                        label = None if opt.hide_labels else (
                            names[c]
                            if opt.hide_conf else f'{names[c]} {conf:.2f}')
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors(c, True),
                                     line_thickness=opt.line_thickness)

                if opt.save_clean_img and not save_img:
                    clean_img = str(save_dir) + '/clean/' + str(
                        p.name) + f'_{frame}' + '.jpg'
                    cv2.imwrite(clean_img, imc)

            # Print time (inference + NMS)
            if not opt.quiet:
                print(f'{s}Done. ({t2 - t1:.3f}s)')

            # Stream results
            if view_img:
                cv2.imshow(str(p), im0)
                cv2.waitKey(1)  # 1 millisecond

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'image':
                    cv2.imwrite(save_path, im0)
                else:  # 'video' or 'stream'
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer
                        if vid_cap:  # video
                            fps = vid_cap.get(cv2.CAP_PROP_FPS)
                            w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                            h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        else:  # stream
                            fps, w, h = 30, im0.shape[1], im0.shape[0]
                            save_path += '.mp4'
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps,
                            (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
        print(f"Results saved to {save_dir}{s}")

    print(f'Done. ({time.time() - t0:.3f}s)')
def detect(save_img=False,
           o_weights="weights/yolov5s.pt",
           o_source="inference/images",
           o_output="inference/output",
           o_img_size=640,
           o_conf_thres=0.4,
           o_iou_thres=0.5,
           o_fourcc="mp4v",
           o_device='',
           o_view_img=True,
           o_save_txt=False,
           o_classes=None,
           o_agnostic_nms=False,
           o_augment=False):
    p = ''
    c1 = (0, 0)
    c2 = (0, 0)
    label_no_value = ''
    detection_result_list = []

    out, source, weights, view_img, save_txt, imgsz = \
        o_output, o_source, o_weights, o_view_img, o_save_txt, o_img_size
    webcam = source.isnumeric() or source.startswith(
        'rtsp') or source.startswith('http') or source.endswith('.txt')

    # Initialize
    set_logging()
    device = select_device('cpu')
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        """
        前向传播 返回pred的shape是(1, num_boxes, 5+num_class)
        h,w为传入网络图片的长和宽,注意dataset在检测时使用了矩形推理,所以这里h不一定等于w
        num_boxes = h/32 * w/32 + h/16 * w/16 + h/8 * w/8
        pred[..., 0:4]为预测框坐标
        预测框坐标为xywh(中心点+宽长)格式
        pred[..., 4]为objectness置信度
        pred[..., 5:-1]为分类结果
        """

        pred = model(img, augment=o_augment)[0]
        # print(pred[..., 5:-1])

        # Apply NMS
        pred = non_max_suppression(pred,
                                   o_conf_thres,
                                   o_iou_thres,
                                   classes=o_classes,
                                   agnostic=o_agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            save_path = str(Path(out) / Path(p).name)
            txt_path = str(Path(out) / Path(p).stem) + (
                '_%g' % dataset.frame if dataset.mode == 'video' else '')
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                # 调整预测框的坐标:基于resize+pad的图片的坐标-->基于原size图片的坐标
                # 此时坐标格式为xyxy
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                # for c in det[:, -1].unique():
                #     n = (det[:, -1] == c).sum()  # detections per class
                #     s += '%g %ss, ' % (n, names[int(c)])  # add to string
                #     print(n.numpy())
                #     detection_result_list.append(names[int(c)])

                # 写出结果
                for *xyxy, conf, cls in det:
                    label = '%s: %.2f' % (names[int(cls)], conf)
                    # 输出:label: car: 0.47
                    print("label: " + label)
                    label_no_value = '%s' % (names[int(cls)])
                    confidences_value = '%.2f' % conf
                    c1, c2 = plot_one_box(xyxy,
                                          im0,
                                          label=label,
                                          color=colors[int(cls)],
                                          line_thickness=3)
                    print(c1, c2, label_no_value)
                    if label_no_value == 'car' or label_no_value == 'truck':
                        print('检测到汽车!开始处理')
                        # car = Car()
                        # car.last_position = (c1, c2)
                        # car.stop_time = time.time()
                        # car_timing(car)

                        # 保存图片
                        img_name = str(time.time()).split('.')[0]
                        img_path = os.path.join(r'vehicle_imgs\cars',
                                                img_name) + '.jpg'
                        print("保存图片:", img_path)
                        frame_name = o_source
                        frame = cv2.imread(frame_name)
                        cv2.imwrite(img_path, frame)
                        detected_cars(time.time(), (c1, c2), img_path)
                    elif label_no_value == 'bicycle' or label_no_value == 'motocycle':
                        print('检测到电动车!开始计时!')

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    # 将xyxy(左上角+右下角)格式转为xywh(中心点+宽长)格式,并除上w,h做归一化,转化为列表再保存
                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()  # normalized xywh
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * 5 + '\n') %
                                    (cls, *xywh))  # label format

                    if save_img or view_img:  # Add bbox to image
                        label = '%s %.2f' % (names[int(cls)], conf)
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors[int(cls)],
                                     line_thickness=3)

            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1))

            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*fourcc), fps,
                            (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % Path(out))
        if platform.system() == 'Darwin' and not opt.update:  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))
    return c1, c2, label_no_value
Exemple #11
0
def run(
        weights=ROOT / 'yolov5s.pt',  # model.pt path(s)
        source=ROOT / 'data/images',  # file/dir/URL/glob, 0 for webcam
        data=ROOT / 'data/coco128.yaml',  # dataset.yaml path
        imgsz=(640, 640),  # inference size (height, width)
        conf_thres=0.25,  # confidence threshold
        iou_thres=0.45,  # NMS IOU threshold
        max_det=1000,  # maximum detections per image
        device='',  # cuda device, i.e. 0 or 0,1,2,3 or cpu
        view_img=False,  # show results
        save_txt=False,  # save results to *.txt
        save_conf=False,  # save confidences in --save-txt labels
        save_crop=False,  # save cropped prediction boxes
        nosave=False,  # do not save images/videos
        classes=None,  # filter by class: --class 0, or --class 0 2 3
        agnostic_nms=False,  # class-agnostic NMS
        augment=False,  # augmented inference
        visualize=False,  # visualize features
        update=False,  # update all models
        project=ROOT / 'runs/detect',  # save results to project/name
        name='exp',  # save results to project/name
        exist_ok=False,  # existing project/name ok, do not increment
        line_thickness=3,  # bounding box thickness (pixels)
        hide_labels=False,  # hide labels
        hide_conf=False,  # hide confidences
        half=False,  # use FP16 half-precision inference
        dnn=False,  # use OpenCV DNN for ONNX inference
):
    source = str(source)
    save_img = not nosave and not source.endswith('.txt')  # save inference images
    is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS)
    is_url = source.lower().startswith(('rtsp://', 'rtmp://', 'http://', 'https://'))
    webcam = source.isnumeric() or source.endswith('.txt') or (is_url and not is_file)
    if is_url and is_file:
        source = check_file(source)  # download

    # Directories
    save_dir = increment_path(Path(project) / name, exist_ok=exist_ok)  # increment run
    (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir

    # Load model
    device = select_device(device)
    model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)
    stride, names, pt = model.stride, model.names, model.pt
    imgsz = check_img_size(imgsz, s=stride)  # check image size

    # Dataloader
    if webcam:
        view_img = check_imshow()
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt)
        bs = len(dataset)  # batch_size
    else:
        dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt)
        bs = 1  # batch_size
    vid_path, vid_writer = [None] * bs, [None] * bs

    # Run inference
    model.warmup(imgsz=(1 if pt else bs, 3, *imgsz))  # warmup
    seen, windows, dt = 0, [], [0.0, 0.0, 0.0]
    for path, im, im0s, vid_cap, s in dataset:
        t1 = time_sync()
        im = torch.from_numpy(im).to(device)
        im = im.half() if model.fp16 else im.float()  # uint8 to fp16/32
        im /= 255  # 0 - 255 to 0.0 - 1.0
        if len(im.shape) == 3:
            im = im[None]  # expand for batch dim
        t2 = time_sync()
        dt[0] += t2 - t1

        # Inference
        visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
        pred = model(im, augment=augment, visualize=visualize)
        t3 = time_sync()
        dt[1] += t3 - t2

        # NMS
        pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
        dt[2] += time_sync() - t3

        # Second-stage classifier (optional)
        # pred = utils.general.apply_classifier(pred, classifier_model, im, im0s)

        # Process predictions
        for i, det in enumerate(pred):  # per image
            seen += 1
            if webcam:  # batch_size >= 1
                p, im0, frame = path[i], im0s[i].copy(), dataset.count
                s += f'{i}: '
            else:
                p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0)

            p = Path(p)  # to Path
            save_path = str(save_dir / p.name)  # im.jpg
            txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}')  # im.txt
            s += '%gx%g ' % im.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
            imc = im0.copy() if save_crop else im0  # for save_crop
            annotator = Annotator(im0, line_width=line_thickness, example=str(names))
            if len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
                        line = (cls, *xywh, conf) if save_conf else (cls, *xywh)  # label format
                        with open(f'{txt_path}.txt', 'a') as f:
                            f.write(('%g ' * len(line)).rstrip() % line + '\n')

                    if save_img or save_crop or view_img:  # Add bbox to image
                        c = int(cls)  # integer class
                        label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}')
                        annotator.box_label(xyxy, label, color=colors(c, True))
                    if save_crop:
                        save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True)

            # Stream results
            im0 = annotator.result()
            if view_img:
                if p not in windows:
                    windows.append(p)
                    cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO)  # allow window resize (Linux)
                    cv2.resizeWindow(str(p), im0.shape[1], im0.shape[0])
                cv2.imshow(str(p), im0)
                cv2.waitKey(1)  # 1 millisecond

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'image':
                    cv2.imwrite(save_path, im0)
                else:  # 'video' or 'stream'
                    if vid_path[i] != save_path:  # new video
                        vid_path[i] = save_path
                        if isinstance(vid_writer[i], cv2.VideoWriter):
                            vid_writer[i].release()  # release previous video writer
                        if vid_cap:  # video
                            fps = vid_cap.get(cv2.CAP_PROP_FPS)
                            w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                            h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        else:  # stream
                            fps, w, h = 30, im0.shape[1], im0.shape[0]
                        save_path = str(Path(save_path).with_suffix('.mp4'))  # force *.mp4 suffix on results videos
                        vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
                    vid_writer[i].write(im0)

        # Print time (inference-only)
        LOGGER.info(f'{s}Done. ({t3 - t2:.3f}s)')

    # Print results
    t = tuple(x / seen * 1E3 for x in dt)  # speeds per image
    LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}' % t)
    if save_txt or save_img:
        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
        LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}")
    if update:
        strip_optimizer(weights)  # update model (to fix SourceChangeWarning)
def detect_recog():
    source, weights_detect, weights_recog, view_img, save_txt, imgsz_detect, imgsz_recog, save_img = opt.source, opt.weights_detect, opt.weights_recog, opt.view_img, opt.save_txt, opt.img_size_detect, opt.img_size_recog, opt.save_img

    # Set Dataloader
    webcam = source.isnumeric() or source.endswith(
        '.txt') or source.lower().startswith(('rtsp://', 'rtmp://', 'http://'))
    vid_path, vid_writer = None, None
    shmstream = source.startswith('/tmp/')
    if shmstream:
        source = f"shmsrc socket-path={source} \
                ! video/x-raw, format=BGR, width={int(imgsz_detect*4/3)}, height={imgsz_detect}, pixel-aspect-ratio=1/1, framerate=30/1 \
                ! decodebin \
                ! videoconvert \
                ! appsink"

        dataset = LoadStreamsBuffered(source, img_size=imgsz_detect)
    elif webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz_detect)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz_detect)

    # Directories
    if opt.save_dir == 'runs/exp':
        save_dir = Path(
            increment_path(Path(opt.project) / opt.name,
                           exist_ok=opt.exist_ok))  # increment run
    else:
        save_dir = Path(opt.save_dir)
    (save_dir / 'labels' if save_txt else save_dir).mkdir(
        parents=True, exist_ok=True)  # make dir

    # Initialize
    set_logging()
    device = select_device(opt.device)
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model_detect = attempt_load(weights_detect,
                                map_location=device)  # load FP32 model
    model_recog = attempt_load(weights_recog,
                               map_location=device)  # load FP32 model
    imgsz_detect = check_img_size(
        imgsz_detect, s=model_detect.stride.max())  # check img_size
    imgsz_recog = check_img_size(imgsz_recog,
                                 s=model_recog.stride.max())  # check img_size
    if half:
        model_detect.half()  # to FP16
        model_recog.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model']).to(device).eval()
    else:
        modelc = None

    # Get names and colors
    names_detect = model_detect.module.names if hasattr(
        model_detect, 'module') else model_detect.names
    names_recog = model_recog.module.names if hasattr(
        model_recog, 'module') else model_recog.names
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in names_detect]

    t0 = time.time()
    img = torch.zeros((1, 3, imgsz_detect, imgsz_detect),
                      device=device)  # init img
    img_lp = torch.zeros((1, 3, imgsz_recog, imgsz_recog),
                         device=device)  # init img
    if device.type != 'cpu':  # run once
        _ = model_detect(img.half() if half else img)
        _ = model_recog(img.half() if half else img)

    # Run inference
    shmcounter = 0
    for path, img, im0s, vid_cap in dataset:
        if img is None:
            continue

        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        t1 = time_synchronized()

        # Inference
        pred = model_detect(img, augment=opt.augment)[0]
        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres_detect,
                                   opt.iou_thres_detect,
                                   classes=opt.classes_detect,
                                   agnostic=opt.agnostic_nms)

        t2 = time_synchronized()
        all_t2_t1 = t2 - t1

        # Print time (inference + NMS)
        print('Done Detection. (%.3fs)' % (all_t2_t1))

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if shmstream:
                p, s, im0 = Path(
                    f"{shmcounter}.jpg"), '%g: ' % i, im0s[i].copy()
                shmcounter += 1
            elif webcam:  # batch_size >= 1
                p, s, im0 = Path(path[i]), '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = Path(path), '', im0s

            save_path = str(save_dir / p.name)
            txt_path = str(save_dir / 'labels' / p.stem) + (
                '_%g' % dataset.frame if dataset.mode == 'video' else '')

            s += '%gx%g ' % img.shape[2:]  # print string
            # normalization gain whwh
            gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]
            if len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    # add to string
                    s += '%g %ss, ' % (n, names_detect[int(c)])

                # Write results
                # But first, Recognition
                all_t2_t1 = recog(det, im0, device, img_lp, imgsz_recog, half,
                                  model_recog, all_t2_t1, classify, modelc,
                                  names_recog, save_txt, gn, txt_path,
                                  save_img, view_img, colors)

            # Stream results
            if view_img:
                cv2.imshow(str(p), im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*fourcc), fps,
                            (w, h))
                    vid_writer.write(im0)

        # Print time (inference + NMS)
        print('%sDone Recognition. (%.3fs)' % (s, all_t2_t1))

    if save_txt or save_img:
        print('Results saved to %s' % save_dir)

    print('Done. (%.3fs)' % (time.time() - t0))
Exemple #13
0
def run(data=ROOT / 'data/coco128.yaml',  # 'dataset.yaml path'
        weights=ROOT / 'yolov5s.pt',  # weights path
        imgsz=(640, 640),  # image (height, width)
        batch_size=1,  # batch size
        device='cpu',  # cuda device, i.e. 0 or 0,1,2,3 or cpu
        include=('torchscript', 'onnx', 'coreml'),  # include formats
        half=False,  # FP16 half-precision export
        inplace=False,  # set YOLOv5 Detect() inplace=True
        train=False,  # model.train() mode
        optimize=False,  # TorchScript: optimize for mobile
        int8=False,  # CoreML/TF INT8 quantization
        dynamic=False,  # ONNX/TF: dynamic axes
        simplify=False,  # ONNX: simplify model
        opset=12,  # ONNX: opset version
        topk_per_class=100,  # TF.js NMS: topk per class to keep
        topk_all=100,  # TF.js NMS: topk for all classes to keep
        iou_thres=0.45,  # TF.js NMS: IoU threshold
        conf_thres=0.25  # TF.js NMS: confidence threshold
        ):
    t = time.time()
    include = [x.lower() for x in include]
    tf_exports = list(x in include for x in ('saved_model', 'pb', 'tflite', 'tfjs'))  # TensorFlow exports
    imgsz *= 2 if len(imgsz) == 1 else 1  # expand
    file = Path(url2file(weights) if str(weights).startswith(('http:/', 'https:/')) else weights)

    # Load PyTorch model
    device = select_device(device)
    assert not (device.type == 'cpu' and half), '--half only compatible with GPU export, i.e. use --device 0'
    model = attempt_load(weights, map_location=device, inplace=True, fuse=True)  # load FP32 model
    nc, names = model.nc, model.names  # number of classes, class names

    # Input
    gs = int(max(model.stride))  # grid size (max stride)
    imgsz = [check_img_size(x, gs) for x in imgsz]  # verify img_size are gs-multiples
    im = torch.zeros(batch_size, 3, *imgsz).to(device)  # image size(1,3,320,192) BCHW iDetection

    # Update model
    if half:
        im, model = im.half(), model.half()  # to FP16
    model.train() if train else model.eval()  # training mode = no Detect() layer grid construction
    for k, m in model.named_modules():
        if isinstance(m, Conv):  # assign export-friendly activations
            if isinstance(m.act, nn.SiLU):
                m.act = SiLU()
        elif isinstance(m, Detect):
            m.inplace = inplace
            m.onnx_dynamic = dynamic
            # m.forward = m.forward_export  # assign forward (optional)

    for _ in range(2):
        y = model(im)  # dry runs
    print(f"\n{colorstr('PyTorch:')} starting from {file} ({file_size(file):.1f} MB)")

    # Exports
    if 'torchscript' in include:
        export_torchscript(model, im, file, optimize)
    if 'onnx' in include:
        export_onnx(model, im, file, opset, train, dynamic, simplify)
    if 'coreml' in include:
        export_coreml(model, im, file)

    # TensorFlow Exports
    if any(tf_exports):
        pb, tflite, tfjs = tf_exports[1:]
        assert not (tflite and tfjs), 'TFLite and TF.js models must be exported separately, please pass only one type.'
        model = export_saved_model(model, im, file, dynamic, tf_nms=tfjs, agnostic_nms=tfjs,
                                   topk_per_class=topk_per_class, topk_all=topk_all, conf_thres=conf_thres,
                                   iou_thres=iou_thres)  # keras model
        if pb or tfjs:  # pb prerequisite to tfjs
            export_pb(model, im, file)
        if tflite:
            export_tflite(model, im, file, int8=int8, data=data, ncalib=100)
        if tfjs:
            export_tfjs(model, im, file)

    # Finish
    print(f'\nExport complete ({time.time() - t:.2f}s)'
          f"\nResults saved to {colorstr('bold', file.parent.resolve())}"
          f'\nVisualize with https://netron.app')
def detect(save_img=False):
    print_div('INTIL')
    out, source, weights, view_img, save_txt, imgsz,count = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size,opt.threads

    # Initialize

    print_div('GET DEVICE')
    set_logging()
    device = select_device(opt.device)
    half = device.type != 'cpu'
    # half precision only supported on CUDA

    # Load model
    print_div('LOAD MODEL')
    model = attempt_load(weights, map_location=device)
    # load FP32 model

    imgsz = check_img_size(imgsz, s=model.stride.max())
    # check img_size

    if half:
        model.half()
# to FP16

# Second-stage classifier

    print_div('LOAD MODEL_CLASSIFIER')
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)
        # initialize

        modelc.load_state_dict(
            torch.load('weights/resnet101.pt', map_location=device)['model'])
        # load weights

        modelc.to(device).eval()


# Get names and colors

    print_div('SET LABEL COLOR')
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # Run inference
    ###############################################################################
    print_div("RUN INFERENCE")
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)
    # init img
    _ = model(img.half() if half else img) if device.type != 'cpu' else None
    # run muti
    q = []
    rescale = 0.5
    threads = []
    img = [None] * 9
    videowrite = []
    time_count = time.time()
    if os.path.isdir(source):
        for i in range(0, 1):
            video_path = source + "/test" + str(i + 1) + ".mp4"
            cap = cv2.VideoCapture(video_path)
            videowrite.append(
                cv2.VideoWriter(r'end' + str(i + 1) + '.mp4', -1, 20,
                                (960, 540)))
            q.append(Queue())
            threads.append(
                Thread(target=multithreading,
                       args=(cap, device, half, names, colors, model, q[i])))
            threads[i].start()
            threads[i].join()
        for k in range(0, q[0].qsize()):
            for j in range(0, 1):
                img[j] = q[j].get()
                re_img0 = (int(img[j].shape[1] * rescale),
                           int(img[j].shape[0] * rescale))

                img[j] = cv2.resize(img[j], re_img0)
                videowrite[j].write(img[j])
            print(k)
            #mix= np.vstack((img[0], img[1])).toarray()
            #mix1= np.hstack((img[3], img[4],img[5]))
            #mix2= np.hstack((img[6], img[7],img[8]))
            #mix=np.vstack((mix, mix1,mix2))
            #cv2.imshow('Stream_Detected',img[0])
            #cv2.waitKey(1)
    # After break
    print("Spending time: " + str(time.time() - time_count))
Exemple #15
0
def run(
        weights=ROOT / 'yolov5s.pt',  # model.pt path(s)
        source=ROOT / 'data/images',  # file/dir/URL/glob, 0 for webcam
        imgsz=640,  # inference size (pixels)
        conf_thres=0.25,  # confidence threshold
        iou_thres=0.45,  # NMS IOU threshold
        max_det=1000,  # maximum detections per image
        device='',  # cuda device, i.e. 0 or 0,1,2,3 or cpu
        view_img=False,  # show results
        save_txt=False,  # save results to *.txt
        save_conf=False,  # save confidences in --save-txt labels
        save_crop=False,  # save cropped prediction boxes
        nosave=False,  # do not save images/videos
        classes=None,  # filter by class: --class 0, or --class 0 2 3
        agnostic_nms=False,  # class-agnostic NMS
        augment=False,  # augmented inference
        visualize=False,  # visualize features
        update=False,  # update all models
        project=ROOT / 'runs/detect',  # save results to project/name
        name='exp',  # save results to project/name
        exist_ok=False,  # existing project/name ok, do not increment
        line_thickness=3,  # bounding box thickness (pixels)
        hide_labels=False,  # hide labels
        hide_conf=False,  # hide confidences
        half=False,  # use FP16 half-precision inference
        dnn=False,  # use OpenCV DNN for ONNX inference
):
    source = str(source)
    save_img = not nosave and not source.endswith(
        '.txt')  # save inference images
    webcam = source.isnumeric() or source.endswith(
        '.txt') or source.lower().startswith(
            ('rtsp://', 'rtmp://', 'http://', 'https://'))

    # Directories
    save_dir = increment_path(Path(project) / name,
                              exist_ok=exist_ok)  # increment run
    (save_dir / 'labels' if save_txt else save_dir).mkdir(
        parents=True, exist_ok=True)  # make dir

    # Initialize
    set_logging()
    device = select_device(device)
    half &= device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    w = str(weights[0] if isinstance(weights, list) else weights)
    classify, suffix, suffixes = False, Path(w).suffix.lower(), [
        '.pt', '.onnx', '.tflite', '.pb', ''
    ]
    check_suffix(w, suffixes)  # check weights have acceptable suffix
    pt, onnx, tflite, pb, saved_model = (suffix == x
                                         for x in suffixes)  # backend booleans
    stride, names = 64, [f'class{i}' for i in range(1000)]  # assign defaults
    if pt:
        model = torch.jit.load(w) if 'torchscript' in w else attempt_load(
            weights, map_location=device)
        stride = int(model.stride.max())  # model stride
        names = model.module.names if hasattr(
            model, 'module') else model.names  # get class names
        if half:
            model.half()  # to FP16
        if classify:  # second-stage classifier
            modelc = load_classifier(name='resnet50', n=2)  # initialize
            modelc.load_state_dict(
                torch.load('resnet50.pt',
                           map_location=device)['model']).to(device).eval()
    elif onnx:
        if dnn:
            # check_requirements(('opencv-python>=4.5.4',))
            net = cv2.dnn.readNetFromONNX(w)
        else:
            check_requirements(
                ('onnx',
                 'onnxruntime-gpu' if torch.has_cuda else 'onnxruntime'))
            import onnxruntime
            session = onnxruntime.InferenceSession(w, None)
    else:  # TensorFlow models
        check_requirements(('tensorflow>=2.4.1', ))
        import tensorflow as tf
        if pb:  # https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt

            def wrap_frozen_graph(gd, inputs, outputs):
                x = tf.compat.v1.wrap_function(
                    lambda: tf.compat.v1.import_graph_def(gd, name=""),
                    [])  # wrapped import
                return x.prune(
                    tf.nest.map_structure(x.graph.as_graph_element, inputs),
                    tf.nest.map_structure(x.graph.as_graph_element, outputs))

            graph_def = tf.Graph().as_graph_def()
            graph_def.ParseFromString(open(w, 'rb').read())
            frozen_func = wrap_frozen_graph(gd=graph_def,
                                            inputs="x:0",
                                            outputs="Identity:0")
        elif saved_model:
            model = tf.keras.models.load_model(w)
        elif tflite:
            interpreter = tf.lite.Interpreter(
                model_path=w)  # load TFLite model
            interpreter.allocate_tensors()  # allocate
            input_details = interpreter.get_input_details()  # inputs
            output_details = interpreter.get_output_details()  # outputs
            int8 = input_details[0][
                'dtype'] == np.uint8  # is TFLite quantized uint8 model
    imgsz = check_img_size(imgsz, s=stride)  # check image size

    # Dataloader
    if webcam:
        view_img = check_imshow()
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt)
        bs = len(dataset)  # batch_size
    else:
        dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt)
        bs = 1  # batch_size
    vid_path, vid_writer = [None] * bs, [None] * bs

    # Run inference
    if pt and device.type != 'cpu':
        model(
            torch.zeros(1, 3, *imgsz).to(device).type_as(
                next(model.parameters())))  # run once
    dt, seen = [0.0, 0.0, 0.0], 0
    for path, img, im0s, vid_cap in dataset:
        t1 = time_sync()
        if onnx:
            img = img.astype('float32')
        else:
            img = torch.from_numpy(img).to(device)
            img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if len(img.shape) == 3:
            img = img[None]  # expand for batch dim
        t2 = time_sync()
        dt[0] += t2 - t1

        # Inference
        if pt:
            visualize = increment_path(save_dir / Path(path).stem,
                                       mkdir=True) if visualize else False
            pred = model(img, augment=augment, visualize=visualize)[0]
        elif onnx:
            if dnn:
                net.setInput(img)
                pred = torch.tensor(net.forward())
            else:
                pred = torch.tensor(
                    session.run([session.get_outputs()[0].name],
                                {session.get_inputs()[0].name: img}))
        else:  # tensorflow model (tflite, pb, saved_model)
            imn = img.permute(0, 2, 3, 1).cpu().numpy()  # image in numpy
            if pb:
                pred = frozen_func(x=tf.constant(imn)).numpy()
            elif saved_model:
                pred = model(imn, training=False).numpy()
            elif tflite:
                if int8:
                    scale, zero_point = input_details[0]['quantization']
                    imn = (imn / scale + zero_point).astype(
                        np.uint8)  # de-scale
                interpreter.set_tensor(input_details[0]['index'], imn)
                interpreter.invoke()
                pred = interpreter.get_tensor(output_details[0]['index'])
                if int8:
                    scale, zero_point = output_details[0]['quantization']
                    pred = (pred.astype(np.float32) -
                            zero_point) * scale  # re-scale
            pred[..., 0] *= imgsz[1]  # x
            pred[..., 1] *= imgsz[0]  # y
            pred[..., 2] *= imgsz[1]  # w
            pred[..., 3] *= imgsz[0]  # h
            pred = torch.tensor(pred)
        t3 = time_sync()
        dt[1] += t3 - t2

        # NMS
        pred = non_max_suppression(pred,
                                   conf_thres,
                                   iou_thres,
                                   classes,
                                   agnostic_nms,
                                   max_det=max_det)
        dt[2] += time_sync() - t3

        # Second-stage classifier (optional)
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process predictions
        for i, det in enumerate(pred):  # per image
            seen += 1
            if webcam:  # batch_size >= 1
                p, s, im0, frame = path[i], f'{i}: ', im0s[i].copy(
                ), dataset.count
            else:
                p, s, im0, frame = path, '', im0s.copy(), getattr(
                    dataset, 'frame', 0)

            p = Path(p)  # to Path
            save_path = str(save_dir / p.name)  # img.jpg
            txt_path = str(save_dir / 'labels' / p.stem) + (
                '' if dataset.mode == 'image' else f'_{frame}')  # img.txt
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            imc = im0.copy() if save_crop else im0  # for save_crop
            annotator = Annotator(im0,
                                  line_width=line_thickness,
                                  example=str(names))
            if len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()  # normalized xywh
                        line = (cls, *xywh,
                                conf) if save_conf else (cls,
                                                         *xywh)  # label format
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * len(line)).rstrip() % line + '\n')

                    if save_img or save_crop or view_img:  # Add bbox to image
                        c = int(cls)  # integer class
                        label = None if hide_labels else (
                            names[c]
                            if hide_conf else f'{names[c]} {conf:.2f}')
                        annotator.box_label(xyxy, label, color=colors(c, True))
                        if save_crop:
                            save_one_box(xyxy,
                                         imc,
                                         file=save_dir / 'crops' / names[c] /
                                         f'{p.stem}.jpg',
                                         BGR=True)

            # Print time (inference-only)
            print(f'{s}Done. ({t3 - t2:.3f}s)')

            # Stream results
            im0 = annotator.result()
            if view_img:
                cv2.imshow(str(p), im0)
                cv2.waitKey(1)  # 1 millisecond

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'image':
                    cv2.imwrite(save_path, im0)
                else:  # 'video' or 'stream'
                    if vid_path[i] != save_path:  # new video
                        vid_path[i] = save_path
                        if isinstance(vid_writer[i], cv2.VideoWriter):
                            vid_writer[i].release(
                            )  # release previous video writer
                        if vid_cap:  # video
                            fps = vid_cap.get(cv2.CAP_PROP_FPS)
                            w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                            h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        else:  # stream
                            fps, w, h = 30, im0.shape[1], im0.shape[0]
                            save_path += '.mp4'
                        vid_writer[i] = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps,
                            (w, h))
                    vid_writer[i].write(im0)

    # Print results
    t = tuple(x / seen * 1E3 for x in dt)  # speeds per image
    print(
        f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}'
        % t)
    if save_txt or save_img:
        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
        print(f"Results saved to {colorstr('bold', save_dir)}{s}")
    if update:
        strip_optimizer(weights)  # update model (to fix SourceChangeWarning)
def main(string):
    #set parameter
    collapse_check_list =[148,149,150]
    model_path = os.path.join(base_path,'yolov5l_19_0.988_0.764.pt')
    model_path2 = os.path.join(base_path,'yolov5l_34_0.988_0.794.pt')
    model_path3 = os.path.join(base_path,'yolov5l_34_0.989_0.769.pt')

    json_path = os.path.join(base_path,'t1_res_U0000000279.json')
    
    img_size = 640
    confidence_threshold = 0.175     #0.1, 0.2
    iou_threshold = 0.4     #0.4, 0.5
    tracking_iou = 0.65
    device_choice = ''
    fps_collapse_rate = 150
    batch_size = 20
    single_cls = True
    
    device = select_device(device_choice)
    half = device.type != 'cpu'  # half precision only supported on CUDA
    model = attempt_load(model_path, map_location=device)  # load FP32 model
    model2 = attempt_load(model_path2, map_location=device)  # load FP32 model
    model3 = attempt_load(model_path3, map_location=device)  # load FP32 model
    img_size = check_img_size(img_size, s=model.stride.max())  # check img_size
    model.half()
    model2.half()
    model3.half()
    img = torch.zeros((1, 3, img_size, img_size), device=device)  # init img
    _ = model(img.half() if half else img) if device.type != 'cpu' else None  # run once
    _ = model2(img.half() if half else img) if device.type != 'cpu' else None  # run once
    _ = model3(img.half() if half else img) if device.type != 'cpu' else None  # run once

    with open(json_path,'w') as f:
        json_file = dict()
        json_file['annotations'] = []
        json.dump(json_file,f)
    
    f.close()

    for folder_path in os.listdir(string):

        if os.path.splitext(folder_path)[-1] == '.cache':
            continue
        
        folder_path = os.path.join(string,folder_path)

        info_of_video = []
        Answer_Manager_list = []

        dataloader = create_dataloader(folder_path, img_size, batch_size, model.stride.max(),single_cls, pad=0.5, rect=False)[0]
        
        for batch_i, (img, _, paths, shapes) in enumerate(dataloader):
            img = img.to(device, non_blocking=True)
            img = img.half() if half else img.float()  # uint8 to fp16/32
            img /= 255.0  # 0 - 255 to 0.0 - 1.0
            nb, _, height, width = img.shape  # batch size, channels, height, width
            
            with torch.no_grad():
                # Run model
                inf_out, train_out = model(img, augment=True)  # inference and training outputs
                inf_out2, train_out2 = model2(img,augment=True)
                inf_out3, train_out3 = model3(img,augment=True)
                
                inf_out = torch.cat([inf_out,inf_out2,inf_out3],1)
                
                for index,_ in enumerate(inf_out):
                    sort,idx = torch.sort(_[:,4],0,descending=True)
                    
                    inf_out[index] = _[idx]

                # Run NMS           
                output = non_max_suppression(inf_out, conf_thres=confidence_threshold, iou_thres=iou_threshold)

            for si, pred in enumerate(output):
                path = Path(paths[si])
                pred_boxes = np.asarray([])
                pred_scores = np.asarray([])

                if len(pred):
                    pred[:, :4] = scale_coords(img[si].shape[1:], pred[:, :4], shapes[si][0], shapes[si][1])

                    pred_boxes = np.asarray(pred[:,:4].tolist())
                    pred_scores = np.asarray(pred[:,4].tolist())

                    delete_index = np.where(np.logical_or(abs(pred_boxes[:,0]-pred_boxes[:,2])<= 32,abs(pred_boxes[:,1]-pred_boxes[:,3])<=32))

                    pred_scores = np.delete(pred_scores,delete_index,axis=0)
                    pred_boxes = np.delete(pred_boxes,delete_index,axis=0)

                info_of_frame = dict()
                info_of_frame['file_name'] = os.path.basename(str(path))
                info_of_frame['boxes'] = pred_boxes.astype('int')
                info_of_frame['scores'] = pred_scores

                info_of_video.append(info_of_frame)

        #tracking
        for index, info in enumerate(info_of_video):
            
            info_boxes = info['boxes']
            info_scores = info['scores']

            info_of_video[index]['boxes'] = []
            info_of_video[index]['scores'] = []

            #정답 Manager 유무 확인
            if len(Answer_Manager_list) != 0:
            
                #pred 유무 확인
                if len(info_scores) != 0:
                    
                    delete_index_list = []

                    for idx,answer in enumerate(Answer_Manager_list):
                        delete_index, iou = iou_matching(answer['box'],info_boxes)

                        if iou >= tracking_iou:
                            Answer_Manager_list[idx]['box'] = info_boxes[delete_index]
                            Answer_Manager_list[idx]['score'] = info_scores[delete_index]
                            Answer_Manager_list[idx]['stack'] += 1

                            info_of_video[index]['boxes'].append(info_boxes[delete_index].tolist())
                            info_of_video[index]['scores'].append(info_scores[delete_index].tolist())

                            # del info_boxes[delete_index]
                            # del info_scores[delete_index]
                            info_boxes = np.delete(info_boxes,[delete_index],axis=0)
                            info_scores = np.delete(info_scores,[delete_index],axis=0)

                        else:

                            if answer['stack'] >= fps_collapse_rate:
                                delete_index_list.append(idx)
                            
                            else:
                                Answer_Manager_list[idx]['stack'] += 1
                                info_of_video[index]['boxes'].append(answer['box'].tolist())
                                info_of_video[index]['scores'].append(answer['score'].tolist())
                        
                    Answer_Manager_list = np.delete(Answer_Manager_list,delete_index_list)
                    Answer_Manager_list = Answer_Manager_list.tolist()

                    if index < len(info_of_video)-fps_collapse_rate:
                        for info_box, info_score in zip(info_boxes[:],info_scores):
                            for check_val in collapse_check_list:
                                _, iou = iou_matching(info_box,np.array(info_of_video[index+check_val]['boxes']))

                                if iou >= tracking_iou:
                                    answer_dict = dict()
                                    answer_dict['box'] = info_box
                                    answer_dict['score'] = info_score
                                    answer_dict['stack'] = 1
                                    Answer_Manager_list.append(answer_dict)

                                    info_of_video[index]['boxes'].append(info_box.tolist())
                                    info_of_video[index]['scores'].append(info_score.tolist())
                                    
                                    break

                else:
                    delete_index_list = []

                    for idx,answer in enumerate(Answer_Manager_list):
                        if answer['stack']>=fps_collapse_rate:
                            delete_index_list.append(idx)
                        else:
                            Answer_Manager_list[idx]['stack'] += 1
                            info_of_video[index]['boxes'].append(answer['box'].tolist())
                            info_of_video[index]['scores'].append(answer['score'].tolist())
                    
                    Answer_Manager_list = np.delete(Answer_Manager_list,delete_index_list)
                    Answer_Manager_list = Answer_Manager_list.tolist()
            
            else:

                #pred 유무 확인
                if len(info_scores) != 0:  
                    if index < len(info_of_video)-fps_collapse_rate:
                        for info_box, info_score in zip(info_boxes[:],info_scores):
                            for check_val in collapse_check_list:
                                _, iou = iou_matching(info_box,np.array(info_of_video[index+check_val]['boxes']))

                                if iou >= tracking_iou:
                                    answer_dict = dict()
                                    answer_dict['box'] = info_box
                                    answer_dict['score'] = info_score
                                    answer_dict['stack'] = 1
                                    Answer_Manager_list.append(answer_dict)

                                    info_of_video[index]['boxes'].append(info_box.tolist())
                                    info_of_video[index]['scores'].append(info_score.tolist())
                                    
                                    break
        
        with open(json_path,'r') as f:
            json_data = json.load(f)
        
        f.close()
        
        for _ in info_of_video:
            detection_num = len(_['scores'])

            # if detection_num == 0:
            #     continue
            
            json_d = dict()
            json_d['file_name'] = _['file_name']
            json_d['box'] = []

            for index in range(detection_num):
                j_d = dict()
                j_d['position'] = _['boxes'][index]
                j_d['confidence_score'] = str(_['scores'][index])
                json_d['box'].append(j_d)

            json_data['annotations'].append(json_d)
        
        with open(json_path,'w') as f:
            json.dump(json_data,f)
        
        f.close()
def train(opt):
    # 设置
    cuda = opt.device != 'cpu'
    device_id = [int(x) for x in opt.device.split(',')] if cuda else None
    torch_device = torch_utils.select_device(device=opt.device,
                                             batch_size=opt.batch_size)

    # 数据
    s = opt.source
    train_paths = [
        f'{s}/train/{x}' for x in os.listdir(f'{s}/train')
        if os.path.isdir(f'{s}/train/{x}')
    ]
    val_paths = [
        f'{s}/val/{x}' for x in os.listdir(f'{s}/val')
        if os.path.isdir(f'{s}/val/{x}')
    ]

    statuses = opt.cfg['statuses']
    nc = len(statuses)

    train_loader = datasets.StatusDataLoader(balance=opt.balance,
                                             dformat='behavior').get_loader(
                                                 trte_path=train_paths,
                                                 batch_size=opt.batch_size,
                                                 cfg=opt.cfg)
    val_loader = datasets.StatusDataLoader(balance=opt.balance,
                                           dformat='behavior').get_loader(
                                               trte_path=val_paths,
                                               batch_size=opt.batch_size,
                                               cfg=opt.cfg)

    # model
    model = resnet_zoo_behavior.attempt_load(model_name='resnet50',
                                             pretrained=False,
                                             num_classes=nc)
    model.nc = nc
    # 损失函数、优化器和学习率控制器
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(),
                                opt.cfg['lr0'],
                                momentum=opt.cfg['momentum'],
                                weight_decay=opt.cfg['weight_decay'])
    lf = lambda x: (((1 + math.cos(x * math.pi / opt.epochs)) / 2)**1.0
                    ) * 0.8 + 0.2  # cosine
    scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)

    if opt.resume is None:
        # lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.95)
        start_epoch = 0
    else:
        model, optimizer, scheduler, start_epoch = general.load_resume(
            opt.resume, model, optimizer, torch_device)

    if cuda:
        model = model.to(torch_device)
        if torch.cuda.device_count() > 1:
            model = torch.nn.DataParallel(model, device_ids=device_id)

    scheduler.last_epoch = start_epoch - 1
    last_acc = 0
    for epoch in range(start_epoch, opt.epochs):
        model.train()
        confusion_matrix = np.zeros((nc, nc), dtype=np.int32)
        total_loss = 0
        total_time = 0
        for i, (input, target, img_path, status) in enumerate(train_loader):
            start_time = general.synchronize_time()
            # print(input.shape, target, img_path, status)
            # with torch.no_grad():
            if True:
                input.requires_grad = True
                input = input.to(torch_device)
                target = target.to(torch_device)
                output = model(input)  # [8, 365]
                # output.requires_grad = True

            loss = criterion(output, target)
            total_loss += loss.data
            # measure accuracy
            output = torch.argmax(output, dim=1)  # [bs, ]
            for j in range(len(output)):
                m = int(target.cpu().numpy()[j])
                n = int(output.cpu().numpy()[j])
                confusion_matrix[m, n] += 1
            P, R, F1, acc = dm.general.confusion2score(confusion_matrix,
                                                       round=5)
            # print(target, output, '\n', confusion_matrix, F1, acc)
            t = general.synchronize_time() - start_time
            total_time += t
            train_ret = \
             f"\rTraining   [{epoch+1:>3}/{opt.epochs}] [{i+1:>3}/{len(train_loader):>3}] loss: {total_loss/(i+1):>8.4f} " \
             f"mean_F1: {np.mean(F1)*100:>6.3f}% mean_acc: {acc*100:>6.3f}% batch_time: {total_time/(i+1):.4f}s "
            print(train_ret, end='')
            expc.echo_info(train_ret, opt.results_file)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            # break
        print('')
        scheduler.step()

        # validation
        confusion_matrix = np.zeros((nc, nc), dtype=np.int32)
        model.eval()
        total_time = 0
        for i, (input, target, img_path, statuses) in enumerate(val_loader):
            val_start_time = general.synchronize_time()
            with torch.no_grad():
                input.requires_grad = False
                input = input.to(torch_device)
                target = target.to(torch_device)
                output = model(input)  # [bs, nc]
                output = torch.argmax(output, dim=1)  # [bs, ]
                for j in range(len(output)):
                    m = int(target.cpu().numpy()[j])
                    n = int(output.cpu().numpy()[j])
                    confusion_matrix[m, n] += 1
                # print(confusion_matrix)
                total_time += (general.synchronize_time() - val_start_time)
                P, R, F1, acc = dm.general.confusion2score(confusion_matrix)
                val_ret = \
                 f'\rValidation [{epoch+1:>3}/{opt.epochs}] [{i+1:>3}/{len(val_loader):>3}] {"":>14} ' \
                 f'mean_F1: {np.mean(F1)*100:>6.3f}% mean_acc: {acc*100:>6.3f}% valid_time: {total_time:.4f}s'
                print(val_ret, end='')
        # val_ret = f'{val_ret} '
        # print(val_ret)
        print(f'\n{" ".join(opt.cfg["statuses"])}, confusion matrix:')
        expc.echo_info(val_ret, txt=opt.results_file)
        plt_cm_str = general.plot_confusion_matrix(confusion_matrix,
                                                   classes=opt.cfg['statuses'])
        expc.echo_info(plt_cm_str, txt=opt.results_file)

        # 保存模型
        if opt.save:
            final_epoch = epoch + 1 == opt.epochs
            with open(f'{expc.exp_path}/{opt.results_file}', 'r') as f:
                result_data = f.readlines()

            ckpt = {
                'epoch':
                epoch + 1,
                'training_results':
                result_data,
                'model':
                model.module.state_dict()
                if hasattr(model, 'module') else model.state_dict(),
                'optimizer':
                None if final_epoch else optimizer.state_dict(),
                'classes':
                opt.cfg['statuses']
            }
            expc.save_model(ckpt, model_name='last.pt')

            if acc > last_acc:
                print(f'SOTA reached at epoch: {epoch+1}, ', end='')
                expc.save_model(ckpt, model_name='best.pt')
                last_acc = acc
                print('\n')
Exemple #18
0
def capture(ip):
    if ip.endswith('6'):
        reader = easyocr.Reader(['en'], gpu=True, recog_network='english_g2')
        device = select_device('0')

        half = device.type != 'cpu'  # half precision only supported on CUDA

        weights = '/home/qfactory/Camera/best.pt'
        model = attempt_load(weights, map_location=device)  # load FP32 model
        imgsz = check_img_size(640, s=model.stride.max())  # check img_size
        if half:
            model.half()

        names = model.module.names if hasattr(model, 'module') else model.names
        colors = [[random.randint(0, 255) for _ in range(3)]
                  for _ in range(len(names))]

    n = 0
    while True:
        n += 1
        cap = cv2.VideoCapture(
            f"rtsp://*****:*****@{ip}:554/Streaming/channels/101")
        # print(cap.isOpened())

        cap.set(cv2.CAP_PROP_FRAME_WIDTH, 3840)
        cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 2160)
        cap.set(cv2.CAP_PROP_FPS, 30)
        cap.set(cv2.CAP_PROP_BUFFERSIZE, 1)
        cap.set(cv2.CAP_PROP_FOURCC,
                cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'))

        if cap.isOpened():

            ret, frame = cap.read()
            cap.release()  ####################################################
            if ret:
                ##########################################################################################
                path = f'/home/qfactory/Camera/tmp/'
                out = '/home/qfactory/Camera/out'

                if ip.endswith('6'):

                    os.makedirs(path, exist_ok=True)
                    cv2.imwrite(f'{path}/2_{time.ctime()}.jpg', frame)
                    while True:
                        if len(os.listdir(path)) == 2:
                            t1 = threading.Thread(target=detect,
                                                  args=[
                                                      path, out, model, names,
                                                      colors, imgsz, half,
                                                      device, reader
                                                  ])
                            t1.start()
                            t1.join()
                            shutil.rmtree(path, ignore_errors=True)
                            break
                        else:
                            time.sleep(0.2)
                            continue
                elif ip.endswith('5'):
                    try:
                        if len(os.listdir(path)) == 1:
                            cv2.imwrite(f'{path}/1_{time.ctime()}.jpg', frame)
                    except:
                        pass
            else:
                continue
Exemple #19
0
def detect(save_img=False):
    print_div('INTIL')
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
  
    # Initialize
    print_div('GET DEVICE')
    set_logging()
    device = select_device(opt.device)
    half = device.type != 'cpu'  # half precision only supported on CUDA
  
    # Load model
    print_div('LOAD MODEL')
    model = attempt_load(weights, map_location=device)  
    # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  
    # check img_size
    if half:
        model.half()  # to FP16
    
    # Second-stage classifier
    print_div('LOAD MODEL_CLASSIFIER')
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model'])  # load weights
        modelc.to(device).eval()
   
    # Get names and colors
    print_div('SET LABEL COLOR')
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))]

    # Run inference
    ###############################################################################
    print_div("RUN INFERENCE") 
    img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img
    _ = model(img.half() if half else img) if device.type != 'cpu' else None  # run once
    video_path = source
    cap = cv2.VideoCapture(video_path)
    
    print_div('Start Play VIDEO')
    while cap.isOpened():   
        ret, frame = cap.read()
        t0 = time.time()
        
        if not ret: 
            print_div('No Frame')
            break

        fps_t1 = time.time()
        img, img0 = img_preprocess(frame)  # img: Resize, img0:Orginal
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0

        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        
        # Apply NMS : 取得每項預測的數值
        pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
        t2 = time_synchronized()
      
        # Apply Classifier : 取得該數值的LAbel
        if classify:
            pred = apply_classifier(pred, modelc, img, img0)
               
        # Draw Box
        for i, det in enumerate(pred):
            s = '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(img0.shape)[[1, 0, 1, 0]]  # normalization gain whwh

            if det is not None and len(det):                   
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img0.shape).round()
                
                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)]) # add to string
               
                # Write results
                for *xyxy, conf, cls in reversed(det):
                    label = '%s %.2f' % (names[int(cls)], conf)
                    plot_one_box(xyxy, img0, label=label, color=colors[int(cls)], line_thickness=3)
        
        # Print Results(inference + NMS)
        print_div('%sDone. (%.3fs)' % (s, t2 - t1))
       
        # Draw Image
        x, y, w, h = (img0.shape[1]//4), 25, (img0.shape[1]//2), 30
        cv2.rectangle(img0, (x, 10),(x+w, y+h), (0,0,0), -1)
        
        rescale = 0.5
        re_img0 = (int(img0.shape[1]*rescale) ,int(img0.shape[0]*rescale))

        cv2.putText(img0, '{} | inference: {:.4f}s | fps: {:.4f}'.format(opt.weights[0], t2-t1, 1/(time.time()-t0)),(x+20, y+20),cv2.FONT_HERSHEY_SIMPLEX,1,(0,0,255),2)
        cv2.imshow('Stream_Detected', cv2.resize(img0, re_img0) )

        key = cv2.waitKey(1)
        if key == ord('q'): break

    # After break
    cap.release()
    cv2.destroyAllWindows()
Exemple #20
0
def test(
    data,
    weights=None,
    batch_size=16,
    imgsz=640,
    conf_thres=0.001,
    iou_thres=0.6,  # for NMS
    save_json=False,
    single_cls=False,
    augment=False,
    verbose=False,
    model=None,
    dataloader=None,
    save_dir="",
    merge=False,
    save_txt=False,
):
    # Initialize/load model and set device
    training = model is not None
    if training:  # called by train.py
        device = next(model.parameters()).device  # get model device

    else:  # called directly
        set_logging()
        device = select_device(opt.device, batch_size=batch_size)
        merge, save_txt = opt.merge, opt.save_txt  # use Merge NMS, save *.txt labels
        if save_txt:
            out = Path("inference/output")
            if os.path.exists(out):
                shutil.rmtree(out)  # delete output folder
            os.makedirs(out)  # make new output folder

        # Remove previous
        for f in glob.glob(str(Path(save_dir) / "test_batch*.jpg")):
            os.remove(f)

        # Load model
        model = attempt_load(weights, map_location=device)  # load FP32 model
        imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size

        # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99
        # if device.type != 'cpu' and torch.cuda.device_count() > 1:
        #     model = nn.DataParallel(model)

    # Half
    half = device.type != "cpu"  # half precision only supported on CUDA
    if half:
        model.half()

    # Configure
    model.eval()
    with open(data) as f:
        data = yaml.load(f, Loader=yaml.FullLoader)  # model dict
    check_dataset(data)  # check
    nc = 1 if single_cls else int(data["nc"])  # number of classes
    iouv = torch.linspace(0.5, 0.95,
                          10).to(device)  # iou vector for [email protected]:0.95
    niou = iouv.numel()

    # Dataloader
    if not training:
        img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
        _ = (model(img.half() if half else img)
             if device.type != "cpu" else None)  # run once
        path = (data["test"] if opt.task == "test" else data["val"]
                )  # path to val/test images
        dataloader = create_dataloader(
            path,
            imgsz,
            batch_size,
            model.stride.max(),
            opt,
            hyp=None,
            augment=False,
            cache=True,
            pad=0.5,
            rect=True,
        )[0]

    seen = 0
    names = model.names if hasattr(model, "names") else model.module.names
    coco91class = coco80_to_coco91_class()
    s = ("%20s" + "%12s" * 6) % (
        "Class",
        "Images",
        "Targets",
        "P",
        "R",
        "[email protected]",
        "[email protected]:.95",
    )
    p, r, f1, mp, mr, map50, map, t0, t1 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
    loss = torch.zeros(3, device=device)
    jdict, stats, ap, ap_class = [], [], [], []
    evaluator = COCOEvaluator(root=DATA_ROOT,
                              model_name=opt.weights.replace(".pt", ""))
    for batch_i, (img, targets, paths,
                  shapes) in enumerate(tqdm(dataloader, desc=s)):
        img = img.to(device, non_blocking=True)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        targets = targets.to(device)
        nb, _, height, width = img.shape  # batch size, channels, height, width
        whwh = torch.Tensor([width, height, width, height]).to(device)

        # Disable gradients
        with torch.no_grad():
            # Run model
            t = time_synchronized()
            inf_out, train_out = model(
                img, augment=augment)  # inference and training outputs
            t0 += time_synchronized() - t

            # Compute loss
            if training:  # if model has loss hyperparameters
                loss += compute_loss([x.float() for x in train_out], targets,
                                     model)[1][:3]  # GIoU, obj, cls

            # Run NMS
            t = time_synchronized()
            output = non_max_suppression(inf_out,
                                         conf_thres=conf_thres,
                                         iou_thres=iou_thres,
                                         merge=merge)
            t1 += time_synchronized() - t

        # Statistics per image
        for si, pred in enumerate(output):
            labels = targets[targets[:, 0] == si, 1:]
            nl = len(labels)
            tcls = labels[:, 0].tolist() if nl else []  # target class
            seen += 1

            if pred is None:
                if nl:
                    stats.append((
                        torch.zeros(0, niou, dtype=torch.bool),
                        torch.Tensor(),
                        torch.Tensor(),
                        tcls,
                    ))
                continue

            # Append to text file
            if save_txt:
                gn = torch.tensor(shapes[si][0])[[1, 0, 1, 0
                                                  ]]  # normalization gain whwh
                x = pred.clone()
                x[:, :4] = scale_coords(img[si].shape[1:], x[:, :4],
                                        shapes[si][0],
                                        shapes[si][1])  # to original
                for *xyxy, conf, cls in x:
                    xywh = ((xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                             gn).view(-1).tolist())  # normalized xywh
                    with open(str(out / Path(paths[si]).stem) + ".txt",
                              "a") as f:
                        f.write(
                            ("%g " * 5 + "\n") % (cls, *xywh))  # label format

            # Clip boxes to image bounds
            clip_coords(pred, (height, width))

            # Append to pycocotools JSON dictionary
            if save_json:
                # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
                image_id = Path(paths[si]).stem
                box = pred[:, :4].clone()  # xyxy
                scale_coords(img[si].shape[1:], box, shapes[si][0],
                             shapes[si][1])  # to original shape
                box = xyxy2xywh(box)  # xywh
                box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
                for p, b in zip(pred.tolist(), box.tolist()):
                    result = {
                        "image_id":
                        int(image_id) if image_id.isnumeric() else image_id,
                        "category_id": coco91class[int(p[5])],
                        "bbox": [round(x, 3) for x in b],
                        "score": round(p[4], 5),
                    }
                    jdict.append(result)

                    # evaluator.add([result])
                    # if evaluator.cache_exists:
                    #    break

            # # Assign all predictions as incorrect
            # correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device)
            # if nl:
            #     detected = []  # target indices
            #     tcls_tensor = labels[:, 0]
            #
            #     # target boxes
            #     tbox = xywh2xyxy(labels[:, 1:5]) * whwh
            #
            #     # Per target class
            #     for cls in torch.unique(tcls_tensor):
            #         ti = (cls == tcls_tensor).nonzero(as_tuple=False).view(-1)  # prediction indices
            #         pi = (cls == pred[:, 5]).nonzero(as_tuple=False).view(-1)  # target indices
            #
            #         # Search for detections
            #         if pi.shape[0]:
            #             # Prediction to target ious
            #             ious, i = box_iou(pred[pi, :4], tbox[ti]).max(1)  # best ious, indices
            #
            #             # Append detections
            #             detected_set = set()
            #             for j in (ious > iouv[0]).nonzero(as_tuple=False):
            #                 d = ti[i[j]]  # detected target
            #                 if d.item() not in detected_set:
            #                     detected_set.add(d.item())
            #                     detected.append(d)
            #                     correct[pi[j]] = ious[j] > iouv  # iou_thres is 1xn
            #                     if len(detected) == nl:  # all targets already located in image
            #                         break
            #
            # # Append statistics (correct, conf, pcls, tcls)
            # stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls))

        # # Plot images
        # if batch_i < 1:
        #     f = Path(save_dir) / ('test_batch%g_gt.jpg' % batch_i)  # filename
        #     plot_images(img, targets, paths, str(f), names)  # ground truth
        #     f = Path(save_dir) / ('test_batch%g_pred.jpg' % batch_i)
        #     plot_images(img, output_to_target(output, width, height), paths, str(f), names)  # predictions

    evaluator.add(jdict)
    evaluator.save()
Exemple #21
0
def detect(save_img=False):
    source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith(
        ('rtsp://', 'rtmp://', 'http://'))

    # Directories
    save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok))  # increment run
    (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir

    # Initialize
    set_logging()
    device = select_device(opt.device) # CPU
    # device = torch.device('cuda:1') # GPU
    print(device)
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    stride = int(model.stride.max())  # model stride
    imgsz = check_img_size(imgsz, s=stride)  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = check_imshow()
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz, stride=stride)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz, stride=stride)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]

    # Run inference
    if device.type != 'cpu':
        model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters())))  # run once
    t0 = time.time()
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            ret_val = ' '
            if webcam:  # batch_size >= 1
                p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(), dataset.count
            else:
                p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0)

            p = Path(p)  # to Path
            save_path = str(save_dir / p.name)  # img.jpg
            txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}')  # img.txt
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
            if len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string
                    ret_val = f"{names[int(c)]}{'s' * (n > 1)} "

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
                        line = (cls, *xywh, conf) if opt.save_conf else (cls, *xywh)  # label format
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * len(line)).rstrip() % line + '\n')

                    if save_img or view_img:  # Add bbox to image
                        label = f'{names[int(cls)]} {conf:.2f}'
                        plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3)

            # Print time (inference + NMS)
            print(f'{s}Done. ({t2 - t1:.3f}s)')

            # Stream results
            if view_img:
                yield [ret_val.split(' ')[0],im0]
                # cv2.imshow(str(p), im0)
                # cv2.waitKey(1)  # 1 millisecond

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'image':
                    cv2.imwrite(save_path, im0)
                else:  # 'video'
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release()  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
        print(f"Results saved to {save_dir}{s}")

    print(f'Done. ({time.time() - t0:.3f}s)')
Exemple #22
0
def test(
        data,
        weights=None,
        batch_size=32,
        imgsz=640,
        conf_thres=0.001,
        iou_thres=0.6,  # for NMS
        save_json=False,
        single_cls=False,
        augment=False,
        verbose=False,
        model=None,
        dataloader=None,
        save_dir=Path(''),  # for saving images
        save_txt=False,  # for auto-labelling
        save_hybrid=False,  # for hybrid auto-labelling
        save_conf=False,  # save auto-label confidences
        plots=True,
        wandb_logger=None,
        compute_loss=None,
        half_precision=True,
        is_coco=False):
    # Initialize/load model and set device
    training = model is not None
    if training:  # called by train.py
        device = next(model.parameters()).device  # get model device

    else:  # called directly
        set_logging()
        device = select_device(opt.device, batch_size=batch_size)

        # Directories
        save_dir = Path(
            increment_path(Path(opt.project) / opt.name,
                           exist_ok=opt.exist_ok))  # increment run
        (save_dir / 'labels' if save_txt else save_dir).mkdir(
            parents=True, exist_ok=True)  # make dir

        # Load model
        model = attempt_load(weights, map_location=device)  # load FP32 model
        gs = max(int(model.stride.max()), 32)  # grid size (max stride)
        imgsz = check_img_size(imgsz, s=gs)  # check img_size

        # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99
        # if device.type != 'cpu' and torch.cuda.device_count() > 1:
        #     model = nn.DataParallel(model)

    # Half
    half = device.type != 'cpu' and half_precision  # half precision only supported on CUDA
    if half:
        model.half()

    # Configure
    model.eval()
    if isinstance(data, str):
        is_coco = data.endswith('coco.yaml')
        with open(data) as f:
            data = yaml.load(f, Loader=yaml.SafeLoader)
    check_dataset(data)  # check
    nc = 1 if single_cls else int(data['nc'])  # number of classes
    iouv = torch.linspace(0.5, 0.95,
                          10).to(device)  # iou vector for [email protected]:0.95
    niou = iouv.numel()

    # Logging
    log_imgs = 0
    if wandb_logger and wandb_logger.wandb:
        log_imgs = min(wandb_logger.log_imgs, 100)
    # Dataloader
    if not training:
        if device.type != 'cpu':
            model(
                torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(
                    next(model.parameters())))  # run once
        task = opt.task if opt.task in (
            'train', 'val', 'test') else 'val'  # path to train/val/test images
        dataloader = create_dataloader(data[task],
                                       imgsz,
                                       batch_size,
                                       gs,
                                       opt,
                                       pad=0.5,
                                       rect=True,
                                       prefix=colorstr(f'{task}: '))[0]

    seen = 0
    confusion_matrix = ConfusionMatrix(nc=nc)
    names = {
        k: v
        for k, v in enumerate(
            model.names if hasattr(model, 'names') else model.module.names)
    }
    coco91class = coco80_to_coco91_class()
    s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Labels', 'P', 'R',
                                 '[email protected]', '[email protected]:.95')
    p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0.
    loss = torch.zeros(3, device=device)
    jdict, stats, ap, ap_class, wandb_images = [], [], [], [], []
    for batch_i, (img, targets, paths,
                  shapes) in enumerate(tqdm(dataloader, desc=s)):
        img = img.to(device, non_blocking=True)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        targets = targets.to(device)
        nb, _, height, width = img.shape  # batch size, channels, height, width

        with torch.no_grad():
            # Run model
            t = time_synchronized()
            out, train_out = model(
                img, augment=augment)  # inference and training outputs
            t0 += time_synchronized() - t

            # Compute loss
            if compute_loss:
                loss += compute_loss([x.float() for x in train_out],
                                     targets)[1][:3]  # box, obj, cls

            # Run NMS
            targets[:, 2:] *= torch.Tensor([width, height, width,
                                            height]).to(device)  # to pixels
            lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)
                  ] if save_hybrid else []  # for autolabelling
            t = time_synchronized()
            out = non_max_suppression(out,
                                      conf_thres=conf_thres,
                                      iou_thres=iou_thres,
                                      labels=lb,
                                      multi_label=True)
            t1 += time_synchronized() - t

        # Statistics per image
        for si, pred in enumerate(out):
            labels = targets[targets[:, 0] == si, 1:]
            nl = len(labels)
            tcls = labels[:, 0].tolist() if nl else []  # target class
            path = Path(paths[si])
            seen += 1

            if len(pred) == 0:
                if nl:
                    stats.append((torch.zeros(0, niou, dtype=torch.bool),
                                  torch.Tensor(), torch.Tensor(), tcls))
                continue

            # Predictions
            predn = pred.clone()
            scale_coords(img[si].shape[1:], predn[:, :4], shapes[si][0],
                         shapes[si][1])  # native-space pred

            # Append to text file
            if save_txt:
                gn = torch.tensor(shapes[si][0])[[1, 0, 1, 0
                                                  ]]  # normalization gain whwh
                for *xyxy, conf, cls in predn.tolist():
                    xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                            gn).view(-1).tolist()  # normalized xywh
                    line = (cls, *xywh,
                            conf) if save_conf else (cls,
                                                     *xywh)  # label format
                    with open(save_dir / 'labels' / (path.stem + '.txt'),
                              'a') as f:
                        f.write(('%g ' * len(line)).rstrip() % line + '\n')

            # W&B logging - Media Panel Plots
            if len(
                    wandb_images
            ) < log_imgs and wandb_logger.current_epoch > 0:  # Check for test operation
                if wandb_logger.current_epoch % wandb_logger.bbox_interval == 0:
                    box_data = [{
                        "position": {
                            "minX": xyxy[0],
                            "minY": xyxy[1],
                            "maxX": xyxy[2],
                            "maxY": xyxy[3]
                        },
                        "class_id": int(cls),
                        "box_caption": "%s %.3f" % (names[cls], conf),
                        "scores": {
                            "class_score": conf
                        },
                        "domain": "pixel"
                    } for *xyxy, conf, cls in pred.tolist()]
                    boxes = {
                        "predictions": {
                            "box_data": box_data,
                            "class_labels": names
                        }
                    }  # inference-space
                    wandb_images.append(
                        wandb_logger.wandb.Image(img[si],
                                                 boxes=boxes,
                                                 caption=path.name))
            wandb_logger.log_training_progress(
                predn, path,
                names) if wandb_logger and wandb_logger.wandb_run else None

            # Append to pycocotools JSON dictionary
            if save_json:
                # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
                image_id = int(
                    path.stem) if path.stem.isnumeric() else path.stem
                box = xyxy2xywh(predn[:, :4])  # xywh
                box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
                for p, b in zip(pred.tolist(), box.tolist()):
                    jdict.append({
                        'image_id':
                        image_id,
                        'category_id':
                        coco91class[int(p[5])] if is_coco else int(p[5]),
                        'bbox': [round(x, 3) for x in b],
                        'score':
                        round(p[4], 5)
                    })

            # Assign all predictions as incorrect
            correct = torch.zeros(pred.shape[0],
                                  niou,
                                  dtype=torch.bool,
                                  device=device)
            if nl:
                detected = []  # target indices
                tcls_tensor = labels[:, 0]

                # target boxes
                tbox = xywh2xyxy(labels[:, 1:5])
                scale_coords(img[si].shape[1:], tbox, shapes[si][0],
                             shapes[si][1])  # native-space labels
                if plots:
                    confusion_matrix.process_batch(
                        predn, torch.cat((labels[:, 0:1], tbox), 1))

                # Per target class
                for cls in torch.unique(tcls_tensor):
                    ti = (cls == tcls_tensor).nonzero(as_tuple=False).view(
                        -1)  # prediction indices
                    pi = (cls == pred[:, 5]).nonzero(as_tuple=False).view(
                        -1)  # target indices

                    # Search for detections
                    if pi.shape[0]:
                        # Prediction to target ious
                        ious, i = box_iou(predn[pi, :4], tbox[ti]).max(
                            1)  # best ious, indices

                        # Append detections
                        detected_set = set()
                        for j in (ious > iouv[0]).nonzero(as_tuple=False):
                            d = ti[i[j]]  # detected target
                            if d.item() not in detected_set:
                                detected_set.add(d.item())
                                detected.append(d)
                                correct[
                                    pi[j]] = ious[j] > iouv  # iou_thres is 1xn
                                if len(
                                        detected
                                ) == nl:  # all targets already located in image
                                    break

            # Append statistics (correct, conf, pcls, tcls)
            stats.append(
                (correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls))

        # Plot images
        if plots and batch_i < 3:
            f = save_dir / f'test_batch{batch_i}_labels.jpg'  # labels
            Thread(target=plot_images,
                   args=(img, targets, paths, f, names),
                   daemon=True).start()
            f = save_dir / f'test_batch{batch_i}_pred.jpg'  # predictions
            Thread(target=plot_images,
                   args=(img, output_to_target(out), paths, f, names),
                   daemon=True).start()

    # Compute statistics
    stats = [np.concatenate(x, 0) for x in zip(*stats)]  # to numpy
    if len(stats) and stats[0].any():
        p, r, ap, f1, ap_class = ap_per_class(*stats,
                                              plot=plots,
                                              save_dir=save_dir,
                                              names=names)
        ap50, ap = ap[:, 0], ap.mean(1)  # [email protected], [email protected]:0.95
        mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean()
        nt = np.bincount(stats[3].astype(np.int64),
                         minlength=nc)  # number of targets per class
    else:
        nt = torch.zeros(1)

    # Print results
    pf = '%20s' + '%12i' * 2 + '%12.3g' * 4  # print format
    print(pf % ('all', seen, nt.sum(), mp, mr, map50, map))

    # Print results per class
    if (verbose or (nc < 50 and not training)) and nc > 1 and len(stats):
        for i, c in enumerate(ap_class):
            print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i]))

    # Print speeds
    t = tuple(x / seen * 1E3
              for x in (t0, t1, t0 + t1)) + (imgsz, imgsz, batch_size)  # tuple
    if not training:
        print(
            'Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g'
            % t)

    # Plots
    if plots:
        confusion_matrix.plot(save_dir=save_dir, names=list(names.values()))
        if wandb_logger and wandb_logger.wandb:
            val_batches = [
                wandb_logger.wandb.Image(str(f), caption=f.name)
                for f in sorted(save_dir.glob('test*.jpg'))
            ]
            wandb_logger.log({"Validation": val_batches})
    if wandb_images:
        wandb_logger.log({"Bounding Box Debugger/Images": wandb_images})

    # Save JSON
    if save_json and len(jdict):
        w = Path(weights[0] if isinstance(weights, list) else weights
                 ).stem if weights is not None else ''  # weights
        anno_json = '../coco/annotations/instances_val2017.json'  # annotations json
        pred_json = str(save_dir / f"{w}_predictions.json")  # predictions json
        print('\nEvaluating pycocotools mAP... saving %s...' % pred_json)
        with open(pred_json, 'w') as f:
            json.dump(jdict, f)

        try:  # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
            from pycocotools.coco import COCO
            from pycocotools.cocoeval import COCOeval

            anno = COCO(anno_json)  # init annotations api
            pred = anno.loadRes(pred_json)  # init predictions api
            eval = COCOeval(anno, pred, 'bbox')
            if is_coco:
                eval.params.imgIds = [
                    int(Path(x).stem) for x in dataloader.dataset.img_files
                ]  # image IDs to evaluate
            eval.evaluate()
            eval.accumulate()
            eval.summarize()
            map, map50 = eval.stats[:
                                    2]  # update results ([email protected]:0.95, [email protected])
        except Exception as e:
            print(f'pycocotools unable to run: {e}')

    # Return results
    model.float()  # for training
    if not training:
        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
        print(f"Results saved to {save_dir}{s}")
    maps = np.zeros(nc) + map
    for i, c in enumerate(ap_class):
        maps[c] = ap[i]
    return (mp, mr, map50, map,
            *(loss.cpu() / len(dataloader)).tolist()), maps, t
def detect(save_img=False):
    source, start_frame, end_frame, weights, view_img, save_txt, imgsz, yaml_file = opt.source, \
                                                                                                    opt.start_frame, \
                                                                                                    opt.end_frame, \
                                                                                          opt.weights, opt.view_img, \
                                                                                          opt.save_txt, opt.img_size, \
                                                                                                           opt.yaml_file

    # initialize Tracker and sim
    tracker = Tracker(yaml_file)  # yaml file to read classes
    sim = Sim(yaml_file=yaml_file)  # qui

    # Directories
    save_dir = Path(
        increment_path(Path(opt.project) / opt.name,
                       exist_ok=opt.exist_ok))  # increment run
    (save_dir / 'labels' if save_txt else save_dir).mkdir(
        parents=True, exist_ok=True)  # make dir

    # Initialize
    set_logging()
    device = select_device(opt.device)
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    stride = int(model.stride.max())  # model stride
    imgsz = check_img_size(imgsz, s=stride)  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    # classify = False
    # if classify:
    #     modelc = load_classifier(name='resnet101', n=2)  # initialize
    #     modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval()

    # initialize classifier for feature vector
    detect_degradation = False
    if detect_degradation:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model']).to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    save_img = True
    dataset = LoadImages(source, img_size=imgsz, stride=stride)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]

    # Run inference
    if device.type != 'cpu':
        model(
            torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(
                next(model.parameters())))  # run once
    t0 = time.time()
    i = 0
    f = 0

    with Bar('detection...', max=dataset.nframes) as bar:
        for path, img, im0s, vid_cap in dataset:

            # pass info to tracker
            if i == 0:
                fps = vid_cap.get(cv2.CAP_PROP_FPS)

                width = vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)  # float `width`
                height = vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)

                duration = vid_cap.get(cv2.CAP_PROP_FRAME_COUNT) / vid_cap.get(
                    cv2.CAP_PROP_FPS)
                tracker.info(fps=fps,
                             save_dir=save_dir,
                             video_duration=duration)
                sim.info(fps=fps,
                         save_dir=save_dir,
                         width=width,
                         height=height)  # qui
                i = 1

            img_for_sim = img.copy()

            img = torch.from_numpy(img).to(device)
            img = img.half() if half else img.float()  # uint8 to fp16/32
            img /= 255.0  # 0 - 255 to 0.0 - 1.0

            if img.ndimension() == 3:
                img = img.unsqueeze(0)

            # Inference
            # print(img.shape) # [1,3, W,H]
            # t1 = time_synchronized()

            if dataset.frame >= start_frame and dataset.frame < end_frame:  # first frame is

                pred = model(img, augment=opt.augment)[0]  # this is a tuple
                # Apply NMS
                pred = non_max_suppression(pred,
                                           opt.conf_thres,
                                           opt.iou_thres,
                                           classes=opt.classes,
                                           agnostic=opt.agnostic_nms)

                # Apply second stage classifier Classifier
                if detect_degradation:
                    pred = apply_classifier(pred, modelc, img, im0s)

                # Apply second stage classifier Classifier
                # if classify:
                #     pred = apply_classifier(pred, modelc, img, im0s)

            else:
                f += 1
                pred = [torch.Tensor([])]

            for i, det in enumerate(pred):  # detections per image
                p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0)

                p = Path(p)  # to Path
                save_path = str(save_dir / p.name)  # img.jpg
                # txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}')  # img.txt
                s += '%gx%g ' % img.shape[2:]  # print string
                gn = torch.tensor(im0.shape)[[1, 0, 1,
                                              0]]  # normalization gain whwh
                clean_im = im0.copy()  # decomment

                if len(det):
                    # Rescale boxes from img_size to im0 size
                    det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                              im0.shape).round()

                    # Print results
                    for c in det[:, -1].unique():
                        n = (det[:, -1] == c).sum()  # detections per class
                        s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string

                    # Write results
                    l = []
                    lines = [
                    ]  # to write results in txt if images are not similar
                    for *xyxy, conf, cls in reversed(det):
                        #xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh

                        # take proprieties from the detection
                        nbox = (
                            xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                            gn).view(-1).tolist()  # xywh in normalized form
                        cl = int(cls.item())
                        bbox = torch.tensor(xyxy).view(1, 4)[0].tolist()

                        # apply classifier
                        rust = False
                        if nbox[2] * nbox[3] >= 0.2:
                            rust = rust_classifier(clean_im, nbox)
                            print(rust)

                        # pass proprieties to Tracker
                        id = tracker.update(
                            nbox, bbox, cl,
                            frame)  # object put into the tracker
                        if rust:
                            id = id + '_rust'

                        l = [int(cls.item())] + nbox
                        lines.append(l)

                        if save_img or view_img:  # Add bbox to image
                            label = f'{names[int(cls)]} {conf:.2f}'
                            plot_one_box_ours(xyxy,
                                              im0,
                                              objectID=id,
                                              label=label,
                                              color=colors[int(cls)],
                                              line_thickness=3)  # label=label

                    # save detection in case the inspector wants to label the suggested images
                    # pass image to check similatiry
                    # can return 'sim' or 'not_sim'. If not_sim, we want to retrieve the detection too
                    s_ = sim.new_im(clean_im, frame)  # decomment
                    # s_ = sim.new_im(img_for_sim, frame)  # qui
                    if s_ == 'not_sim':
                        sim.save_detection(lines)

                # save video
                if vid_path != save_path:  # new video
                    vid_path = save_path
                    if isinstance(vid_writer, cv2.VideoWriter):
                        vid_writer.release()  # release previous video writer

                    fourcc = 'mp4v'  # output video codec
                    w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                    h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                    vid_writer = cv2.VideoWriter(
                        save_path, cv2.VideoWriter_fourcc(*fourcc), fps,
                        (w, h))

                vid_writer.write(im0)
                # res = cv2.resize(im0, (416,416))
                # cv2.imshow('frame', res)

                # cv2.imshow('frame', im0)
                # cv2.waitKey(1)

        bar.next()

    tracker.print_results()
    sim.end()  # qui

    if save_txt or save_img:
        print(f"Results saved to {save_dir}")

    # print('Mean time to assign id: ', np.mean(id_time))
    # print('With variance: ', np.var(id_time))

    print(f'Done. ({time.time() - t0:.3f}s)')
    print(f)
Exemple #24
0
    def pyqt_detect(self, show_frame, tracked_X, tracked_Y):
        self.tracked_X = tracked_X
        self.tracked_Y = tracked_Y
        print("XXXXXXXX", self.tracked_X)
        print("YYYYYYYY", self.tracked_Y)
        source = r'rtsp://' + cam_user + r':' + cam_psw + r'@' + cam_ip + r':' + cam_port + r'/id=1'
        # source=r'rtsp://*****:*****@192.168.1.110/id=1'
        # weights= r'./yolov5x.pt'
        view_img = True
        imgsz = 640
        # = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
        webcam = source.isnumeric() or source.endswith(
            '.txt') or source.lower().startswith(
                ('rtsp://', 'rtmp://', 'http://'))

        # Initialize
        set_logging()
        device = select_device()  # 获取设备
        # 如果设备为GPU 使用float16
        half = device.type != 'cpu'  # half precision only supported on CUDA

        # Load model
        # 加载float32模型,确保用户设定的输入图片分辨率能整除32(如不能则调整为能整除返回)
        model = attempt_load(weights, map_location=device)  # load FP32 model
        stride = int(model.stride.max())  # model stride
        imgsz = check_img_size(imgsz, s=stride)  # check img_size
        if half:
            # 设置float16
            model.half()  # to FP16

        # Set Dataloader
        # 通过不同的输入源来设置不同的数据加载方式
        vid_path, vid_writer = None, None
        if webcam:
            view_img = check_imshow()
            cudnn.benchmark = True  # set True to speed up constant image size inference
            dataset = LoadStreams(source, img_size=imgsz, stride=stride)
        else:
            save_img = True
            # 如果哦检测视频的时候想显示出来,可以在这里加一行 view_img = True
            view_img = True
            dataset = LoadImages(source, img_size=imgsz, stride=stride)

        # Get names and colors
        # 获取类别名字
        names = model.module.names if hasattr(model, 'module') else model.names
        # 设置画框的颜色
        colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]

        # Run inference
        if device.type != 'cpu':
            model(
                torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(
                    next(model.parameters())))  # run once
        t0 = time.time()
        # 进行一次前向推理,测试程序是否正常
        """
           path 图片/视频路径
           img 进行resize+pad之后的图片
           img0 原size图片
           cap 当读取图片时为None,读取视频时为视频源
        """
        for path, img, im0s, vid_cap in dataset:
            img = torch.from_numpy(img).to(device)
            # 图片也设置为Float16
            img = img.half() if half else img.float()  # uint8 to fp16/32
            img /= 255.0  # 0 - 255 to 0.0 - 1.0
            # 没有batch_size的话则在最前面添加一个轴
            if img.ndimension() == 3:
                img = img.unsqueeze(0)

            # Inference
            t1 = time_synchronized()
            pred = model(img, augment=False)[0]
            """
                   前向传播 返回pred的shape是(1, num_boxes, 5+num_class)
                   h,w为传入网络图片的长和宽,注意dataset在检测时使用了矩形推理,所以这里h不一定等于w
                   num_boxes = h/32 * w/32 + h/16 * w/16 + h/8 * w/8
                   pred[..., 0:4]为预测框坐标
                   预测框坐标为xywh(中心点+宽长)格式
                   pred[..., 4]为objectness置信度
                   pred[..., 5:-1]为分类结果
            """

            # Apply NMS
            pred = non_max_suppression(pred,
                                       0.25,
                                       0.45,
                                       classes=None,
                                       agnostic=False)
            t2 = time_synchronized()

            # Process detections
            # 检测每一帧图片
            for i, det in enumerate(pred):  # detections per image
                if webcam:  # batch_size >= 1
                    p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(
                    ), dataset.count
                else:
                    p, s, im0, frame = path, '', im0s, getattr(
                        dataset, 'frame', 0)

                p = Path(p)  # to Path
                s += '%gx%g ' % img.shape[2:]  # print string
                gn = torch.tensor(im0.shape)[[1, 0, 1,
                                              0]]  # normalization gain whwh
                if len(det):
                    # Rescale boxes from img_size to im0 size
                    # 调整预测框的坐标:基于resize+pad的图片的坐标-->基于原size图片的坐标
                    # 此时坐标格式为xyxy
                    det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                              im0.shape).round()

                    # Print results
                    # 打印检测到的类别数量
                    for c in det[:, -1].unique():
                        n = (det[:, -1] == c).sum()  # detections per class
                        s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string

                    # Write results
                    countCls = 0
                    self.predList.clear()

                    checkTracked = False
                    for *xyxy, conf, cls in reversed(det):
                        # 画框
                        label = f'{names[int(cls)]} {conf:.2f}'
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors[int(cls)],
                                     line_thickness=3)
                        c1, c2 = (int(xyxy[0]), int(xyxy[1])), (int(xyxy[2]),
                                                                int(xyxy[3]))

                        # 判断tracked是否与上一针存在联系
                        cX = 0
                        cY = 0
                        # print(self.TRACKFLAG)
                        # print(checkTracked)
                        #print(abs(self.tracked_X - cX), abs(self.tracked_Y - cY))
                        print(abs(self.tracked_X - cX),
                              abs(self.tracked_Y - cY))
                        if self.TRACKFLAG:
                            cX = (c1[0] + c2[0]) / 2
                            cY = (c1[1] + c2[1]) / 2
                            # print((c1[0]+c2[0])/2,(c1[1]+c2[1])/2)

                            # 如果追踪到了则按差值移动相机
                            print('loss:', self.tracked_X - self.midX)
                            if (self.tracked_X - self.midX < -10):
                                print('left:', self.tracked_X - self.midX)
                                self.ipcCon.Move(EnumPtzMoveType.moveleft,
                                                 self.cam_speed)
                                self.ipcCon.Move(EnumPtzMoveType.movestop)
                            elif (self.tracked_X - self.midX > 10):
                                print('right:', self.tracked_X - self.midX)
                                self.ipcCon.Move(EnumPtzMoveType.moveright,
                                                 self.cam_speed)
                                self.ipcCon.Move(EnumPtzMoveType.movestop)
                            else:

                                self.ipcCon.Move(EnumPtzMoveType.movestop)
                                # self.TRACKFLAG=False

                            if abs(self.tracked_X -
                                   cX) < 50 and abs(self.tracked_Y - cY) < 50:
                                self.tracked_X = cX
                                self.tracked_Y = cY
                                checkTracked = True
                                break
                                # print('tracked_X',self.tracked_X)
                                # print('tracked_Y',self.tracked_Y)
                            else:
                                checkTracked = False
                                self.ipcCon.Move(EnumPtzMoveType.movestop)
                        # 存储当前帧的检测框与下一帧关联起来
                        predBox = (c1, c2)
                        self.predList.append(predBox)
                        # print(c1)
                        # print("******************")

                        # print("ffffffffffffffffffffffff")
                        # print(self.predList[-1])
                        # print("ffffffffffffffffffffffff")
                        # print(label,c1,c2)

                    if checkTracked == False:
                        self.TRACKFLAG = False
                    print(self.TRACKFLAG)
                    # print(checkTracked)

                    # for pred in self.predList:

                    # print(pred[:1])
                # Print time (inference + NMS)
                # 打印前向传播+nms时间
                # print(f'{s}Done. ({t2 - t1:.3f}s)')

                # 调整显示的像素排列
                cv2.cvtColor(im0, cv2.COLOR_BGR2RGB, im0)
                show_frame(im0)
Exemple #25
0
def test(
        data,
        weights=None,
        batch_size=16,
        imgsz=640,
        conf_thres=0.001,
        iou_thres=0.6,  # for NMS
        save_json=False,
        single_cls=False,
        augment=False,
        verbose=False,
        model=None,
        dataloader=None,
        save_dir='',
        merge=False,
        save_txt=False):
    # Initialize/load model and set device
    training = model is not None
    if training:  # called by train.py
        device = next(model.parameters()).device  # get model device

    else:  # called directly
        set_logging()
        device = select_device(opt.device, batch_size=batch_size)
        merge, save_txt = opt.merge, opt.save_txt  # use Merge NMS, save *.txt labels
        if save_txt:
            out = Path('inference/output')
            if os.path.exists(out):
                shutil.rmtree(out)  # delete output folder
            os.makedirs(out)  # make new output folder

        # Remove previous
        for f in glob.glob(str(Path(save_dir) / 'test_batch*.jpg')):
            os.remove(f)

        # Load model
        model = attempt_load(weights, map_location=device)  # load FP32 model
        imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size

        # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99
        # if device.type != 'cpu' and torch.cuda.device_count() > 1:
        #     model = nn.DataParallel(model)

    # Half
    half = device.type != 'cpu'  # half precision only supported on CUDA
    if half:
        model.half()

    # Configure
    model.eval()
    with open(data) as f:
        data = yaml.load(f, Loader=yaml.FullLoader)  # model dict
    check_dataset(data)  # check
    nc = 1 if single_cls else int(data['nc'])  # number of classes
    iouv = torch.linspace(0.5, 0.95,
                          10).to(device)  # iou vector for [email protected]:0.95
    niou = iouv.numel()

    # Dataloader
    if not training:
        img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
        _ = model(img.half() if half else img
                  ) if device.type != 'cpu' else None  # run once
        path = data['test'] if opt.task == 'test' else data[
            'val']  # path to val/test images
        dataloader = create_dataloader(path,
                                       imgsz,
                                       batch_size,
                                       model.stride.max(),
                                       opt,
                                       hyp=None,
                                       augment=False,
                                       cache=False,
                                       pad=0.5,
                                       rect=True)[0]

    seen = 0
    names = model.names if hasattr(model, 'names') else model.module.names
    coco91class = coco80_to_coco91_class()
    s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R',
                                 '[email protected]', '[email protected]:.95')
    p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0.
    loss = torch.zeros(3, device=device)
    jdict, stats, ap, ap_class = [], [], [], []
    for batch_i, (img, targets, paths,
                  shapes) in enumerate(tqdm(dataloader, desc=s)):
        img = img.to(device, non_blocking=True)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        targets = targets.to(device)
        nb, _, height, width = img.shape  # batch size, channels, height, width
        whwh = torch.Tensor([width, height, width, height]).to(device)

        # Disable gradients
        with torch.no_grad():
            # Run model
            t = time_synchronized()
            inf_out, train_out = model(
                img, augment=augment)  # inference and training outputs
            t0 += time_synchronized() - t

            # Compute loss
            if training:  # if model has loss hyperparameters
                loss += compute_loss([x.float() for x in train_out], targets,
                                     model)[1][:3]  # GIoU, obj, cls

            # Run NMS
            t = time_synchronized()
            output = non_max_suppression(inf_out,
                                         conf_thres=conf_thres,
                                         iou_thres=iou_thres,
                                         merge=merge)
            t1 += time_synchronized() - t

        # Statistics per image
        for si, pred in enumerate(output):
            labels = targets[targets[:, 0] == si, 1:]
            nl = len(labels)
            tcls = labels[:, 0].tolist() if nl else []  # target class
            seen += 1

            if pred is None:
                if nl:
                    stats.append((torch.zeros(0, niou, dtype=torch.bool),
                                  torch.Tensor(), torch.Tensor(), tcls))
                continue

            # Append to text file
            if save_txt:
                gn = torch.tensor(shapes[si][0])[[1, 0, 1, 0
                                                  ]]  # normalization gain whwh
                txt_path = str(out / Path(paths[si]).stem)
                pred[:, :4] = scale_coords(img[si].shape[1:], pred[:, :4],
                                           shapes[si][0],
                                           shapes[si][1])  # to original
                for *xyxy, conf, cls in pred:
                    xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                            gn).view(-1).tolist()  # normalized xywh
                    with open(txt_path + '.txt', 'a') as f:
                        f.write(
                            ('%g ' * 5 + '\n') % (cls, *xywh))  # label format

            # Clip boxes to image bounds
            clip_coords(pred, (height, width))

            # Append to pycocotools JSON dictionary
            if save_json:
                # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
                image_id = Path(paths[si]).stem
                box = pred[:, :4].clone()  # xyxy
                scale_coords(img[si].shape[1:], box, shapes[si][0],
                             shapes[si][1])  # to original shape
                box = xyxy2xywh(box)  # xywh
                box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
                for p, b in zip(pred.tolist(), box.tolist()):
                    jdict.append({
                        'image_id':
                        int(image_id) if image_id.isnumeric() else image_id,
                        'category_id':
                        coco91class[int(p[5])],
                        'bbox': [round(x, 3) for x in b],
                        'score':
                        round(p[4], 5)
                    })

            # Assign all predictions as incorrect
            correct = torch.zeros(pred.shape[0],
                                  niou,
                                  dtype=torch.bool,
                                  device=device)
            if nl:
                detected = []  # target indices
                tcls_tensor = labels[:, 0]

                # target boxes
                tbox = xywh2xyxy(labels[:, 1:5]) * whwh

                # Per target class
                for cls in torch.unique(tcls_tensor):
                    ti = (cls == tcls_tensor).nonzero(as_tuple=False).view(
                        -1)  # prediction indices
                    pi = (cls == pred[:, 5]).nonzero(as_tuple=False).view(
                        -1)  # target indices

                    # Search for detections
                    if pi.shape[0]:
                        # Prediction to target ious
                        ious, i = box_iou(pred[pi, :4], tbox[ti]).max(
                            1)  # best ious, indices

                        # Append detections
                        for j in (ious > iouv[0]).nonzero(as_tuple=False):
                            d = ti[i[j]]  # detected target
                            if d not in detected:
                                detected.append(d)
                                correct[
                                    pi[j]] = ious[j] > iouv  # iou_thres is 1xn
                                if len(
                                        detected
                                ) == nl:  # all targets already located in image
                                    break

            # Append statistics (correct, conf, pcls, tcls)
            stats.append(
                (correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls))

        # Plot images
        if batch_i < 1:
            f = Path(save_dir) / ('test_batch%g_gt.jpg' % batch_i)  # filename
            plot_images(img, targets, paths, str(f), names)  # ground truth
            f = Path(save_dir) / ('test_batch%g_pred.jpg' % batch_i)
            plot_images(img, output_to_target(output, width, height), paths,
                        str(f), names)  # predictions

    # Compute statistics
    stats = [np.concatenate(x, 0) for x in zip(*stats)]  # to numpy
    if len(stats) and stats[0].any():
        p, r, ap, f1, ap_class = ap_per_class(*stats)
        p, r, ap50, ap = p[:, 0], r[:, 0], ap[:, 0], ap.mean(
            1)  # [P, R, [email protected], [email protected]:0.95]
        mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean()
        nt = np.bincount(stats[3].astype(np.int64),
                         minlength=nc)  # number of targets per class
    else:
        nt = torch.zeros(1)

    # Print results
    pf = '%20s' + '%12.3g' * 6  # print format
    print(pf % ('all', seen, nt.sum(), mp, mr, map50, map))

    # Print results per class
    if verbose and nc > 1 and len(stats):
        for i, c in enumerate(ap_class):
            print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i]))

    # Print speeds
    t = tuple(x / seen * 1E3
              for x in (t0, t1, t0 + t1)) + (imgsz, imgsz, batch_size)  # tuple
    if not training:
        print(
            'Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g'
            % t)

    # Save JSON
    if save_json and len(jdict):
        f = 'detections_val2017_%s_results.json' % \
            (weights.split(os.sep)[-1].replace('.pt', '') if isinstance(weights, str) else '')  # filename
        print('\nCOCO mAP with pycocotools... saving %s...' % f)
        with open(f, 'w') as file:
            json.dump(jdict, file)

        try:  # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
            from pycocotools.coco import COCO
            from pycocotools.cocoeval import COCOeval

            imgIds = [int(Path(x).stem) for x in dataloader.dataset.img_files]
            cocoGt = COCO(
                glob.glob('../coco/annotations/instances_val*.json')
                [0])  # initialize COCO ground truth api
            cocoDt = cocoGt.loadRes(f)  # initialize COCO pred api
            cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')
            cocoEval.params.imgIds = imgIds  # image IDs to evaluate
            cocoEval.evaluate()
            cocoEval.accumulate()
            cocoEval.summarize()
            map, map50 = cocoEval.stats[:
                                        2]  # update results ([email protected]:0.95, [email protected])
        except Exception as e:
            print('ERROR: pycocotools unable to run: %s' % e)

    # Return results
    model.float()  # for training
    maps = np.zeros(nc) + map
    for i, c in enumerate(ap_class):
        maps[c] = ap[i]
    return (mp, mr, map50, map,
            *(loss.cpu() / len(dataloader)).tolist()), maps, t
Exemple #26
0
def detect(number_person):
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source == '0' or source.startswith('rtsp') or source.startswith('http') or source.endswith('.txt')

    # Initialize
    device = select_device(opt.device)
    # if os.path.exists(out):
    #     shutil.rmtree(out)  # delete output folder
    if not os.path.exists(out):
        os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA
    half=False

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size

    """
    loading model
    """
    config="configs/fangweisui/resnet18_b32x8.py"
    checkpoint="ckpts/epoch_100.pth"
    classify_model = init_model(config, checkpoint, device=device)
    classify_model.CLASSES = ["1", "2", "3", "4", "5"]


    # model=torch.load(weights, map_location=device)['model'].float().eval()
    # stride = [8, 16, 32]
    # imgsz = check_img_size(imgsz, s=max(stride))  # check img_size

    # model = Darknet('cfg/prune_0.8_yolov3-spp.cfg', (opt.img_size, opt.img_size)).to(device)
    # initialize_weights(model)
    # model.load_state_dict(torch.load('weights/prune_0.8_yolov3-spp-ultralytics.pt')['model'])
    # model.eval()
    # stride = [8, 16, 32]
    # imgsz = check_img_size(imgsz, s=max(stride))  # check img_size

    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    # names = ['1', '2']
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))]

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img) if device.type != 'cpu' else None  # run once

    for dirs in os.listdir(source):
        # if dirs !='WH':
        #     continue
        src = os.path.join(source, dirs)
        save_img = True
        save_xml = False
        dataset = LoadImages(src, img_size=imgsz)
        for path, img, im0s, vid_cap in dataset:
            # if os.path.basename(path)!='2_31746253093C100D_2018-12-10-21-56-37-998_0_75_636_307_6.jpg':
            #     continue
            img = torch.from_numpy(img).to(device)
            img = img.half() if half else img.float()  # uint8 to fp16/32
            img /= 255.0  # 0 - 255 to 0.0 - 1.0
            if img.ndimension() == 3:
                img = img.unsqueeze(0)

            # Inference
            t1 = time_synchronized()
            pred = model(img, augment=opt.augment)[0]
            t2 = time_synchronized()

            # Apply NMS
            pred = non_max_suppression_test(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
            # pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes,agnostic=opt.agnostic_nms)
            # t2 = time_synchronized()

            # Apply Classifier
            if classify:
                pred = apply_classifier(pred, modelc, img, im0s)

            # Process detections
            for i, det in enumerate(pred):  # detections per image
                if webcam:  # batch_size >= 1
                    p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
                else:
                    p, s, im0 = path, '', im0s

                # save_path = str(Path(out) / Path(p).name)
                # txt_path = str(Path(out) / Path(p).stem) + ('_%g' % dataset.frame if dataset.mode == 'video' else '')
                s += '%gx%g ' % img.shape[2:]  # print string
                gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
                results = [0, 0]
                minconf=1
                if det is not None and len(det):
                    # Rescale boxes from img_size to im0 size
                    det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()

                    # Print results
                    for c in det[:, -1].unique():
                        n = (det[:, -1] == c).sum()  # detections per class
                        s += '%g %ss, ' % (n, names[int(c)])  # add to string

                    # Write results
                    results = [0, 0]   #刘茁梅测试版本
                    for *xyxy, conf, cls in det:
                        if save_txt:  # Write to file
                            xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
                            # with open(txt_path + '.txt', 'a') as f:
                            #     f.write(('%g ' * 5 + '\n') % (cls, *xywh))  # label format

                        if save_img or view_img:  # Add bbox to image
                            if names[int(cls)] == "1":
                                results[0] += 1
                            elif names[int(cls)] == "2":
                                results[1] += 1
                            else:
                                label = '%s %.2f' % (names[int(cls)], conf)
                                plot_one_box_half(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3)
                                continue
                            # else:
                            #     results[1] += 1
                            minconf=min(conf,minconf)
                            label = '%s %.2f' % (names[int(cls)], conf)
                            plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3)



                if len(results) == 2:
                    if (results[0] > number_person) | (results[1] > number_person):
                        tmp = "err"
                        if number_person==1:
                            tmpresults = [0, 0]
                            for *xyxy, conf, cls in det:
                                c1, c2 = (int(xyxy[0]), int(xyxy[1])), (int(xyxy[2]), int(xyxy[3]))
                                ret_image = im0[c1[1]:c2[1], c1[0]:c2[0]]
                                # cv2.imwrite("ret_image.jpg", ret_image)
                                result = inference_model(classify_model, ret_image)

                                # print(classes[predict[0]])
                                if save_img or view_img:  # Add bbox to image
                                    if names[int(cls)] == "1" and result['pred_class'] == "1":
                                        tmpresults[0] += 1
                                    elif names[int(cls)] == "2" and result['pred_class'] == "2":
                                        tmpresults[1] += 1
                                    elif names[int(cls)] == "1" and result['pred_class'] == "2":
                                        tmpresults[0] += 1
                                    elif names[int(cls)] == "2" and result['pred_class'] == "1":
                                        tmpresults[1] += 1
                                    else:
                                        label = '%s %.2f' % (result['pred_class'], result['pred_score'])
                                        plot_one_box_half(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3)
                                        continue
                                if (tmpresults[0] > number_person) | (tmpresults[1] > number_person):
                                    tmp = "err"
                                elif (tmpresults[0] == number_person) | (tmpresults[1] == number_person):
                                    tmp = "err_to_corr"
                                else:
                                    tmp = "miss"
                    elif (results[0] == number_person) | (results[1] == number_person):
                        tmp = "corr"
                        if number_person == 2:
                            tmpresults = [0, 0]
                            for *xyxy, conf, cls in det:
                                c1, c2 = (int(xyxy[0]), int(xyxy[1])), (int(xyxy[2]), int(xyxy[3]))
                                ret_image = im0[c1[1]:c2[1], c1[0]:c2[0]]
                                # cv2.imwrite("ret_image.jpg", ret_image)
                                result = inference_model(classify_model, ret_image)

                                # print(classes[predict[0]])
                                if save_img or view_img:  # Add bbox to image
                                    if names[int(cls)] == "1" and result['pred_class'] == "1":
                                        tmpresults[0] += 1
                                    elif names[int(cls)] == "2" and result['pred_class'] == "2":
                                        tmpresults[1] += 1
                                    elif names[int(cls)] == "1" and result['pred_class'] == "2":
                                        tmpresults[0] += 1
                                    elif names[int(cls)] == "2" and result['pred_class'] == "1":
                                        tmpresults[1] += 1
                                    else:
                                        label = '%s %.2f' % (result['pred_class'], result['pred_score'])
                                        plot_one_box_half(xyxy, im0, label=label, color=colors[int(cls)],
                                                          line_thickness=3)
                                        continue
                                if (tmpresults[0] > number_person) | (tmpresults[1] > number_person):
                                    tmp = "err"
                                elif (tmpresults[0] == number_person) | (tmpresults[1] == number_person):
                                    tmp = "corr"
                                else:
                                    tmp = "corr_to_miss"

                    else:
                        tmp = "miss"


                elif len(results) == 1:
                    if (results[0] == number_person):
                        tmp = "corr"
                    elif (results[0] > number_person):
                        tmp = "err"
                    else:
                        tmp = "miss"
                else:
                    tmp = "miss"

                save_path = os.path.join(Path(out), dirs, tmp)#, tmp
                if not os.path.exists(save_path):
                    os.makedirs(save_path)

                # Print time (inference + NMS)
                print('%sDone. (%.3fs)' % (s, t2 - t1))

                # Stream results
                if view_img:
                    cv2.imshow(p, im0)
                    if cv2.waitKey(1) == ord('q'):  # q to quit
                        raise StopIteration

                # Save results (image with detections)
                if save_img:
                    if dataset.mode == 'images':
                        cv2.imwrite(os.path.join(save_path, Path(p).name), im0)
                    else:
                        if vid_path != save_path:  # new video
                            vid_path = save_path
                            if isinstance(vid_writer, cv2.VideoWriter):
                                vid_writer.release()  # release previous video writer

                            fourcc = 'mp4v'  # output video codec
                            fps = vid_cap.get(cv2.CAP_PROP_FPS)
                            w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                            h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                            vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h))
                        vid_writer.write(im0)

                    if det is not None and len(det) and save_xml:
                        # xml
                        output_name=os.path.join(save_path, Path(p).name)
                        create_tree(output_name)
                        for *xyxy, conf, cls in det:
                            label=names[int(cls)]
                            left,top,right,bottom=torch.tensor(xyxy).view(1, 4).view(-1).tolist()
                            create_object(annotation, label, left,top, right,bottom)

                        # 将树模型写入xml文件
                        tree = ET.ElementTree(annotation)
                        # tree.write('%s.xml' % output_name.rstrip('.jpg'))

                        # tree = ET.ElementTree.parse('%s.xml' % output_name.rstrip('.jpg'))  # 解析movies.xml这个文件
                        root = tree.getroot()  # 得到根元素,Element类
                        pretty_xml(root, '\t', '\n')  # 执行美化方法
                        tree.write('%s.xml' % output_name.rstrip('.jpg'))


        if save_txt or save_img:
            print('Results saved to %s' % os.getcwd() + os.sep + out)
            # if platform == 'darwin' and not opt.update:  # MacOS
            #     os.system('open ' + save_path)

        print('Done. (%.3fs)' % (time.time() - t0))
Exemple #27
0
        m_.i, m_.f, m_.type, m_.np = i, f, t, np  # attach index, 'from' index, type, number params
        logger.info('%3s%18s%3s%10.0f  %-40s%-30s' % (i, f, n, np, t, args))  # print
        save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1)  # append to savelist
        layers.append(m_)
        ch.append(c2)
    return nn.Sequential(*layers), sorted(save)


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--cfg', type=str, default='yolov5s.yaml', help='model.yaml')
    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
    opt = parser.parse_args()
    opt.cfg = check_file(opt.cfg)  # check file
    set_logging()
    device = select_device(opt.device)

    # Create model
    model = Model(opt.cfg).to(device)
    model.train()

    # Profile
    # img = torch.rand(8 if torch.cuda.is_available() else 1, 3, 640, 640).to(device)
    # y = model(img, profile=True)

    # ONNX export
    # model.model[-1].export = True
    # torch.onnx.export(model, img, opt.cfg.replace('.yaml', '.onnx'), verbose=True, opset_version=11)

    # Tensorboard
    # from torch.utils.tensorboard import SummaryWriter
Exemple #28
0
def test(
    cfg,
    data_cfg,
    batch_size=16,
    img_size=416,
    iou_thres=0.5,
    conf_thres=0.1,
    nms_thres=0.5,
    model=None,
):

    # Configure run
    data_cfg = parse_data_cfg(data_cfg)
    nc = int(data_cfg['classes'])  # number of classes
    test_path = data_cfg['valid']  # path to test images
    names = load_classes(data_cfg['names'])  # class names

    if model is None:
        device = select_device()
        num_classes = nc
        # Initialize model
        if "-tiny" in cfg:
            model = (num_classes).to(device)
            # weights = 'weights-yolov3-tiny/best.pt'
            weights = "./yolov3-tiny_coco.pt"
        else:
            model = Yolov3(num_classes).to(device)
            # weights = 'weights-yolov3/best.pt'
            weights = "./finetune-weight/yolov3_coco.pt"

        # Load weights
        model.load_state_dict(
            torch.load(weights, map_location=device)['model'])

        if torch.cuda.device_count() > 1:
            model = nn.DataParallel(model)
    else:
        device = next(model.parameters()).device  # get model device
    print("using device: {}".format(device))
    # Dataloader
    dataset = LoadImagesAndLabels(test_path,
                                  batch_size,
                                  img_size=img_size,
                                  augment=False)
    dataloader = DataLoader(dataset,
                            batch_size=batch_size,
                            num_workers=0,
                            pin_memory=False,
                            collate_fn=dataset.collate_fn)

    seen = 0
    model.eval()
    print(('%20s' + '%10s' * 6) %
          ('Class', 'Images', 'Targets', 'P', 'R', 'mAP', 'F1'))
    loss, p, r, f1, mp, mr, map, mf1 = 0., 0., 0., 0., 0., 0., 0., 0.
    jdict, stats, ap, ap_class = [], [], [], []
    for batch_i, (imgs, targets, paths,
                  shapes) in enumerate(tqdm(dataloader, desc='Computing mAP')):
        targets = targets.to(device)
        nt = len(targets)
        if nt == 0:  # if no targets continue
            continue
        imgs = imgs.to(device)
        # Run model
        inf_out, train_out = model(imgs)  # inference and training outputs

        # Build targets
        target_list = build_targets(model, targets)

        # Compute loss
        loss_i, _ = compute_loss(train_out, target_list)
        loss += loss_i.item()

        # Run NMS
        output = non_max_suppression(inf_out,
                                     conf_thres=conf_thres,
                                     nms_thres=nms_thres)
        # Statistics per image
        for si, pred in enumerate(output):
            labels = targets[targets[:, 0] == si, 1:]
            correct, detected = [], []
            tcls = torch.Tensor()
            seen += 1

            if pred is None:
                if len(labels):
                    tcls = labels[:, 0].cpu()  # target classes
                    stats.append(
                        (correct, torch.Tensor(), torch.Tensor(), tcls))
                continue

            if len(labels):
                # Extract target boxes as (x1, y1, x2, y2)
                tbox = xywh2xyxy(labels[:, 1:5]) * img_size  # target boxes
                tcls = labels[:, 0]  # target classes

                for *pbox, pconf, pcls_conf, pcls in pred:
                    if pcls not in tcls:
                        correct.append(0)
                        continue

                    # Best iou, index between pred and targets
                    iou, bi = bbox_iou(pbox, tbox).max(0)

                    # If iou > threshold and class is correct mark as correct
                    if iou > iou_thres and bi not in detected:
                        correct.append(1)
                        detected.append(bi)
                    else:
                        correct.append(0)
            else:
                # If no labels add number of detections as incorrect
                correct.extend([0] * len(pred))

            # Append Statistics (correct, conf, pcls, tcls)
            stats.append(
                (correct, pred[:, 4].cpu(), pred[:, 6].cpu(), tcls.cpu()))

    # Compute statistics
    stats_np = [np.concatenate(x, 0) for x in list(zip(*stats))]
    nt = np.bincount(stats_np[3].astype(np.int64),
                     minlength=nc)  # number of targets per class
    if len(stats_np):
        p, r, ap, f1, ap_class = ap_per_class(*stats_np)
        mp, mr, map, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean()

    # Print results
    pf = '%20s' + '%10.3g' * 6  # print format
    print(pf % ('all', seen, nt.sum(), mp, mr, map, mf1), end='\n\n')

    # Print results per class
    if nc > 1 and len(stats_np):
        for i, c in enumerate(ap_class):
            print(pf % (names[c], seen, nt[c], p[i], r[i], ap[i], f1[i]))

    # Return results
    return mp, mr, map, mf1, loss
Exemple #29
0
def run(
        weights='yolov5s.pt',  # model.pt path(s)
        source='data/images',  # file/dir/URL/glob, 0 for webcam
        imgsz=640,  # inference size (pixels)
        conf_thres=0.25,  # confidence threshold
        iou_thres=0.45,  # NMS IOU threshold
        max_det=1000,  # maximum detections per image
        device='',  # cuda device, i.e. 0 or 0,1,2,3 or cpu
        view_img=False,  # show results
        save_txt=False,  # save results to *.txt
        save_conf=False,  # save confidences in --save-txt labels
        save_crop=False,  # save cropped prediction boxes
        nosave=False,  # do not save images/videos
        classes=None,  # filter by class: --class 0, or --class 0 2 3
        agnostic_nms=False,  # class-agnostic NMS
        augment=False,  # augmented inference
        update=False,  # update all models
        project='runs/detect',  # save results to project/name
        name='exp',  # save results to project/name
        exist_ok=False,  # existing project/name ok, do not increment
        line_thickness=3,  # bounding box thickness (pixels)
        hide_labels=False,  # hide labels
        hide_conf=False,  # hide confidences
        half=False,  # use FP16 half-precision inference
):
    save_img = not nosave and not source.endswith(
        '.txt')  # save inference images
    webcam = source.isnumeric() or source.endswith(
        '.txt') or source.lower().startswith(
            ('rtsp://', 'rtmp://', 'http://', 'https://'))

    # Directories
    save_dir = increment_path(Path(project) / name,
                              exist_ok=exist_ok)  # increment run
    (save_dir / 'labels' if save_txt else save_dir).mkdir(
        parents=True, exist_ok=True)  # make dir

    # Initialize
    set_logging()
    device = select_device(device)
    half &= device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    stride = int(model.stride.max())  # model stride
    imgsz = check_img_size(imgsz, s=stride)  # check image size
    names = model.module.names if hasattr(
        model, 'module') else model.names  # get class names
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet50', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('resnet50.pt',
                       map_location=device)['model']).to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = check_imshow()
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz, stride=stride)
    else:
        dataset = LoadImages(source, img_size=imgsz, stride=stride)

    # Run inference
    if device.type != 'cpu':
        model(
            torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(
                next(model.parameters())))  # run once
    t0 = time.time()
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   conf_thres,
                                   iou_thres,
                                   classes,
                                   agnostic_nms,
                                   max_det=max_det)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0, frame = path[i], f'{i}: ', im0s[i].copy(
                ), dataset.count
            else:
                p, s, im0, frame = path, '', im0s.copy(), getattr(
                    dataset, 'frame', 0)

            p = Path(p)  # to Path
            save_path = str(save_dir / p.name)  # img.jpg
            txt_path = str(save_dir / 'labels' / p.stem) + (
                '' if dataset.mode == 'image' else f'_{frame}')  # img.txt
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            imc = im0.copy() if save_crop else im0  # for save_crop
            if len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()  # normalized xywh
                        line = (cls, *xywh,
                                conf) if save_conf else (cls,
                                                         *xywh)  # label format
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * len(line)).rstrip() % line + '\n')

                    if save_img or save_crop or view_img:  # Add bbox to image
                        c = int(cls)  # integer class
                        label = None if hide_labels else (
                            names[c]
                            if hide_conf else f'{names[c]} {conf:.2f}')
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors(c, True),
                                     line_thickness=line_thickness)
                        if save_crop:
                            save_one_box(xyxy,
                                         imc,
                                         file=save_dir / 'crops' / names[c] /
                                         f'{p.stem}.jpg',
                                         BGR=True)

            # Print time (inference + NMS)
            print(f'{s}Done. ({t2 - t1:.3f}s)')

            # Stream results
            if view_img:
                cv2.imshow(str(p), im0)
                cv2.waitKey(1)  # 1 millisecond

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'image':
                    cv2.imwrite(save_path, im0)
                else:  # 'video' or 'stream'
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer
                        if vid_cap:  # video
                            fps = vid_cap.get(cv2.CAP_PROP_FPS)
                            w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                            h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        else:  # stream
                            fps, w, h = 30, im0.shape[1], im0.shape[0]
                            save_path += '.mp4'
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps,
                            (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
        print(f"Results saved to {save_dir}{s}")

    if update:
        strip_optimizer(weights)  # update model (to fix SourceChangeWarning)

    print(f'Done. ({time.time() - t0:.3f}s)')
Exemple #30
0
LastEditors: Please set LastEditors
Description: In User Settings Edit
FilePath: /yolo-face-with-landmark-master/onnx/conver_to_onnx.py
'''
import sys
sys.path.insert(0,'..')
from nn import *
import torch
from utils.torch_utils import select_device
from hyp import  hyp
from torchstat import stat


point_num = hyp['point_num']

device = select_device('cpu')


net_type = "mbv3_small_75_light" # "mbv3_large_75"

assert net_type in ['mbv3_small_1','mbv3_small_75','mbv3_large_1','mbv3_large_75',
                    "mbv3_large_75_light", "mbv3_large_1_light",'mbv3_small_75_light','mbv3_small_1_light',
                       ]

if net_type.startswith("mbv3_small_1"):
    backone = mobilenetv3_small()
elif net_type.startswith("mbv3_small_75"):
    backone = mobilenetv3_small( width_mult=0.75)
elif net_type.startswith("mbv3_large_1"):
    backone = mobilenetv3_large()
elif net_type.startswith("mbv3_large_75"):