Ejemplo n.º 1
0
    def __init__(self, transform, opt, save_img=False):
        self.opt = opt
        self.debug = opt.debug
        self.save_txt = opt.save_txt
        self.view_img = opt.view_img
        self.transform = transform
        self.save_img = save_img
        self.fourcc = 'mp4v'
        self.resolution = (
            1920, 1080
        )  # Can also be set to False to get standard camera resolution
        self.vid_writer = None
        self.vid_path = None

        self.out = self.create_dir(opt.output)
        self.webcam = self.check_webcam(opt.source)
        self.device = select_device(opt.device)
        self.half = self.device.type != 'cpu'  # half precision only supported on CUDA
        self.model, self.imgsz, self.class_names = self.load_model()
        self.overlay_images = self.get_overlay_icons()
        self.banner_icon = self.get_banner_icon()
        self.dataset = self.set_dataloader()
        self.meta_data_writer = self.get_meta_data_writer()

        set_logging()
Ejemplo n.º 2
0
def custom(path_or_model='path/to/model.pt', autoshape=True, verbose=True):
    """YOLOv5-custom model https://github.com/ultralytics/yolov5

    Arguments (3 options):
        path_or_model (str): 'path/to/model.pt'
        path_or_model (dict): torch.load('path/to/model.pt')
        path_or_model (nn.Module): torch.load('path/to/model.pt')['model']

    Returns:
        pytorch model
    """
    set_logging(verbose=verbose)

    model = torch.load(path_or_model) if isinstance(
        path_or_model, str) else path_or_model  # load checkpoint
    if isinstance(model, dict):
        model = model['ema' if model.get('ema') else 'model']  # load model

    hub_model = Model(model.yaml).to(next(model.parameters()).device)  # create
    hub_model.load_state_dict(model.float().state_dict())  # load state_dict
    hub_model.names = model.names  # class names
    if autoshape:
        hub_model = hub_model.autoshape(
        )  # for file/URI/PIL/cv2/np inputs and NMS
    device = select_device('0' if torch.cuda.is_available() else
                           'cpu')  # default to GPU if available
    return hub_model.to(device)
 def load_model(self,dev="cpu"):
     # Initialize
     set_logging()
     device = select_device(dev)
     # Load model
     # model = attempt_load("yolov5_model/yolov5x.pt", map_location=device)  # load FP32 model
     model = torch.hub.load('ultralytics/yolov5', 'custom', path_or_model='yolov5_model/yolov5x.pt')
     stride = int(model.stride.max())  # model stride
     imgsz = check_img_size(640, s=stride)  # check img_size
     # Run inference
     if device.type != 'cpu':
         model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters())))
     return model,stride,device,imgsz
Ejemplo n.º 4
0
def load_model(model_path, device=None, autoshape=True, verbose=False):
    """
    Creates a specified YOLOv5 model

    Arguments:
        model_path (str): path of the model
        config_path (str): path of the config file
        device (str): select device that model will be loaded (cpu, cuda)
        pretrained (bool): load pretrained weights into the model
        autoshape (bool): make model ready for inference
        verbose (bool): if False, yolov5 logs will be silent

    Returns:
        pytorch model

    (Adapted from yolov5.hubconf.create)
    """
    # set logging
    set_logging(verbose=verbose)

    # set device if not given
    if not device:
        device = "cuda:0" if torch.cuda.is_available() else "cpu"

    # add yolov5 folder to system path
    here = Path(__file__).parent.absolute()
    yolov5_folder_dir = str(here)
    sys.path.insert(0, yolov5_folder_dir)

    attempt_download(model_path)  # download if not found locally
    model = torch.load(model_path, map_location=torch.device(device))
    if isinstance(model, dict):
        model = model["model"]  # load model
    hub_model = Model(model.yaml).to(next(model.parameters()).device)  # create
    hub_model.load_state_dict(model.float().state_dict())  # load state_dict
    hub_model.names = model.names  # class names
    model = hub_model

    # remove yolov5 folder from system path
    sys.path.remove(yolov5_folder_dir)

    if autoshape:
        model = model.autoshape()

    return model
    """
Ejemplo n.º 5
0
def create(name, pretrained, channels, classes, autoshape, verbose):
    """Creates a specified YOLOv5 model

    Arguments:
        name (str): name of model, i.e. 'yolov5s'
        pretrained (bool): load pretrained weights into the model
        channels (int): number of input channels
        classes (int): number of model classes

    Returns:
        pytorch model
    """
    try:
        set_logging(verbose=verbose)

        cfg = list((Path(__file__).parent /
                    'models').rglob(f'{name}.yaml'))[0]  # model.yaml path
        model = Model(cfg, channels, classes)
        if pretrained:
            fname = f'{name}.pt'  # checkpoint filename
            attempt_download(fname)  # download if not found locally
            ckpt = torch.load(fname, map_location=torch.device('cpu'))  # load
            msd = model.state_dict()  # model state_dict
            csd = ckpt['model'].float().state_dict(
            )  # checkpoint state_dict as FP32
            csd = {k: v
                   for k, v in csd.items()
                   if msd[k].shape == v.shape}  # filter
            model.load_state_dict(csd, strict=False)  # load
            if len(ckpt['model'].names) == classes:
                model.names = ckpt['model'].names  # set class names attribute
            if autoshape:
                model = model.autoshape(
                )  # for file/URI/PIL/cv2/np inputs and NMS
        device = select_device('0' if torch.cuda.is_available() else
                               'cpu')  # default to GPU if available
        return model.to(device)

    except Exception as e:
        help_url = 'https://github.com/ultralytics/yolov5/issues/36'
        s = 'Cache maybe be out of date, try force_reload=True. See %s for help.' % help_url
        raise Exception(s) from e
Ejemplo n.º 6
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--weights',
                        type=str,
                        default='yolov5s.pt',
                        help='initial weights path')
    parser.add_argument('--cfg', type=str, default='', help='model.yaml path')
    #parser.add_argument('--data', type=str, default='yolov5/data/coco128.yaml', help='data.yaml path')
    #parser.add_argument('--hyp', type=str, default='yolov5/data/hyp.scratch.yaml', help='hyperparameters path')
    parser.add_argument('--data', type=str, default='', help='data.yaml path')
    parser.add_argument('--hyp',
                        type=str,
                        default='',
                        help='hyperparameters path')
    parser.add_argument('--epochs', type=int, default=300)
    parser.add_argument('--batch-size',
                        type=int,
                        default=16,
                        help='total batch size for all GPUs')
    parser.add_argument('--img-size',
                        nargs='+',
                        type=int,
                        default=[640, 640],
                        help='[train, test] image sizes')
    parser.add_argument('--rect',
                        action='store_true',
                        help='rectangular training')
    parser.add_argument('--resume',
                        nargs='?',
                        const=True,
                        default=False,
                        help='resume most recent training')
    parser.add_argument('--nosave',
                        action='store_true',
                        help='only save final checkpoint')
    parser.add_argument('--notest',
                        action='store_true',
                        help='only test final epoch')
    parser.add_argument('--noautoanchor',
                        action='store_true',
                        help='disable autoanchor check')
    parser.add_argument('--evolve',
                        action='store_true',
                        help='evolve hyperparameters')
    parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
    parser.add_argument('--cache-images',
                        action='store_true',
                        help='cache images for faster training')
    parser.add_argument('--image-weights',
                        action='store_true',
                        help='use weighted image selection for training')
    parser.add_argument('--device',
                        default='',
                        help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
    parser.add_argument('--multi-scale',
                        action='store_true',
                        help='vary img-size +/- 50%%')
    parser.add_argument('--single-cls',
                        action='store_true',
                        help='train multi-class data as single-class')
    parser.add_argument('--adam',
                        action='store_true',
                        help='use torch.optim.Adam() optimizer')
    parser.add_argument('--sync-bn',
                        action='store_true',
                        help='use SyncBatchNorm, only available in DDP mode')
    parser.add_argument('--local_rank',
                        type=int,
                        default=-1,
                        help='DDP parameter, do not modify')
    parser.add_argument('--workers',
                        type=int,
                        default=8,
                        help='maximum number of dataloader workers')
    parser.add_argument('--project',
                        default='runs/train',
                        help='save to project/name')
    parser.add_argument('--entity', default=None, help='W&B entity')
    parser.add_argument('--name', default='exp', help='save to project/name')
    parser.add_argument('--exist-ok',
                        action='store_true',
                        help='existing project/name ok, do not increment')
    parser.add_argument('--quad', action='store_true', help='quad dataloader')
    parser.add_argument('--linear-lr', action='store_true', help='linear LR')
    parser.add_argument('--label-smoothing',
                        type=float,
                        default=0.0,
                        help='Label smoothing epsilon')
    parser.add_argument('--upload_dataset',
                        action='store_true',
                        help='Upload dataset as W&B artifact table')
    parser.add_argument('--bbox_interval',
                        type=int,
                        default=-1,
                        help='Set bounding-box image logging interval for W&B')
    parser.add_argument('--save_period',
                        type=int,
                        default=-1,
                        help='Log model after every "save_period" epoch')
    parser.add_argument('--artifact_alias',
                        type=str,
                        default="latest",
                        help='version of dataset artifact to be used')
    opt = parser.parse_args()

    # Set DDP variables
    opt.world_size = int(
        os.environ['WORLD_SIZE']) if 'WORLD_SIZE' in os.environ else 1
    opt.global_rank = int(os.environ['RANK']) if 'RANK' in os.environ else -1
    set_logging(opt.global_rank)
    if opt.global_rank in [-1, 0]:
        check_git_status()
        #check_requirements()

    # Resume
    wandb_run = check_wandb_resume(opt)
    if opt.resume and not wandb_run:  # resume an interrupted run
        ckpt = opt.resume if isinstance(
            opt.resume,
            str) else get_latest_run()  # specified or most recent path
        assert os.path.isfile(
            ckpt), 'ERROR: --resume checkpoint does not exist'
        apriori = opt.global_rank, opt.local_rank
        with open(Path(ckpt).parent.parent / 'opt.yaml') as f:
            opt = argparse.Namespace(**yaml.safe_load(f))  # replace
        opt.cfg, opt.weights, opt.resume, opt.batch_size, opt.global_rank, opt.local_rank = \
            '', ckpt, True, opt.total_batch_size, *apriori  # reinstate
        logger.info('Resuming training from %s' % ckpt)
    else:
        opt.hyp = opt.hyp or str(
            Path(__file__).parent / 'data' /
            ('hyp.finetune.yaml' if opt.weights else 'hyp.scratch.yaml'))
        opt.data = opt.data or str(Path(__file__).parent / 'data/coco128.yaml')

        opt.data, opt.cfg, opt.hyp = check_file(opt.data), check_file(
            opt.cfg), check_file(opt.hyp)  # check files
        assert len(opt.cfg) or len(
            opt.weights), 'either --cfg or --weights must be specified'
        opt.img_size.extend(
            [opt.img_size[-1]] *
            (2 - len(opt.img_size)))  # extend to 2 sizes (train, test)
        opt.name = 'evolve' if opt.evolve else opt.name
        opt.save_dir = str(
            increment_path(Path(opt.project) / opt.name,
                           exist_ok=opt.exist_ok | opt.evolve))

    # DDP mode
    opt.total_batch_size = opt.batch_size
    device = select_device(opt.device, batch_size=opt.batch_size)
    if opt.local_rank != -1:
        assert torch.cuda.device_count() > opt.local_rank
        torch.cuda.set_device(opt.local_rank)
        device = torch.device('cuda', opt.local_rank)
        dist.init_process_group(backend='nccl',
                                init_method='env://')  # distributed backend
        assert opt.batch_size % opt.world_size == 0, '--batch-size must be multiple of CUDA device count'
        opt.batch_size = opt.total_batch_size // opt.world_size

    # Hyperparameters
    with open(opt.hyp) as f:
        hyp = yaml.safe_load(f)  # load hyps

    # Train
    logger.info(opt)
    if not opt.evolve:
        tb_writer = None  # init loggers
        if opt.global_rank in [-1, 0]:
            prefix = colorstr('tensorboard: ')
            logger.info(
                f"{prefix}Start with 'tensorboard --logdir {opt.project}', view at http://localhost:6006/"
            )
            tb_writer = SummaryWriter(opt.save_dir)  # Tensorboard
        train(hyp, opt, device, tb_writer)

    # Evolve hyperparameters (optional)
    else:
        # Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit)
        meta = {
            'lr0':
            (1, 1e-5, 1e-1),  # initial learning rate (SGD=1E-2, Adam=1E-3)
            'lrf':
            (1, 0.01, 1.0),  # final OneCycleLR learning rate (lr0 * lrf)
            'momentum': (0.3, 0.6, 0.98),  # SGD momentum/Adam beta1
            'weight_decay': (1, 0.0, 0.001),  # optimizer weight decay
            'warmup_epochs': (1, 0.0, 5.0),  # warmup epochs (fractions ok)
            'warmup_momentum': (1, 0.0, 0.95),  # warmup initial momentum
            'warmup_bias_lr': (1, 0.0, 0.2),  # warmup initial bias lr
            'box': (1, 0.02, 0.2),  # box loss gain
            'cls': (1, 0.2, 4.0),  # cls loss gain
            'cls_pw': (1, 0.5, 2.0),  # cls BCELoss positive_weight
            'obj': (1, 0.2, 4.0),  # obj loss gain (scale with pixels)
            'obj_pw': (1, 0.5, 2.0),  # obj BCELoss positive_weight
            'iou_t': (0, 0.1, 0.7),  # IoU training threshold
            'anchor_t': (1, 2.0, 8.0),  # anchor-multiple threshold
            'anchors': (2, 2.0, 10.0),  # anchors per output grid (0 to ignore)
            'fl_gamma':
            (0, 0.0, 2.0),  # focal loss gamma (efficientDet default gamma=1.5)
            'hsv_h': (1, 0.0, 0.1),  # image HSV-Hue augmentation (fraction)
            'hsv_s': (1, 0.0,
                      0.9),  # image HSV-Saturation augmentation (fraction)
            'hsv_v': (1, 0.0, 0.9),  # image HSV-Value augmentation (fraction)
            'degrees': (1, 0.0, 45.0),  # image rotation (+/- deg)
            'translate': (1, 0.0, 0.9),  # image translation (+/- fraction)
            'scale': (1, 0.0, 0.9),  # image scale (+/- gain)
            'shear': (1, 0.0, 10.0),  # image shear (+/- deg)
            'perspective':
            (0, 0.0, 0.001),  # image perspective (+/- fraction), range 0-0.001
            'flipud': (1, 0.0, 1.0),  # image flip up-down (probability)
            'fliplr': (0, 0.0, 1.0),  # image flip left-right (probability)
            'mosaic': (1, 0.0, 1.0),  # image mixup (probability)
            'mixup': (1, 0.0, 1.0)
        }  # image mixup (probability)

        assert opt.local_rank == -1, 'DDP mode not implemented for --evolve'
        opt.notest, opt.nosave = True, True  # only test/save final epoch
        # ei = [isinstance(x, (int, float)) for x in hyp.values()]  # evolvable indices
        yaml_file = Path(
            opt.save_dir) / 'hyp_evolved.yaml'  # save best result here
        if opt.bucket:
            os.system('gsutil cp gs://%s/evolve.txt .' %
                      opt.bucket)  # download evolve.txt if exists

        for _ in range(300):  # generations to evolve
            if Path('evolve.txt').exists(
            ):  # if evolve.txt exists: select best hyps and mutate
                # Select parent(s)
                parent = 'single'  # parent selection method: 'single' or 'weighted'
                x = np.loadtxt('evolve.txt', ndmin=2)
                n = min(5, len(x))  # number of previous results to consider
                x = x[np.argsort(-fitness(x))][:n]  # top n mutations
                w = fitness(x) - fitness(x).min()  # weights
                if parent == 'single' or len(x) == 1:
                    # x = x[random.randint(0, n - 1)]  # random selection
                    x = x[random.choices(range(n),
                                         weights=w)[0]]  # weighted selection
                elif parent == 'weighted':
                    x = (x * w.reshape(
                        n, 1)).sum(0) / w.sum()  # weighted combination

                # Mutate
                mp, s = 0.8, 0.2  # mutation probability, sigma
                npr = np.random
                npr.seed(int(time.time()))
                g = np.array([x[0] for x in meta.values()])  # gains 0-1
                ng = len(meta)
                v = np.ones(ng)
                while all(
                        v == 1
                ):  # mutate until a change occurs (prevent duplicates)
                    v = (g * (npr.random(ng) < mp) * npr.randn(ng) *
                         npr.random() * s + 1).clip(0.3, 3.0)
                for i, k in enumerate(hyp.keys()):  # plt.hist(v.ravel(), 300)
                    hyp[k] = float(x[i + 7] * v[i])  # mutate

            # Constrain to limits
            for k, v in meta.items():
                hyp[k] = max(hyp[k], v[1])  # lower limit
                hyp[k] = min(hyp[k], v[2])  # upper limit
                hyp[k] = round(hyp[k], 5)  # significant digits

            # Train mutation
            results = train(hyp.copy(), opt, device)

            # Write mutation results
            print_mutation(hyp.copy(), results, yaml_file, opt.bucket)

        # Plot results
        plot_evolution(yaml_file)
        print(
            f'Hyperparameter evolution complete. Best results saved as: {yaml_file}\n'
            f'Command to train a new model with these hyperparameters: $ python train.py --hyp {yaml_file}'
        )
Ejemplo n.º 7
0
def mainFunc(args):
    # Set the main function flag
    print("Main Function Start...")

    # Check the GPU device
    print("Number of available GPUs: {}".format(torch.cuda.device_count()))

    # Check whether using the distributed runing for the network
    is_distributed = initDistributed(args)
    master = True
    if is_distributed and os.environ["RANK"]:
        master = int(
            os.environ["RANK"]) == 0  # check whether this node is master node

    # Configuration for device setting
    set_logging()
    if is_distributed:
        device = torch.device('cuda:{}'.format(args.local_rank))
    else:
        device = select_device(args.device)
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load the configuration
    config = loadConfig(args.config)

    # CuDNN related setting
    if torch.cuda.is_available():
        cudnn.benchmark = config.DEVICE.CUDNN.BENCHMARK
        cudnn.deterministic = config.DEVICE.CUDNN.DETERMINISTIC
        cudnn.enabled = config.DEVICE.CUDNN.ENABLED

    # Configurations for dirctories
    save_img, save_dir, source, yolov5_weights, view_img, save_txt, imgsz = \
        False, Path(args.save_dir), args.source, args.weights, args.view_img, args.save_txt, args.img_size
    webcam = source.isnumeric() or source.startswith(
        ('rtsp://', 'rtmp://', 'http://')) or source.endswith('.txt')

    if save_dir == Path('runs/detect'):  # if default
        os.makedirs('runs/detect', exist_ok=True)  # make base
        save_dir = Path(increment_dir(save_dir / 'exp',
                                      args.name))  # increment run
    os.makedirs(save_dir / 'labels' if save_txt else save_dir,
                exist_ok=True)  # make new dir

    # Load yolov5 model for human detection
    model_yolov5 = attempt_load(config.MODEL.PRETRAINED.YOLOV5,
                                map_location=device)
    imgsz = check_img_size(imgsz,
                           s=model_yolov5.stride.max())  # check img_size
    if half:
        model_yolov5.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        model_classifier = load_classifier(name='resnet101', n=2)  # initialize
        model_classifier.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model'])  # load weights
        model_classifier.to(device).eval()

    # Load resnet model for human keypoints estimation
    model_resnet = eval('pose_models.' + config.MODEL.NAME.RESNET +
                        '.get_pose_net')(config, is_train=False)
    if config.EVAL.RESNET.MODEL_FILE:
        print('=> loading model from {}'.format(config.EVAL.RESNET.MODEL_FILE))
        model_resnet.load_state_dict(torch.load(config.EVAL.RESNET.MODEL_FILE),
                                     strict=False)
    else:
        print('expected model defined in config at EVAL.RESNET.MODEL_FILE')
    model_resnet.to(device)
    model_resnet.eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)
    pose_transform = transforms.Compose(
        [  # input transformation for 2d human pose estimation
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ])

    # Get names and colors
    names = model_yolov5.module.names if hasattr(
        model_yolov5, 'module') else model_yolov5.names
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # Construt filters for filtering 2D/3D human keypoints
    # filters_2d = constructFilters((1,16,2), freq=25, mincutoff=1, beta=0.01)  # for test
    # filters_3d = constructFilters((1,16,3), freq=25, mincutoff=1, beta=0.01)

    # Run the yolov5 and resnet for 2d human pose estimation
    # with torch.no_grad():
    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model_yolov5(img.half() if half else img
                     ) if device.type != 'cpu' else None  # run once

    # Process every video frame
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred_boxes = model_yolov5(img, augment=args.augment)[0]

        # Apply NMS
        pred_boxes = non_max_suppression(pred_boxes,
                                         args.conf_thres,
                                         args.iou_thres,
                                         classes=args.classes,
                                         agnostic=args.agnostic_nms)
        t2 = time_synchronized()

        # Can not find people and move to next frame
        if pred_boxes[0] is None:
            # show the frame with no human detected
            cv2.namedWindow("2D Human Pose Estimation", cv2.WINDOW_NORMAL)
            cv2.imshow("2D Human Pose Estimation", im0s[0].copy())
            # wait manual operations
            # with kb.Listener(on_press=on_press) as listener:
            #     listener.join()
            #     return
            # if kb.is_pressed('t'):
            #     return
            print("No Human Detected and Move on.")
            print("-" * 30)
            continue

        # Print time (inference + NMS)
        detect_time = t2 - t1
        detect_fps = 1.0 / detect_time
        print("Human Detection Time: {}, Human Detection FPS: {}".format(
            detect_time, detect_fps))

        # Apply Classifier
        if classify:  # false
            pred_boxes = apply_classifier(pred_boxes, model_classifier, img,
                                          im0s)

        # Estimate 2d human pose(multiple person)
        centers = []
        scales = []
        for id, boxes in enumerate(pred_boxes):
            if boxes is not None and len(boxes):
                boxes[:, :4] = scale_coords(img.shape[2:], boxes[:, :4],
                                            im0s[id].copy().shape).round()
            # convert tensor to list format
            boxes = np.delete(boxes.cpu().numpy(), [-2, -1], axis=1).tolist()
            for l in range(len(boxes)):
                boxes[l] = [tuple(boxes[l][0:2]), tuple(boxes[l][2:4])]
            # convert box to center and scale
            for box in boxes:
                center, scale = box_to_center_scale(box, imgsz, imgsz)
                centers.append(center)
                scales.append(scale)
        t3 = time_synchronized()
        pred_pose_2d = get_pose_estimation_prediction(config,
                                                      model_resnet,
                                                      im0s[0],
                                                      centers,
                                                      scales,
                                                      transform=pose_transform,
                                                      device=device)
        t4 = time_synchronized()

        # Print time (2d human pose estimation)
        estimate_time = t4 - t3
        estimate_fps = 1.0 / estimate_time
        print("Pose Estimation Time: {}, Pose Estimation FPS: {}".format(
            estimate_time, estimate_fps))

        # Filter the predicted 2d human pose(multiple person)
        t5 = time_synchronized()
        # if False:  # for test
        if config.EVAL.RESNET.USE_FILTERS_2D:
            # construct filters for every keypoints of every person in 2D
            filters_2d = constructFilters(pred_pose_2d.shape,
                                          freq=1,
                                          mincutoff=1,
                                          beta=0.01)
            print("Shape of filters_2d: ({}, {}, {})".format(
                len(filters_2d), len(filters_2d[0]),
                len(filters_2d[0][0])))  # for test
            for per in range(pred_pose_2d.shape[0]):
                for kp in range(pred_pose_2d.shape[1]):
                    for coord in range(pred_pose_2d.shape[2]):
                        pred_pose_2d[per][kp][coord] = filters_2d[per][kp][
                            coord](pred_pose_2d[per][kp][coord])
        t6 = time_synchronized()

        # Print time (filter 2d human pose)
        filter_time_2d = t6 - t5
        filter_fps_2d = 1.0 / filter_time_2d
        print("Filter 2D Pose Time: {}, Filter 2D Pose FPS: {}".format(
            filter_time_2d, filter_fps_2d))

        # Process detections and estimations in 2D
        for i, box in enumerate(pred_boxes):
            if webcam:  # batch_size >= 1
                p, s, im0 = Path(path[i]), '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = Path(path), '', im0s

            save_path = str(save_dir / p.name)
            txt_path = str(save_dir / 'labels' / p.stem) + (
                '_%g' % dataset.frame if dataset.mode == 'video' else '')
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh

            if box is not None and len(box):
                # Rescale boxes from img_size to im0 size
                box[:, :4] = scale_coords(img.shape[2:], box[:, :4],
                                          im0.shape).round()

                # Print results
                for c in box[:, -1].unique():
                    n = (box[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(box):
                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()  # normalized xywh
                        line = (cls, *xywh, conf) if args.save_conf else (
                            cls, *xywh)  # label format
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * len(line) + '\n') % line)

                    # Add bbox to image
                    if save_img or view_img:
                        label = '%s %.2f' % (names[int(cls)], conf)
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors[int(cls)],
                                     line_thickness=3)

                # Draw joint keypoints, number orders and human skeletons for every detected people in 2D
                for person in pred_pose_2d:
                    # draw the human keypoints
                    for idx, coord in enumerate(person):
                        x_coord, y_coord = int(coord[0]), int(coord[1])
                        cv2.circle(im0, (x_coord, y_coord), 1, (0, 0, 255), 5)
                        cv2.putText(im0, str(idx), (x_coord, y_coord),
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.6,
                                    (255, 255, 255), 2, cv2.LINE_AA)

                    # draw the human skeletons in PACIFIC mode
                    for skeleton in PACIFIC_SKELETON_INDEXES:
                        cv2.line(im0, (int(person[skeleton[0]][0]),
                                       int(person[skeleton[0]][1])),
                                 (int(person[skeleton[1]][0]),
                                  int(person[skeleton[1]][1])), skeleton[2], 2)

            # Print time (inference + NMS + estimation)
            print('%sDone. (%.3fs)' % (s, t4 - t1))

            # Stream results
            if view_img:
                detect_text = "Detect FPS:{0:0>5.2f}/{1:0>6.2f}ms".format(
                    detect_fps, detect_time * 1000)
                estimate_text = "Estimate FPS:{0:0>5.2f}/{1:0>6.2f}ms".format(
                    estimate_fps, estimate_time * 1000)
                cv2.putText(im0, detect_text, (10, 30),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2,
                            cv2.LINE_AA)
                cv2.putText(im0, estimate_text, (10, 60),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2,
                            cv2.LINE_AA)
                cv2.namedWindow("2D Human Pose Estimation", cv2.WINDOW_NORMAL)
                cv2.imshow("2D Human Pose Estimation", im0)
                if cv2.waitKey(1) & 0xFF == ord('q'):  # q to quit
                    return
                    # goto .mainFunc

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*fourcc), fps,
                            (w, h))
                    vid_writer.write(im0)

        # Print time (inference + NMS + estimation + 2d filtering)
        all_process_time = t6 - t1
        all_process_fps = 1.0 / all_process_time
        print("All Process Time: {}, All Process FPS: {}".format(
            all_process_time, all_process_fps))
        print("-" * 30)

    # Goto label
    # label .mainFunc

    # Print saving results
    if save_txt or save_img:
        print('Results saved to %s' % save_dir)

    # Release video reader and writer, then destory all opencv windows
    dataset.vid_cap.release()
    vid_writer.release()
    cv2.destroyAllWindows()
    print('Present 2D Human Pose Inference Done. Total Time:(%.3f seconds)' %
          (time.time() - t0))
Ejemplo n.º 8
0
def detect(save_img=False):
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source.isnumeric() or source.startswith(
        'rtsp') or source.startswith('http') or source.endswith('.txt')

    # Initialize
    set_logging()
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            save_path = str(Path(out) / Path(p).name)
            txt_path = str(Path(out) / Path(p).stem) + (
                '_%g' % dataset.frame if dataset.mode == 'video' else '')
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()  # normalized xywh
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * 5 + '\n') %
                                    (cls, *xywh))  # label format

                    if save_img or view_img:  # Add bbox to image
                        label = '%s %.2f' % (names[int(cls)], conf)
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors[int(cls)],
                                     line_thickness=3)

            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1))

            # Stream results
            if view_img:
                # cv2.imshow(p, im0)
                cv2.imwrite("C:/Users/lenovo/Desktop/server/output/camera.jpg",
                            im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIterationq

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        # vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h))
                        vid_writer = cv2.VideoWriter(
                            save_path,
                            cv2.VideoWriter_fourcc('X', '2', '6', '4'), fps,
                            (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % Path(out))
        if platform.system() == 'Darwin' and not opt.update:  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))
Ejemplo n.º 9
0
def detect():
    source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size

    set_logging()
    device = select_device(opt.device)

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size

    dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]
    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img) if device.type != 'cpu' else None  # run once

    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.float()
        img /= 255.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        pred = model(img, augment=opt.augment)[0]
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)

        pred = sort_pred(
            pred,
            iou_tr=0.6)  # отсекает пересекающиеся боксы с меньшей вероятностью

        for i, det in enumerate(pred):

            if len(det):
                # Rescale boxes from img_size to im0 size
                #det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0s.shape).round()
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0s.shape).round()

            for k in range(det.size(0)):
                box = det[k].tolist()
                x1 = int(box[0])
                y1 = int(box[1])
                x2 = int(box[2])
                y2 = int(box[3])

                text = names[int(box[5])] + ' ' + str(round(box[4], 4))
                print('classes:', names[int(box[5])])
                cv2.rectangle(im0s, (x1, y1), (x2, y2), colors[int(box[5])], 2)
                cv2.putText(im0s,
                            text,
                            tuple([x1, y2]),
                            cv2.FONT_HERSHEY_SIMPLEX,
                            1, (0, 255, 0),
                            thickness=1)

            cv2.imshow('Image', im0s)

        if cv2.waitKey(0) & 0xFF == ord('q'):
            break
Ejemplo n.º 10
0
def test(
        data,
        weights=None,  # model.pt path(s)
        batch_size=32,  # batch size
        imgsz=640,  # inference size (pixels)
        conf_thres=0.001,  # confidence threshold
        iou_thres=0.6,  # NMS IoU threshold
        task='val',  # train, val, test, speed or study
        device='',  # cuda device, i.e. 0 or 0,1,2,3 or cpu
        single_cls=False,  # treat as single-class dataset
        augment=False,  # augmented inference
        verbose=False,  # verbose output
        save_txt=False,  # save results to *.txt
        save_hybrid=False,  # save label+prediction hybrid results to *.txt
        save_conf=False,  # save confidences in --save-txt labels
        save_json=False,  # save a cocoapi-compatible JSON results file
        project='runs/test',  # save to project/name
        name='exp',  # save to project/name
        exist_ok=False,  # existing project/name ok, do not increment
        half=True,  # use FP16 half-precision inference
        model=None,
        dataloader=None,
        save_dir=Path(''),
        plots=True,
        wandb_logger=None,
        compute_loss=None,
):
    # Initialize/load model and set device
    training = model is not None
    if training:  # called by train.py
        device = next(model.parameters()).device  # get model device

    else:  # called directly
        set_logging()
        device = select_device(device, batch_size=batch_size)

        # Directories
        save_dir = increment_path(Path(project) / name,
                                  exist_ok=exist_ok)  # increment run
        (save_dir / 'labels' if save_txt else save_dir).mkdir(
            parents=True, exist_ok=True)  # make dir

        # Load model
        model = attempt_load(weights, map_location=device)  # load FP32 model
        gs = max(int(model.stride.max()), 32)  # grid size (max stride)
        imgsz = check_img_size(imgsz, s=gs)  # check image size

        # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99
        # if device.type != 'cpu' and torch.cuda.device_count() > 1:
        #     model = nn.DataParallel(model)

    # Half
    half &= device.type != 'cpu'  # half precision only supported on CUDA
    if half:
        model.half()

    # Configure
    model.eval()
    if isinstance(data, str):
        with open(data) as f:
            data = yaml.safe_load(f)
    check_dataset(data)  # check
    is_coco = data['val'].endswith('coco/val2017.txt')  # COCO dataset
    nc = 1 if single_cls else int(data['nc'])  # number of classes
    iouv = torch.linspace(0.5, 0.95,
                          10).to(device)  # iou vector for [email protected]:0.95
    niou = iouv.numel()

    # Logging
    log_imgs = 0
    if wandb_logger and wandb_logger.wandb:
        log_imgs = min(wandb_logger.log_imgs, 100)
    # Dataloader
    if not training:
        if device.type != 'cpu':
            model(
                torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(
                    next(model.parameters())))  # run once
        task = task if task in (
            'train', 'val', 'test') else 'val'  # path to train/val/test images
        dataloader = create_dataloader(data[task],
                                       imgsz,
                                       batch_size,
                                       gs,
                                       single_cls,
                                       pad=0.5,
                                       rect=True,
                                       prefix=colorstr(f'{task}: '))[0]

    seen = 0
    confusion_matrix = ConfusionMatrix(nc=nc)
    names = {
        k: v
        for k, v in enumerate(
            model.names if hasattr(model, 'names') else model.module.names)
    }
    coco91class = coco80_to_coco91_class()
    s = ('%20s' + '%11s' * 6) % ('Class', 'Images', 'Labels', 'P', 'R',
                                 '[email protected]', '[email protected]:.95')
    p, r, f1, mp, mr, map50, map, t0, t1, t2 = 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.
    loss = torch.zeros(3, device=device)
    jdict, stats, ap, ap_class, wandb_images = [], [], [], [], []
    for batch_i, (img, targets, paths,
                  shapes) in enumerate(tqdm(dataloader, desc=s)):
        t_ = time_synchronized()
        img = img.to(device, non_blocking=True)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        targets = targets.to(device)
        nb, _, height, width = img.shape  # batch size, channels, height, width
        t = time_synchronized()
        t0 += t - t_

        # Run model
        out, train_out = model(
            img, augment=augment)  # inference and training outputs
        t1 += time_synchronized() - t

        # Compute loss
        if compute_loss:
            loss += compute_loss([x.float() for x in train_out],
                                 targets)[1][:3]  # box, obj, cls

        # Run NMS
        targets[:, 2:] *= torch.Tensor([width, height, width,
                                        height]).to(device)  # to pixels
        lb = [targets[targets[:, 0] == i, 1:]
              for i in range(nb)] if save_hybrid else []  # for autolabelling
        t = time_synchronized()
        out = non_max_suppression(out,
                                  conf_thres,
                                  iou_thres,
                                  labels=lb,
                                  multi_label=True,
                                  agnostic=single_cls)
        t2 += time_synchronized() - t

        # Statistics per image
        for si, pred in enumerate(out):
            labels = targets[targets[:, 0] == si, 1:]
            nl = len(labels)
            tcls = labels[:, 0].tolist() if nl else []  # target class
            path = Path(paths[si])
            seen += 1

            if len(pred) == 0:
                if nl:
                    stats.append((torch.zeros(0, niou, dtype=torch.bool),
                                  torch.Tensor(), torch.Tensor(), tcls))
                continue

            # Predictions
            if single_cls:
                pred[:, 5] = 0
            predn = pred.clone()
            scale_coords(img[si].shape[1:], predn[:, :4], shapes[si][0],
                         shapes[si][1])  # native-space pred

            # Append to text file
            if save_txt:
                gn = torch.tensor(shapes[si][0])[[1, 0, 1, 0
                                                  ]]  # normalization gain whwh
                for *xyxy, conf, cls in predn.tolist():
                    xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                            gn).view(-1).tolist()  # normalized xywh
                    line = (cls, *xywh,
                            conf) if save_conf else (cls,
                                                     *xywh)  # label format
                    with open(save_dir / 'labels' / (path.stem + '.txt'),
                              'a') as f:
                        f.write(('%g ' * len(line)).rstrip() % line + '\n')

            # W&B logging - Media Panel plots
            if len(
                    wandb_images
            ) < log_imgs and wandb_logger.current_epoch > 0:  # Check for test operation
                if wandb_logger.current_epoch % wandb_logger.bbox_interval == 0:
                    box_data = [{
                        "position": {
                            "minX": xyxy[0],
                            "minY": xyxy[1],
                            "maxX": xyxy[2],
                            "maxY": xyxy[3]
                        },
                        "class_id": int(cls),
                        "box_caption": "%s %.3f" % (names[cls], conf),
                        "scores": {
                            "class_score": conf
                        },
                        "domain": "pixel"
                    } for *xyxy, conf, cls in pred.tolist()]
                    boxes = {
                        "predictions": {
                            "box_data": box_data,
                            "class_labels": names
                        }
                    }  # inference-space
                    wandb_images.append(
                        wandb_logger.wandb.Image(img[si],
                                                 boxes=boxes,
                                                 caption=path.name))
            wandb_logger.log_training_progress(
                predn, path,
                names) if wandb_logger and wandb_logger.wandb_run else None

            # Append to pycocotools JSON dictionary
            if save_json:
                # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
                image_id = int(
                    path.stem) if path.stem.isnumeric() else path.stem
                box = xyxy2xywh(predn[:, :4])  # xywh
                box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
                for p, b in zip(pred.tolist(), box.tolist()):
                    jdict.append({
                        'image_id':
                        image_id,
                        'category_id':
                        coco91class[int(p[5])] if is_coco else int(p[5]),
                        'bbox': [round(x, 3) for x in b],
                        'score':
                        round(p[4], 5)
                    })

            # Assign all predictions as incorrect
            correct = torch.zeros(pred.shape[0],
                                  niou,
                                  dtype=torch.bool,
                                  device=device)
            if nl:
                detected = []  # target indices
                tcls_tensor = labels[:, 0]

                # target boxes
                tbox = xywh2xyxy(labels[:, 1:5])
                scale_coords(img[si].shape[1:], tbox, shapes[si][0],
                             shapes[si][1])  # native-space labels
                if plots:
                    confusion_matrix.process_batch(
                        predn, torch.cat((labels[:, 0:1], tbox), 1))

                # Per target class
                for cls in torch.unique(tcls_tensor):
                    ti = (cls == tcls_tensor).nonzero(as_tuple=False).view(
                        -1)  # target indices
                    pi = (cls == pred[:, 5]).nonzero(as_tuple=False).view(
                        -1)  # prediction indices

                    # Search for detections
                    if pi.shape[0]:
                        # Prediction to target ious
                        ious, i = box_iou(predn[pi, :4], tbox[ti]).max(
                            1)  # best ious, indices

                        # Append detections
                        detected_set = set()
                        for j in (ious > iouv[0]).nonzero(as_tuple=False):
                            d = ti[i[j]]  # detected target
                            if d.item() not in detected_set:
                                detected_set.add(d.item())
                                detected.append(d)
                                correct[
                                    pi[j]] = ious[j] > iouv  # iou_thres is 1xn
                                if len(
                                        detected
                                ) == nl:  # all targets already located in image
                                    break

            # Append statistics (correct, conf, pcls, tcls)
            stats.append(
                (correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls))

        # Plot images
        if plots and batch_i < 3:
            f = save_dir / f'test_batch{batch_i}_labels.jpg'  # labels
            Thread(target=plot_images,
                   args=(img, targets, paths, f, names),
                   daemon=True).start()
            f = save_dir / f'test_batch{batch_i}_pred.jpg'  # predictions
            Thread(target=plot_images,
                   args=(img, output_to_target(out), paths, f, names),
                   daemon=True).start()

    # Compute statistics
    stats = [np.concatenate(x, 0) for x in zip(*stats)]  # to numpy
    if len(stats) and stats[0].any():
        p, r, ap, f1, ap_class = ap_per_class(*stats,
                                              plot=plots,
                                              save_dir=save_dir,
                                              names=names)
        ap50, ap = ap[:, 0], ap.mean(1)  # [email protected], [email protected]:0.95
        mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean()
        nt = np.bincount(stats[3].astype(np.int64),
                         minlength=nc)  # number of targets per class
    else:
        nt = torch.zeros(1)

    # Print results
    pf = '%20s' + '%11i' * 2 + '%11.3g' * 4  # print format
    print(pf % ('all', seen, nt.sum(), mp, mr, map50, map))

    # Print results per class
    if (verbose or (nc < 50 and not training)) and nc > 1 and len(stats):
        for i, c in enumerate(ap_class):
            print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i]))

    # Print speeds
    t = tuple(x / seen * 1E3 for x in (t0, t1, t2))  # speeds per image
    if not training:
        shape = (batch_size, 3, imgsz, imgsz)
        print(
            f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}'
            % t)

    # Plots
    if plots:
        confusion_matrix.plot(save_dir=save_dir, names=list(names.values()))
        if wandb_logger and wandb_logger.wandb:
            val_batches = [
                wandb_logger.wandb.Image(str(f), caption=f.name)
                for f in sorted(save_dir.glob('test*.jpg'))
            ]
            wandb_logger.log({"Validation": val_batches})
    if wandb_images:
        wandb_logger.log({"Bounding Box Debugger/Images": wandb_images})

    # Save JSON
    if save_json and len(jdict):
        w = Path(weights[0] if isinstance(weights, list) else weights
                 ).stem if weights is not None else ''  # weights
        anno_json = '../coco/annotations/instances_val2017.json'  # annotations json
        pred_json = str(save_dir / f"{w}_predictions.json")  # predictions json
        print('\nEvaluating pycocotools mAP... saving %s...' % pred_json)
        with open(pred_json, 'w') as f:
            json.dump(jdict, f)

        try:  # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
            check_requirements(['pycocotools'])
            from pycocotools.coco import COCO
            from pycocotools.cocoeval import COCOeval

            anno = COCO(anno_json)  # init annotations api
            pred = anno.loadRes(pred_json)  # init predictions api
            eval = COCOeval(anno, pred, 'bbox')
            if is_coco:
                eval.params.imgIds = [
                    int(Path(x).stem) for x in dataloader.dataset.img_files
                ]  # image IDs to evaluate
            eval.evaluate()
            eval.accumulate()
            eval.summarize()
            map, map50 = eval.stats[:
                                    2]  # update results ([email protected]:0.95, [email protected])
        except Exception as e:
            print(f'pycocotools unable to run: {e}')

    # Return results
    model.float()  # for training
    if not training:
        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
        print(f"Results saved to {save_dir}{s}")
    maps = np.zeros(nc) + map
    for i, c in enumerate(ap_class):
        maps[c] = ap[i]
    return (mp, mr, map50, map,
            *(loss.cpu() / len(dataloader)).tolist()), maps, t
Ejemplo n.º 11
0
def detect(save_img=False):
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source.isnumeric() or source.startswith(
        'rtsp') or source.startswith('http') or source.endswith('.txt')

    # Initialize
    set_logging()
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # Find index corresponding to a person
    idx_person = names.index("person")

    # SORT: initialize the tracker
    mot_tracker = sort_module.Sort(max_age=opt.max_age,
                                   min_hits=opt.min_hits,
                                   iou_threshold=opt.iou_threshold)

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)
        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            save_path = str(Path(out) / Path(p).name)
            txt_path = str(Path(out) / Path(p).stem) + (
                '_%g' % dataset.frame if dataset.mode == 'video' else '')
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                # SORT: number of people detected
                idxs_ppl = (
                    det[:, -1] == idx_person
                ).nonzero(as_tuple=False).squeeze(
                    dim=1)  # 1. List of indices with 'person' class detections
                dets_ppl = det[idxs_ppl, :-1].to(
                    "cpu")  # 2. Torch.tensor with 'person' detections
                print('\n {} people were detected!'.format(len(idxs_ppl)))

                # SORT: feed detections to the tracker
                if len(dets_ppl) != 0:
                    trackers = mot_tracker.update(dets_ppl)
                    for d in trackers:
                        plot_one_box(d[:-1],
                                     im0,
                                     label='ID' + str(int(d[-1])),
                                     color=colors[1],
                                     line_thickness=1)

            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1))

            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*fourcc), fps,
                            (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % Path(out))
        if platform.system() == 'Darwin' and not opt.update:  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))
Ejemplo n.º 12
0
def test(
        weights=None,
        data="yolov5/data/coco128.yaml",
        batch_size=32,
        image_size=640,
        conf_thres=0.001,
        iou_thres=0.6,  # for NMS
        task="val",
        device="",
        single_cls=False,
        augment=False,
        verbose=False,
        save_txt=False,  # for auto-labelling
        save_hybrid=False,  # for hybrid auto-labelling
        save_conf=False,  # save auto-label confidences
        save_json=False,
        project="runs/test",
        name="exp",
        exist_ok=False,
        model=None,
        dataloader=None,
        save_dir=Path(""),  # for saving images
        plots=True,
        log_imgs=0,  # number of logged images
):
    arguments = locals()
    # Initialize/load model and set device
    training = model is not None
    if training:  # called by train.py
        device = next(model.parameters()).device  # get model device

    else:  # called directly
        set_logging()
        device = select_device(device, batch_size=batch_size)

        # Directories
        save_dir = Path(increment_path(Path(project) / name,
                                       exist_ok=exist_ok))  # increment run
        (save_dir / "labels" if save_txt else save_dir).mkdir(
            parents=True, exist_ok=True)  # make dir

        # Load model
        model = attempt_load(weights, map_location=device)  # load FP32 model
        image_size = check_img_size(image_size,
                                    s=model.stride.max())  # check img_size

        # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99
        # if device.type != 'cpu' and torch.cuda.device_count() > 1:
        #     model = nn.DataParallel(model)

    # Half
    half = device.type != "cpu"  # half precision only supported on CUDA
    if half:
        model.half()

    # Configure
    model.eval()
    is_coco = data.endswith("coco.yaml")  # is COCO dataset
    with open(data) as f:
        data = yaml.load(f, Loader=yaml.FullLoader)  # model dict
    check_dataset(data)  # check
    nc = 1 if single_cls else int(data["nc"])  # number of classes
    iouv = torch.linspace(0.5, 0.95,
                          10).to(device)  # iou vector for [email protected]:0.95
    niou = iouv.numel()

    # Logging
    log_imgs, wandb = min(log_imgs, 100), None  # ceil
    try:
        import wandb  # Weights & Biases
    except ImportError:
        log_imgs = 0

    # Dataloader
    if not training:
        img = torch.zeros((1, 3, image_size, image_size),
                          device=device)  # init img
        _ = (model(img.half() if half else img)
             if device.type != "cpu" else None)  # run once
        path = (data["test"] if task == "test" else data["val"]
                )  # path to val/test images
        opt = OptFactory(arguments)
        dataloader = create_dataloader(path,
                                       image_size,
                                       batch_size,
                                       model.stride.max(),
                                       opt,
                                       pad=0.5,
                                       rect=True)[0]

    seen = 0
    confusion_matrix = ConfusionMatrix(nc=nc)
    names = {
        k: v
        for k, v in enumerate(
            model.names if hasattr(model, "names") else model.module.names)
    }
    coco91class = coco80_to_coco91_class()
    s = ("%20s" + "%12s" * 6) % (
        "Class",
        "Images",
        "Targets",
        "P",
        "R",
        "[email protected]",
        "[email protected]:.95",
    )
    p, r, f1, mp, mr, map50, map, t0, t1 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
    loss = torch.zeros(3, device=device)
    jdict, stats, ap, ap_class, wandb_images = [], [], [], [], []
    for batch_i, (img, targets, paths,
                  shapes) in enumerate(tqdm(dataloader, desc=s)):
        img = img.to(device, non_blocking=True)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        targets = targets.to(device)
        nb, _, height, width = img.shape  # batch size, channels, height, width

        with torch.no_grad():
            # Run model
            t = time_synchronized()
            inf_out, train_out = model(
                img, augment=augment)  # inference and training outputs
            t0 += time_synchronized() - t

            # Compute loss
            if training:
                loss += compute_loss([x.float() for x in train_out], targets,
                                     model)[1][:3]  # box, obj, cls

            # Run NMS
            targets[:, 2:] *= torch.Tensor([width, height, width,
                                            height]).to(device)  # to pixels
            lb = ([targets[targets[:, 0] == i, 1:]
                   for i in range(nb)] if save_hybrid else []
                  )  # for autolabelling
            t = time_synchronized()
            output = non_max_suppression(inf_out,
                                         conf_thres=conf_thres,
                                         iou_thres=iou_thres,
                                         labels=lb)
            t1 += time_synchronized() - t

        # Statistics per image
        for si, pred in enumerate(output):
            labels = targets[targets[:, 0] == si, 1:]
            nl = len(labels)
            tcls = labels[:, 0].tolist() if nl else []  # target class
            path = Path(paths[si])
            seen += 1

            if len(pred) == 0:
                if nl:
                    stats.append((
                        torch.zeros(0, niou, dtype=torch.bool),
                        torch.Tensor(),
                        torch.Tensor(),
                        tcls,
                    ))
                continue

            # Predictions
            predn = pred.clone()
            scale_coords(img[si].shape[1:], predn[:, :4], shapes[si][0],
                         shapes[si][1])  # native-space pred

            # Append to text file
            if save_txt:
                gn = torch.tensor(shapes[si][0])[[1, 0, 1, 0
                                                  ]]  # normalization gain whwh
                for *xyxy, conf, cls in predn.tolist():
                    xywh = ((xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                             gn).view(-1).tolist())  # normalized xywh
                    line = ((cls, *xywh, conf) if save_conf else
                            (cls, *xywh))  # label format
                    with open(save_dir / "labels" / (path.stem + ".txt"),
                              "a") as f:
                        f.write(("%g " * len(line)).rstrip() % line + "\n")

            # W&B logging
            if plots and len(wandb_images) < log_imgs:
                box_data = [{
                    "position": {
                        "minX": xyxy[0],
                        "minY": xyxy[1],
                        "maxX": xyxy[2],
                        "maxY": xyxy[3],
                    },
                    "class_id": int(cls),
                    "box_caption": "%s %.3f" % (names[cls], conf),
                    "scores": {
                        "class_score": conf
                    },
                    "domain": "pixel",
                } for *xyxy, conf, cls in pred.tolist()]
                boxes = {
                    "predictions": {
                        "box_data": box_data,
                        "class_labels": names
                    }
                }  # inference-space
                wandb_images.append(
                    wandb.Image(img[si], boxes=boxes, caption=path.name))

            # Append to pycocotools JSON dictionary
            if save_json:
                # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
                image_id = int(
                    path.stem) if path.stem.isnumeric() else path.stem
                box = xyxy2xywh(predn[:, :4])  # xywh
                box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
                for p, b in zip(pred.tolist(), box.tolist()):
                    jdict.append({
                        "image_id":
                        image_id,
                        "category_id":
                        coco91class[int(p[5])] if is_coco else int(p[5]),
                        "bbox": [round(x, 3) for x in b],
                        "score":
                        round(p[4], 5),
                    })

            # Assign all predictions as incorrect
            correct = torch.zeros(pred.shape[0],
                                  niou,
                                  dtype=torch.bool,
                                  device=device)
            if nl:
                detected = []  # target indices
                tcls_tensor = labels[:, 0]

                # target boxes
                tbox = xywh2xyxy(labels[:, 1:5])
                scale_coords(img[si].shape[1:], tbox, shapes[si][0],
                             shapes[si][1])  # native-space labels
                if plots:
                    confusion_matrix.process_batch(
                        pred, torch.cat((labels[:, 0:1], tbox), 1))

                # Per target class
                for cls in torch.unique(tcls_tensor):
                    ti = ((cls == tcls_tensor).nonzero(as_tuple=False).view(-1)
                          )  # prediction indices
                    pi = ((cls == pred[:, 5]).nonzero(as_tuple=False).view(-1)
                          )  # target indices

                    # Search for detections
                    if pi.shape[0]:
                        # Prediction to target ious
                        ious, i = box_iou(predn[pi, :4], tbox[ti]).max(
                            1)  # best ious, indices

                        # Append detections
                        detected_set = set()
                        for j in (ious > iouv[0]).nonzero(as_tuple=False):
                            d = ti[i[j]]  # detected target
                            if d.item() not in detected_set:
                                detected_set.add(d.item())
                                detected.append(d)
                                correct[
                                    pi[j]] = ious[j] > iouv  # iou_thres is 1xn
                                if (len(detected) == nl
                                    ):  # all targets already located in image
                                    break

            # Append statistics (correct, conf, pcls, tcls)
            stats.append(
                (correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls))

        # Plot images
        if plots and batch_i < 3:
            f = save_dir / f"test_batch{batch_i}_labels.jpg"  # labels
            Thread(target=plot_images,
                   args=(img, targets, paths, f, names),
                   daemon=True).start()
            f = save_dir / f"test_batch{batch_i}_pred.jpg"  # predictions
            Thread(
                target=plot_images,
                args=(img, output_to_target(output), paths, f, names),
                daemon=True,
            ).start()

    # Compute statistics
    stats = [np.concatenate(x, 0) for x in zip(*stats)]  # to numpy
    if len(stats) and stats[0].any():
        p, r, ap, f1, ap_class = ap_per_class(*stats,
                                              plot=plots,
                                              save_dir=save_dir,
                                              names=names)
        p, r, ap50, ap = (
            p[:, 0],
            r[:, 0],
            ap[:, 0],
            ap.mean(1),
        )  # [P, R, [email protected], [email protected]:0.95]
        mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean()
        nt = np.bincount(stats[3].astype(np.int64),
                         minlength=nc)  # number of targets per class
    else:
        nt = torch.zeros(1)

    # Print results
    pf = "%20s" + "%12.3g" * 6  # print format
    print(pf % ("all", seen, nt.sum(), mp, mr, map50, map))

    # Print results per class
    if verbose and nc > 1 and len(stats):
        for i, c in enumerate(ap_class):
            print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i]))

    # Print speeds
    t = tuple(x / seen * 1e3 for x in (t0, t1, t0 + t1)) + (
        image_size,
        image_size,
        batch_size,
    )  # tuple
    if not training:
        print(
            "Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g"
            % t)

    # Plots
    if plots:
        confusion_matrix.plot(save_dir=save_dir, names=list(names.values()))
        if wandb and wandb.run:
            wandb.log({"Images": wandb_images})
            wandb.log({
                "Validation": [
                    wandb.Image(str(f), caption=f.name)
                    for f in sorted(save_dir.glob("test*.jpg"))
                ]
            })

    # Save JSON
    if save_json and len(jdict):
        w = (Path(weights[0] if isinstance(weights, list) else weights).stem
             if weights is not None else "")  # weights
        anno_json = "../coco/annotations/instances_val2017.json"  # annotations json
        pred_json = str(save_dir / f"{w}_predictions.json")  # predictions json
        print("\nEvaluating pycocotools mAP... saving %s..." % pred_json)
        with open(pred_json, "w") as f:
            json.dump(jdict, f)

        try:  # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
            from pycocotools.coco import COCO
            from pycocotools.cocoeval import COCOeval

            anno = COCO(anno_json)  # init annotations api
            pred = anno.loadRes(pred_json)  # init predictions api
            eval = COCOeval(anno, pred, "bbox")
            if is_coco:
                eval.params.imgIds = [
                    int(Path(x).stem) for x in dataloader.dataset.img_files
                ]  # image IDs to evaluate
            eval.evaluate()
            eval.accumulate()
            eval.summarize()
            map, map50 = eval.stats[:
                                    2]  # update results ([email protected]:0.95, [email protected])
        except Exception as e:
            print(f"pycocotools unable to run: {e}")

    # Return results
    if not training:
        s = (
            f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}"
            if save_txt else "")
        print(f"Results saved to {save_dir}{s}")
    model.float()  # for training
    maps = np.zeros(nc) + map
    for i, c in enumerate(ap_class):
        maps[c] = ap[i]
    return (mp, mr, map50, map,
            *(loss.cpu() / len(dataloader)).tolist()), maps, t
Ejemplo n.º 13
0
    def __init__(self, master, opt):
        self.parent = master
        self.parent.title("Semi Automatic Image Annotation Tool")
        self.frame = Frame(self.parent)
        self.frame.pack(fill=BOTH, expand=1)
        self.parent.resizable(width=False, height=False)

        # setup yolov5 model
        self.opt = opt
        opt.augment = True
        opt.update = False
        opt.agnostic_nms = False
        # initialize
        set_logging()
        device = select_device(opt.device)
        self.device = device

        # load yolo5 object detection model
        model = attempt_load(opt.weights,
                             map_location=device)  # load FP32 model
        self.img_size = check_img_size(opt.img_size,
                                       s=model.stride.max())  # check img_size
        half = device.type != 'cpu'  # half precision only supported on CUDA
        self.half = half
        if half:
            model.half()  # to FP16
        self.object_model = model
        # get object detection names
        self.names = ['person']

        # load face detect model
        self.face_model = MTCNN(keep_all=True,
                                margin=opt.face_margin,
                                device=device)
        self.face_landmarks = opt.face_landmarks

        # Initialize class variables
        self.img = None
        self.tkimg = None
        self.imageDir = ''
        self.imageDirPathBuffer = ''
        self.imageList = []
        self.imageTotal = 0
        self.imageCur = 0
        self.cur = 0
        self.bboxIdList = []
        self.bboxList = []
        self.bboxPointList = []
        self.o1 = None
        self.o2 = None
        self.o3 = None
        self.o4 = None
        self.bboxId = None
        self.currLabel = None
        self.editbboxId = None
        self.currBboxColor = None
        self.zoomImgId = None
        self.zoomImg = None
        self.zoomImgCrop = None
        self.tkZoomImg = None
        self.hl = None
        self.vl = None
        self.editPointId = None
        self.filename = None
        self.filenameBuffer = None
        self.objectLabelList = []
        self.EDIT = False
        # record image names for saving previous results
        self.annoList = {}

        # initialize mouse state
        self.STATE = {'x': 0, 'y': 0}
        self.STATE_COCO = {'click': 0}

        # initialize annotation file
        self.anno_filename = 'annotations.csv'
        self.annotation_file = open('annotations/' + self.anno_filename, 'w+')
        self.annotation_file.write("")
        self.annotation_file.close()
        # ------------------ GUI ---------------------

        # Control Panel
        self.ctrlPanel = Frame(self.frame)
        self.ctrlPanel.grid(row=0, column=0, sticky=W + N)
        self.openBtn = Button(self.ctrlPanel,
                              text='Open',
                              command=self.open_image)
        self.openBtn.pack(fill=X, side=TOP)
        self.openDirBtn = Button(self.ctrlPanel,
                                 text='Open Dir',
                                 command=self.open_image_dir)
        self.openDirBtn.pack(fill=X, side=TOP)
        self.nextBtn = Button(self.ctrlPanel,
                              text='Next -->',
                              command=self.open_next)
        self.nextBtn.pack(fill=X, side=TOP)
        self.previousBtn = Button(self.ctrlPanel,
                                  text='<-- Previous',
                                  command=self.open_previous)
        self.previousBtn.pack(fill=X, side=TOP)
        self.saveBtn = Button(self.ctrlPanel, text='Save', command=self.save)
        self.saveBtn.pack(fill=X, side=TOP)
        self.semiAutoBtn = Button(self.ctrlPanel,
                                  text="Show Suggestions",
                                  command=self.automate)
        self.semiAutoBtn.pack(fill=X, side=TOP)
        self.disp = Label(self.ctrlPanel, text='Coordinates:')
        self.disp.pack(fill=X, side=TOP)
        self.mb = Menubutton(self.ctrlPanel,
                             text="COCO Classes for Suggestions",
                             relief=RAISED)
        self.mb.pack(fill=X, side=TOP)
        self.mb.menu = Menu(self.mb, tearoff=0)
        self.mb["menu"] = self.mb.menu
        self.addCocoBtn = Button(self.ctrlPanel,
                                 text="+",
                                 command=self.add_labels_coco)
        self.addCocoBtn.pack(fill=X, side=TOP)
        self.zoomPanelLabel = Label(self.ctrlPanel,
                                    text="Precision View Panel")
        self.zoomPanelLabel.pack(fill=X, side=TOP)
        self.zoomcanvas = Canvas(self.ctrlPanel, width=150, height=150)
        self.zoomcanvas.pack(fill=X, side=TOP, anchor='center')

        # Image Editing Region
        self.canvas = Canvas(self.frame,
                             width=self.img_size,
                             height=self.img_size)
        self.canvas.grid(row=0, column=1, sticky=W + N)
        self.canvas.bind("<Button-1>", self.mouse_click)
        self.canvas.bind("<Motion>", self.mouse_move, "+")
        self.canvas.bind("<B1-Motion>", self.mouse_drag)
        self.canvas.bind("<ButtonRelease-1>", self.mouse_release)
        self.parent.bind("<Key-Left>", self.open_previous)
        self.parent.bind("<Key-Right>", self.open_next)
        self.parent.bind("Escape", self.cancel_bbox)

        # Labels and Bounding Box Lists Panel
        self.listPanel = Frame(self.frame)
        self.listPanel.grid(row=0, column=2, sticky=W + N)
        self.listBoxNameLabel = Label(self.listPanel,
                                      text="List of Objects").pack(fill=X,
                                                                   side=TOP)
        self.objectListBox = Listbox(self.listPanel, width=40)
        self.objectListBox.pack(fill=X, side=TOP)
        self.delObjectBtn = Button(self.listPanel,
                                   text="Delete",
                                   command=self.del_bbox)
        self.delObjectBtn.pack(fill=X, side=TOP)
        self.clearAllBtn = Button(self.listPanel,
                                  text="Clear All",
                                  command=self.clear_bbox)
        self.clearAllBtn.pack(fill=X, side=TOP)
        self.classesNameLabel = Label(self.listPanel,
                                      text="Classes").pack(fill=X, side=TOP)
        self.textBox = Entry(self.listPanel, text="Enter label")
        self.textBox.pack(fill=X, side=TOP)

        self.addLabelBtn = Button(self.listPanel,
                                  text="+",
                                  command=self.add_label).pack(fill=X,
                                                               side=TOP)
        self.delLabelBtn = Button(self.listPanel,
                                  text="-",
                                  command=self.del_label).pack(fill=X,
                                                               side=TOP)

        self.labelListBox = Listbox(self.listPanel)
        self.labelListBox.pack(fill=X, side=TOP)
        for name in self.names + ['face']:
            self.labelListBox.insert(END, str(name))

        self.cocoLabels = config.labels_to_names.values()
        self.cocoIntVars = []

        for idxcoco, label_coco in enumerate(self.cocoLabels):
            self.cocoIntVars.append(IntVar())
            self.mb.menu.add_checkbutton(label=label_coco,
                                         variable=self.cocoIntVars[idxcoco])
        # print(self.cocoIntVars)

        # STATUS BAR
        self.statusBar = Frame(self.frame, width=500)
        self.statusBar.grid(row=1, column=1, sticky=W + N)
        self.processingLabel = Label(self.statusBar,
                                     text="                      ")
        self.processingLabel.pack(side="left", fill=X)
        self.imageIdxLabel = Label(self.statusBar,
                                   text="                      ")
        self.imageIdxLabel.pack(side="right", fill=X)
Ejemplo n.º 14
0
def test(data,
         weights=None,
         batch_size=16,
         imgsz=640,
         conf_thres=0.001,
         iou_thres=0.6,  # for NMS
         save_json=False,
         single_cls=False,
         augment=False,
         verbose=False,
         model=None,
         dataloader=None,
         save_dir=Path(''),  # for saving images
         save_txt=False,  # for auto-labelling
         save_conf=False,
         plots=True):
    # Initialize/load model and set device
    training = model is not None
    if training:  # called by train.py
        device = next(model.parameters()).device  # get model device

    else:  # called directly
        set_logging()
        device = select_device(opt.device, batch_size=batch_size)
        save_txt = opt.save_txt  # save *.txt labels

        # Remove previous
        if os.path.exists(save_dir):
            shutil.rmtree(save_dir)  # delete dir
        os.makedirs(save_dir)  # make new dir

        if save_txt:
            out = save_dir / 'autolabels'
            if os.path.exists(out):
                shutil.rmtree(out)  # delete dir
            os.makedirs(out)  # make new dir

        # Load model
        model = attempt_load(weights, map_location=device)  # load FP32 model
        imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size

        # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99
        # if device.type != 'cpu' and torch.cuda.device_count() > 1:
        #     model = nn.DataParallel(model)

    # Half
    half = device.type != 'cpu'  # half precision only supported on CUDA
    if half:
        model.half()

    # Configure
    model.eval()
    with open(data) as f:
        data = yaml.load(f, Loader=yaml.FullLoader)  # model dict
    check_dataset(data)  # check
    nc = 1 if single_cls else int(data['nc'])  # number of classes
    iouv = torch.linspace(0.5, 0.95, 10).to(device)  # iou vector for [email protected]:0.95
    niou = iouv.numel()

    # Dataloader
    if not training:
        img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
        _ = model(img.half() if half else img) if device.type != 'cpu' else None  # run once
        path = data['test'] if opt.task == 'test' else data['val']  # path to val/test images
        dataloader = create_dataloader(path, imgsz, batch_size, model.stride.max(), opt,
                                       hyp=None, augment=False, cache=False, pad=0.5, rect=True)[0]

    seen = 0
    names = model.names if hasattr(model, 'names') else model.module.names
    coco91class = coco80_to_coco91_class()
    s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', '[email protected]', '[email protected]:.95')
    p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0.
    loss = torch.zeros(3, device=device)
    jdict, stats, ap, ap_class = [], [], [], []
    for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)):
        img = img.to(device, non_blocking=True)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        targets = targets.to(device)
        nb, _, height, width = img.shape  # batch size, channels, height, width
        whwh = torch.Tensor([width, height, width, height]).to(device)

        # Disable gradients
        with torch.no_grad():
            # Run model
            t = time_synchronized()
            inf_out, train_out = model(img, augment=augment)  # inference and training outputs
            t0 += time_synchronized() - t

            # Compute loss
            if training:  # if model has loss hyperparameters
                loss += compute_loss([x.float() for x in train_out], targets, model)[1][:3]  # box, obj, cls

            # Run NMS
            t = time_synchronized()
            output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres)
            t1 += time_synchronized() - t

        # Statistics per image
        for si, pred in enumerate(output):
            labels = targets[targets[:, 0] == si, 1:]
            nl = len(labels)
            tcls = labels[:, 0].tolist() if nl else []  # target class
            seen += 1

            if pred is None:
                if nl:
                    stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls))
                continue

            # Append to text file
            if save_txt:
                gn = torch.tensor(shapes[si][0])[[1, 0, 1, 0]]  # normalization gain whwh
                x = pred.clone()
                x[:, :4] = scale_coords(img[si].shape[1:], x[:, :4], shapes[si][0], shapes[si][1])  # to original
                for *xyxy, conf, cls in x:
                    xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
                    line = (cls, conf, *xywh) if save_conf else (cls, *xywh)  # label format
                    with open(str(out / Path(paths[si]).stem) + '.txt', 'a') as f:
                        f.write(('%g ' * len(line) + '\n') % line)

            # Clip boxes to image bounds
            clip_coords(pred, (height, width))

            # Append to pycocotools JSON dictionary
            if save_json:
                # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
                image_id = Path(paths[si]).stem
                box = pred[:, :4].clone()  # xyxy
                scale_coords(img[si].shape[1:], box, shapes[si][0], shapes[si][1])  # to original shape
                box = xyxy2xywh(box)  # xywh
                box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
                for p, b in zip(pred.tolist(), box.tolist()):
                    jdict.append({
                                     'image_id':    int(image_id) if image_id.isnumeric() else image_id,
                                     'category_id': coco91class[int(p[5])],
                                     'bbox':        [round(x, 3) for x in b],
                                     'score':       round(p[4], 5)
                                     })

            # Assign all predictions as incorrect
            correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device)
            if nl:
                detected = []  # target indices
                tcls_tensor = labels[:, 0]

                # target boxes
                tbox = xywh2xyxy(labels[:, 1:5]) * whwh

                # Per target class
                for cls in torch.unique(tcls_tensor):
                    ti = (cls == tcls_tensor).nonzero(as_tuple=False).view(-1)  # prediction indices
                    pi = (cls == pred[:, 5]).nonzero(as_tuple=False).view(-1)  # target indices

                    # Search for detections
                    if pi.shape[0]:
                        # Prediction to target ious
                        ious, i = box_iou(pred[pi, :4], tbox[ti]).max(1)  # best ious, indices

                        # Append detections
                        detected_set = set()
                        for j in (ious > iouv[0]).nonzero(as_tuple=False):
                            d = ti[i[j]]  # detected target
                            if d.item() not in detected_set:
                                detected_set.add(d.item())
                                detected.append(d)
                                correct[pi[j]] = ious[j] > iouv  # iou_thres is 1xn
                                if len(detected) == nl:  # all targets already located in image
                                    break

            # Append statistics (correct, conf, pcls, tcls)
            stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls))

        # Plot images
        if plots and save_dir and batch_i < 1:
            f = save_dir / f'test_batch{batch_i}_gt.jpg'  # filename
            plot_images(img, targets, paths, str(f), names)  # ground truth
            f = save_dir / f'test_batch{batch_i}_pred.jpg'
            plot_images(img, output_to_target(output, width, height), paths, str(f), names)  # predictions

    # Compute statistics
    stats = [np.concatenate(x, 0) for x in zip(*stats)]  # to numpy
    if len(stats) and stats[0].any():
        p, r, ap, f1, ap_class = ap_per_class(*stats, plot=plots,
                                              fname=os.path.join(save_dir, 'precision-recall_curve.png'))
        p, r, ap50, ap = p[:, 0], r[:, 0], ap[:, 0], ap.mean(1)  # [P, R, [email protected], [email protected]:0.95]
        mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean()
        nt = np.bincount(stats[3].astype(np.int64), minlength=nc)  # number of targets per class
    else:
        nt = torch.zeros(1)

    # Print results
    pf = '%20s' + '%12.3g' * 6  # print format
    print(pf % ('all', seen, nt.sum(), mp, mr, map50, map))

    # Print results per class
    if verbose and nc > 1 and len(stats):
        for i, c in enumerate(ap_class):
            print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i]))

    # Print speeds
    t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + (imgsz, imgsz, batch_size)  # tuple
    if not training:
        print('Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g' % t)

    # Save JSON
    if save_json and len(jdict):
        w = Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else ''  # weights
        file = save_dir / f"detections_val2017_{w}_results.json"  # predicted annotations file
        print('\nCOCO mAP with pycocotools... saving %s...' % file)
        with open(file, 'w') as f:
            json.dump(jdict, f)

        try:  # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
            from pycocotools.coco import COCO
            from pycocotools.cocoeval import COCOeval

            imgIds = [int(Path(x).stem) for x in dataloader.dataset.img_files]
            cocoGt = COCO(glob.glob('../coco/annotations/instances_val*.json')[0])  # initialize COCO ground truth api
            cocoDt = cocoGt.loadRes(str(file))  # initialize COCO pred api
            cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')
            cocoEval.params.imgIds = imgIds  # image IDs to evaluate
            cocoEval.evaluate()
            cocoEval.accumulate()
            cocoEval.summarize()
            map, map50 = cocoEval.stats[:2]  # update results ([email protected]:0.95, [email protected])
        except Exception as e:
            print('ERROR: pycocotools unable to run: %s' % e)

    # Return results
    model.float()  # for training
    maps = np.zeros(nc) + map
    for i, c in enumerate(ap_class):
        maps[c] = ap[i]
    return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t
Ejemplo n.º 15
0
def detect(save_img=False):
    source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    save_img = not opt.nosave and not source.endswith(
        '.txt')  # save inference images
    webcam = source.isnumeric() or source.endswith(
        '.txt') or source.lower().startswith(('rtsp://', 'rtmp://', 'http://'))

    # Directories
    save_dir = Path(
        increment_path(Path(opt.project) / opt.name,
                       exist_ok=opt.exist_ok))  # increment run
    (save_dir / 'labels' if save_txt else save_dir).mkdir(
        parents=True, exist_ok=True)  # make dir

    # Initialize
    set_logging()
    device = select_device(opt.device)
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    stride = int(model.stride.max())  # model stride
    imgsz = check_img_size(imgsz, s=stride)  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model']).to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = check_imshow()
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz, stride=stride)
    else:
        dataset = LoadImages(source, img_size=imgsz, stride=stride)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]

    # Run inference
    if device.type != 'cpu':
        model(
            torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(
                next(model.parameters())))  # run once
    t0 = time.time()
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(
                ), dataset.count
            else:
                p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0)

            p = Path(p)  # to Path
            save_path = str(save_dir / p.name)  # img.jpg
            txt_path = str(save_dir / 'labels' / p.stem) + (
                '' if dataset.mode == 'image' else f'_{frame}')  # img.txt
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            if len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()  # normalized xywh
                        line = (cls, *xywh, conf) if opt.save_conf else (
                            cls, *xywh)  # label format
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * len(line)).rstrip() % line + '\n')

                    if save_img or view_img:  # Add bbox to image
                        label = f'{names[int(cls)]} {conf:.2f}'
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors[int(cls)],
                                     line_thickness=3)

            # Print time (inference + NMS)
            print(f'{s}Done. ({t2 - t1:.3f}s)')

            # Stream results
            if view_img:
                cv2.imshow(str(p), im0)
                cv2.waitKey(1)  # 1 millisecond

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'image':
                    cv2.imwrite(save_path, im0)
                else:  # 'video' or 'stream'
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer
                        if vid_cap:  # video
                            fps = vid_cap.get(cv2.CAP_PROP_FPS)
                            w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                            h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        else:  # stream
                            fps, w, h = 30, im0.shape[1], im0.shape[0]
                            save_path += '.mp4'
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps,
                            (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
        print(f"Results saved to {save_dir}{s}")

    print(f'Done. ({time.time() - t0:.3f}s)')
Ejemplo n.º 16
0
def detect(
    weights="yolov5s.pt",
    source="yolov5/data/images",
    img_size=640,
    conf_thres=0.75,
    iou_thres=0.45,
    device="",
    view_img=False,
    save_txt=False,
    save_conf=False,
    classes=None,
    agnostic_nms=False,
    augment=False,
    update=False,
    project="runs/detect",
    name="exp",
    exist_ok=False,
    save_img=False,
):
    """
    Args:
        weights: str
            model.pt path(s)
        source: str
            file/folder, 0 for webcam
        img_size: int
            inference size (pixels)
        conf_thres: float
            object confidence threshold
        iou_thres: float
            IOU threshold for NMS
        device: str
            cuda device, i.e. 0 or 0,1,2,3 or cpu
        view_img: bool
            display results
        save_txt: bool
            save results to *.txt
        save_conf: bool
            save confidences in save_txt labels
        classes: int
            filter by class: [0], or [0, 2, 3]
        agnostic-nms: bool
            class-agnostic NMS
        augment: bool
            augmented inference
        update: bool
            update all models
        project: str
            save results to project/name
        name: str
            save results to project/name
        exist_ok: bool
            existing project/name ok, do not increment
    """
    source, weights, view_img, save_txt, imgsz = (
        source,
        weights,
        view_img,
        save_txt,
        img_size,
    )
    webcam = (
        source.isnumeric()
        or source.endswith(".txt")
        or source.lower().startswith(("rtsp://", "rtmp://", "http://"))
    )

    # Directories
    save_dir = Path(
        increment_path(Path(project) / name, exist_ok=exist_ok)
    )  # increment run
    (save_dir / "labels" if save_txt else save_dir).mkdir(
        parents=True, exist_ok=True
    )  # make dir

    # Initialize
    set_logging()
    device = select_device(device)
    half = device.type != "cpu"  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name="resnet101", n=2)  # initialize
        modelc.load_state_dict(
            torch.load("weights/resnet101.pt", map_location=device)["model"]
        ).to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, "module") else model.names
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img) if device.type != "cpu" else None  # run once
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=augment)[0]

        # Apply NMS
        pred = non_max_suppression(
            pred,
            conf_thres,
            iou_thres,
            classes=classes,
            agnostic=agnostic_nms,
        )
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0, frame = path[i], "%g: " % i, im0s[i].copy(), dataset.count
            else:
                p, s, im0, frame = path, "", im0s, getattr(dataset, "frame", 0)

            p = Path(p)  # to Path
            save_path = str(save_dir / p.name)  # img.jpg
            txt_path = str(save_dir / "labels" / p.stem) + (
                "" if dataset.mode == "image" else f"_{frame}"
            )  # img.txt
            s += "%gx%g " % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
            if len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += f"{n} {names[int(c)]}s, "  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    if save_txt:  # Write to file
                        xywh = (
                            (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn)
                            .view(-1)
                            .tolist()
                        )  # normalized xywh
                        line = (
                            (cls, *xywh, conf) if save_conf else (cls, *xywh)
                        )  # label format
                        with open(txt_path + ".txt", "a") as f:
                            f.write(("%g " * len(line)).rstrip() % line + "\n")

                    if save_img or view_img:  # Add bbox to image
                        label = f"{names[int(cls)]} {conf:.2f}"
                        plot_one_box(
                            xyxy,
                            im0,
                            label=label,
                            color=colors[int(cls)],
                            line_thickness=3,
                        )

            # Print time (inference + NMS)
            print(f"{s}Done. ({t2 - t1:.3f}s)")

            # Stream results
            if view_img:
                cv2.imshow(str(p), im0)

            # Save results (image with detections)
            if save_img:
                if dataset.mode == "image":
                    cv2.imwrite(save_path, im0)
                else:  # 'video'
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release()  # release previous video writer

                        fourcc = "mp4v"  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)
                        )
                    vid_writer.write(im0)

    if save_txt or save_img:
        s = (
            f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}"
            if save_txt
            else ""
        )
        print(f"Results saved to {save_dir}{s}")

    print(f"Done. ({time.time() - t0:.3f}s)")
Ejemplo n.º 17
0
    parser.add_argument('--logdir',
                        type=str,
                        default='runs/',
                        help='logging directory')
    parser.add_argument('--workers',
                        type=int,
                        default=8,
                        help='maximum number of dataloader workers')
    opt = parser.parse_args()

    # Set DDP variables
    opt.total_batch_size = opt.batch_size
    opt.world_size = int(
        os.environ['WORLD_SIZE']) if 'WORLD_SIZE' in os.environ else 1
    opt.global_rank = int(os.environ['RANK']) if 'RANK' in os.environ else -1
    set_logging(opt.global_rank)

    # Resume
    if opt.resume:  # resume an interrupted run
        ckpt = opt.resume if isinstance(
            opt.resume,
            str) else get_latest_run()  # specified or most recent path
        log_dir = Path(ckpt).parent.parent  # runs/exp0
        assert os.path.isfile(
            ckpt), 'ERROR: --resume checkpoint does not exist'
        with open(log_dir / 'opt.yaml') as f:
            opt = argparse.Namespace(**yaml.load(
                f, Loader=yaml.FullLoader))  # replace
        opt.cfg, opt.weights, opt.resume = '', ckpt, True
        logger.info('Resuming training from %s' % ckpt)
def detect(save_img=False):
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source.isnumeric() or source.endswith(
        '.txt') or source.lower().startswith(
            ('rtsp://', 'rtmp://',
             'http://')) or source.lower().startswith('intel')

    # Initialize
    set_logging()
    device = select_device(opt.device)
    folder_main = out.split('/')[0]
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    folder_features = folder_main + '/features'
    if os.path.exists(folder_features):
        shutil.rmtree(folder_features)  # delete features output folder
    folder_crops = folder_main + '/image_crops'
    if os.path.exists(folder_crops):
        shutil.rmtree(folder_crops)  # delete output folder with object crops
    os.makedirs(out)  # make new output folder
    os.makedirs(folder_features)  # make new output folder
    os.makedirs(folder_crops)  # make new output folder

    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = torch.load(weights[0],
                       map_location=device)['model'].float()  # load to FP32
    model.to(device).eval()
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        if source.lower().startswith('intel'):
            dataset = LoadRealSense2()
            save_img = True
        else:
            dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # frames per second
    # TODO if use intel or if use given footage
    fps = 30  # dataset.cap.get(cv2.CAP_PROP_FPS)
    critical_time_frames = opt.time * fps

    # COUNTER: initialization
    counter = VoteCounter(critical_time_frames, fps)
    print('CRITICAL TIME IS ', opt.time, 'sec, or ', counter.critical_time,
          ' frames')

    # Find index corresponding to a person
    idx_person = names.index("person")

    # Deep SORT: initialize the tracker
    cfg = get_config()
    cfg.merge_from_file(opt.config_deepsort)
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST,
                        min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                        max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE,
                        n_init=cfg.DEEPSORT.N_INIT,
                        nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)

    # AlphaPose: initialization
    # args_p = update_config(opt.config_alphapose)
    # cfg_p = update_config(args_p.ALPHAPOSE.cfg)
    #
    # args_p.ALPHAPOSE.tracking = args_p.ALPHAPOSE.pose_track or args_p.ALPHAPOSE.pose_flow
    #
    # demo = SingleImageAlphaPose(args_p.ALPHAPOSE, cfg_p, device)
    # output_pose = opt.output.split('/')[0] + '/pose'
    # if not os.path.exists(output_pose):
    #     os.mkdir(output_pose)

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # TODO => COUNTER: draw queueing ROI
        #  compute urn centoid (1st frame only) and plot a bounding box around it
        # if dataset.frame == 1:
        #     counter.read_urn_coordinates(opt.urn, im0s, opt.radius)
        # counter.plot_urn_bbox(im0s)

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)
        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                if source.lower().startswith('intel'):
                    p, s, im0, frame = path, '%g: ' % i, im0s[i].copy(
                    ), dataset.count
                else:
                    p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            save_path = str(Path(out) / Path(p).name)
            print(save_path)
            txt_path = str(Path(out) / Path(p).stem) + (
                '_%g' % dataset.frame if dataset.mode == 'video' else '')
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                # Deep SORT: person class only
                idxs_ppl = (
                    det[:, -1] == idx_person
                ).nonzero(as_tuple=False).squeeze(
                    dim=1)  # 1. List of indices with 'person' class detections
                dets_ppl = det[idxs_ppl, :
                               -1]  # 2. Torch.tensor with 'person' detections
                print('\n {} people were detected!'.format(len(idxs_ppl)))

                # Deep SORT: convert data into a proper format
                xywhs = xyxy2xywh(dets_ppl[:, :-1]).to("cpu")
                confs = dets_ppl[:, 4].to("cpu")

                # Deep SORT: feed detections to the tracker
                if len(dets_ppl) != 0:
                    trackers, features = deepsort.update(xywhs, confs, im0)
                    # tracks inside a critical sphere
                    trackers_inside = []
                    for i, d in enumerate(trackers):
                        plot_one_box(d[:-1],
                                     im0,
                                     label='ID' + str(int(d[-1])),
                                     color=colors[1],
                                     line_thickness=1)

                        # TODO: queue COUNTER
                        # d_include = counter.centroid_distance(d, im0, colors[1], dataset.frame)
                        # if d_include:
                        #     trackers_inside.append(d)

                    # ALPHAPOSE: show skeletons for bounding boxes inside the critical sphere
                    # if len(trackers_inside) > 0:
                    #     pose = demo.process('frame_'+str(dataset.frame), im0, trackers_inside)
                    #     im0 = demo.vis(im0, pose)
                    #     demo.writeJson([pose], output_pose, form=args_p.ALPHAPOSE.format, for_eval=args_p.ALPHAPOSE.eval)
                    #
                    #     counter.save_features_and_crops(im0, dataset.frame, trackers_inside, features, folder_main)

            cv2.putText(im0, 'Voted ' + str(len(counter.voters_count)),
                        (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255),
                        2)

            print('NUM VOTERS', len(counter.voters))
            print(list(counter.voters.keys()))

            # COUNTER
            if len(counter.voters) > 0:
                counter.save_voter_trajectory(dataset.frame, folder_main)

            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1))

            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        if type(vid_cap
                                ) is dict:  # estimate distance_in_meters
                            # TODO hard code
                            w, h, fps = 640, 480, 6
                        else:
                            fps = vid_cap.get(cv2.CAP_PROP_FPS)
                            w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                            h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*fourcc), fps,
                            (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % Path(out))
        if platform.system() == 'Darwin' and not opt.update:  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))
Ejemplo n.º 19
0
    model = torch.hub.load('ultralytics/yolov5', 'yolov5s')
"""

from pathlib import Path

import torch

from yolov5.models.yolo import Model
from yolov5.utils.general import check_requirements, set_logging
from yolov5.utils.google_utils import attempt_download
from yolov5.utils.torch_utils import select_device

dependencies = ['torch', 'yaml']
check_requirements(Path(__file__).parent / 'requirements.txt',
                   exclude=('pycocotools', 'thop'))
set_logging()


def create(name, pretrained, channels, classes, autoshape):
    """Creates a specified YOLOv5 model

    Arguments:
        name (str): name of model, i.e. 'yolov5s'
        pretrained (bool): load pretrained weights into the model
        channels (int): number of input channels
        classes (int): number of model classes

    Returns:
        pytorch model
    """
    config = Path(
Ejemplo n.º 20
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--weights', type=str, default='./yolov5s.pt', help='weights path')
    parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size')  # height, width
    parser.add_argument('--batch-size', type=int, default=1, help='batch size')
    parser.add_argument('--grid', action='store_true', help='export Detect() layer grid')
    parser.add_argument('--device', default='cpu', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
    parser.add_argument('--dynamic', action='store_true', help='dynamic ONNX axes')  # ONNX-only
    parser.add_argument('--simplify', action='store_true', help='simplify ONNX model')  # ONNX-only
    opt = parser.parse_args()
    opt.img_size *= 2 if len(opt.img_size) == 1 else 1  # expand
    print(opt)
    set_logging()
    t = time.time()

    # Load PyTorch model
    device = select_device(opt.device)

    # add yolov5 folder to system path
    here = Path(__file__).parents[1].absolute()
    yolov5_folder_dir = str(here)
    sys.path.insert(0, yolov5_folder_dir)

    model = attempt_load(opt.weights, map_location=device)  # load FP32 model
    labels = model.names

    # Checks
    gs = int(max(model.stride))  # grid size (max stride)
    opt.img_size = [check_img_size(x, gs) for x in opt.img_size]  # verify img_size are gs-multiples

    # Input
    img = torch.zeros(opt.batch_size, 3, *opt.img_size).to(device)  # image size(1,3,320,192) iDetection

    # Update model
    for k, m in model.named_modules():
        m._non_persistent_buffers_set = set()  # pytorch 1.6.0 compatibility
        if isinstance(m, models.common.Conv):  # assign export-friendly activations
            if isinstance(m.act, nn.Hardswish):
                m.act = Hardswish()
            elif isinstance(m.act, nn.SiLU):
                m.act = SiLU()
        # elif isinstance(m, models.yolo.Detect):
        #     m.forward = m.forward_export  # assign forward (optional)
    model.model[-1].export = not opt.grid  # set Detect() layer grid export
    for _ in range(2):
        y = model(img)  # dry runs
    print(f"\n{colorstr('PyTorch:')} starting from {opt.weights} ({file_size(opt.weights):.1f} MB)")

    # remove yolov5 folder from system path
    sys.path.remove(yolov5_folder_dir)

    # TorchScript export -----------------------------------------------------------------------------------------------
    prefix = colorstr('TorchScript:')
    try:
        print(f'\n{prefix} starting export with torch {torch.__version__}...')
        f = opt.weights.replace('.pt', '.torchscript.pt')  # filename
        ts = torch.jit.trace(model, img, strict=False)
        ts = optimize_for_mobile(ts)  # https://pytorch.org/tutorials/recipes/script_optimized.html
        ts.save(f)
        print(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
    except Exception as e:
        print(f'{prefix} export failure: {e}')

    # ONNX export ------------------------------------------------------------------------------------------------------
    prefix = colorstr('ONNX:')
    try:
        import onnx

        print(f'{prefix} starting export with onnx {onnx.__version__}...')
        f = opt.weights.replace('.pt', '.onnx')  # filename
        torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'],
                          dynamic_axes={'images': {0: 'batch', 2: 'height', 3: 'width'},  # size(1,3,640,640)
                                        'output': {0: 'batch', 2: 'y', 3: 'x'}} if opt.dynamic else None)

        # Checks
        model_onnx = onnx.load(f)  # load onnx model
        onnx.checker.check_model(model_onnx)  # check onnx model
        # print(onnx.helper.printable_graph(model_onnx.graph))  # print

        # Simplify
        if opt.simplify:
            try:
                check_requirements(['onnx-simplifier'])
                import onnxsim

                print(f'{prefix} simplifying with onnx-simplifier {onnxsim.__version__}...')
                model_onnx, check = onnxsim.simplify(model_onnx,
                                                     dynamic_input_shape=opt.dynamic,
                                                     input_shapes={'images': list(img.shape)} if opt.dynamic else None)
                assert check, 'assert check failed'
                onnx.save(model_onnx, f)
            except Exception as e:
                print(f'{prefix} simplifier failure: {e}')
        print(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
    except Exception as e:
        print(f'{prefix} export failure: {e}')

    # CoreML export ----------------------------------------------------------------------------------------------------
    prefix = colorstr('CoreML:')
    try:
        import coremltools as ct

        print(f'{prefix} starting export with coremltools {ct.__version__}...')
        # convert model from torchscript and apply pixel scaling as per detect.py
        model = ct.convert(ts, inputs=[ct.ImageType(name='image', shape=img.shape, scale=1 / 255.0, bias=[0, 0, 0])])
        f = opt.weights.replace('.pt', '.mlmodel')  # filename
        model.save(f)
        print(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
    except Exception as e:
        print(f'{prefix} export failure: {e}')

    # Finish
    print(f'\nExport complete ({time.time() - t:.2f}s). Visualize with https://github.com/lutzroeder/netron.')