Exemple #1
0
def download(split='val2017', reload=False):
    """
    Downloads COCO split
    Returns:
        Path to downloaded split directory
    """
    from io import BytesIO
    from urllib.request import urlopen
    from zipfile import ZipFile
    from ml import hub

    download_url = BASE_DOWNLOAD_URL(split)
    download_dir = Path(os.path.join(hub.get_dir(), 'COCO'))
    download_dir.mkdir(exist_ok=True, parents=True)
    split_dir = Path(os.path.join(download_dir, f'{split}'))

    if split_dir.exists() and any(split_dir.iterdir()) and not reload:
        # already exists
        logging.info(
            f'Skipping download of COCO: {split} as it already exists')
    else:
        # download
        with urlopen(download_url) as zipresp:
            with ZipFile(BytesIO(zipresp.read())) as zfile:
                zfile.extractall(download_dir)
        logging.info(f'Downloaded COCO: {split}')

    return str(split_dir)
Exemple #2
0
def attempt_download(params, epoch=8, model_dir=None, force=False):
    if model_dir is None:
        hub_dir = hub.get_dir()
        model_dir = os.path.join(hub_dir, 'checkpoints')

    try:
        os.makedirs(model_dir)
    except OSError as e:
        import errno
        if e.errno == errno.EEXIST:
            # Directory already exists, ignore.
            pass
        else:
            # Unexpected OSError, re-raise.
            raise
    chkpts = {
        'rfcn_dcn_coco': '0B6T5quL13CdHZ3ZrRVNjcnFmZk0',
        'rfcn_coco': None,
    }
    id = chkpts[params]
    path = f"{model_dir}/{params}-{epoch:04d}.params"
    if hub.download_gdrive(id, path, force=force) != 0:
        raise IOError(f"Failed to download to {path}")
    return path
Exemple #3
0
def build(name,
          model,
          spec,
          model_dir=None,
          backend='trt',
          reload=False,
          **kwargs):
    r"""
    Args:
        name(str): checkpoint name to save and load
        model(nn.Module): pytorch model on CPU
        spec(Tuple[B, C, H, W]): input shape including dynamic axies as -1
    Kwargs:
        model_dir(str, Path): path to save and load model checkpoint
        backend(str): deployment backend
        reload(bool): whether to force deploying the model with backend

        input_names(List[str]): list of names of input tensor args
        output_names([List[str]): list of names of output tensors
        batch_size(int): max batch size as the dynamic axis 0
        workspace_size(int):
        fp16(bool):
        int8(bool):
        strict_type_constraints(bool): type mode strictly forced or not
        int8_calib_batch_size(int):
        int8_calib_preprocess_func(Callable):
        min_shapes(Tuple):
        opt_shapes(Tuple):
        max_shapes(Tuple)
    """
    from ml import hub
    if model_dir is None:
        hub_dir = hub.get_dir()
        model_dir = os.path.join(hub_dir, 'checkpoints')

    try:
        os.makedirs(model_dir)
    except OSError as e:
        if e.errno == errno.EEXIST:
            pass
        else:
            raise e

    from time import time
    t = time()
    if backend in ['trt', 'tensorrt']:
        # XXX No intermmediate ONNX archive
        from . import trt as backend
        chkpt_path = Path(f"{model_dir}/{name}.pth")

        if chkpt_path.exists() and not reload:
            logging.info(f"Loading torch2trt checkpoint from {chkpt_path}")
            chkpt = th.load(chkpt_path)
            predictor = backend.TRTPredictor()
            predictor.load_state_dict(chkpt)
            return predictor

        trt = Path(f"{model_dir}/{name}.trt")
        input_names = kwargs.pop('input_names', None)
        output_names = kwargs.pop('output_names', None)

        if trt.exists() and not reload:
            # Load from previous saved deployment engine
            logging.info(f"Building TensorRT inference engine from {trt}")
            engine = backend.build(trt)
            if not (input_names and output_names):
                input_names, output_names = get_input_output_names(engine)
            predictor = backend.TRTPredictor(engine=engine,
                                             input_names=input_names,
                                             output_names=output_names)
        else:
            batch_size = kwargs.pop('batch_size', 1)
            workspace_size = kwargs.pop('workspace_size', GiB(2))
            fp16 = kwargs.pop('amp', False)
            fp16 = fp16 or kwargs.pop('fp16', False)
            int8 = kwargs.pop('int8', False)
            strict_type_constraints = kwargs.pop('strict_type_constraints',
                                                 False)  # amp implied
            int8_calib_batch_size = kwargs.pop('int8_calib_batch_size', 16)
            device = next(model.parameters()).device
            min_shapes = kwargs.get('min_shapes')
            if min_shapes is not None:
                inputs = tuple([
                    th.rand(1, *shape, device=device) for shape in min_shapes
                ])
            else:
                inputs = tuple(
                    [th.rand(1, *shape, device=device) for shape in spec])
            predictor = backend.torch2trt(
                model,
                inputs,
                max_batch_size=batch_size,
                max_workspace_size=workspace_size,
                input_names=input_names,
                output_names=output_names,
                fp16_mode=fp16,
                int8_mode=int8,
                int8_calib_batch_size=int8_calib_batch_size,
                strict_type_constraints=strict_type_constraints,
                use_onnx=True,
                **kwargs)
        logging.info(f"Saving TensorRT checkpoint to {chkpt_path}")
        io.save(predictor.state_dict(), chkpt_path)
        logging.info(f"Built TensorRT inference engine for {time() - t:.3f}s")
        return predictor
    elif backend == 'onnx':
        onnx_path = Path(f"{model_dir}/{name}.onnx")
        batch_size = kwargs.pop('batch_size', 1)
        workspace_size = kwargs.pop('workspace_size', GiB(1))
        amp = kwargs.pop('amp', False)
        if not onnx_path.exists() or reload:
            # print(spec, onnx_path, kwargs)
            export(model, spec, onnx_path, **kwargs)
        import onnxruntime
        from .onnx import ONNXPredictor
        engine = onnxruntime.InferenceSession(str(onnx_path))
        predictor = ONNXPredictor(engine)
        logging.info(
            f"Built ONNX inference engine at {onnx_path} for {time() - t:.3f}s"
        )
    else:
        raise ValueError(f"Unsupported backend: {backend}")
    return predictor
Exemple #4
0
    def deploy(self,
               name='yolo5x',
               batch_size=10,
               spec=(3, 640, 640),
               fp16=True,
               backend='trt',
               reload=False,
               **kwargs):
        r"""Deploy optimized runtime backend.
        Args:
            batch_size(int): max batch size
            spec(Tuple[int]): preprocessed frame shape which must be fixed through the batch
            amp(bool): mixed precision with FP16
            kwargs:
                dynamix_axes: dynamic axes for each input ==> {'input_0': {0: 'batch_size', 2: 'height'}}
                min_shapes: min input shapes ==> [(3, 320, 640)]
                max_shapes: max input shapes ==> [(3, 640, 640)]
        """
        from ml import deploy
        module = self.module
        # avoids warning for dynamic ifs
        module.model[-1].onnx_dynamic = True
        # FIXME: workaround for invalid values with different batch size in tensorrt
        # tensorrt output is not consistent with in place operations
        module.model[-1].inplace = False
        int8 = kwargs.get('int8', False)
        strict = kwargs.get('strict', False)
        if int8:
            from ml import hub
            from ml.vision.datasets.coco import download

            def preprocessor(size=(384, 640)):
                from PIL import Image
                from torchvision import transforms
                trans = transforms.Compose(
                    [transforms.Resize(size),
                     transforms.ToTensor()])

                H, W = size

                def preprocess(image_path, *shape):
                    r'''Preprocessing for TensorRT calibration
                    Args:
                        image_path(str): path to image
                        channels(int):
                    '''
                    image = Image.open(image_path)
                    logging.debug(
                        f"image.size={image.size}, mode={image.mode}")
                    image = image.convert('RGB')
                    C = len(image.mode)
                    im = trans(image)
                    assert im.shape == (C, H, W)
                    return im

                return preprocess

            int8_calib_max = kwargs.get('int8_calib_max', 5000)
            int8_calib_batch_size = kwargs.get('int8_calib_batch_size',
                                               max(batch_size, 64))
            cache = f'{name}-COCO2017-val-{int8_calib_max}-{int8_calib_batch_size}.cache'
            cache_path = Path(os.path.join(hub.get_dir(), cache))
            kwargs['int8_calib_cache'] = str(cache_path)
            kwargs['int8_calib_data'] = download(split='val2017', reload=False)
            kwargs['int8_calib_preprocess_func'] = preprocessor()
            kwargs['int8_calib_max'] = int8_calib_max
            kwargs['int8_calib_batch_size'] = int8_calib_batch_size

        device = next(self.module.parameters()).device
        # FIXME: cuda + onnx_dynamic: causes the onnx export to fail: https://github.com/ultralytics/yolov5/issues/5439
        self.to('cpu')
        self.engine = deploy.build(
            f"{name}-bs{batch_size}_{spec[-2]}x{spec[-1]}{fp16 and '_fp16' or ''}{int8 and '_int8' or ''}{strict and '_strict' or ''}",
            self, [spec],
            backend=backend,
            reload=reload,
            batch_size=batch_size,
            fp16=fp16,
            strict_type_constraints=strict,
            **kwargs)
        self.to(device)
        # TODO: avoid storing dummy modules to keep track of module device
        self.dummy = module.model[-1]
        del self.module
def test_deploy_trt(benchmark, batch, detector, dev, B, fp16, int8, strict,
                    name):
    # FIXME pytorch cuda initialization must be ahead of pycuda
    module = detector.module
    module.model[-1].export = True
    batch = TF.resize(batch, (384, 640)).float()
    h, w = batch.shape[2:]
    kwargs = {}
    if int8:
        import os
        from pathlib import Path
        from ml import hub
        from ml.vision.datasets.coco import download

        def preprocessor(size=(384, 640)):
            from PIL import Image
            from torchvision import transforms
            trans = transforms.Compose(
                [transforms.Resize(size),
                 transforms.ToTensor()])

            H, W = size

            def preprocess(image_path, *shape):
                r'''Preprocessing for TensorRT calibration
                Args:
                    image_path(str): path to image
                    channels(int):
                '''
                image = Image.open(image_path)
                logging.debug(f"image.size={image.size}, mode={image.mode}")
                image = image.convert('RGB')
                C = len(image.mode)
                im = trans(image)
                assert im.shape == (C, H, W)
                return im

            return preprocess

        int8_calib_max = 5000
        int8_calib_batch_size = 64
        cache = f'{name}-COCO2017-val-{int8_calib_max}-{int8_calib_batch_size}.cache'
        cache_path = Path(os.path.join(hub.get_dir(), cache))
        kwargs['int8_calib_cache'] = str(cache_path)
        kwargs['int8_calib_data'] = download(split='val2017', reload=False)
        kwargs['int8_calib_preprocess_func'] = preprocessor()
        kwargs['int8_calib_max'] = int8_calib_max
        kwargs['int8_calib_batch_size'] = int8_calib_batch_size

    engine = deploy.build(
        f"yolo5x-bs{B}_{h}x{w}{fp16 and '_fp16' or ''}{int8 and '_int8' or ''}",
        detector, [batch.shape[1:]],
        backend='trt',
        reload=not True,
        batch_size=B,
        fp16=fp16,
        int8=int8,
        strict_type_constraints=strict,
        **kwargs)

    preds, *features = benchmark(engine.predict, batch[:B].to(dev), sync=True)
    assert len(features) == 3
    with th.no_grad():
        with th.cuda.amp.autocast(enabled=fp16):
            torch_preds, torch_features = detector(batch[:B].to(dev))
    logging.info(
        f"outputs trt norm={preds.norm().item()}, torch norm={torch_preds.norm().item()}"
    )
    if fp16 or int8:
        pass
        # th.testing.assert_allclose(torch_preds.float(), preds.float(), rtol=2e-02, atol=4e-02)
    else:
        th.testing.assert_allclose(torch_preds.float(),
                                   preds.float(),
                                   rtol=1e-03,
                                   atol=4e-04)
        for torch_feats, feats in zip(torch_features, features):
            th.testing.assert_allclose(torch_feats.float(),
                                       feats.float(),
                                       rtol=1e-03,
                                       atol=4e-04)