Exemple #1
0
def register_model(origin_model,
                   dataset: str,
                   metric: Dict[Metric, float],
                   task: Task,
                   inputs: List[IOShape],
                   outputs: List[IOShape],
                   model_input: Optional[List] = None,
                   architecture: str = None,
                   framework: Framework = None,
                   engine: Engine = None,
                   version: ModelVersion = None,
                   parent_model_id: Optional[str] = None,
                   convert: bool = True,
                   profile: bool = True,
                   model_status: List[ModelStatus] = None):
    """Upload a model to ModelDB.
    This function will upload the given model into the database with some variation. It may optionally generate a
        branch of models (i.e. model family) with different optimization techniques. Besides, a benchmark will be
        scheduled for each generated model, in order to gain profiling results for model selection strategies.
        In the `no_generate` model(i.e. `no_generate` flag is set to be `True`), `architecture`, `framework`, `engine`
        and `version` could be None. If any of the above arguments is `None`, all of them will be auto induced
        from the origin_model path. An `ValueError` will be raised if the mata info cannot be induced.

    TODO:
        This function has a super comprehensive logic, need to be simplified.

    Arguments:
        origin_model: The uploaded model without optimization. When `no_generate` flag is set, this parameter should
            be a str indicating model file path.
        architecture (str): Model architecture name. Default to None.
        framework (Framework): Framework name. Default to None.
        version (ModelVersion): Model version. Default to None.
        dataset (str): Model testing dataset.
        metric (Dict[Metric,float]): Scoring metric and its corresponding score used for model evaluation
        task (Task): Model task type.
        inputs (Iterable[IOShape]): Model input tensors.
        outputs (Iterable[IOShape]): Model output tensors.
        model_input: specify sample model input data
            TODO: specify more model conversion related params
        engine (Engine): Model optimization engine. Default to `Engine.NONE`.
        parent_model_id (Optional[str]): the parent model id of current model if this model is derived from a pre-existing one
        model_status (List[ModelStatus]): Indicate the status of current model in its lifecycle
        convert (bool): Flag for generation of model family. When set, `origin_model` should be a path to model saving
            file. Default to `True`.
        profile (bool): Flag for profiling uploaded (including converted) models. Default to `False`.
    """
    from modelci.controller import job_executor
    from modelci.controller.executor import Job

    model_dir_list = list()

    # type and existence check
    if isinstance(origin_model, str):
        model_dir = Path(origin_model).absolute()
        assert model_dir.exists(
        ), f'model weight does not exist at {origin_model}'

        if all([architecture, task, framework, engine, version]):
            # from explicit architecture, framework, engine and version
            ext = model_dir.suffix
            path = generate_path(architecture, task, framework, engine,
                                 version).with_suffix(ext)
            # if already in the destination folder
            if path == model_dir:
                pass
            # create destination folder
            else:
                if ext:
                    path.parent.mkdir(parents=True, exist_ok=True)
                else:
                    path.mkdir(parents=True, exist_ok=True)

                # copy to cached folder
                subprocess.call(['cp', model_dir, path])
        else:  # from implicit extracted from path, check validity of the path later at registration
            path = model_dir
        model_dir_list.append(path)
    elif framework == Framework.PYTORCH and engine in [
            Engine.PYTORCH, Engine.NONE
    ]:
        # save original pytorch model
        pytorch_dir = generate_path(
            task=task,
            model_name=architecture,
            framework=framework,
            engine=engine,
            version=str(version),
        )
        pytorch_dir.parent.mkdir(parents=True, exist_ok=True)
        save_path_with_ext = pytorch_dir.with_suffix('.pth')
        torch.save(origin_model, str(save_path_with_ext))
        model_dir_list.append(pytorch_dir.with_suffix('.pth'))

    if convert:
        # TODO: generate from path name
        # generate model variant
        model_dir_list.extend(
            _generate_model_family(origin_model,
                                   architecture,
                                   task,
                                   framework,
                                   filename=str(version),
                                   inputs=inputs,
                                   outputs=outputs,
                                   model_input=model_input))

    # register
    for model_dir in model_dir_list:
        parse_result = parse_path(model_dir)
        architecture = parse_result['architecture']
        task = parse_result['task']
        framework = parse_result['framework']
        engine = parse_result['engine']
        version = parse_result['version']
        filename = parse_result['filename']

        if model_status is not None:
            model_bo_status = model_status
        elif engine == Engine.PYTORCH:
            model_bo_status = [ModelStatus.PUBLISHED]
        else:
            model_bo_status = [ModelStatus.CONVERTED]

        with open(str(model_dir), 'rb') as f:
            model = ModelBO(name=architecture,
                            task=task,
                            framework=framework,
                            engine=engine,
                            version=version,
                            dataset=dataset,
                            metric=metric,
                            parent_model_id=parent_model_id,
                            inputs=inputs,
                            outputs=outputs,
                            model_status=model_bo_status,
                            weight=Weight(f, filename=filename))

            ModelService.post_model(model)
        # TODO refresh
        model = ModelService.get_models(name=architecture,
                                        task=task,
                                        framework=framework,
                                        engine=engine,
                                        version=version)[0]
        if model.engine == Engine.PYTORCH or model.engine == Engine.TFS:
            parent_model_id = model.id
        # profile registered model
        if profile and engine != Engine.PYTORCH:
            file = tf.keras.utils.get_file(
                "grace_hopper.jpg",
                "https://storage.googleapis.com/download.tensorflow.org/example_images/grace_hopper.jpg"
            )
            test_img_bytes = cv2.imread(file)

            kwargs = {
                'repeat_data': test_img_bytes,
                'batch_size': 32,
                'batch_num': 100,
                'asynchronous': False,
                'model_info': model,
            }

            new_status = [
                item for item in model.model_status
                if item is not (ModelStatus.CONVERTED or ModelStatus.PUBLISHED)
            ]
            new_status.append(ModelStatus.PROFILING)
            model.model_status = new_status
            ModelService.update_model(model)

            if engine == Engine.TORCHSCRIPT:
                client = CVTorchClient(**kwargs)
            elif engine == Engine.TFS:
                client = CVTFSClient(**kwargs)
            elif engine == Engine.ONNX:
                client = CVONNXClient(**kwargs)
            elif engine == Engine.TRT:
                client = CVTRTClient(**kwargs)
            else:
                raise ValueError(f'No such serving engine: {engine}')

            job_cuda = Job(client=client, device='cuda:0', model_info=model)
            # job_cpu = Job(client=client, device='cpu', model_info=model)
            job_executor.submit(job_cuda)
def serve(
    save_path: Union[Path, str],
    device: str = 'cpu',
    name: str = None,
    batch_size: int = 16,
) -> Container:
    """Serve the given model save path in a Docker container.

    Args:
        save_path (Union[Path, str]): Saved path to the model.
        device (str): Device name. E.g.: cpu, cuda, cuda:1.
        name (str): Container name. Default to None.
        batch_size (int): Batch size for passing to serving containers.

    Returns:
        Container: Docker container object created.

    """

    info = parse_path(Path(save_path))
    architecture: str = info['architecture']
    engine: Engine = info['engine']

    cuda, device_num = get_device(device)

    docker_client = docker.from_env()

    # set mount
    mounts = [
        Mount(target=f'/models/{architecture}',
              source=str(info['base_dir']),
              type='bind',
              read_only=True)
    ]

    common_kwargs = remove_dict_null({
        'detach': True,
        'auto_remove': True,
        'mounts': mounts,
        'name': name
    })
    environment = dict()

    if cuda:
        common_kwargs['runtime'] = 'nvidia'
        environment['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
        environment['CUDA_VISIBLE_DEVICES'] = device_num

    if engine == Engine.TFS:
        # Tensorflow Serving 2.2.0 has the issue: https://github.com/tensorflow/serving/issues/1663
        docker_tag = '2.1.0-gpu' if cuda else '2.1.0'
        ports = {'8501': config.TFS_HTTP_PORT, '8500': config.TFS_GRPC_PORT}
        environment['MODEL_NAME'] = architecture
        container = docker_client.containers.run(
            f'tensorflow/serving:{docker_tag}',
            environment=environment,
            ports=ports,
            **common_kwargs)
    elif engine == Engine.TORCHSCRIPT:
        docker_tag = 'latest-gpu' if cuda else 'latest'
        ports = {
            '8000': config.TORCHSCRIPT_HTTP_PORT,
            '8001': config.TORCHSCRIPT_GRPC_PORT
        }
        environment['MODEL_NAME'] = architecture
        container = docker_client.containers.run(
            f'mlmodelci/pytorch-serving:{docker_tag}',
            environment=environment,
            ports=ports,
            **common_kwargs)
    elif engine == Engine.ONNX:
        docker_tag = 'latest-gpu' if cuda else 'latest'
        ports = {'8000': config.ONNX_HTTP_PORT, '8001': config.ONNX_GRPC_PORT}
        environment['MODEL_NAME'] = architecture
        container = docker_client.containers.run(
            f'mlmodelci/onnx-serving:{docker_tag}',
            environment=environment,
            ports=ports,
            **common_kwargs)
    elif engine == Engine.TRT:
        if not cuda:
            raise RuntimeError(
                'TensorRT cannot be run without CUDA. Please specify a CUDA device.'
            )

        ports = {
            '8000': config.TRT_HTTP_PORT,
            '8001': config.TRT_GRPC_PORT,
            '8002': config.TRT_PROMETHEUS_PORT
        }
        ulimits = [
            Ulimit(name='memlock', soft=-1, hard=-1),
            Ulimit(name='stack', soft=67100864, hard=67100864)
        ]
        trt_kwargs = {'ulimits': ulimits, 'shm_size': '1G'}
        container = docker_client.containers.run(
            f'nvcr.io/nvidia/tensorrtserver:19.10-py3',
            'trtserver --model-repository=/models',
            environment=environment,
            ports=ports,
            **common_kwargs,
            **trt_kwargs,
        )
    else:
        raise RuntimeError(
            f'Not able to serve model with path `{str(save_path)}`.')

    return container
Exemple #3
0
def register_model(
    origin_model,
    dataset: str,
    acc: float,
    task: str,
    inputs: List[IOShape],
    outputs: List[IOShape],
    architecture: str = None,
    framework: Framework = None,
    engine: Engine = None,
    version: ModelVersion = None,
    convert=True,
    profile=True,
):
    """Upload a model to ModelDB.
    This function will upload the given model into the database with some variation. It may optionally generate a
        branch of models (i.e. model family) with different optimization techniques. Besides, a benchmark will be
        scheduled for each generated model, in order to gain profiling results for model selection strategies.
        In the `no_generate` model(i.e. `no_generate` flag is set to be `True`), `architecture`, `framework`, `engine`
        and `version` could be None. If any of the above arguments is `None`, all of them will be auto induced
        from the origin_model path. An `ValueError` will be raised if the mata info cannot be induced.

    Arguments:
        origin_model: The uploaded model without optimization. When `no_generate` flag is set, this parameter should
            be a str indicating model file path.
        architecture (str): Model architecture name. Default to None.
        framework (Framework): Framework name. Default to None.
        version (ModelVersion): Model version. Default to None.
        dataset (str): Model testing dataset.
        acc (float): Model accuracy on the testing dataset.
        task (str): Model task type.
        inputs (Iterable[IOShape]): Model input tensors.
        outputs (Iterable[IOShape]): Model output tensors.
        engine (Engine): Model optimization engine. Default to `Engine.NONE`.
        convert (bool): Flag for generation of model family. When set, `origin_model` should be a path to model saving
            file. Default to `True`.
        profile (bool): Flag for profiling uploaded (including converted) models. Default to `False`.
    """
    from modelci.controller import job_executor
    from modelci.controller.executor import Job

    model_dir_list = list()
    if not convert:
        # type and existence check
        assert isinstance(origin_model, str)
        model_dir = Path(origin_model).absolute()
        assert model_dir.exists(
        ), f'model weight does not exist at {origin_model}'

        if all([
                architecture, framework, engine, version
        ]):  # from explicit architecture, framework, engine and version
            ext = model_dir.suffix
            path = generate_path(architecture, framework, engine,
                                 version).with_suffix(ext)
            # if already in the destination folder
            if path == model_dir:
                pass
            # create destination folder
            else:
                if ext:
                    path.parent.mkdir(parents=True, exist_ok=True)
                else:
                    path.mkdir(parents=True, exist_ok=True)

                # copy to cached folder
                subprocess.call(['cp', model_dir, path])
        else:  # from implicit extracted from path, check validity of the path later at registration
            path = model_dir
        model_dir_list.append(path)
    else:
        # TODO: generate from path name

        # generate model variant
        model_dir_list.extend(
            _generate_model_family(origin_model,
                                   architecture,
                                   framework,
                                   filename=str(version),
                                   inputs=inputs,
                                   outputs=outputs))

    # register
    for model_dir in model_dir_list:
        parse_result = parse_path(model_dir)
        architecture = parse_result['architecture']
        framework = parse_result['framework']
        engine = parse_result['engine']
        version = parse_result['version']
        filename = parse_result['filename']

        with open(str(model_dir), 'rb') as f:
            model = ModelBO(name=architecture,
                            framework=framework,
                            engine=engine,
                            version=version,
                            dataset=dataset,
                            acc=acc,
                            task=task,
                            inputs=inputs,
                            outputs=outputs,
                            weight=Weight(f, filename=filename))

            ModelService.post_model(model)
        # TODO refresh
        model = ModelService.get_models(name=architecture,
                                        framework=framework,
                                        engine=engine,
                                        version=version)[0]

        # profile registered model
        if profile:
            file = tf.keras.utils.get_file(
                "grace_hopper.jpg",
                "https://storage.googleapis.com/download.tensorflow.org/example_images/grace_hopper.jpg"
            )
            test_img_bytes = cv2.imread(file)

            kwargs = {
                'repeat_data': test_img_bytes,
                'batch_size': 32,
                'batch_num': 100,
                'asynchronous': False,
                'model_info': model,
            }
            if engine == Engine.TORCHSCRIPT:
                client = CVTorchClient(**kwargs)
            elif engine == Engine.TFS:
                client = CVTFSClient(**kwargs)
            elif engine == Engine.ONNX:
                client = CVONNXClient(**kwargs)
            elif engine == Engine.TRT:
                client = CVTRTClient(**kwargs)
            else:
                raise ValueError(f'No such serving engine: {engine}')

            job_cuda = Job(client=client, device='cuda:0', model_info=model)
            # job_cpu = Job(client=client, device='cpu', model_info=model)
            job_executor.submit(job_cuda)
Exemple #4
0
    def from_onnx(
            onnx_path: Union[Path, str],
            save_path: Union[Path, str],
            inputs: List[IOShape],
            outputs: List[IOShape],
            int8_calibrator=None,
            create_model_config: bool = True,
            override: bool = False,
    ):
        """Takes an ONNX file and creates a TensorRT engine to run inference with
        From https://github.com/layerism/TensorRT-Inference-Server-Tutorial

        FIXME: bug exist: TRT 6.x.x does not support opset 10 used in ResNet50(ONNX).
        """
        import tensorrt as trt

        if save_path.with_suffix('.plan').exists():
            if not override:  # file exist yet override flag is not set
                logger.info('Use cached model')
                return True

        onnx_path = Path(onnx_path)
        assert onnx_path.exists()

        save_path = Path(save_path)
        # get arch name
        arch_name = parse_path(save_path)['architecture']

        # trt serving model repository is different from others:
        # `<model-name>/<framework>-tensorrt/<version>/model.plan`
        save_path = save_path.with_suffix('')
        save_path.mkdir(parents=True, exist_ok=True)

        # Save TRT engine
        trt_logger = trt.Logger(trt.Logger.WARNING)
        with trt.Builder(trt_logger) as builder:
            with builder.create_network(
                    1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) as network:
                with trt.OnnxParser(network, trt_logger) as parser:
                    builder.max_workspace_size = GiB(1)  # 1GB
                    builder.max_batch_size = 1
                    if int8_calibrator is not None:
                        builder.int8_mode = True
                        builder.int8_calibrator = int8_calibrator

                    print('Loading ONNX file from path {}...'.format(onnx_path))
                    with open(onnx_path, 'rb') as model:
                        parser.parse(model.read())
                    engine = builder.build_cuda_engine(network)

                    with open(save_path / 'model.plan', 'wb') as f:
                        f.write(engine.serialize())

        # create model configuration file
        if create_model_config:
            TRTConverter.generate_trt_config(
                save_path.parent,
                arch_name=arch_name,
                inputs=inputs,
                outputs=outputs
            )
        return True
Exemple #5
0
    def from_tfs(
            tf_path: Union[Path, str],
            save_path: Union[Path, str],
            inputs: List[IOShape],
            outputs: List[IOShape],
            tf_version=2,
            max_batch_size: int = 32,
            max_workspace_size_bytes: int = 1 << 32,
            precision_mode: str = 'FP32',
            maximum_cached_engines: int = 100,
            create_model_config: bool = True,
            override: bool = False,
    ):
        """Convert TensorFlow SavedModel to TF-TRT SavedModel."""
        from tensorflow.python.compiler.tensorrt import trt_convert as trt

        if save_path.with_suffix('.zip').exists():
            if not override:  # file exist yet override flag is not set
                # TODO: add logging
                print('Use cached model')
                return True

        tf_path = Path(tf_path)
        save_path = Path(save_path)
        # get arch name
        arch_name = parse_path(save_path)['architecture']

        # TF SavedModel files should be contained in a directory
        # `~/.modelci/<model-name>/tensorflow-tfs/<version>/model.savedmodel`
        tf_saved_model_path = save_path / 'model.savedmodel'

        assert tf_path.exists()
        save_path.mkdir(parents=True, exist_ok=True)

        if tf_version == 1:
            converter = trt.TrtGraphConverter(
                input_saved_model_dir=str(tf_path),
                max_workspace_size_bytes=max_workspace_size_bytes,
                precision_mode=precision_mode,
                maximum_cached_engines=maximum_cached_engines
            )
        elif tf_version == 2:
            # conversion
            conversion_params = trt.DEFAULT_TRT_CONVERSION_PARAMS
            conversion_params = conversion_params._replace(
                max_workspace_size_bytes=max_workspace_size_bytes
            )
            conversion_params = conversion_params._replace(precision_mode=precision_mode)
            conversion_params = conversion_params._replace(
                maximum_cached_engines=maximum_cached_engines
            )

            converter = trt.TrtGraphConverterV2(
                input_saved_model_dir=str(tf_path),
                conversion_params=conversion_params
            )
        else:
            raise ValueError(f'tf_version expecting a value of `1` or `2`, but got {tf_version}')

        converter.convert()
        converter.save(str(tf_saved_model_path))

        # zip
        shutil.make_archive(save_path, 'zip', root_dir=save_path.parent)

        # create model configuration
        if create_model_config:
            TRTConverter.generate_trt_config(
                save_path.parent,
                arch_name=arch_name,
                platform=TensorRTPlatform.TENSORFLOW_SAVEDMODEL,
                inputs=inputs,
                outputs=outputs,
                max_batch_size=max_batch_size
            )

        return True
Exemple #6
0
def register_model(
    origin_model,
    dataset: str,
    acc: float,
    task: str,
    inputs: List[IOShape],
    outputs: List[IOShape],
    architecture: str = None,
    framework: Framework = None,
    engine: Engine = None,
    version: ModelVersion = None,
    convert=True,
    profile=False,
):
    """Upload a model to ModelDB.
    This function will upload the given model into the database with some variation. It may optionally generate a
        branch of models (i.e. model family) with different optimization techniques. Besides, a benchmark will be
        scheduled for each generated model, in order to gain profiling results for model selection strategies.
        In the `no_generate` model(i.e. `no_generate` flag is set to be `True`), `architecture`, `framework`, `engine`
        and `version` could be None. If any of the above arguments is `None`, all of them will be auto induced
        from the origin_model path. An `ValueError` will be raised if the mata info cannot be induced.
    Arguments:
        origin_model: The uploaded model without optimization. When `no_generate` flag is set, this parameter should
            be a str indicating model file path.
        architecture (str): Model architecture name. Default to None.
        framework (Framework): Framework name. Default to None.
        version (ModelVersion): Model version. Default to None.
        dataset (str): Model testing dataset.
        acc (float): Model accuracy on the testing dataset.
        task (str): Model task type.
        inputs (Iterable[IOShape]): Model input tensors.
        outputs (Iterable[IOShape]): Model output tensors.
        engine (Engine): Model optimization engine. Default to `Engine.NONE`.
        convert (bool): Flag for generation of model family. When set, `origin_model` should be a path to model saving
            file. Default to `True`.
        profile (bool): Flag for profiling uploaded (including converted) models. Default to `False`.
    """
    model_dir_list = list()
    if not convert:
        # type and existence check
        assert isinstance(origin_model, str)
        model_dir = Path(origin_model).absolute()
        assert model_dir.exists(
        ), f'model weight does not exist at {origin_model}'

        if all([
                architecture, framework, engine, version
        ]):  # from explicit architecture, framework, engine and version
            ext = model_dir.suffix
            path = generate_path(architecture, framework, engine,
                                 version).with_suffix(ext)
            # if already in the destination folder
            if path == model_dir:
                pass
            # create destination folder
            else:
                if ext:
                    path.parent.mkdir(parents=True, exist_ok=True)
                else:
                    path.mkdir(parents=True, exist_ok=True)

                # copy to cached folder
                subprocess.call(['cp', model_dir, path])
        else:  # from implicit extracted from path, check validity of the path later at registration
            path = model_dir
        model_dir_list.append(path)
    else:
        # TODO: generate from path name

        # generate model variant
        model_dir_list.extend(
            _generate_model_family(origin_model,
                                   architecture,
                                   framework,
                                   filename=str(version),
                                   inputs=inputs,
                                   outputs=outputs))

    # register
    for model_dir in model_dir_list:
        parse_result = parse_path(model_dir)
        architecture = parse_result['architecture']
        framework = parse_result['framework']
        engine = parse_result['engine']
        version = parse_result['version']
        filename = parse_result['filename']

        with open(str(model_dir), 'rb') as f:
            model = ModelBO(name=architecture,
                            framework=framework,
                            engine=engine,
                            version=version,
                            dataset=dataset,
                            acc=acc,
                            task=task,
                            inputs=inputs,
                            outputs=outputs,
                            weight=Weight(f, filename=filename))

            ModelService.post_model(model)

        if profile:
            # TODO(lym): profile
            pass