Ejemplo n.º 1
0
def models(name, framework, engine, version, list_all, quiet):
    payload = remove_dict_null({
        'name': name,
        'framework': framework,
        'engine': engine,
        'version': version
    })
    with requests.get(f'http://{SERVER_HOST}:{SERVER_PORT}/api/v1/model/',
                      params=payload) as r:
        model_list = r.json()
        model_view([model_list], list_all=list_all, quiet=quiet)
Ejemplo n.º 2
0
def show_models(name, framework, engine, version, list_all):
    """show a table that lists all models published in mlmodelci

    Args:
        name ([type]): [description]
        framework ([type]): [description]
        engine ([type]): [description]
        version ([type]): [description]
        list_all ([type]): [description]
    """
    payload = remove_dict_null({
        'name': name,
        'framework': framework,
        'engine': engine,
        'version': version
    })
    with requests.get(f'http://{SERVER_HOST}:{SERVER_PORT}/api/v1/model/',
                      params=payload) as r:
        model_list = r.json()
        model_view([model_list], list_all=list_all)
Ejemplo n.º 3
0
def list_models(
        architecture: Optional[str] = typer.Option(None, '-n', '--name', help='Model architecture name'),
        framework: Optional[Framework] = typer.Option(None, '-fw', '--framework', case_sensitive=False,
                                                      help='Framework'),
        engine: Optional[Engine] = typer.Option(None, '-e', '--engine', case_sensitive=False, help='Serving engine'),
        version: Optional[int] = typer.Option(None, '-v', '--version', help='Version'),
        list_all: Optional[bool] = typer.Option(
            False,
            '-a', '--all', is_flag=True,
            help='Display queried models. otherwise, only partial result will be shown.'
        ),
):
    """Show a table that lists all models published in MLModelCI"""

    payload = remove_dict_null(
        {'architecture': architecture, 'framework': framework, 'engine': engine, 'version': version}
    )
    with requests.get(f'{app_settings.api_v1_prefix}/model', params=payload) as r:
        model_list = r.json()
        model_view([MLModel.parse_obj(model) for model in model_list], list_all=list_all)
Ejemplo n.º 4
0
def serve(
    save_path: Union[Path, str],
    device: str = 'cpu',
    name: str = None,
    batch_size: int = 16,
) -> Container:
    """Serve the given model save path in a Docker container.

    Args:
        save_path (Union[Path, str]): Saved path to the model.
        device (str): Device name. E.g.: cpu, cuda, cuda:1.
        name (str): Container name. Default to None.
        batch_size (int): Batch size for passing to serving containers.

    Returns:
        Container: Docker container object created.

    """

    info = parse_path(Path(save_path))
    architecture: str = info['architecture']
    engine: Engine = info['engine']

    cuda, device_num = get_device(device)

    docker_client = docker.from_env()

    # set mount
    mounts = [
        Mount(target=f'/models/{architecture}',
              source=str(info['base_dir']),
              type='bind',
              read_only=True)
    ]

    common_kwargs = remove_dict_null({
        'detach': True,
        'auto_remove': True,
        'mounts': mounts,
        'name': name
    })
    environment = dict()

    if cuda:
        common_kwargs['runtime'] = 'nvidia'
        environment['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
        environment['CUDA_VISIBLE_DEVICES'] = device_num

    if engine == Engine.TFS:
        # Tensorflow Serving 2.2.0 has the issue: https://github.com/tensorflow/serving/issues/1663
        docker_tag = '2.1.0-gpu' if cuda else '2.1.0'
        ports = {'8501': config.TFS_HTTP_PORT, '8500': config.TFS_GRPC_PORT}
        environment['MODEL_NAME'] = architecture
        container = docker_client.containers.run(
            f'tensorflow/serving:{docker_tag}',
            environment=environment,
            ports=ports,
            **common_kwargs)
    elif engine == Engine.TORCHSCRIPT:
        docker_tag = 'latest-gpu' if cuda else 'latest'
        ports = {
            '8000': config.TORCHSCRIPT_HTTP_PORT,
            '8001': config.TORCHSCRIPT_GRPC_PORT
        }
        environment['MODEL_NAME'] = architecture
        container = docker_client.containers.run(
            f'mlmodelci/pytorch-serving:{docker_tag}',
            environment=environment,
            ports=ports,
            **common_kwargs)
    elif engine == Engine.ONNX:
        docker_tag = 'latest-gpu' if cuda else 'latest'
        ports = {'8000': config.ONNX_HTTP_PORT, '8001': config.ONNX_GRPC_PORT}
        environment['MODEL_NAME'] = architecture
        container = docker_client.containers.run(
            f'mlmodelci/onnx-serving:{docker_tag}',
            environment=environment,
            ports=ports,
            **common_kwargs)
    elif engine == Engine.TRT:
        if not cuda:
            raise RuntimeError(
                'TensorRT cannot be run without CUDA. Please specify a CUDA device.'
            )

        ports = {
            '8000': config.TRT_HTTP_PORT,
            '8001': config.TRT_GRPC_PORT,
            '8002': config.TRT_PROMETHEUS_PORT
        }
        ulimits = [
            Ulimit(name='memlock', soft=-1, hard=-1),
            Ulimit(name='stack', soft=67100864, hard=67100864)
        ]
        trt_kwargs = {'ulimits': ulimits, 'shm_size': '1G'}
        container = docker_client.containers.run(
            f'nvcr.io/nvidia/tensorrtserver:19.10-py3',
            'trtserver --model-repository=/models',
            environment=environment,
            ports=ports,
            **common_kwargs,
            **trt_kwargs,
        )
    else:
        raise RuntimeError(
            f'Not able to serve model with path `{str(save_path)}`.')

    return container
Ejemplo n.º 5
0
def models(name, framework, engine, version, list_all, quiet):
    payload = remove_dict_null({'name': name, 'framework': framework, 'engine': engine, 'version': version})
    with requests.get(f'{app_settings.api_v1_prefix}/model', params=payload) as r:
        model_list = r.json()
        model_view([MLModel.parse_obj(model) for model in model_list], list_all=list_all, quiet=quiet)