def register_model(origin_model, dataset: str, metric: Dict[Metric, float], task: Task, inputs: List[IOShape], outputs: List[IOShape], model_input: Optional[List] = None, architecture: str = None, framework: Framework = None, engine: Engine = None, version: ModelVersion = None, parent_model_id: Optional[str] = None, convert: bool = True, profile: bool = True, model_status: List[ModelStatus] = None): """Upload a model to ModelDB. This function will upload the given model into the database with some variation. It may optionally generate a branch of models (i.e. model family) with different optimization techniques. Besides, a benchmark will be scheduled for each generated model, in order to gain profiling results for model selection strategies. In the `no_generate` model(i.e. `no_generate` flag is set to be `True`), `architecture`, `framework`, `engine` and `version` could be None. If any of the above arguments is `None`, all of them will be auto induced from the origin_model path. An `ValueError` will be raised if the mata info cannot be induced. TODO: This function has a super comprehensive logic, need to be simplified. Arguments: origin_model: The uploaded model without optimization. When `no_generate` flag is set, this parameter should be a str indicating model file path. architecture (str): Model architecture name. Default to None. framework (Framework): Framework name. Default to None. version (ModelVersion): Model version. Default to None. dataset (str): Model testing dataset. metric (Dict[Metric,float]): Scoring metric and its corresponding score used for model evaluation task (Task): Model task type. inputs (Iterable[IOShape]): Model input tensors. outputs (Iterable[IOShape]): Model output tensors. model_input: specify sample model input data TODO: specify more model conversion related params engine (Engine): Model optimization engine. Default to `Engine.NONE`. parent_model_id (Optional[str]): the parent model id of current model if this model is derived from a pre-existing one model_status (List[ModelStatus]): Indicate the status of current model in its lifecycle convert (bool): Flag for generation of model family. When set, `origin_model` should be a path to model saving file. Default to `True`. profile (bool): Flag for profiling uploaded (including converted) models. Default to `False`. """ from modelci.controller import job_executor from modelci.controller.executor import Job model_dir_list = list() # type and existence check if isinstance(origin_model, str): model_dir = Path(origin_model).absolute() assert model_dir.exists( ), f'model weight does not exist at {origin_model}' if all([architecture, task, framework, engine, version]): # from explicit architecture, framework, engine and version ext = model_dir.suffix path = generate_path(architecture, task, framework, engine, version).with_suffix(ext) # if already in the destination folder if path == model_dir: pass # create destination folder else: if ext: path.parent.mkdir(parents=True, exist_ok=True) else: path.mkdir(parents=True, exist_ok=True) # copy to cached folder subprocess.call(['cp', model_dir, path]) else: # from implicit extracted from path, check validity of the path later at registration path = model_dir model_dir_list.append(path) elif framework == Framework.PYTORCH and engine in [ Engine.PYTORCH, Engine.NONE ]: # save original pytorch model pytorch_dir = generate_path( task=task, model_name=architecture, framework=framework, engine=engine, version=str(version), ) pytorch_dir.parent.mkdir(parents=True, exist_ok=True) save_path_with_ext = pytorch_dir.with_suffix('.pth') torch.save(origin_model, str(save_path_with_ext)) model_dir_list.append(pytorch_dir.with_suffix('.pth')) if convert: # TODO: generate from path name # generate model variant model_dir_list.extend( _generate_model_family(origin_model, architecture, task, framework, filename=str(version), inputs=inputs, outputs=outputs, model_input=model_input)) # register for model_dir in model_dir_list: parse_result = parse_path(model_dir) architecture = parse_result['architecture'] task = parse_result['task'] framework = parse_result['framework'] engine = parse_result['engine'] version = parse_result['version'] filename = parse_result['filename'] if model_status is not None: model_bo_status = model_status elif engine == Engine.PYTORCH: model_bo_status = [ModelStatus.PUBLISHED] else: model_bo_status = [ModelStatus.CONVERTED] with open(str(model_dir), 'rb') as f: model = ModelBO(name=architecture, task=task, framework=framework, engine=engine, version=version, dataset=dataset, metric=metric, parent_model_id=parent_model_id, inputs=inputs, outputs=outputs, model_status=model_bo_status, weight=Weight(f, filename=filename)) ModelService.post_model(model) # TODO refresh model = ModelService.get_models(name=architecture, task=task, framework=framework, engine=engine, version=version)[0] if model.engine == Engine.PYTORCH or model.engine == Engine.TFS: parent_model_id = model.id # profile registered model if profile and engine != Engine.PYTORCH: file = tf.keras.utils.get_file( "grace_hopper.jpg", "https://storage.googleapis.com/download.tensorflow.org/example_images/grace_hopper.jpg" ) test_img_bytes = cv2.imread(file) kwargs = { 'repeat_data': test_img_bytes, 'batch_size': 32, 'batch_num': 100, 'asynchronous': False, 'model_info': model, } new_status = [ item for item in model.model_status if item is not (ModelStatus.CONVERTED or ModelStatus.PUBLISHED) ] new_status.append(ModelStatus.PROFILING) model.model_status = new_status ModelService.update_model(model) if engine == Engine.TORCHSCRIPT: client = CVTorchClient(**kwargs) elif engine == Engine.TFS: client = CVTFSClient(**kwargs) elif engine == Engine.ONNX: client = CVONNXClient(**kwargs) elif engine == Engine.TRT: client = CVTRTClient(**kwargs) else: raise ValueError(f'No such serving engine: {engine}') job_cuda = Job(client=client, device='cuda:0', model_info=model) # job_cpu = Job(client=client, device='cpu', model_info=model) job_executor.submit(job_cuda)
def serve( save_path: Union[Path, str], device: str = 'cpu', name: str = None, batch_size: int = 16, ) -> Container: """Serve the given model save path in a Docker container. Args: save_path (Union[Path, str]): Saved path to the model. device (str): Device name. E.g.: cpu, cuda, cuda:1. name (str): Container name. Default to None. batch_size (int): Batch size for passing to serving containers. Returns: Container: Docker container object created. """ info = parse_path(Path(save_path)) architecture: str = info['architecture'] engine: Engine = info['engine'] cuda, device_num = get_device(device) docker_client = docker.from_env() # set mount mounts = [ Mount(target=f'/models/{architecture}', source=str(info['base_dir']), type='bind', read_only=True) ] common_kwargs = remove_dict_null({ 'detach': True, 'auto_remove': True, 'mounts': mounts, 'name': name }) environment = dict() if cuda: common_kwargs['runtime'] = 'nvidia' environment['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID' environment['CUDA_VISIBLE_DEVICES'] = device_num if engine == Engine.TFS: # Tensorflow Serving 2.2.0 has the issue: https://github.com/tensorflow/serving/issues/1663 docker_tag = '2.1.0-gpu' if cuda else '2.1.0' ports = {'8501': config.TFS_HTTP_PORT, '8500': config.TFS_GRPC_PORT} environment['MODEL_NAME'] = architecture container = docker_client.containers.run( f'tensorflow/serving:{docker_tag}', environment=environment, ports=ports, **common_kwargs) elif engine == Engine.TORCHSCRIPT: docker_tag = 'latest-gpu' if cuda else 'latest' ports = { '8000': config.TORCHSCRIPT_HTTP_PORT, '8001': config.TORCHSCRIPT_GRPC_PORT } environment['MODEL_NAME'] = architecture container = docker_client.containers.run( f'mlmodelci/pytorch-serving:{docker_tag}', environment=environment, ports=ports, **common_kwargs) elif engine == Engine.ONNX: docker_tag = 'latest-gpu' if cuda else 'latest' ports = {'8000': config.ONNX_HTTP_PORT, '8001': config.ONNX_GRPC_PORT} environment['MODEL_NAME'] = architecture container = docker_client.containers.run( f'mlmodelci/onnx-serving:{docker_tag}', environment=environment, ports=ports, **common_kwargs) elif engine == Engine.TRT: if not cuda: raise RuntimeError( 'TensorRT cannot be run without CUDA. Please specify a CUDA device.' ) ports = { '8000': config.TRT_HTTP_PORT, '8001': config.TRT_GRPC_PORT, '8002': config.TRT_PROMETHEUS_PORT } ulimits = [ Ulimit(name='memlock', soft=-1, hard=-1), Ulimit(name='stack', soft=67100864, hard=67100864) ] trt_kwargs = {'ulimits': ulimits, 'shm_size': '1G'} container = docker_client.containers.run( f'nvcr.io/nvidia/tensorrtserver:19.10-py3', 'trtserver --model-repository=/models', environment=environment, ports=ports, **common_kwargs, **trt_kwargs, ) else: raise RuntimeError( f'Not able to serve model with path `{str(save_path)}`.') return container
def register_model( origin_model, dataset: str, acc: float, task: str, inputs: List[IOShape], outputs: List[IOShape], architecture: str = None, framework: Framework = None, engine: Engine = None, version: ModelVersion = None, convert=True, profile=True, ): """Upload a model to ModelDB. This function will upload the given model into the database with some variation. It may optionally generate a branch of models (i.e. model family) with different optimization techniques. Besides, a benchmark will be scheduled for each generated model, in order to gain profiling results for model selection strategies. In the `no_generate` model(i.e. `no_generate` flag is set to be `True`), `architecture`, `framework`, `engine` and `version` could be None. If any of the above arguments is `None`, all of them will be auto induced from the origin_model path. An `ValueError` will be raised if the mata info cannot be induced. Arguments: origin_model: The uploaded model without optimization. When `no_generate` flag is set, this parameter should be a str indicating model file path. architecture (str): Model architecture name. Default to None. framework (Framework): Framework name. Default to None. version (ModelVersion): Model version. Default to None. dataset (str): Model testing dataset. acc (float): Model accuracy on the testing dataset. task (str): Model task type. inputs (Iterable[IOShape]): Model input tensors. outputs (Iterable[IOShape]): Model output tensors. engine (Engine): Model optimization engine. Default to `Engine.NONE`. convert (bool): Flag for generation of model family. When set, `origin_model` should be a path to model saving file. Default to `True`. profile (bool): Flag for profiling uploaded (including converted) models. Default to `False`. """ from modelci.controller import job_executor from modelci.controller.executor import Job model_dir_list = list() if not convert: # type and existence check assert isinstance(origin_model, str) model_dir = Path(origin_model).absolute() assert model_dir.exists( ), f'model weight does not exist at {origin_model}' if all([ architecture, framework, engine, version ]): # from explicit architecture, framework, engine and version ext = model_dir.suffix path = generate_path(architecture, framework, engine, version).with_suffix(ext) # if already in the destination folder if path == model_dir: pass # create destination folder else: if ext: path.parent.mkdir(parents=True, exist_ok=True) else: path.mkdir(parents=True, exist_ok=True) # copy to cached folder subprocess.call(['cp', model_dir, path]) else: # from implicit extracted from path, check validity of the path later at registration path = model_dir model_dir_list.append(path) else: # TODO: generate from path name # generate model variant model_dir_list.extend( _generate_model_family(origin_model, architecture, framework, filename=str(version), inputs=inputs, outputs=outputs)) # register for model_dir in model_dir_list: parse_result = parse_path(model_dir) architecture = parse_result['architecture'] framework = parse_result['framework'] engine = parse_result['engine'] version = parse_result['version'] filename = parse_result['filename'] with open(str(model_dir), 'rb') as f: model = ModelBO(name=architecture, framework=framework, engine=engine, version=version, dataset=dataset, acc=acc, task=task, inputs=inputs, outputs=outputs, weight=Weight(f, filename=filename)) ModelService.post_model(model) # TODO refresh model = ModelService.get_models(name=architecture, framework=framework, engine=engine, version=version)[0] # profile registered model if profile: file = tf.keras.utils.get_file( "grace_hopper.jpg", "https://storage.googleapis.com/download.tensorflow.org/example_images/grace_hopper.jpg" ) test_img_bytes = cv2.imread(file) kwargs = { 'repeat_data': test_img_bytes, 'batch_size': 32, 'batch_num': 100, 'asynchronous': False, 'model_info': model, } if engine == Engine.TORCHSCRIPT: client = CVTorchClient(**kwargs) elif engine == Engine.TFS: client = CVTFSClient(**kwargs) elif engine == Engine.ONNX: client = CVONNXClient(**kwargs) elif engine == Engine.TRT: client = CVTRTClient(**kwargs) else: raise ValueError(f'No such serving engine: {engine}') job_cuda = Job(client=client, device='cuda:0', model_info=model) # job_cpu = Job(client=client, device='cpu', model_info=model) job_executor.submit(job_cuda)
def from_onnx( onnx_path: Union[Path, str], save_path: Union[Path, str], inputs: List[IOShape], outputs: List[IOShape], int8_calibrator=None, create_model_config: bool = True, override: bool = False, ): """Takes an ONNX file and creates a TensorRT engine to run inference with From https://github.com/layerism/TensorRT-Inference-Server-Tutorial FIXME: bug exist: TRT 6.x.x does not support opset 10 used in ResNet50(ONNX). """ import tensorrt as trt if save_path.with_suffix('.plan').exists(): if not override: # file exist yet override flag is not set logger.info('Use cached model') return True onnx_path = Path(onnx_path) assert onnx_path.exists() save_path = Path(save_path) # get arch name arch_name = parse_path(save_path)['architecture'] # trt serving model repository is different from others: # `<model-name>/<framework>-tensorrt/<version>/model.plan` save_path = save_path.with_suffix('') save_path.mkdir(parents=True, exist_ok=True) # Save TRT engine trt_logger = trt.Logger(trt.Logger.WARNING) with trt.Builder(trt_logger) as builder: with builder.create_network( 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) as network: with trt.OnnxParser(network, trt_logger) as parser: builder.max_workspace_size = GiB(1) # 1GB builder.max_batch_size = 1 if int8_calibrator is not None: builder.int8_mode = True builder.int8_calibrator = int8_calibrator print('Loading ONNX file from path {}...'.format(onnx_path)) with open(onnx_path, 'rb') as model: parser.parse(model.read()) engine = builder.build_cuda_engine(network) with open(save_path / 'model.plan', 'wb') as f: f.write(engine.serialize()) # create model configuration file if create_model_config: TRTConverter.generate_trt_config( save_path.parent, arch_name=arch_name, inputs=inputs, outputs=outputs ) return True
def from_tfs( tf_path: Union[Path, str], save_path: Union[Path, str], inputs: List[IOShape], outputs: List[IOShape], tf_version=2, max_batch_size: int = 32, max_workspace_size_bytes: int = 1 << 32, precision_mode: str = 'FP32', maximum_cached_engines: int = 100, create_model_config: bool = True, override: bool = False, ): """Convert TensorFlow SavedModel to TF-TRT SavedModel.""" from tensorflow.python.compiler.tensorrt import trt_convert as trt if save_path.with_suffix('.zip').exists(): if not override: # file exist yet override flag is not set # TODO: add logging print('Use cached model') return True tf_path = Path(tf_path) save_path = Path(save_path) # get arch name arch_name = parse_path(save_path)['architecture'] # TF SavedModel files should be contained in a directory # `~/.modelci/<model-name>/tensorflow-tfs/<version>/model.savedmodel` tf_saved_model_path = save_path / 'model.savedmodel' assert tf_path.exists() save_path.mkdir(parents=True, exist_ok=True) if tf_version == 1: converter = trt.TrtGraphConverter( input_saved_model_dir=str(tf_path), max_workspace_size_bytes=max_workspace_size_bytes, precision_mode=precision_mode, maximum_cached_engines=maximum_cached_engines ) elif tf_version == 2: # conversion conversion_params = trt.DEFAULT_TRT_CONVERSION_PARAMS conversion_params = conversion_params._replace( max_workspace_size_bytes=max_workspace_size_bytes ) conversion_params = conversion_params._replace(precision_mode=precision_mode) conversion_params = conversion_params._replace( maximum_cached_engines=maximum_cached_engines ) converter = trt.TrtGraphConverterV2( input_saved_model_dir=str(tf_path), conversion_params=conversion_params ) else: raise ValueError(f'tf_version expecting a value of `1` or `2`, but got {tf_version}') converter.convert() converter.save(str(tf_saved_model_path)) # zip shutil.make_archive(save_path, 'zip', root_dir=save_path.parent) # create model configuration if create_model_config: TRTConverter.generate_trt_config( save_path.parent, arch_name=arch_name, platform=TensorRTPlatform.TENSORFLOW_SAVEDMODEL, inputs=inputs, outputs=outputs, max_batch_size=max_batch_size ) return True
def register_model( origin_model, dataset: str, acc: float, task: str, inputs: List[IOShape], outputs: List[IOShape], architecture: str = None, framework: Framework = None, engine: Engine = None, version: ModelVersion = None, convert=True, profile=False, ): """Upload a model to ModelDB. This function will upload the given model into the database with some variation. It may optionally generate a branch of models (i.e. model family) with different optimization techniques. Besides, a benchmark will be scheduled for each generated model, in order to gain profiling results for model selection strategies. In the `no_generate` model(i.e. `no_generate` flag is set to be `True`), `architecture`, `framework`, `engine` and `version` could be None. If any of the above arguments is `None`, all of them will be auto induced from the origin_model path. An `ValueError` will be raised if the mata info cannot be induced. Arguments: origin_model: The uploaded model without optimization. When `no_generate` flag is set, this parameter should be a str indicating model file path. architecture (str): Model architecture name. Default to None. framework (Framework): Framework name. Default to None. version (ModelVersion): Model version. Default to None. dataset (str): Model testing dataset. acc (float): Model accuracy on the testing dataset. task (str): Model task type. inputs (Iterable[IOShape]): Model input tensors. outputs (Iterable[IOShape]): Model output tensors. engine (Engine): Model optimization engine. Default to `Engine.NONE`. convert (bool): Flag for generation of model family. When set, `origin_model` should be a path to model saving file. Default to `True`. profile (bool): Flag for profiling uploaded (including converted) models. Default to `False`. """ model_dir_list = list() if not convert: # type and existence check assert isinstance(origin_model, str) model_dir = Path(origin_model).absolute() assert model_dir.exists( ), f'model weight does not exist at {origin_model}' if all([ architecture, framework, engine, version ]): # from explicit architecture, framework, engine and version ext = model_dir.suffix path = generate_path(architecture, framework, engine, version).with_suffix(ext) # if already in the destination folder if path == model_dir: pass # create destination folder else: if ext: path.parent.mkdir(parents=True, exist_ok=True) else: path.mkdir(parents=True, exist_ok=True) # copy to cached folder subprocess.call(['cp', model_dir, path]) else: # from implicit extracted from path, check validity of the path later at registration path = model_dir model_dir_list.append(path) else: # TODO: generate from path name # generate model variant model_dir_list.extend( _generate_model_family(origin_model, architecture, framework, filename=str(version), inputs=inputs, outputs=outputs)) # register for model_dir in model_dir_list: parse_result = parse_path(model_dir) architecture = parse_result['architecture'] framework = parse_result['framework'] engine = parse_result['engine'] version = parse_result['version'] filename = parse_result['filename'] with open(str(model_dir), 'rb') as f: model = ModelBO(name=architecture, framework=framework, engine=engine, version=version, dataset=dataset, acc=acc, task=task, inputs=inputs, outputs=outputs, weight=Weight(f, filename=filename)) ModelService.post_model(model) if profile: # TODO(lym): profile pass