def test_pyversion_warning_on_load(tmp_path_factory, capsys, example_bento_service_class): # Set logging level so version mismatch warnings are outputted bentoml.configure_logging(logging_level=logging.WARNING) # (Note that logger.warning() is captured by pytest in stdout, NOT stdlog. # So the warning is in capsys.readouterr().out, NOT caplog.text.) test_model = TestModel() svc = example_bento_service_class() svc.pack('model', test_model) # Should not warn for default `_python_version` value match_dir = tmp_path_factory.mktemp("match") svc.save_to_dir(match_dir) _ = bentoml.load(str(match_dir)) assert "Python version mismatch" not in capsys.readouterr().out # Should warn for any version mismatch (major, minor, or micro) svc.env._python_version = "X.Y.Z" mismatch_dir = tmp_path_factory.mktemp("mismatch") svc.save_to_dir(mismatch_dir) _ = bentoml.load(str(mismatch_dir)) assert "Python version mismatch" in capsys.readouterr().out # Reset logging level to default bentoml.configure_logging()
def start_dev_server( saved_bundle_path: str, port: int, enable_microbatch: bool, mb_max_batch_size: int, mb_max_latency: int, run_with_ngrok: bool, enable_swagger: bool, ): logger.info("Starting BentoML API server in development mode..") import multiprocessing from bentoml import load from bentoml.server.api_server import BentoAPIServer from bentoml.utils import reserve_free_port if run_with_ngrok: from threading import Timer from bentoml.utils.flask_ngrok import start_ngrok thread = Timer(1, start_ngrok, args=(port, )) thread.setDaemon(True) thread.start() if enable_microbatch: with reserve_free_port() as api_server_port: # start server right after port released # to reduce potential race marshal_proc = multiprocessing.Process( target=start_dev_batching_server, kwargs=dict( api_server_port=api_server_port, saved_bundle_path=saved_bundle_path, port=port, mb_max_latency=mb_max_latency, mb_max_batch_size=mb_max_batch_size, ), daemon=True, ) marshal_proc.start() bento_service = load(saved_bundle_path) api_server = BentoAPIServer(bento_service, port=api_server_port, enable_swagger=enable_swagger) api_server.start() else: bento_service = load(saved_bundle_path) api_server = BentoAPIServer(bento_service, port=port, enable_swagger=enable_swagger) api_server.start()
def test_fasttext_artifact_pack(fasttext_classifier_class): @contextlib.contextmanager def _temp_filename_with_contents(contents): temporary_file = tempfile.NamedTemporaryFile(suffix=".txt", mode="w+") temporary_file.write(contents) # Set file pointer to beginning to ensure correct read temporary_file.seek(0) yield temporary_file.name temporary_file.close() with _temp_filename_with_contents("__label__bar foo") as filename: model = fasttext.train_supervised(input=filename) svc = fasttext_classifier_class() svc.pack('model', model) assert svc.predict(test_json)[0] == ( '__label__bar', ), 'Run inference before saving the artifact' saved_path = svc.save() loaded_svc = bentoml.load(saved_path) assert loaded_svc.predict(test_json)[0] == ( '__label__bar', ), 'Run inference after saving the artifact' # clean up saved bundle yc = YataiClient() yc.repository.dangerously_delete_bento(svc.name, svc.version)
def test_save_and_load_model(tmpdir, example_bento_service_class): example_bento_service_class = bentoml.ver( major=2, minor=10)(example_bento_service_class) test_model = TestModel() svc = example_bento_service_class.pack(model=test_model) assert svc.predict(1000) == 2000 version = "test_" + uuid.uuid4().hex svc.save_to_dir(str(tmpdir), version=version) model_service = bentoml.load(str(tmpdir)) expected_version = "2.10.{}".format(version) assert model_service.version == expected_version api = model_service.get_service_api('predict') assert api.name == "predict" assert api.mb_max_latency == 1000 assert api.mb_max_batch_size == 2000 assert isinstance(api.handler, DataframeInput) assert api.func(1) == 2 # Check api methods are available assert model_service.predict(1) == 2
def test_pack_on_bento_service_instance(tmpdir, example_bento_service_class): example_bento_service_class = bentoml.ver(major=2, minor=10)( example_bento_service_class ) test_model = TestModel() svc = example_bento_service_class() svc.pack("model", test_model) assert svc.predict(1000) == 2000 version = "test_" + uuid.uuid4().hex svc.set_version(version) svc.save_to_dir(str(tmpdir)) model_service = bentoml.load(str(tmpdir)) expected_version = "2.10.{}".format(version) assert model_service.version == expected_version api = model_service.get_inference_api('predict') assert api.name == "predict" assert isinstance(api.input_adapter, DataframeInput) assert api.user_func(1) == 2 # Check api methods are available assert model_service.predict(1) == 2
def test_save_and_load_model(tmpdir): test_model = MyTestModel() ms = MyTestBentoService.pack(model=test_model) assert ms.predict(1000) == 2000 import uuid version = "test_" + uuid.uuid4().hex saved_path = ms.save(str(tmpdir), version=version) expected_version = "2.10.{}".format(version) assert saved_path == os.path.join(str(tmpdir), "MyTestBentoService", expected_version) assert os.path.exists(saved_path) model_service = bentoml.load(saved_path) assert len(model_service.get_service_apis()) == 1 api = model_service.get_service_apis()[0] assert api.name == "predict" assert isinstance(api.handler, bentoml.handlers.DataframeHandler) assert api.func(1) == 2 # Check api methods are available assert model_service.predict(1) == 2 assert model_service.version == expected_version
def test_save_and_load_model(): test_model = MyTestModel() ms = MyTestBentoService.pack(model=test_model) assert ms.predict(1000) == 2000 import uuid version = "test_" + uuid.uuid4().hex saved_path = ms.save(BASE_TEST_PATH, version=version) model_path = os.path.join(BASE_TEST_PATH, 'MyTestBentoService', version) assert os.path.exists(model_path) model_service = bentoml.load(saved_path, lazy_load=True) assert not model_service.loaded model_service.load() assert model_service.loaded assert len(model_service.get_service_apis()) == 1 api = model_service.get_service_apis()[0] assert api.name == 'predict' assert api.handler == bentoml.handlers.DataframeHandler assert api.func(1) == 2 # Check api methods are available assert model_service.predict(1) == 2
def start_dev_server(saved_bundle_path: str, port: int, enable_microbatch: bool, run_with_ngrok: bool): logger.info("Starting BentoML API server in development mode..") from bentoml import load from bentoml.server.api_server import BentoAPIServer from bentoml.marshal.marshal import MarshalService from bentoml.utils import reserve_free_port bento_service = load(saved_bundle_path) if run_with_ngrok: from bentoml.utils.flask_ngrok import start_ngrok from threading import Timer thread = Timer(1, start_ngrok, args=(port, )) thread.setDaemon(True) thread.start() if enable_microbatch: with reserve_free_port() as api_server_port: # start server right after port released # to reduce potential race marshal_server = MarshalService( saved_bundle_path, outbound_host="localhost", outbound_port=api_server_port, outbound_workers=1, ) api_server = BentoAPIServer(bento_service, port=api_server_port) marshal_server.async_start(port=port) api_server.start() else: api_server = BentoAPIServer(bento_service, port=port) api_server.start()
def test_easyocr_artifact_packs(): svc = EasyOCRService() lang_list = ['ch_sim', 'en'] recog_network = "zh_sim_g2" model = easyocr.Reader( lang_list=lang_list, gpu=False, download_enabled=True, recog_network=recog_network, ) svc.pack('chinese_small', model, lang_list=lang_list, recog_network=recog_network) assert [x[1] for x in model.readtext(IMAGE_PATH) ] == (TEST_RESULT), 'Run inference before saving the artifact' saved_path = svc.save() loaded_svc = bentoml.load(saved_path) assert loaded_svc.predict(imageio.imread(IMAGE_PATH))['text'] == ( TEST_RESULT), 'Run inference after saving the artifact' # clean up saved bundle yc = YataiClient() yc.repository.delete(f'{svc.name}:{svc.version}')
def test_tensorflow_2_artifact_loaded(tf2_svc): with export_service_bundle(tf2_svc) as saved_path: tf2_svc_loaded = bentoml.load(saved_path) assert ( tf2_svc.predict(test_tensor) == tf2_svc_loaded.predict(test_tensor) == 15.0 ), 'Inference on saved and loaded TF2 artifact does not match expected'
async def on_ready(): print('We have logged in as {0.user}'.format(client)) for guild in client.guilds: if guild.name == GUILD: break guild_obj.ids = guild.members guild_obj.bento_service = bentoml.load(LOAD_PATH)
def main( bento_identifier: str = "", runner_name: str = "", bind: str = "", working_dir: t.Optional[str] = None, ) -> None: """ Start a runner server. Args: bento_identifier: the Bento identifier name: the name of the runner bind: the bind address URI. Can be: - tcp://host:port - unix://path/to/unix.sock - file:///path/to/unix.sock - fd://12 working_dir: (Optional) the working directory """ import uvicorn # type: ignore from bentoml._internal.server.runner_app import RunnerAppFactory ServiceContext.component_name_var.set(runner_name) svc = load(bento_identifier, working_dir=working_dir, change_global_cwd=True) runner = svc.runners[runner_name] app = t.cast("ASGI3Application", RunnerAppFactory(runner)()) parsed = urlparse(bind) uvicorn_options = { "log_level": "info", "log_config": LOGGING_CONFIG, "workers": 1, } if parsed.scheme in ("file", "unix"): uvicorn.run( app, uds=uri_to_path(bind), **uvicorn_options, ) elif parsed.scheme == "tcp": uvicorn.run( app, host=parsed.hostname, port=parsed.port, **uvicorn_options, ) elif parsed.scheme == "fd": # when fd is provided, we will skip the uvicorn internal supervisor, thus there is only one process fd = int(parsed.netloc) sock = socket.socket(fileno=fd) config = uvicorn.Config(app, **uvicorn_options) uvicorn.Server(config).run(sockets=[sock]) else: raise ValueError(f"Unsupported bind scheme: {bind}")
def test_save_and_load_model_from_s3(): test_model = MyTestModel() ms = MyTestBentoService.pack(model=test_model) s3_location = "s3://bentoml/test" s3_saved_path = ms.save(base_path=s3_location) download_model_service = bentoml.load(s3_saved_path) assert download_model_service.get_service_apis()[0].func(1) == 2
def test_json_artifact_simple_service_round_trip(tmp_path): service = ExampleServiceWithJSONArtifact() # 'hparams' = hyperparameters used at training time. hparams = {"lr": 1e-3, "patience": 10, "decay": 0.1, "batch_size": 16} service.pack("hparams", hparams) service.save_to_dir(str(tmp_path)) del service new_service = bentoml.load(str(tmp_path)) assert new_service.artifacts.hparams == hparams
def test_pytorch_lightning_model_artifact(): svc = PytorchLightningService() model = TorchLightningModel() svc.pack('model', model) saved_path = svc.save(version=uuid.uuid4().hex[0:8]) svc = bentoml.load(saved_path) result = svc.predict(pd.DataFrame([[5, 4, 3, 2]])) assert result.tolist() == [[6, 5, 4, 3]]
def test_keras_artifact_loaded(svc): with export_service_bundle(svc) as saved_path: loaded = bentoml.load(saved_path) assert ( loaded.predict([test_data]) == 15.0 ), 'Inference on saved and loaded Keras artifact does not match expected' assert ( loaded.predict2([test_data]) == 15.0 ), 'Inference on saved and loaded Keras artifact does not match expected'
def test_pytorch_lightning_model_artifact(): svc = PytorchLightningService() model = TorchLightningModel() svc.pack('model', model) with export_service_bundle(svc) as saved_path: svc = bentoml.load(saved_path) result = svc.predict(pd.DataFrame([[5, 4, 3, 2]])) assert result.tolist() == [[6, 5, 4, 3]]
def _load_model_from_tmp(self, path): path = os.path.join(path, self.model_id) if not os.path.exists(path): return None items = sorted(os.listdir(path)) if not items: return None else: tag = items[-1] path = os.path.join(path, tag) return bentoml.load(path)
def test_fastai2_artifact_pack(fastai_learner): svc = FastaiClassifier() svc.pack('model', fastai_learner) assert svc.predict(test_df) == 5.0, 'Run inference before saving' saved_path = svc.save() loaded_svc = bentoml.load(saved_path) assert loaded_svc.predict(test_df) == 5.0, 'Run inference from saved model' yc = YataiClient() yc.repository.delete(f'{svc.name}:{svc.version}')
def test_tensorflow_2_artifact_loaded(svc): with export_service_bundle(svc) as saved_path: svc_loaded = bentoml.load(saved_path) assert ( svc_loaded.predict1(test_tensor) == 15.0 ), 'Inference on saved and loaded TF2 artifact does not match expected' assert ( svc_loaded.predict2(test_tensor) == 15.0 ), 'Inference on saved and loaded TF2 artifact does not match expected' assert ( (svc_loaded.predict3(ragged_data) == 15.0).numpy().all() ), 'Inference on saved and loaded TF2 artifact does not match expected'
def test_gluon_artifact_pack(gluon_classifier, trained_gluon_model): gluon_classifier.pack('model', trained_gluon_model) assert gluon_classifier.predict([0]) == [0] saved_path = gluon_classifier.save() loaded_svc = bentoml.load(saved_path) assert loaded_svc.predict([0]) == [0] # clean up saved bundle yc = YataiClient() yc.repository.delete(f'{gluon_classifier.name}:{gluon_classifier.version}')
def test_pytorch_artifact_pack(pytorch_classifier_class): svc = pytorch_classifier_class() model = PytorchModel() svc.pack('model', model) assert svc.predict(test_df) == 5.0, 'Run inference before save the artifact' saved_path = svc.save() loaded_svc = bentoml.load(saved_path) assert loaded_svc.predict(test_df) == 5.0, 'Run inference from saved artifact' # clean up saved bundle yc = YataiClient() yc.repository.delete(f'{svc.name}:{svc.version}')
def test_pytorch_lightning_model_artifact_with_saved_lightning_model(): with TempDirectory() as temp_dir: svc = PytorchLightningService() model = TorchLightningModel() script = model.to_torchscript() script_path = f'{temp_dir}/model.pt' torch.jit.save(script, script_path) svc.pack('model', script_path) saved_path = svc.save() svc = bentoml.load(saved_path) result = svc.predict(pd.DataFrame([[5, 4, 3, 2]])) assert result.tolist() == [[6, 5, 4, 3]]
def test_onnx_model_artifact_pack_modelproto_with_onnxruntime_backend( onnx_iris_classifier_class, sklearn_onnx_model ): svc = onnx_iris_classifier_class() svc.pack('model', sklearn_onnx_model) assert svc.predict(test_df)[0] == [1], "Run inference before saving onnx artifact" saved_path = svc.save() loaded_svc = bentoml.load(saved_path) assert loaded_svc.predict(test_df)[0] == [1], 'Run inference after save onnx model' # clean up saved bundle yc = YataiClient() yc.repository.dangerously_delete_bento(svc.name, svc.version)
def test_pytorch_artifact_pack(coreml_classifier_class): svc = coreml_classifier_class() pytorch_model = PytorchModel() model = convert_pytorch_to_coreml(pytorch_model) svc.pack('model', model) assert svc.predict(test_df) == 5.0, 'Run inference before save the artifact' saved_path = svc.save() loaded_svc = bentoml.load(saved_path) assert loaded_svc.predict(test_df) == 5.0, 'Run inference from saved artifact' # clean up saved bundle yc = YataiClient() yc.repository.dangerously_delete_bento(svc.name, svc.version)
def test_tensorflow_artifact_pack(tensorflow_classifier_class): svc = tensorflow_classifier_class() model = TensorflowModel() svc.pack('model', model) assert svc.predict( test_df) == 15.0, 'Run inference before save the artifact' saved_path = svc.save() loaded_svc = bentoml.load(saved_path) assert loaded_svc.predict( test_df) == 15.0, 'Run inference from saved artifact' # clean up saved bundle yc = YataiClient() yc.repository.dangerously_delete_bento(svc.name, svc.version)
def main( bento_identifier: str = "", bind: str = "", working_dir: t.Optional[str] = None, reload: bool = False, reload_delay: t.Optional[float] = None, backlog: int = 2048, ): import uvicorn # type: ignore from ...configuration import get_debug_mode ServiceContext.component_name_var.set("dev_api_server") parsed = urlparse(bind) if parsed.scheme == "fd": fd = int(parsed.netloc) sock = socket.socket(fileno=fd) log_level = "debug" if get_debug_mode() else "info" svc = load(bento_identifier, working_dir=working_dir, change_global_cwd=True) uvicorn_options = { "log_level": log_level, "backlog": backlog, "reload": reload, "reload_delay": reload_delay, "log_config": LOGGING_CONFIG, "workers": 1, } if reload: # When reload=True, the app parameter in uvicorn.run(app) must be the import str asgi_app_import_str = f"{svc._import_str}.asgi_app" # type: ignore[reportPrivateUsage] # TODO: use svc.build_args.include/exclude as default files to watch # TODO: watch changes in model store when "latest" model tag is used config = uvicorn.Config(asgi_app_import_str, **uvicorn_options) server = uvicorn.Server(config) from uvicorn.supervisors import ChangeReload # type: ignore ChangeReload(config, target=server.run, sockets=[sock]).run() else: config = uvicorn.Config(svc.asgi_app, **uvicorn_options) uvicorn.Server(config).run(sockets=[sock]) else: raise ValueError(f"Unsupported bind scheme: {bind}")
def test_pytorch_artifact_pack_with_traced_model(pytorch_classifier_class): svc = pytorch_classifier_class() input_for_tracing = torch.ones(5) model = PytorchModel() traced_model = torch.jit.trace(model, input_for_tracing) svc.pack('model', traced_model) assert svc.predict(test_df) == 5.0, 'Run inference before save the artifact' saved_path = svc.save() loaded_svc = bentoml.load(saved_path) assert loaded_svc.predict(test_df) == 5.0, 'Run inference from saved artifact' # clean up saved bundle yc = YataiClient() yc.repository.delete(f'{svc.name}:{svc.version}')
def test_lgbm_artifact_pack(): model = get_trained_lgbm_model() svc = LgbModelService() svc.pack('model', model) assert svc.predict(DataFrame([[0]])) == [0] saved_path = svc.save() loaded_svc = bentoml.load(saved_path) assert loaded_svc.predict(DataFrame([[0]])) == [0] # clean up saved bundle yc = YataiClient() yc.repository.dangerously_delete_bento(svc.name, svc.version)
def test_onnxmlir_artifact(get_onnx_mlir_svc): svc = get_onnx_mlir_svc assert ( svc.predict(test_df)[0] == 15.0 ), 'Inference on onnx-mlir artifact does not match expected' saved_path = svc.save() loaded_svc = bentoml.load(saved_path) assert ( loaded_svc.predict(test_df)[0] == 15.0 ), 'Run inference after save onnx-mlir model' # clean up saved bundle yc = YataiClient() yc.repository.delete(f'{svc.name}:{svc.version}')