def serve( port, bento, enable_microbatch, mb_max_batch_size, mb_max_latency, run_with_ngrok, yatai_url, enable_swagger, config, ): saved_bundle_path = resolve_bundle_path( bento, pip_installed_bundle_path, yatai_url ) container = BentoMLContainer() config = BentoMLConfiguration(override_config_file=config) config.override(["api_server", "port"], port) config.override(["api_server", "enable_microbatch"], enable_microbatch) config.override(["api_server", "run_with_ngrok"], run_with_ngrok) config.override(["api_server", "enable_swagger"], enable_swagger) config.override(["marshal_server", "max_batch_size"], mb_max_batch_size) config.override(["marshal_server", "max_latency"], mb_max_latency) container.config.from_dict(config.as_dict()) from bentoml import marshal, server container.wire(packages=[marshal, server]) start_dev_server(saved_bundle_path)
def open_api_spec(bento=None): saved_bundle_path = resolve_bundle_path(bento, pip_installed_bundle_path) bento_service = load(saved_bundle_path) _echo(json.dumps(get_open_api_spec_json(bento_service), indent=2))
def serve_gunicorn( port, workers, timeout, bento=None, enable_microbatch=False, mb_max_batch_size=None, mb_max_latency=None, microbatch_workers=1, yatai_url=None, enable_swagger=True, ): if not psutil.POSIX: _echo( "The `bentoml serve-gunicorn` command is only supported on POSIX. " "On windows platform, use `bentoml serve` for local API testing and " "docker for running production API endpoint: " "https://docs.docker.com/docker-for-windows/ ") return saved_bundle_path = resolve_bundle_path(bento, pip_installed_bundle_path, yatai_url) start_prod_server( saved_bundle_path, port, timeout, workers, enable_microbatch, mb_max_batch_size, mb_max_latency, microbatch_workers, enable_swagger, )
def serve( port, bento, enable_microbatch, mb_max_batch_size, mb_max_latency, run_with_ngrok, yatai_url, enable_swagger, config, ): saved_bundle_path = resolve_bundle_path(bento, pip_installed_bundle_path, yatai_url) start_dev_server( saved_bundle_path, port=port, enable_microbatch=enable_microbatch, mb_max_batch_size=mb_max_batch_size, mb_max_latency=mb_max_latency, run_with_ngrok=run_with_ngrok, enable_swagger=enable_swagger, config_file=config, )
def serve( port, bento, enable_microbatch, mb_max_batch_size, mb_max_latency, run_with_ngrok, yatai_url, enable_swagger, ): if enable_microbatch is not None: logger.warning( "Option --enable-microbatch/--disable-microbatch has been " "deprecated in the current release. The micro-batching option " "has become the default. Consider using --mb-max-batching=1 " "to simulate the effect of --disable-microbatch") saved_bundle_path = resolve_bundle_path(bento, pip_installed_bundle_path, yatai_url) start_dev_server( saved_bundle_path, port=port, mb_max_batch_size=mb_max_batch_size, mb_max_latency=mb_max_latency, run_with_ngrok=run_with_ngrok, enable_swagger=enable_swagger, )
def run(api_name, run_args, bento=None): saved_bundle_path = resolve_bundle_path(bento, pip_installed_bundle_path) api = load_bento_service_api(saved_bundle_path, api_name) exit_code = api.handle_cli(run_args) sys.exit(exit_code)
def serve(port, bento=None, enable_microbatch=False, run_with_ngrok=False, yatai_url=None): saved_bundle_path = resolve_bundle_path(bento, pip_installed_bundle_path, yatai_url) start_dev_server(saved_bundle_path, port, enable_microbatch, run_with_ngrok)
def containerize(bento, push, tag, build_arg, yatai_url): """Containerize specified BentoService. BENTO is the target BentoService to be containerized, referenced by its name and version in format of name:version. For example: "iris_classifier:v1.2.0" `bentoml containerize` command also supports the use of the `latest` tag which will automatically use the last built version of your Bento. You can provide a tag for the image built by Bento using the `--tag` flag. Additionally, you can provide a `--push` flag, which will push the built image to the Docker repository specified by the image tag. You can also prefixing the tag with a hostname for the repository you wish to push to. e.g. `bentoml containerize IrisClassifier:latest --push --tag repo-address.com:username/iris` would build a Docker image called `username/iris:latest` and push that to docker repository at repo-address.com. By default, the `containerize` command will use the current credentials provided by Docker daemon. """ saved_bundle_path = resolve_bundle_path(bento, pip_installed_bundle_path, yatai_url) _echo(f"Found Bento: {saved_bundle_path}") # fmt: off bento_metadata: "BentoServiceMetadata" = load_bento_service_metadata( saved_bundle_path) # noqa: E501 # fmt: on bento_tag = f'{bento_metadata.name}:{bento_metadata.version}' yatai_client: "YataiClient" = get_yatai_client(yatai_url) docker_build_args = {} if build_arg: for arg in build_arg: key, value = arg.split("=", 1) docker_build_args[key] = value if yatai_url is not None: spinner_message = f'Sending containerize RPC to YataiService at {yatai_url}' else: spinner_message = ( f'Containerizing {bento_tag} with local YataiService and docker ' f'daemon from local environment') with Spinner(spinner_message): tag: str = yatai_client.repository.containerize( bento=bento_tag, tag=tag, build_args=docker_build_args, push=push, ) _echo(f'\nBuild container image: {tag}', CLI_COLOR_SUCCESS)
def info(bento=None, yatai_url=None): """ List all APIs defined in the BentoService loaded from saved bundle """ saved_bundle_path = resolve_bundle_path( bento, pip_installed_bundle_path, yatai_url ) bento_service_metadata_pb = load_bento_service_metadata(saved_bundle_path) output = json.dumps(ProtoMessageToDict(bento_service_metadata_pb), indent=2) _echo(output)
def run(api_name, run_args, bento=None): parser = argparse.ArgumentParser() parser.add_argument('--yatai-url', type=str, default=None) parsed_args, _ = parser.parse_known_args(run_args) yatai_url = parsed_args.yatai_url saved_bundle_path = resolve_bundle_path( bento, pip_installed_bundle_path, yatai_url ) api = load_bento_service_api(saved_bundle_path, api_name) exit_code = api.handle_cli(run_args) sys.exit(exit_code)
def serve_gunicorn( port, workers, timeout, bento, enable_microbatch, mb_max_batch_size, mb_max_latency, microbatch_workers, yatai_url, enable_swagger, config, ): if not psutil.POSIX: _echo( "The `bentoml serve-gunicorn` command is only supported on POSIX. " "On windows platform, use `bentoml serve` for local API testing and " "docker for running production API endpoint: " "https://docs.docker.com/docker-for-windows/ ") return saved_bundle_path = resolve_bundle_path(bento, pip_installed_bundle_path, yatai_url) container = BentoMLContainer() config = BentoMLConfiguration(override_config_file=config) config.override(["api_server", "port"], port) config.override(["api_server", "workers"], workers) config.override(["api_server", "timeout"], timeout) config.override(["api_server", "enable_microbatch"], enable_microbatch) config.override(["api_server", "enable_swagger"], enable_swagger) config.override(["marshal_server", "max_batch_size"], mb_max_batch_size) config.override(["marshal_server", "max_latency"], mb_max_latency) config.override(["marshal_server", "workers"], microbatch_workers) container.config.from_dict(config.as_dict()) from bentoml import marshal, server container.wire(packages=[marshal, server]) start_prod_server(saved_bundle_path)
def run(api_name, config, run_args, bento=None): container = BentoMLContainer() config = BentoMLConfiguration(override_config_file=config) container.config.from_dict(config.as_dict()) from bentoml import tracing container.wire(modules=[tracing]) parser = argparse.ArgumentParser() parser.add_argument('--yatai-url', type=str, default=None) parsed_args, _ = parser.parse_known_args(run_args) yatai_url = parsed_args.yatai_url saved_bundle_path = resolve_bundle_path(bento, pip_installed_bundle_path, yatai_url) api = load_bento_service_api(saved_bundle_path, api_name) exit_code = api.handle_cli(run_args) sys.exit(exit_code)
def serve_gunicorn( port, workers, timeout, bento, enable_microbatch, mb_max_batch_size, mb_max_latency, microbatch_workers, yatai_url, enable_swagger, ): if not psutil.POSIX: _echo( "The `bentoml serve-gunicorn` command is only supported on POSIX. " "On windows platform, use `bentoml serve` for local API testing and " "docker for running production API endpoint: " "https://docs.docker.com/docker-for-windows/ ") return if enable_microbatch is not None: logger.warning( "Option --enable-microbatch/--disable-microbatch has been " "deprecated in the current release. The micro-batching option " "has become the default. Consider using --mb-max-batching=1 " "to simulate the effect of --disable-microbatch") saved_bundle_path = resolve_bundle_path(bento, pip_installed_bundle_path, yatai_url) start_prod_server( saved_bundle_path, port=port, workers=workers, timeout=timeout, enable_swagger=enable_swagger, mb_max_batch_size=mb_max_batch_size, mb_max_latency=mb_max_latency, microbatch_workers=microbatch_workers, )
def serve( port, bento=None, enable_microbatch=False, mb_max_batch_size=None, mb_max_latency=None, run_with_ngrok=False, yatai_url=None, enable_swagger=True, ): saved_bundle_path = resolve_bundle_path(bento, pip_installed_bundle_path, yatai_url) start_dev_server( saved_bundle_path, port, enable_microbatch, mb_max_batch_size, mb_max_latency, run_with_ngrok, enable_swagger, )
def containerize(bento, push, tag, build_arg, username, password, yatai_url): """Containerize specified BentoService. BENTO is the target BentoService to be containerized, referenced by its name and version in format of name:version. For example: "iris_classifier:v1.2.0" `bentoml containerize` command also supports the use of the `latest` tag which will automatically use the last built version of your Bento. You can provide a tag for the image built by Bento using the `--docker-image-tag` flag. Additionally, you can provide a `--push` flag, which will push the built image to the Docker repository specified by the image tag. You can also prefixing the tag with a hostname for the repository you wish to push to. e.g. `bentoml containerize IrisClassifier:latest --push --tag username/iris` would build a Docker image called `username/iris:latest` and push that to Docker Hub. By default, the `containerize` command will use the credentials provided by Docker. You may provide your own through `--username` and `--password`. """ saved_bundle_path = resolve_bundle_path(bento, pip_installed_bundle_path, yatai_url) _echo(f"Found Bento: {saved_bundle_path}") bento_metadata = load_bento_service_metadata(saved_bundle_path) name = to_valid_docker_image_name(bento_metadata.name) version = to_valid_docker_image_version(bento_metadata.version) if not tag: _echo("Tag not specified, using tag parsed from " f"BentoService: '{name}:{version}'") tag = f"{name}:{version}" if ":" not in tag: _echo( "Image version not specified, using version parsed " f"from BentoService: '{version}'", CLI_COLOR_WARNING, ) tag = f"{tag}:{version}" docker_build_args = {} if build_arg: for arg in build_arg: key, value = arg.split("=") docker_build_args[key] = value import docker docker_api = docker.from_env().api try: with Spinner(f"Building Docker image {tag} from {bento} \n"): for line in echo_docker_api_result( docker_api.build( path=saved_bundle_path, tag=tag, decode=True, buildargs=docker_build_args, )): _echo(line) except docker.errors.APIError as error: raise CLIException(f'Could not build Docker image: {error}') _echo( f'Finished building {tag} from {bento}', CLI_COLOR_SUCCESS, ) if push: auth_config_payload = ({ "username": username, "password": password } if username or password else None) try: with Spinner(f"Pushing docker image to {tag}\n"): for line in echo_docker_api_result( docker_api.push( repository=tag, stream=True, decode=True, auth_config=auth_config_payload, )): _echo(line) _echo( f'Pushed {tag} to {name}', CLI_COLOR_SUCCESS, ) except (docker.errors.APIError, BentoMLException) as error: raise CLIException(f'Could not push Docker image: {error}')
def complex_deploy(cortex_name, cortex_type, bento_path, region, model_name, model_type, model_url, cortex_url): """ Adds files to module to download models from url and pack it with the python file. """ saved_bundle_path = resolve_bundle_path(bento_path, None, None) with open(os.path.join(saved_bundle_path, "bentoml.yml"), "r") as f: graph = yaml.safe_load(f) class_name = graph['metadata']['service_name'] module_name = graph['metadata']['module_name'] py_version = graph['env']['python_version'] fastapi_file_script = """\ from fastapi import FastAPI import requests import os import subprocess app = FastAPI(title="{cortex_name}") @app.get("/create") def task(): try: mod=importlib.import_module("{class_name}.{module_name}") IrisClassifier=mod.{class_name} except Exception as e: return "ERROR : %s"%e url = "{model_url}" try: response = requests.get(url) if not os.path.exists("tmp_folder"): os.mkdir("tmp_folder") model_path=os.path.join("tmp_folder","{model_name}") with open(model_path,"wb") as f: f.write(response.content) except Exception as e: return "ERROR : %s"%e try: from bentoml import api, BentoService, artifacts from bentoml.frameworks import {model_type} model={model_type}("{model_name_only}") clf=model.load("tmp_folder").get() except Exception as e: return "ERROR : %s"%e try: bento_class={class_name}() bento_class.pack("model",clf) bento_class.save() except Exception as e: return "ERROR : %s"%e try: res=subprocess.check_output(["bentoml","deploy","{class_name}:latest","--region","{region}","--cortex-name","{cortex_name}","--cortex-type","{cortex_type}"],stdout=subprocess.PIPE) return res.decode() except Exception as e: return "ERROR : %s"%e """ ffs = fastapi_file_script.format(class_name=class_name, module_name=module_name, cortex_name=cortex_name, cortex_type=cortex_type, region=region, model_name=model_name, model_name_only=model_name.split(".")[0], model_type=model_type, model_url=model_url) dockerfile = """\ FROM python:{py_version} COPY requirements.txt / RUN pip install -r ./requirements.txt --no-cache-dir COPY . / CMD ["uvicorn","bento_script:app","--reload","--port","5000","--host","0.0.0.0"] """ dockerfile = dockerfile.format(py_version=py_version) with open(os.path.join(saved_bundle_path, "bento_script.py"), "w") as f: f.write(ffs) with open(os.path.join(saved_bundle_path, "Dockerfile"), "w") as f: f.write(dockerfile) simple_deploy(cortex_name, cortex_type, bento_path, region, cortex_url)
def simple_deploy(cortex_name, cortex_type, region, cortex_url, bento_path: Optional[str] = None, direct_path: Optional[str] = None): """ Zips and deploys your module on AWS """ CORTEX_URL = "{cortex_url}cortex?repository_uri={ecr_uri}&cortex_type={cortex_type}&cortex_name={cortex_name}" DOCKER_URL = "{cortex_url}docker?repository_name={repository_name}®ion={region}" def create_unique_name(name: str): name = ''.join([name, "-", str(uuid.uuid4())])[:40] if name[:-1] == "-": name = name[:-1] return name def create_zip_file(dir_path: str): head, tail = os.path.split(dir_path) path = shutil.make_archive( os.path.join(head, create_unique_name(tail)), 'zip', dir_path) head, key_name = os.path.split(path) return path, key_name def create_docker(key_name, zipped_file_path, deploy_region="us-east-1"): repository_name = create_unique_name(key_name.split(".")[0]) docker_url = DOCKER_URL.format(cortex_url=cortex_url, repository_name=repository_name, region=deploy_region) files = [('file', (key_name, open(zipped_file_path, 'rb'), 'application/zip'))] response = requests.request("POST", docker_url, files=files) _echo(response.text) ecr_uri = json.loads(response.text)['ecr_uri'].split(" ")[-1] return ecr_uri def create_cortex_api(ecr_uri, cortx_type, cortx_name): cortex_endpoint = CORTEX_URL.format(cortex_url=cortex_url, ecr_uri=ecr_uri, cortex_type=cortx_type, cortex_name=cortx_name) response = requests.request("GET", cortex_endpoint) backend_api_url = response.text backend_api_url = backend_api_url.replace("\n", "") backend_api_url = json.loads(backend_api_url)['api_endpoint'] return backend_api_url if bento_path: saved_bundle_path = resolve_bundle_path(bento_path, None, None) else: saved_bundle_path = direct_path _echo("Zipping backend files") backend_path, backend_key_name = create_zip_file(saved_bundle_path) _echo("Creating docker image for backend") backend_docker_uri = create_docker(backend_key_name, backend_path, region) _echo("Creating Backend API") backend_cortex_uri = create_cortex_api(backend_docker_uri, cortex_type, cortex_name) _echo(f"Backend API at : {backend_cortex_uri}") return backend_cortex_uri