def serving_entrypoint(): """Start Inference Server. NOTE: If the inference server is multi-model, MxNet Model Server will be used as the base server. Otherwise, GUnicorn is used as the base server. """ if is_multi_model(): start_mxnet_model_server() else: server.start(env.ServingEnv().framework_module)
def main(environ, start_response): global app if app is None: serving_env = env.ServingEnv() user_module = modules.import_module(serving_env.module_dir, serving_env.module_name) user_module_transformer = _user_module_transformer(user_module) user_module_transformer.initialize() app = worker.Worker(transform_fn=user_module_transformer.transform, module_name=serving_env.module_name) return app(environ, start_response)
def start_mxnet_model_server(): serving_env = env.ServingEnv() is_multi_model = True if serving_env.module_name is None: logging.info("Starting MXNet server in algorithm mode.") _start_model_server(is_multi_model, ALGO_HANDLER_SERVICE) else: logging.info("Staring MXNet Model Server with user module.") # Install user module from s3 to import modules.import_module(serving_env.module_dir, serving_env.module_name) _start_model_server(is_multi_model, USER_HANDLER_SERVICE)
def main(environ, start_response): global app if app is None: serving_env = env.ServingEnv() _update_mxnet_env_vars() user_module = modules.import_module(serving_env.module_dir, serving_env.module_name) user_transformer = _user_module_transformer(user_module, serving_env.model_dir) app = worker.Worker(transform_fn=user_transformer.transform, module_name=serving_env.module_name) return app(environ, start_response)
def main(environ, start_response): global app if app is None: serving_env = env.ServingEnv() logger.setLevel(serving_env.log_level) user_module = modules.import_module(serving_env.module_dir, serving_env.module_name) user_module_transformer = _user_module_transformer(user_module) user_module_transformer.initialize() app = worker.Worker(transform_fn=user_module_transformer.transform, module_name=serving_env.module_name, healthcheck_fn=default_healthcheck_fn) return app(environ, start_response)
def main(environ, start_response): global app if app is None: serving_env = env.ServingEnv() user_module_transformer, execution_parameters_fn = import_module( serving_env.module_name, serving_env.module_dir) app = worker.Worker(transform_fn=user_module_transformer.transform, module_name=serving_env.module_name, execution_parameters_fn=execution_parameters_fn) return app(environ, start_response)
def main(environ, start_response): serving_env = env.ServingEnv() logger.setLevel(serving_env.log_level) user_module = modules.import_module_from_s3(serving_env.module_dir, serving_env.module_name) user_module_transformer = _user_module_transformer(user_module) user_module_transformer.initialize() app = worker.Worker(transform_fn=user_module_transformer.transform, module_name=serving_env.module_name) return app(environ, start_response)
def main(): """Placeholder docstring""" server.start(env.ServingEnv().framework_module)
def worker(): server.start(env.ServingEnv().framework_module)
def start_model_server(): serving_env = env.ServingEnv() is_multi_model = True modules.import_module(serving_env.module_dir, serving_env.module_name) _start_model_server(is_multi_model, HANDLER_SERVICE)
# Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the 'License'). You # may not use this file except in compliance with the License. A copy of # the License is located at # # http://aws.amazon.com/apache2.0/ # # or in the 'license' file accompanying this file. This file is # distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. import os from sagemaker_containers.beta.framework import env from sagemaker_xgboost_container.algorithm_mode import serve # Pre-load the model in the algorithm mode. # Otherwise, the model will be loaded when serving the first request per worker. # When the model is large, the request may timeout. if os.environ.get("SERVER_SOFTWARE" ) is not None and env.ServingEnv().module_name is None: serve.load_model()