def dynamic_server( train_env: TrainEnv, ) -> ContextManager["testutil.ServerFacade"]: serve_env = ServeEnv(train_env.path.base) settings = Settings(sagemaker_server_port=testutil.free_port()) with testutil.temporary_server(serve_env, MeanPredictor, settings) as server: yield server
def static_server( train_env: TrainEnv, ) -> ContextManager["testutil.ServerFacade"]: predictor = MeanPredictor.from_hyperparameters(**train_env.hyperparameters) predictor.serialize(train_env.path.model) serve_env = ServeEnv(train_env.path.base) settings = Settings(sagemaker_server_port=testutil.free_port()) with testutil.temporary_server(serve_env, None, settings) as server: yield server
def temporary_server( env: ServeEnv, forecaster_type: Optional[Type[Predictor]], settings: Settings = Settings(), ) -> ContextManager[ServerFacade]: """ A context manager that instantiates a Gunicorn inference server in a separate process (using the :func:`make_inference_server` call) Parameters ---------- env The `ServeEnv` to use in static inference mode. Either `env` or `forecaster_type` must be set. forecaster_type The `Predictor` type to use in dynamic inference mode. Either `env` or `forecaster_type` must be set. settings Settings to use when instantiating the Gunicorn server. Returns ------- ContextManager[ServerFacade] A context manager that yields the `InferenceServer` instance wrapping the spawned inference server. """ context = multiprocessing.get_context("fork") context = typing.cast(ForkContext, context) # cast to make mypi pass gunicorn_app = make_gunicorn_app(env, forecaster_type, settings) process = context.Process(target=gunicorn_app.run) process.start() endpoint = ServerFacade( base_address="http://{address}:{port}".format( address=settings.sagemaker_server_address, port=settings.sagemaker_server_port, ) ) # try to ping the server (signalling liveness) # poll for n seconds in t second intervals n, t = 10, 2 max_time = time.time() + n while not endpoint.ping(): if time.time() < max_time: time.sleep(t) else: msg = f"Failed to start the inference server within {n} seconds" raise TimeoutError(msg) yield endpoint process.terminate() process.join()
def serve_command(data_path: str, forecaster: Optional[str], force_static: bool) -> None: from gluonts.shell import serve logging.info("Run 'serve' command") if not force_static and forecaster is not None: gunicorn_app = serve.make_gunicorn_app( env=None, forecaster_type=forecaster_type_by_name(forecaster), settings=Settings(), ) else: gunicorn_app = serve.make_gunicorn_app( env=ServeEnv(Path(data_path)), forecaster_type=None, settings=Settings(), ) gunicorn_app.run()
def serve_command(data_path: str, forecaster: Optional[str], force_static: bool) -> None: from gluonts.shell import serve env = ServeEnv(Path(data_path)) env.install_dynamic() logger.info("Run 'serve' command") if not force_static and forecaster is not None: forecaster_type: Optional[Forecaster] = forecaster_type_by_name( forecaster) else: forecaster_type = None gunicorn_app = serve.make_gunicorn_app( env=env, forecaster_type=forecaster_type, settings=Settings(), ) gunicorn_app.run()
def dynamic_server( train_env: TrainEnv) -> ContextManager[testutil.ServerFacade]: settings = Settings(sagemaker_server_port=testutil.free_port()) with testutil.temporary_server(None, MeanPredictor, settings) as server: yield server