def start(self): log.info("starting services") self._state = "starting" signal.signal(signal.SIGTERM, self._stop) if self._tfs_enable_batching: log.info("batching is enabled") tfs_utils.create_batching_config(self._tfs_batching_config_path) if self._tfs_enable_multi_model_endpoint: log.info( "multi-model endpoint is enabled, TFS model servers will be started later" ) else: self._create_tfs_config() self._start_tfs() self._wait_for_tfs() self._create_nginx_config() if self._use_gunicorn: self._setup_gunicorn() self._start_gunicorn() # make sure gunicorn is up with self._timeout(seconds=30): self._wait_for_gunicorn() self._start_nginx() self._state = "started" self._monitor() self._stop()
def start(self): log.info('starting services') self._state = 'starting' signal.signal(signal.SIGTERM, self._stop) if self._tfs_enable_multi_model_endpoint: log.info( 'multi-model endpoint is enabled, TFS model servers will be started later' ) else: tfs_utils.create_tfs_config(self._tfs_default_model_name, self._tfs_config_path) self._create_tfs_config() self._start_tfs() self._create_nginx_config() if self._tfs_enable_batching: log.info('batching is enabled') tfs_utils.create_batching_config(self._tfs_batching_config_path) if self._use_gunicorn: self._setup_gunicorn() self._start_gunicorn() # make sure gunicorn is up with self._timeout(seconds=30): self._wait_for_gunicorn() self._start_nginx() self._state = 'started' while True: pid, status = os.wait() if self._state != 'started': break if pid == self._nginx.pid: log.warning( 'unexpected nginx exit (status: {}). restarting.'.format( status)) self._start_nginx() elif pid == self._tfs.pid: log.warning( 'unexpected tensorflow serving exit (status: {}). restarting.' .format(status)) self._start_tfs() elif self._gunicorn and pid == self._gunicorn.pid: log.warning( 'unexpected gunicorn exit (status: {}). restarting.'. format(status)) self._start_gunicorn() self._stop()
def _handle_load_model_post(self, res, data): # noqa: C901 model_name = data['model_name'] base_path = data['url'] # model is already loaded if model_name in self._model_tfs_pid: res.status = falcon.HTTP_409 res.body = json.dumps( {'error': 'Model {} is already loaded.'.format(model_name)}) # check if there are available ports if not self._ports_available(): res.status = falcon.HTTP_507 res.body = json.dumps({ 'error': 'Memory exhausted: no available ports to load the model.' }) with lock(): self._model_tfs_rest_port[model_name] = self._tfs_ports[ 'rest_port'].pop() self._model_tfs_grpc_port[model_name] = self._tfs_ports[ 'grpc_port'].pop() # validate model files are in the specified base_path if self.validate_model_dir(base_path): try: # install custom dependencies, import handlers self._import_custom_modules(model_name) tfs_config = tfs_utils.create_tfs_config_individual_model( model_name, base_path) tfs_config_file = '/sagemaker/tfs-config/{}/model-config.cfg'.format( model_name) log.info('tensorflow serving model config: \n%s\n', tfs_config) os.makedirs(os.path.dirname(tfs_config_file)) with open(tfs_config_file, 'w') as f: f.write(tfs_config) batching_config_file = '/sagemaker/batching/{}/batching-config.cfg'.format( model_name) if self._tfs_enable_batching: tfs_utils.create_batching_config(batching_config_file) cmd = tfs_utils.tfs_command( self._model_tfs_grpc_port[model_name], self._model_tfs_rest_port[model_name], tfs_config_file, self._tfs_enable_batching, batching_config_file, ) p = subprocess.Popen(cmd.split()) self._wait_for_model(model_name) log.info('started tensorflow serving (pid: %d)', p.pid) # update model name <-> tfs pid map self._model_tfs_pid[model_name] = p res.status = falcon.HTTP_200 res.body = json.dumps({ 'success': 'Successfully loaded model {}, ' 'listening on rest port {} ' 'and grpc port {}.'.format( model_name, self._model_tfs_rest_port, self._model_tfs_grpc_port, ) }) except MultiModelException as multi_model_exception: self._cleanup_config_file(tfs_config_file) self._cleanup_config_file(batching_config_file) if multi_model_exception.code == 409: res.status = falcon.HTTP_409 res.body = multi_model_exception.msg elif multi_model_exception.code == 408: res.status = falcon.HTTP_408 res.body = multi_model_exception.msg else: raise MultiModelException(falcon.HTTP_500, multi_model_exception.msg) except FileExistsError as e: res.status = falcon.HTTP_409 res.body = json.dumps({ 'error': 'Model {} is already loaded. {}'.format( model_name, str(e)) }) except OSError as os_error: self._cleanup_config_file(tfs_config_file) self._cleanup_config_file(batching_config_file) if os_error.errno == 12: raise MultiModelException( falcon.HTTP_507, 'Memory exhausted: ' 'not enough memory to start TFS instance') else: raise MultiModelException(falcon.HTTP_500, os_error.strerror) else: res.status = falcon.HTTP_404 res.body = json.dumps({ 'error': 'Could not find valid base path {} for servable {}'.format( base_path, model_name) })
def _handle_load_model_post(self, res, data): # noqa: C901 model_name = data["model_name"] base_path = data["url"] # model is already loaded if model_name in self._model_tfs_pid: res.status = falcon.HTTP_409 res.body = json.dumps( {"error": "Model {} is already loaded.".format(model_name)}) # check if there are available ports if not self._ports_available(): res.status = falcon.HTTP_507 res.body = json.dumps({ "error": "Memory exhausted: no available ports to load the model." }) with lock(): self._model_tfs_rest_port[model_name] = self._tfs_ports[ "rest_port"].pop() self._model_tfs_grpc_port[model_name] = self._tfs_ports[ "grpc_port"].pop() # validate model files are in the specified base_path if self.validate_model_dir(base_path): try: tfs_config = tfs_utils.create_tfs_config_individual_model( model_name, base_path) tfs_config_file = "/sagemaker/tfs-config/{}/model-config.cfg".format( model_name) log.info("tensorflow serving model config: \n%s\n", tfs_config) os.makedirs(os.path.dirname(tfs_config_file)) with open(tfs_config_file, "w") as f: f.write(tfs_config) batching_config_file = "/sagemaker/batching/{}/batching-config.cfg".format( model_name) if self._tfs_enable_batching: tfs_utils.create_batching_config(batching_config_file) cmd = tfs_utils.tfs_command( self._model_tfs_grpc_port[model_name], self._model_tfs_rest_port[model_name], tfs_config_file, self._tfs_enable_batching, batching_config_file, ) p = subprocess.Popen(cmd.split()) self._wait_for_model(model_name) log.info("started tensorflow serving (pid: %d)", p.pid) # update model name <-> tfs pid map self._model_tfs_pid[model_name] = p res.status = falcon.HTTP_200 res.body = json.dumps({ "success": "Successfully loaded model {}, " "listening on rest port {} " "and grpc port {}.".format( model_name, self._model_tfs_rest_port, self._model_tfs_grpc_port, ) }) except MultiModelException as multi_model_exception: self._cleanup_config_file(tfs_config_file) self._cleanup_config_file(batching_config_file) if multi_model_exception.code == 409: res.status = falcon.HTTP_409 res.body = multi_model_exception.msg elif multi_model_exception.code == 408: res.status = falcon.HTTP_408 res.body = multi_model_exception.msg else: raise MultiModelException(falcon.HTTP_500, multi_model_exception.msg) except FileExistsError as e: res.status = falcon.HTTP_409 res.body = json.dumps({ "error": "Model {} is already loaded. {}".format( model_name, str(e)) }) except OSError as os_error: self._cleanup_config_file(tfs_config_file) self._cleanup_config_file(batching_config_file) if os_error.errno == 12: raise MultiModelException( falcon.HTTP_507, "Memory exhausted: " "not enough memory to start TFS instance") else: raise MultiModelException(falcon.HTTP_500, os_error.strerror) else: res.status = falcon.HTTP_404 res.body = json.dumps({ "error": "Could not find valid base path {} for servable {}".format( base_path, model_name) })
def start(self): log.info("starting services") log.info("NEURONCORE_GROUP_SIZES {}".format(self._user_ncgs)) log.info("SAGEMAKER_GUNICORN_WORKERS {}".format( self._gunicorn_workers)) self._state = "starting" signal.signal(signal.SIGTERM, self._stop) if self._tfs_enable_multi_model_endpoint: log.info( "multi-model endpoint is enabled, TFS model servers will be started later" ) else: self._create_tfs_config() #Start TFS workers for each gunicorn worker for tf_worker_num in range(int(self._gunicorn_workers)): self._start_tfs() print("all TFS PIDs {}".format(self._tfs)) self._create_nginx_config() if self._tfs_enable_batching: log.info("batching is enabled") tfs_utils.create_batching_config(self._tfs_batching_config_path) if self._use_gunicorn: self._setup_gunicorn() self._start_gunicorn() # make sure gunicorn is up with self._timeout(seconds=30): self._wait_for_gunicorn() self._start_nginx() self._state = "started" while True: pid, status = os.wait() if self._state != "started": break if pid == self._nginx.pid: log.warning( "unexpected nginx exit (status: {}). restarting.".format( status)) self._start_nginx() elif pid in self._tfs: log.warning( "unexpected tensorflow serving exit (status: {}). restarting." .format(status)) self._tfs.remove(pid) self._start_tfs() elif self._gunicorn and pid == self._gunicorn.pid: log.warning( "unexpected gunicorn exit (status: {}). restarting.". format(status)) self._start_gunicorn() self._stop()