def start(self):
        log.info("starting services")
        self._state = "starting"
        signal.signal(signal.SIGTERM, self._stop)

        if self._tfs_enable_batching:
            log.info("batching is enabled")
            tfs_utils.create_batching_config(self._tfs_batching_config_path)

        if self._tfs_enable_multi_model_endpoint:
            log.info(
                "multi-model endpoint is enabled, TFS model servers will be started later"
            )
        else:
            self._create_tfs_config()
            self._start_tfs()
            self._wait_for_tfs()

        self._create_nginx_config()

        if self._use_gunicorn:
            self._setup_gunicorn()
            self._start_gunicorn()
            # make sure gunicorn is up
            with self._timeout(seconds=30):
                self._wait_for_gunicorn()

        self._start_nginx()
        self._state = "started"
        self._monitor()
        self._stop()
    def start(self):
        log.info('starting services')
        self._state = 'starting'
        signal.signal(signal.SIGTERM, self._stop)

        if self._tfs_enable_multi_model_endpoint:
            log.info(
                'multi-model endpoint is enabled, TFS model servers will be started later'
            )
        else:
            tfs_utils.create_tfs_config(self._tfs_default_model_name,
                                        self._tfs_config_path)
            self._create_tfs_config()
            self._start_tfs()

        self._create_nginx_config()

        if self._tfs_enable_batching:
            log.info('batching is enabled')
            tfs_utils.create_batching_config(self._tfs_batching_config_path)

        if self._use_gunicorn:
            self._setup_gunicorn()
            self._start_gunicorn()
            # make sure gunicorn is up
            with self._timeout(seconds=30):
                self._wait_for_gunicorn()

        self._start_nginx()
        self._state = 'started'

        while True:
            pid, status = os.wait()

            if self._state != 'started':
                break

            if pid == self._nginx.pid:
                log.warning(
                    'unexpected nginx exit (status: {}). restarting.'.format(
                        status))
                self._start_nginx()

            elif pid == self._tfs.pid:
                log.warning(
                    'unexpected tensorflow serving exit (status: {}). restarting.'
                    .format(status))
                self._start_tfs()

            elif self._gunicorn and pid == self._gunicorn.pid:
                log.warning(
                    'unexpected gunicorn exit (status: {}). restarting.'.
                    format(status))
                self._start_gunicorn()

        self._stop()
Example #3
0
    def _handle_load_model_post(self, res, data):  # noqa: C901
        model_name = data['model_name']
        base_path = data['url']

        # model is already loaded
        if model_name in self._model_tfs_pid:
            res.status = falcon.HTTP_409
            res.body = json.dumps(
                {'error': 'Model {} is already loaded.'.format(model_name)})

        # check if there are available ports
        if not self._ports_available():
            res.status = falcon.HTTP_507
            res.body = json.dumps({
                'error':
                'Memory exhausted: no available ports to load the model.'
            })
        with lock():
            self._model_tfs_rest_port[model_name] = self._tfs_ports[
                'rest_port'].pop()
            self._model_tfs_grpc_port[model_name] = self._tfs_ports[
                'grpc_port'].pop()

        # validate model files are in the specified base_path
        if self.validate_model_dir(base_path):
            try:
                # install custom dependencies, import handlers
                self._import_custom_modules(model_name)

                tfs_config = tfs_utils.create_tfs_config_individual_model(
                    model_name, base_path)
                tfs_config_file = '/sagemaker/tfs-config/{}/model-config.cfg'.format(
                    model_name)
                log.info('tensorflow serving model config: \n%s\n', tfs_config)
                os.makedirs(os.path.dirname(tfs_config_file))
                with open(tfs_config_file, 'w') as f:
                    f.write(tfs_config)

                batching_config_file = '/sagemaker/batching/{}/batching-config.cfg'.format(
                    model_name)
                if self._tfs_enable_batching:
                    tfs_utils.create_batching_config(batching_config_file)

                cmd = tfs_utils.tfs_command(
                    self._model_tfs_grpc_port[model_name],
                    self._model_tfs_rest_port[model_name],
                    tfs_config_file,
                    self._tfs_enable_batching,
                    batching_config_file,
                )
                p = subprocess.Popen(cmd.split())
                self._wait_for_model(model_name)

                log.info('started tensorflow serving (pid: %d)', p.pid)
                # update model name <-> tfs pid map
                self._model_tfs_pid[model_name] = p

                res.status = falcon.HTTP_200
                res.body = json.dumps({
                    'success':
                    'Successfully loaded model {}, '
                    'listening on rest port {} '
                    'and grpc port {}.'.format(
                        model_name,
                        self._model_tfs_rest_port,
                        self._model_tfs_grpc_port,
                    )
                })
            except MultiModelException as multi_model_exception:
                self._cleanup_config_file(tfs_config_file)
                self._cleanup_config_file(batching_config_file)
                if multi_model_exception.code == 409:
                    res.status = falcon.HTTP_409
                    res.body = multi_model_exception.msg
                elif multi_model_exception.code == 408:
                    res.status = falcon.HTTP_408
                    res.body = multi_model_exception.msg
                else:
                    raise MultiModelException(falcon.HTTP_500,
                                              multi_model_exception.msg)
            except FileExistsError as e:
                res.status = falcon.HTTP_409
                res.body = json.dumps({
                    'error':
                    'Model {} is already loaded. {}'.format(
                        model_name, str(e))
                })
            except OSError as os_error:
                self._cleanup_config_file(tfs_config_file)
                self._cleanup_config_file(batching_config_file)
                if os_error.errno == 12:
                    raise MultiModelException(
                        falcon.HTTP_507, 'Memory exhausted: '
                        'not enough memory to start TFS instance')
                else:
                    raise MultiModelException(falcon.HTTP_500,
                                              os_error.strerror)
        else:
            res.status = falcon.HTTP_404
            res.body = json.dumps({
                'error':
                'Could not find valid base path {} for servable {}'.format(
                    base_path, model_name)
            })
    def _handle_load_model_post(self, res, data):  # noqa: C901
        model_name = data["model_name"]
        base_path = data["url"]

        # model is already loaded
        if model_name in self._model_tfs_pid:
            res.status = falcon.HTTP_409
            res.body = json.dumps(
                {"error": "Model {} is already loaded.".format(model_name)})

        # check if there are available ports
        if not self._ports_available():
            res.status = falcon.HTTP_507
            res.body = json.dumps({
                "error":
                "Memory exhausted: no available ports to load the model."
            })
        with lock():
            self._model_tfs_rest_port[model_name] = self._tfs_ports[
                "rest_port"].pop()
            self._model_tfs_grpc_port[model_name] = self._tfs_ports[
                "grpc_port"].pop()

        # validate model files are in the specified base_path
        if self.validate_model_dir(base_path):
            try:
                tfs_config = tfs_utils.create_tfs_config_individual_model(
                    model_name, base_path)
                tfs_config_file = "/sagemaker/tfs-config/{}/model-config.cfg".format(
                    model_name)
                log.info("tensorflow serving model config: \n%s\n", tfs_config)
                os.makedirs(os.path.dirname(tfs_config_file))
                with open(tfs_config_file, "w") as f:
                    f.write(tfs_config)

                batching_config_file = "/sagemaker/batching/{}/batching-config.cfg".format(
                    model_name)
                if self._tfs_enable_batching:
                    tfs_utils.create_batching_config(batching_config_file)

                cmd = tfs_utils.tfs_command(
                    self._model_tfs_grpc_port[model_name],
                    self._model_tfs_rest_port[model_name],
                    tfs_config_file,
                    self._tfs_enable_batching,
                    batching_config_file,
                )
                p = subprocess.Popen(cmd.split())
                self._wait_for_model(model_name)

                log.info("started tensorflow serving (pid: %d)", p.pid)
                # update model name <-> tfs pid map
                self._model_tfs_pid[model_name] = p

                res.status = falcon.HTTP_200
                res.body = json.dumps({
                    "success":
                    "Successfully loaded model {}, "
                    "listening on rest port {} "
                    "and grpc port {}.".format(
                        model_name,
                        self._model_tfs_rest_port,
                        self._model_tfs_grpc_port,
                    )
                })
            except MultiModelException as multi_model_exception:
                self._cleanup_config_file(tfs_config_file)
                self._cleanup_config_file(batching_config_file)
                if multi_model_exception.code == 409:
                    res.status = falcon.HTTP_409
                    res.body = multi_model_exception.msg
                elif multi_model_exception.code == 408:
                    res.status = falcon.HTTP_408
                    res.body = multi_model_exception.msg
                else:
                    raise MultiModelException(falcon.HTTP_500,
                                              multi_model_exception.msg)
            except FileExistsError as e:
                res.status = falcon.HTTP_409
                res.body = json.dumps({
                    "error":
                    "Model {} is already loaded. {}".format(
                        model_name, str(e))
                })
            except OSError as os_error:
                self._cleanup_config_file(tfs_config_file)
                self._cleanup_config_file(batching_config_file)
                if os_error.errno == 12:
                    raise MultiModelException(
                        falcon.HTTP_507, "Memory exhausted: "
                        "not enough memory to start TFS instance")
                else:
                    raise MultiModelException(falcon.HTTP_500,
                                              os_error.strerror)
        else:
            res.status = falcon.HTTP_404
            res.body = json.dumps({
                "error":
                "Could not find valid base path {} for servable {}".format(
                    base_path, model_name)
            })
Example #5
0
    def start(self):
        log.info("starting services")
        log.info("NEURONCORE_GROUP_SIZES {}".format(self._user_ncgs))
        log.info("SAGEMAKER_GUNICORN_WORKERS {}".format(
            self._gunicorn_workers))
        self._state = "starting"
        signal.signal(signal.SIGTERM, self._stop)

        if self._tfs_enable_multi_model_endpoint:
            log.info(
                "multi-model endpoint is enabled, TFS model servers will be started later"
            )
        else:
            self._create_tfs_config()
            #Start TFS workers for each gunicorn worker
            for tf_worker_num in range(int(self._gunicorn_workers)):
                self._start_tfs()
            print("all TFS PIDs {}".format(self._tfs))

        self._create_nginx_config()

        if self._tfs_enable_batching:
            log.info("batching is enabled")
            tfs_utils.create_batching_config(self._tfs_batching_config_path)

        if self._use_gunicorn:
            self._setup_gunicorn()
            self._start_gunicorn()
            # make sure gunicorn is up
            with self._timeout(seconds=30):
                self._wait_for_gunicorn()

        self._start_nginx()
        self._state = "started"

        while True:
            pid, status = os.wait()

            if self._state != "started":
                break

            if pid == self._nginx.pid:
                log.warning(
                    "unexpected nginx exit (status: {}). restarting.".format(
                        status))
                self._start_nginx()

            elif pid in self._tfs:
                log.warning(
                    "unexpected tensorflow serving exit (status: {}). restarting."
                    .format(status))
                self._tfs.remove(pid)
                self._start_tfs()

            elif self._gunicorn and pid == self._gunicorn.pid:
                log.warning(
                    "unexpected gunicorn exit (status: {}). restarting.".
                    format(status))
                self._start_gunicorn()

        self._stop()