def delete_model(self, model_name):
        model_server_config = model_server_config_pb2.ModelServerConfig()
        config_list = model_server_config_pb2.ModelConfigList()

        with lock(DEFAULT_LOCK_FILE):
            try:
                config_file = self._read_model_config(MODEL_CONFIG_FILE)
                config_list_text = config_file.strip('\n').strip('}').strip(
                    'model_config_list: {')
                config_list = text_format.Parse(text=config_list_text,
                                                message=config_list)

                for config in config_list.config:
                    if config.name == model_name:
                        config_list.config.remove(config)
                        model_server_config.model_config_list.CopyFrom(
                            config_list)
                        req = model_management_pb2.ReloadConfigRequest()
                        req.config.CopyFrom(model_server_config)
                        self.stub.HandleReloadConfigRequest(
                            request=req,
                            timeout=GRPC_REQUEST_TIMEOUT_IN_SECONDS,
                            wait_for_ready=True)
                        return self._delete_model_from_config_file(
                            model_server_config)

                # no such model exists
                raise FileNotFoundError
            except grpc.RpcError as e:
                if e.code() is grpc.StatusCode.DEADLINE_EXCEEDED:
                    raise MultiModelException(408, e.details())
                raise MultiModelException(500, e.details())

        return 'Model {} unloaded.'.format(model_name)
예제 #2
0
 def on_delete(self, req, res, model_name):  # pylint: disable=W0613
     if model_name not in self._model_tfs_pid:
         res.status = falcon.HTTP_404
         res.body = json.dumps(
             {'error': 'Model {} is not loaded yet'.format(model_name)})
     else:
         try:
             self._model_tfs_pid[model_name].kill()
             os.remove('/sagemaker/tfs-config/{}/model-config.cfg'.format(
                 model_name))
             os.rmdir('/sagemaker/tfs-config/{}'.format(model_name))
             release_rest_port = self._model_tfs_rest_port[model_name]
             release_grpc_port = self._model_tfs_grpc_port[model_name]
             with lock():
                 bisect.insort(self._tfs_ports['rest_port'],
                               release_rest_port)
                 bisect.insort(self._tfs_ports['grpc_port'],
                               release_grpc_port)
             del self._model_tfs_rest_port[model_name]
             del self._model_tfs_grpc_port[model_name]
             del self._model_tfs_pid[model_name]
             res.status = falcon.HTTP_200
             res.body = json.dumps({
                 'success':
                 'Successfully unloaded model {}.'.format(model_name)
             })
         except OSError as error:
             res.status = falcon.HTTP_500
             res.body = json.dumps({'error': str(error)}).encode('utf-8')
    def add_model(self, model_name, base_path, model_platform='tensorflow'):
        # read model configs from existing model config file
        model_server_config = model_server_config_pb2.ModelServerConfig()
        config_list = model_server_config_pb2.ModelConfigList()

        with lock(DEFAULT_LOCK_FILE):
            try:
                config_file = self._read_model_config(MODEL_CONFIG_FILE)
                model_server_config = text_format.Parse(
                    text=config_file, message=model_server_config)

                new_model_config = config_list.config.add()
                new_model_config.name = model_name
                new_model_config.base_path = base_path
                new_model_config.model_platform = model_platform

                # send HandleReloadConfigRequest to tensorflow model server
                model_server_config.model_config_list.MergeFrom(config_list)
                req = model_management_pb2.ReloadConfigRequest()
                req.config.CopyFrom(model_server_config)

                self.stub.HandleReloadConfigRequest(
                    request=req,
                    timeout=GRPC_REQUEST_TIMEOUT_IN_SECONDS,
                    wait_for_ready=True)
                self._add_model_to_config_file(model_name, base_path,
                                               model_platform)
            except grpc.RpcError as e:
                if e.code() is grpc.StatusCode.INVALID_ARGUMENT:
                    raise MultiModelException(409, e.details())
                elif e.code() is grpc.StatusCode.DEADLINE_EXCEEDED:
                    raise MultiModelException(408, e.details())
                raise MultiModelException(500, e.details())

        return 'Successfully loaded model {}'.format(model_name)
    def _handle_invocation_post(self, req, res, model_name=None):
        if SAGEMAKER_MULTI_MODEL_ENABLED:
            if model_name:
                if model_name not in self._model_tfs_rest_port:
                    res.status = falcon.HTTP_404
                    res.body = json.dumps({
                        'error':
                        "Model {} is not loaded yet.".format(model_name)
                    })
                    return
                else:
                    log.info("model name: {}".format(model_name))
                    rest_port = self._model_tfs_rest_port[model_name]
                    log.info("rest port: {}".format(
                        str(self._model_tfs_rest_port[model_name])))
                    grpc_port = self._model_tfs_grpc_port[model_name]
                    log.info("grpc port: {}".format(
                        str(self._model_tfs_grpc_port[model_name])))
                    data, context = tfs_utils.parse_request(
                        req, rest_port, grpc_port,
                        self._tfs_default_model_name, model_name)
            else:
                res.status = falcon.HTTP_400
                res.body = json.dumps({
                    'error':
                    'Invocation request does not contain model name.'
                })
        else:
            data, context = tfs_utils.parse_request(
                req, self._tfs_rest_port, self._tfs_grpc_port,
                self._tfs_default_model_name)

        try:
            res.status = falcon.HTTP_200
            if SAGEMAKER_MULTI_MODEL_ENABLED:
                with lock():
                    handlers = self.model_handlers[model_name]
                    res.body, res.content_type = handlers(data, context)
            else:
                res.body, res.content_type = self._handlers(data, context)
        except Exception as e:  # pylint: disable=broad-except
            log.exception('exception handling request: {}'.format(e))
            res.status = falcon.HTTP_500
            res.body = json.dumps({'error': str(e)}).encode('utf-8')  # pylint: disable=E1101
예제 #5
0
    def _handle_load_model_post(self, res, data):  # noqa: C901
        model_name = data['model_name']
        base_path = data['url']

        # model is already loaded
        if model_name in self._model_tfs_pid:
            res.status = falcon.HTTP_409
            res.body = json.dumps(
                {'error': 'Model {} is already loaded.'.format(model_name)})

        # check if there are available ports
        if not self._ports_available():
            res.status = falcon.HTTP_507
            res.body = json.dumps({
                'error':
                'Memory exhausted: no available ports to load the model.'
            })
        with lock():
            self._model_tfs_rest_port[model_name] = self._tfs_ports[
                'rest_port'].pop()
            self._model_tfs_grpc_port[model_name] = self._tfs_ports[
                'grpc_port'].pop()

        # validate model files are in the specified base_path
        if self.validate_model_dir(base_path):
            try:
                # install custom dependencies, import handlers
                self._import_custom_modules(model_name)

                tfs_config = tfs_utils.create_tfs_config_individual_model(
                    model_name, base_path)
                tfs_config_file = '/sagemaker/tfs-config/{}/model-config.cfg'.format(
                    model_name)
                log.info('tensorflow serving model config: \n%s\n', tfs_config)
                os.makedirs(os.path.dirname(tfs_config_file))
                with open(tfs_config_file, 'w') as f:
                    f.write(tfs_config)

                batching_config_file = '/sagemaker/batching/{}/batching-config.cfg'.format(
                    model_name)
                if self._tfs_enable_batching:
                    tfs_utils.create_batching_config(batching_config_file)

                cmd = tfs_utils.tfs_command(
                    self._model_tfs_grpc_port[model_name],
                    self._model_tfs_rest_port[model_name],
                    tfs_config_file,
                    self._tfs_enable_batching,
                    batching_config_file,
                )
                p = subprocess.Popen(cmd.split())
                self._wait_for_model(model_name)

                log.info('started tensorflow serving (pid: %d)', p.pid)
                # update model name <-> tfs pid map
                self._model_tfs_pid[model_name] = p

                res.status = falcon.HTTP_200
                res.body = json.dumps({
                    'success':
                    'Successfully loaded model {}, '
                    'listening on rest port {} '
                    'and grpc port {}.'.format(
                        model_name,
                        self._model_tfs_rest_port,
                        self._model_tfs_grpc_port,
                    )
                })
            except MultiModelException as multi_model_exception:
                self._cleanup_config_file(tfs_config_file)
                self._cleanup_config_file(batching_config_file)
                if multi_model_exception.code == 409:
                    res.status = falcon.HTTP_409
                    res.body = multi_model_exception.msg
                elif multi_model_exception.code == 408:
                    res.status = falcon.HTTP_408
                    res.body = multi_model_exception.msg
                else:
                    raise MultiModelException(falcon.HTTP_500,
                                              multi_model_exception.msg)
            except FileExistsError as e:
                res.status = falcon.HTTP_409
                res.body = json.dumps({
                    'error':
                    'Model {} is already loaded. {}'.format(
                        model_name, str(e))
                })
            except OSError as os_error:
                self._cleanup_config_file(tfs_config_file)
                self._cleanup_config_file(batching_config_file)
                if os_error.errno == 12:
                    raise MultiModelException(
                        falcon.HTTP_507, 'Memory exhausted: '
                        'not enough memory to start TFS instance')
                else:
                    raise MultiModelException(falcon.HTTP_500,
                                              os_error.strerror)
        else:
            res.status = falcon.HTTP_404
            res.body = json.dumps({
                'error':
                'Could not find valid base path {} for servable {}'.format(
                    base_path, model_name)
            })
예제 #6
0
 def _ports_available(self):
     with lock():
         rest_ports = self._tfs_ports['rest_port']
         grpc_ports = self._tfs_ports['grpc_port']
     return len(rest_ports) > 0 and len(grpc_ports) > 0
예제 #7
0
    def _handle_load_model_post(self, res, data):  # noqa: C901
        model_name = data["model_name"]
        base_path = data["url"]

        # model is already loaded
        if model_name in self._model_tfs_pid:
            res.status = falcon.HTTP_409
            res.body = json.dumps(
                {"error": "Model {} is already loaded.".format(model_name)})

        # check if there are available ports
        if not self._ports_available():
            res.status = falcon.HTTP_507
            res.body = json.dumps({
                "error":
                "Memory exhausted: no available ports to load the model."
            })
        with lock():
            self._model_tfs_rest_port[model_name] = self._tfs_ports[
                "rest_port"].pop()
            self._model_tfs_grpc_port[model_name] = self._tfs_ports[
                "grpc_port"].pop()

        # validate model files are in the specified base_path
        if self.validate_model_dir(base_path):
            try:
                tfs_config = tfs_utils.create_tfs_config_individual_model(
                    model_name, base_path)
                tfs_config_file = "/sagemaker/tfs-config/{}/model-config.cfg".format(
                    model_name)
                log.info("tensorflow serving model config: \n%s\n", tfs_config)
                os.makedirs(os.path.dirname(tfs_config_file))
                with open(tfs_config_file, "w") as f:
                    f.write(tfs_config)

                batching_config_file = "/sagemaker/batching/{}/batching-config.cfg".format(
                    model_name)
                if self._tfs_enable_batching:
                    tfs_utils.create_batching_config(batching_config_file)

                cmd = tfs_utils.tfs_command(
                    self._model_tfs_grpc_port[model_name],
                    self._model_tfs_rest_port[model_name],
                    tfs_config_file,
                    self._tfs_enable_batching,
                    batching_config_file,
                )
                p = subprocess.Popen(cmd.split())
                self._wait_for_model(model_name)

                log.info("started tensorflow serving (pid: %d)", p.pid)
                # update model name <-> tfs pid map
                self._model_tfs_pid[model_name] = p

                res.status = falcon.HTTP_200
                res.body = json.dumps({
                    "success":
                    "Successfully loaded model {}, "
                    "listening on rest port {} "
                    "and grpc port {}.".format(
                        model_name,
                        self._model_tfs_rest_port,
                        self._model_tfs_grpc_port,
                    )
                })
            except MultiModelException as multi_model_exception:
                self._cleanup_config_file(tfs_config_file)
                self._cleanup_config_file(batching_config_file)
                if multi_model_exception.code == 409:
                    res.status = falcon.HTTP_409
                    res.body = multi_model_exception.msg
                elif multi_model_exception.code == 408:
                    res.status = falcon.HTTP_408
                    res.body = multi_model_exception.msg
                else:
                    raise MultiModelException(falcon.HTTP_500,
                                              multi_model_exception.msg)
            except FileExistsError as e:
                res.status = falcon.HTTP_409
                res.body = json.dumps({
                    "error":
                    "Model {} is already loaded. {}".format(
                        model_name, str(e))
                })
            except OSError as os_error:
                self._cleanup_config_file(tfs_config_file)
                self._cleanup_config_file(batching_config_file)
                if os_error.errno == 12:
                    raise MultiModelException(
                        falcon.HTTP_507, "Memory exhausted: "
                        "not enough memory to start TFS instance")
                else:
                    raise MultiModelException(falcon.HTTP_500,
                                              os_error.strerror)
        else:
            res.status = falcon.HTTP_404
            res.body = json.dumps({
                "error":
                "Could not find valid base path {} for servable {}".format(
                    base_path, model_name)
            })