Ejemplo n.º 1
0
    def __init__(
        self,
        outbound_host,
        outbound_port,
        bundle_path,
        port=None,
        workers=1,
        timeout=None,
        prometheus_lock=None,
        outbound_workers=1,
    ):
        self.bento_service_bundle_path = bundle_path

        self.port = port or config("apiserver").getint("default_port")
        timeout = timeout or config("apiserver").getint("default_timeout")
        max_request_size = config("apiserver").getint(
            "default_max_request_size")
        self.options = {
            "bind": "%s:%s" % ("0.0.0.0", self.port),
            "timeout": timeout,
            "limit_request_line": max_request_size,
            "loglevel": config("logging").get("LEVEL").upper(),
            "worker_class": "aiohttp.worker.GunicornWebWorker",
        }
        if workers:
            self.options['workers'] = workers
        self.prometheus_lock = prometheus_lock

        self.outbound_port = outbound_port
        self.outbound_host = outbound_host
        self.outbound_workers = outbound_workers

        super(GunicornMarshalServer, self).__init__()
Ejemplo n.º 2
0
    def setup_routes(self):
        """
        Setup routes for bento model server, including:

        /               Index Page
        /healthz        Health check ping
        /feedback       Submitting feedback
        /metrics        Prometheus metrics endpoint

        And user defined BentoServiceAPI list into flask routes, e.g.:
        /classify
        /predict
        """

        self.app.add_url_rule("/", "index", self.index_view_func)
        self.app.add_url_rule("/docs.json", "docs",
                              partial(self.docs_view_func, self.bento_service))
        self.app.add_url_rule("/healthz", "healthz", self.healthz_view_func)

        if config("apiserver").getboolean("enable_metrics"):
            self.app.add_url_rule("/metrics", "metrics",
                                  self.metrics_view_func)

        if config("apiserver").getboolean("enable_feedback"):
            self.app.add_url_rule(
                "/feedback",
                "feedback",
                partial(self.feedback_view_func, self.bento_service),
                methods=["POST", "GET"],
            )

        self.setup_bento_service_api_routes()
Ejemplo n.º 3
0
def _create_sagemaker_model(
    sagemaker_client, bento_name, bento_version, ecr_image_path, api_name
):
    execution_role_arn = get_arn_role_from_current_aws_user()
    model_name = generate_aws_compatible_string(bento_name + '-' + bento_version)

    sagemaker_model_info = {
        "ModelName": model_name,
        "PrimaryContainer": {
            "ContainerHostname": model_name,
            "Image": ecr_image_path,
            "Environment": {
                "API_NAME": api_name,
                "BENTO_SERVER_TIMEOUT": config().get('apiserver', 'default_timeout'),
                "BENTO_SERVER_WORKERS": config().get(
                    'apiserver', 'default_gunicorn_workers_count'
                ),
            },
        },
        "ExecutionRoleArn": execution_role_arn,
    }

    logger.debug("Creating sagemaker model %s", model_name)
    create_model_response = sagemaker_client.create_model(**sagemaker_model_info)
    logger.debug("AWS create model response: %s", create_model_response)
    return model_name
Ejemplo n.º 4
0
    def __init__(
        self,
        target_host,
        target_port,
        bundle_path,
        port=None,
        num_of_workers=1,
        timeout=None,
    ):
        self.bento_service_bundle_path = bundle_path

        self.target_port = target_port
        self.target_host = target_host
        self.port = port or config("apiserver").getint("default_port")
        timeout = timeout or config("apiserver").getint("default_timeout")
        self.options = {
            "bind": "%s:%s" % ("0.0.0.0", self.port),
            "timeout": timeout,
            "loglevel": config("logging").get("LOGGING_LEVEL").upper(),
            "worker_class": "aiohttp.worker.GunicornWebWorker",
        }
        if num_of_workers:
            self.options['workers'] = num_of_workers

        super(GunicornMarshalServer, self).__init__()
Ejemplo n.º 5
0
    def log_image(req, request_id):
        if not config('logging').getboolean('log_request_image_files'):
            return []

        img_prefix = 'image/'
        log_folder = config('logging').get('base_log_dir')

        all_paths = []

        if req.content_type and req.content_type.startswith(img_prefix):
            filename = '{timestamp}-{request_id}.{ext}'.format(
                timestamp=int(time.time()),
                request_id=request_id,
                ext=req.content_type[len(img_prefix):],
            )
            path = os.path.join(log_folder, filename)
            all_paths.append(path)
            with open(path, 'wb') as f:
                f.write(req.get_data())

        for name in req.files:
            file = req.files[name]
            if file and file.filename:
                orig_filename = secure_filename(file.filename)
                filename = '{timestamp}-{request_id}-{orig_filename}'.format(
                    timestamp=int(time.time()),
                    request_id=request_id,
                    orig_filename=orig_filename,
                )
                path = os.path.join(log_folder, filename)
                all_paths.append(path)
                file.save(path)
                file.stream.seek(0)

        return all_paths
Ejemplo n.º 6
0
    def get_docs(cls, bento_service):
        """
        The docs for all endpoints in Open API format.
        """
        docs = OrderedDict(
            openapi="3.0.0",
            info=OrderedDict(
                version=bento_service.version,
                title=bento_service.name,
                description="To get a client SDK, copy all content from <a "
                "href=\"/docs.json\">docs</a> and paste into "
                "<a href=\"https://editor.swagger.io\">editor.swagger.io</a> then click "
                "the tab <strong>Generate Client</strong> and choose the language.",
            ),
            tags=[{
                "name": "infra"
            }, {
                "name": "app"
            }],
        )

        paths = OrderedDict()
        default_response = {"200": {"description": "success"}}

        paths["/healthz"] = OrderedDict(get=OrderedDict(
            tags=["infra"],
            description=
            "Health check endpoint. Expecting an empty response with status"
            " code 200 when the service is in health state",
            responses=default_response,
        ))
        if config("apiserver").getboolean("enable_metrics"):
            paths["/metrics"] = OrderedDict(get=OrderedDict(
                tags=["infra"],
                description="Prometheus metrics endpoint",
                responses=default_response,
            ))
        if config("apiserver").getboolean("enable_feedback"):
            paths["/feedback"] = OrderedDict(get=OrderedDict(
                tags=["infra"],
                description=
                "Predictions feedback endpoint. Expecting feedback request "
                "in JSON format and must contain a `request_id` field, which can be "
                "obtained from any BentoService API response header",
                responses=default_response,
            ))
            paths["/feedback"]["post"] = paths["/feedback"]["get"]

        for api in bento_service.get_service_apis():
            path = "/{}".format(api.name)
            paths[path] = OrderedDict(post=OrderedDict(
                tags=["app"],
                description=api.doc,
                requestBody=OrderedDict(required=True,
                                        content=api.request_schema),
                responses=default_response,
            ))

        docs["paths"] = paths
        return docs
Ejemplo n.º 7
0
    def setup_routes(self):
        """
        Setup routes for bento model server, including:

        /               Index Page
        /docs           Swagger UI
        /healthz        Health check ping
        /feedback       Submitting feedback
        /metrics        Prometheus metrics endpoint

        And user defined InferenceAPI list into flask routes, e.g.:
        /classify
        /predict
        """
        if self.static_path:
            # serve static files for any given path
            # this will also serve index.html from directory /any_path/
            # for path as /any_path/
            self.app.add_url_rule(
                "/<path:file_path>",
                "static_proxy",
                partial(self.static_serve, self.static_path),
            )
            # serve index.html from the directory /any_path
            # for path as /any_path/index
            self.app.add_url_rule(
                "/<path:file_path>/index",
                "static_proxy2",
                partial(self.static_serve, self.static_path),
            )
            # serve index.html from root directory for path as /
            self.app.add_url_rule(
                "/", "index", partial(self.index_view_func, self.static_path)
            )
        else:
            self.app.add_url_rule("/", "index", self.swagger_ui_func)

        self.app.add_url_rule("/docs", "swagger", self.swagger_ui_func)
        self.app.add_url_rule(
            "/swagger_static/<path:filename>",
            "swagger_static",
            partial(self.swagger_static, self.swagger_path),
        )
        self.app.add_url_rule(
            "/docs.json", "docs", partial(self.docs_view_func, self.bento_service)
        )
        self.app.add_url_rule("/healthz", "healthz", self.healthz_view_func)

        if config("apiserver").getboolean("enable_metrics"):
            self.app.add_url_rule("/metrics", "metrics", self.metrics_view_func)

        if config("apiserver").getboolean("enable_feedback"):
            self.app.add_url_rule(
                "/feedback",
                "feedback",
                partial(self.feedback_view_func, self.bento_service),
                methods=["POST"],
            )

        self.setup_bento_service_api_routes()
Ejemplo n.º 8
0
    def __init__(
        self,
        bundle_path,
        port=None,
        workers=None,
        timeout=None,
        prometheus_lock=None,
    ):
        self.bento_service_bundle_path = bundle_path

        self.port = port or config("apiserver").getint("default_port")
        timeout = timeout or config("apiserver").getint("default_timeout")
        max_request_size = config("apiserver").getint(
            "default_max_request_size")
        self.options = {
            "bind": "%s:%s" % ("0.0.0.0", self.port),
            "timeout": timeout,
            "limit_request_line": max_request_size,
            "loglevel": config("logging").get("LEVEL").upper(),
        }
        if workers:
            self.options['workers'] = workers
        self.prometheus_lock = prometheus_lock

        super(GunicornBentoServer, self).__init__()
Ejemplo n.º 9
0
    def __init__(
        self,
        bento_service_name,
        pip_dependencies=None,
        auto_pip_dependencies=False,
        requirements_txt_file=None,
        conda_channels=None,
        conda_dependencies=None,
        setup_sh=None,
        docker_base_image=None,
    ):
        self._python_version = PYTHON_VERSION

        self._conda_env = CondaEnv("bentoml-" + bento_service_name,
                                   self._python_version)

        bentoml_deploy_version = get_bentoml_deploy_version()
        self._pip_dependencies = ["bentoml=={}".format(bentoml_deploy_version)]
        if pip_dependencies:
            if auto_pip_dependencies:
                logger.warning(
                    "auto_pip_dependencies enabled, it may override package versions "
                    "specified in `pip_dependencies=%s`",
                    pip_dependencies,
                )
            else:
                self.add_pip_dependencies(pip_dependencies)

        if requirements_txt_file:
            if auto_pip_dependencies:
                logger.warning(
                    "auto_pip_dependencies enabled, it may override package versions "
                    "specified in `requirements_txt_file=%s`",
                    requirements_txt_file,
                )
            else:
                self._set_requirements_txt(requirements_txt_file)

        self._auto_pip_dependencies = auto_pip_dependencies

        self._set_setup_sh(setup_sh)

        if conda_channels:
            if not isinstance(conda_channels, list):
                conda_channels = [conda_channels]
            self.add_conda_channels(conda_channels)
        if conda_dependencies:
            if not isinstance(conda_dependencies, list):
                conda_dependencies = [conda_dependencies]
            self.add_conda_dependencies(conda_dependencies)

        if docker_base_image:
            self._docker_base_image = docker_base_image
        else:
            self._docker_base_image = config('core').get(
                'default_docker_base_image')
        self._docker_gpu_base_image = config('core').get(
            'default_docker_gpu_base_image')
Ejemplo n.º 10
0
def start_yatai_service_grpc_server(db_url, repo_base_url, grpc_port, ui_port,
                                    with_ui):
    track_server('yatai-service-grpc-server')
    yatai_service = get_yatai_service(db_url=db_url,
                                      repo_base_url=repo_base_url)
    server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
    add_YataiServicer_to_server(yatai_service, server)
    debug_mode = config().getboolean('core', 'debug')
    if debug_mode:
        try:
            logger.debug('Enabling gRPC server reflection for debugging')
            from grpc_reflection.v1alpha import reflection
            from bentoml.proto import yatai_service_pb2

            SERVICE_NAMES = (
                yatai_service_pb2.DESCRIPTOR.services_by_name['Yatai'].
                full_name,
                reflection.SERVICE_NAME,
            )
            reflection.enable_server_reflection(SERVICE_NAMES, server)
        except ImportError:
            logger.debug(
                'Failed enabling gRPC server reflection, missing required package: '
                '"pip install grpcio-reflection"')
    server.add_insecure_port(f'[::]:{grpc_port}')
    server.start()
    if with_ui:
        web_ui_log_path = os.path.join(
            config("logging").get("BASE_LOG_DIR"),
            config('logging').get("yatai_web_server_log_filename"),
        )

        ensure_node_available_or_raise()
        yatai_grpc_server_addess = f'localhost:{grpc_port}'
        async_start_yatai_service_web_ui(yatai_grpc_server_addess, ui_port,
                                         web_ui_log_path, debug_mode)

    # We don't import _echo function from click_utils because of circular dep
    click.echo(
        f'* Starting BentoML YataiService gRPC Server\n'
        f'* Debug mode: { "on" if debug_mode else "off"}\n'
        f'* Web UI: {f"running on http://127.0.0.1:{ui_port}" if with_ui else "off"}\n'
        f'* Running on 127.0.0.1:{grpc_port} (Press CTRL+C to quit)\n'
        f'* Usage: `bentoml config set yatai_service.url=127.0.0.1:{grpc_port}`\n'
        f'* Help and instructions: '
        f'https://docs.bentoml.org/en/latest/concepts/yatai_service.html\n'
        f'{f"* Web server log can be found here: {web_ui_log_path}" if with_ui else ""}'
    )

    try:
        while True:
            time.sleep(_ONE_DAY_IN_SECONDS)
    except KeyboardInterrupt:
        logger.info("Terminating YataiService gRPC server..")
        server.stop(grace=None)
Ejemplo n.º 11
0
        def wrapper(quiet, verbose, *args, **kwargs):
            if verbose:
                from bentoml import config

                config().set('core', 'debug', 'true')
                configure_logging(logging.DEBUG)
            elif quiet:
                configure_logging(logging.ERROR)
            else:
                configure_logging()  # use default setting in local bentoml.cfg

            return func(*args, **kwargs)
Ejemplo n.º 12
0
def get_gunicorn_num_of_workers():
    if config("apiserver").getint("default_gunicorn_workers_count") > 0:
        num_of_workers = config("apiserver").getint(
            "default_gunicorn_workers_count")
        logger.info("get_gunicorn_num_of_workers: %d, loaded from config",
                    num_of_workers)
    else:
        num_of_workers = (multiprocessing.cpu_count() // 2) + 1
        logger.info("get_gunicorn_num_of_workers: %d, calculated by cpu count",
                    num_of_workers)

    return num_of_workers
Ejemplo n.º 13
0
def get_yatai_service(
    channel_address=None,
    access_token=None,
    db_url=None,
    repo_base_url=None,
    s3_endpoint_url=None,
    default_namespace=None,
):
    channel_address = channel_address or config('yatai_service').get('url')
    access_token = access_token or config('yatai_service').get('access_token')
    channel_address = channel_address.strip()
    if channel_address:
        from bentoml.yatai.proto.yatai_service_pb2_grpc import YataiStub

        if any([db_url, repo_base_url, s3_endpoint_url, default_namespace]):
            logger.warning(
                "Using remote YataiService at `%s`, local YataiService configs "
                "including db_url, repo_base_url, s3_endpoint_url and default_namespace"
                "will all be ignored.",
                channel_address,
            )

        logger.debug("Connecting YataiService gRPC server at: %s",
                     channel_address)
        scheme, addr = parse_grpc_url(channel_address)
        header_adder_interceptor = header_client_interceptor.header_adder_interceptor(
            'access_token', access_token)
        if scheme in ('grpcs', 'https'):
            client_cacert_path = (
                config().get('yatai_service', 'client_certificate_file')
                or certifi.where()  # default: Mozilla ca cert
            )
            with open(client_cacert_path, 'rb') as ca_cert_file:
                ca_cert = ca_cert_file.read()
            credentials = grpc.ssl_channel_credentials(ca_cert, None, None)
            channel = grpc.intercept_channel(
                grpc.secure_channel(addr, credentials),
                header_adder_interceptor)
        else:
            channel = grpc.intercept_channel(grpc.insecure_channel(addr),
                                             header_adder_interceptor)
        return YataiStub(channel)
    else:
        from bentoml.yatai.yatai_service_impl import YataiService

        logger.debug("Creating local YataiService instance")
        return YataiService(
            db_url=db_url,
            repo_base_url=repo_base_url,
            s3_endpoint_url=s3_endpoint_url,
            default_namespace=default_namespace,
        )
    def __init__(self, bundle_path, port=None, num_of_workers=None, timeout=None):
        self.bento_service_bundle_path = bundle_path

        self.port = port or config("apiserver").getint("default_port")
        timeout = timeout or config("apiserver").getint("default_timeout")
        self.options = {
            "bind": "%s:%s" % ("0.0.0.0", self.port),
            "timeout": timeout,
            "loglevel": config("logging").get("LOGGING_LEVEL").upper(),
        }
        if num_of_workers:
            self.options['workers'] = num_of_workers

        super(GunicornBentoServer, self).__init__()
Ejemplo n.º 15
0
def get_yatai_service(channel_address=None,
                      db_url=None,
                      repo_base_url=None,
                      default_namespace=None):
    channel_address = channel_address or config().get('yatai_service', 'url')
    if channel_address:
        import grpc
        from bentoml.proto.yatai_service_pb2_grpc import YataiStub

        if db_url is not None:
            logger.warning(
                "Config 'db_url' is ignored in favor of remote YataiService at `%s`",
                channel_address,
            )
        if repo_base_url is not None:
            logger.warning(
                "Config 'repo_base_url:%s' is ignored in favor of remote YataiService "
                "at `%s`",
                repo_base_url,
                channel_address,
            )
        if default_namespace is not None:
            logger.warning(
                "Config 'default_namespace:%s' is ignored in favor of remote "
                "YataiService at `%s`",
                default_namespace,
                channel_address,
            )
        logger.debug("Using BentoML with remote Yatai server: %s",
                     channel_address)

        channel = grpc.insecure_channel(channel_address)
        return YataiStub(channel)
    else:
        from bentoml.yatai.yatai_service_impl import YataiService

        logger.debug("Using BentoML with local Yatai server")

        default_namespace = default_namespace or config().get(
            'deployment', 'default_namespace')
        repo_base_url = repo_base_url or config().get(
            'default_repository_base_url')
        db_url = db_url or config().get('db', 'url')

        return YataiService(
            db_url=db_url,
            repo_base_url=repo_base_url,
            default_namespace=default_namespace,
        )
Ejemplo n.º 16
0
def setup_prometheus_multiproc_dir(lock: multiprocessing.Lock = None):
    """
    Set up prometheus_multiproc_dir for prometheus to work in multiprocess mode,
    which is required when working with Gunicorn server

    Warning: for this to work, prometheus_client library must be imported after
    this function is called. It relies on the os.environ['prometheus_multiproc_dir']
    to properly setup for multiprocess mode
    """
    if lock is not None:
        lock.acquire()

    try:
        prometheus_multiproc_dir = config('instrument').get(
            'prometheus_multiproc_dir')
        logger.debug("Setting up prometheus_multiproc_dir: %s",
                     prometheus_multiproc_dir)
        if not os.path.exists(prometheus_multiproc_dir):
            os.mkdir(prometheus_multiproc_dir)
        if os.listdir(prometheus_multiproc_dir):
            shutil.rmtree(prometheus_multiproc_dir)
        os.environ['prometheus_multiproc_dir'] = prometheus_multiproc_dir
    finally:
        if lock is not None:
            lock.release()
Ejemplo n.º 17
0
class BentoAPIServer:
    """
    BentoAPIServer creates a REST API server based on APIs defined with a BentoService
    via BentoService#get_service_apis call. Each BentoServiceAPI will become one
    endpoint exposed on the REST server, and the RequestHandler defined on each
    BentoServiceAPI object will be used to handle Request object before feeding the
    request data into a Service API function
    """

    _DEFAULT_PORT = config("apiserver").getint("default_port")

    def __init__(self, bento_service, port=_DEFAULT_PORT, app_name=None):
        app_name = bento_service.name if app_name is None else app_name

        self.port = port
        self.bento_service = bento_service

        self.app = Flask(app_name)
        setup_routes(self.app, self.bento_service)

    def start(self):
        """
        Start an REST server at the specific port on the instance or parameter.
        """
        track_server('flask')

        self.app.run(port=self.port)
Ejemplo n.º 18
0
    def __init__(self, app, bento_service):
        self.app = app
        self.bento_service = bento_service

        from prometheus_client import Histogram, Counter, Gauge, CollectorRegistry

        service_name = self.bento_service.name
        namespace = config('instrument').get('default_namespace')
        # Use local registry instead of the global one to avoid duplicated metrics
        # register
        self.collector_registry = CollectorRegistry()

        self.metrics_request_duration = Histogram(
            name=service_name + '_request_duration_seconds',
            documentation=service_name + " API HTTP request duration in seconds",
            namespace=namespace,
            labelnames=['endpoint', 'service_version', 'http_response_code'],
            registry=self.collector_registry,
        )
        self.metrics_request_total = Counter(
            name=service_name + "_request_total",
            documentation='Totoal number of HTTP requests',
            namespace=namespace,
            labelnames=['endpoint', 'service_version', 'http_response_code'],
            registry=self.collector_registry,
        )
        self.metrics_request_in_progress = Gauge(
            name=service_name + "_request_in_progress",
            documentation='Totoal number of HTTP requests in progress now',
            namespace=namespace,
            labelnames=['endpoint', 'service_version'],
            registry=self.collector_registry,
        )
Ejemplo n.º 19
0
    def __init__(self, app, bento_service):
        self.app = app
        self.bento_service = bento_service

        from prometheus_client import Histogram, Counter, Gauge

        service_name = self.bento_service.name
        namespace = config('instrument').get('default_namespace')

        self.metrics_request_duration = Histogram(
            name=service_name + '_request_duration_seconds',
            documentation=service_name +
            " API HTTP request duration in seconds",
            namespace=namespace,
            labelnames=['endpoint', 'service_version', 'http_response_code'],
        )
        self.metrics_request_total = Counter(
            name=service_name + "_request_total",
            documentation='Totoal number of HTTP requests',
            namespace=namespace,
            labelnames=['endpoint', 'service_version', 'http_response_code'],
        )
        self.metrics_request_in_progress = Gauge(
            name=service_name + "_request_in_progress",
            documentation='Totoal number of HTTP requests in progress now',
            namespace=namespace,
            labelnames=['endpoint', 'service_version'],
        )
Ejemplo n.º 20
0
def setup_prometheus_multiproc_dir(lock: multiprocessing.Lock = None):
    """
    Set up prometheus_multiproc_dir for prometheus to work in multiprocess mode,
    which is required when working with Gunicorn server

    Warning: for this to work, prometheus_client library must be imported after
    this function is called. It relies on the os.environ['prometheus_multiproc_dir']
    to properly setup for multiprocess mode
    """
    if lock is not None:
        lock.acquire()

    try:
        prometheus_multiproc_dir = config('instrument').get(
            'prometheus_multiproc_dir')
        logger.debug("Setting up prometheus_multiproc_dir: %s",
                     prometheus_multiproc_dir)
        # Wipe prometheus metrics directory between runs
        # https://github.com/prometheus/client_python#multiprocess-mode-gunicorn
        # Ignore errors so it does not fail when directory does not exist
        shutil.rmtree(prometheus_multiproc_dir, ignore_errors=True)
        os.makedirs(prometheus_multiproc_dir, exist_ok=True)

        os.environ['prometheus_multiproc_dir'] = prometheus_multiproc_dir
    finally:
        if lock is not None:
            lock.release()
Ejemplo n.º 21
0
    def create_sagemaker_deployment(
        self,
        name,
        bento_name,
        bento_version,
        api_name,
        instance_type,
        instance_count,
        timeout,
        num_of_gunicorn_workers_per_instance=None,
        region=None,
        namespace=None,
        labels=None,
        annotations=None,
        wait=None,
    ):
        """Create SageMaker deployment

        Args:
            name:
            bento_name:
            bento_version:
            api_name:
            instance_type:
            instance_count:
            timeout:
            num_of_gunicorn_workers_per_instance:
            region:
            namespace:
            labels:
            annotations:
            wait:

        Returns:
            ApplyDeploymentResponse

        Raises:
            BentoMLException
        """
        namespace = (namespace if namespace else config().get(
            'deployment', 'default_namespace'))

        deployment_pb = Deployment(name=name,
                                   namespace=namespace,
                                   labels=labels,
                                   annotations=annotations)
        deployment_pb.spec.bento_name = bento_name
        deployment_pb.spec.bento_version = bento_version
        deployment_pb.spec.operator = DeploymentSpec.AWS_SAGEMAKER
        deployment_pb.spec.sagemaker_operator_config.api_name = api_name
        deployment_pb.spec.sagemaker_operator_config.instance_count = instance_count
        deployment_pb.spec.sagemaker_operator_config.instance_type = instance_type
        deployment_pb.spec.sagemaker_operator_config.timeout = timeout
        if region:
            deployment_pb.spec.sagemaker_operator_config.region = region
        if num_of_gunicorn_workers_per_instance:
            deployment_pb.spec.sagemaker_operator_config.num_of_gunicorn_workers_per_instance = (  # noqa E501
                num_of_gunicorn_workers_per_instance)

        return self.create(deployment_pb, wait)
Ejemplo n.º 22
0
    def bentoml_cli(ctx, verbose, quiet):
        """
        BentoML CLI tool
        """
        ctx.verbose = verbose
        ctx.quiet = quiet

        if verbose:
            from bentoml import config

            config().set('core', 'debug', 'true')
            configure_logging(logging.DEBUG)
        elif quiet:
            configure_logging(logging.ERROR)
        else:
            configure_logging()  # use default setting in local bentoml.cfg
Ejemplo n.º 23
0
    def parse(cls, raw_headers: Sequence[Tuple[str, str]]):
        from bentoml import config

        BATCH_REQUEST_HEADER = config("apiserver").get("batch_request_header")
        if isinstance(raw_headers, dict):
            raw_headers = raw_headers.items()

        headers_dict = CIMultiDict(
            (k.lower(), v) for k, v in raw_headers or tuple())
        content_type = parse_options_header(
            headers_dict.get('content-type'))[0]
        content_encoding = parse_options_header(
            headers_dict.get('content-encoding'))[0]
        hv = parse_options_header(
            headers_dict.get(BATCH_REQUEST_HEADER))[0].lower()
        if hv:
            is_batch_input = hv == "true"
        else:
            is_batch_input = None
        header = cls(
            headers_dict,
            content_type=content_type,
            content_encoding=content_encoding,
            is_batch_input=is_batch_input,
        )
        return header
Ejemplo n.º 24
0
 def describe(self, namespace, name):
     namespace = (
         namespace if namespace else config().get('deployment', 'default_namespace')
     )
     return self.yatai_service.DescribeDeployment(
         DescribeDeploymentRequest(deployment_name=name, namespace=namespace)
     )
Ejemplo n.º 25
0
def _create_sagemaker_model(
    sagemaker_client, sagemaker_model_name, ecr_image_path, spec
):
    execution_role_arn = get_arn_role_from_current_aws_user()

    sagemaker_model_info = {
        "ModelName": sagemaker_model_name,
        "PrimaryContainer": {
            "ContainerHostname": sagemaker_model_name,
            "Image": ecr_image_path,
            "Environment": {
                "API_NAME": spec.api_name,
                "BENTO_SERVER_TIMEOUT": config().get('apiserver', 'default_timeout'),
            },
        },
        "ExecutionRoleArn": execution_role_arn,
    }

    # Will set envvar, if user defined gunicorn workers per instance.  EnvVar needs
    # to be string instead of the int.
    if spec.num_of_gunicorn_workers_per_instance:
        sagemaker_model_info['PrimaryContainer']['Environment'][
            'GUNICORN_WORKER_COUNT'
        ] = str(spec.num_of_gunicorn_workers_per_instance)

    try:
        create_model_response = sagemaker_client.create_model(**sagemaker_model_info)
    except ClientError as e:
        raise _aws_client_error_to_bentoml_exception(
            e, "Failed to create sagemaker model"
        )
    logger.debug("AWS create model response: %s", create_model_response)
Ejemplo n.º 26
0
        def wrapper():
            with request_metric_time.time():
                request_id = str(uuid.uuid4())
                # Assume there is not a strong use case for idempotency check here.
                # Will revise later if we find a case.

                image_paths = []
                if not config('logging').getboolean('disable_logging_image'):
                    image_paths = log_image(request, request_id)

                response = api.handle_request(request)

                request_log = {
                    "request_id": request_id,
                    "service_name": service_name,
                    "service_version": service_version,
                    "api": api.name,
                    "request": cls._request_to_json(request),
                    "response_code": response.status_code,
                }

                if len(image_paths) > 0:
                    request_log['image_paths'] = image_paths

                if 200 <= response.status_code < 300:
                    request_log['response'] = response.response

                prediction_logger.info(request_log)

                response.headers["request_id"] = request_id

                counter.labels(response.status_code).inc()

                return response
Ejemplo n.º 27
0
    def __init__(self, base_url=None):
        if base_url is None:
            base_url = config().get('default_repository_base_url')

        if is_s3_url(base_url):
            self._repo = _S3BentoRepository(base_url)
        else:
            self._repo = _LocalBentoRepository(base_url)
Ejemplo n.º 28
0
def get_default_accept_image_formats():
    """With default bentoML config, this returns:
        ['.jpg', '.png', '.jpeg', '.tiff', '.webp', '.bmp']
    """
    return [
        extension.strip() for extension in config("apiserver").get(
            "default_image_input_accept_file_extensions").split(",")
    ]
Ejemplo n.º 29
0
def get_default_accept_image_formats():
    """With default bentoML config, this returns:
        ['.jpg', '.png', '.jpeg', '.tiff', '.webp', '.bmp']
    """
    return [
        extension.strip() for extension in config('apiserver').get(
            'default_image_handler_accept_file_extensions').split(',')
    ]
Ejemplo n.º 30
0
    def __exit__(self, exc_type, exc_val, exc_tb):
        if config('core').getboolean('debug'):
            logger.debug('BentoML in debug mode, keeping temp directory "%s"',
                         self.path)
            return

        if self._cleanup:
            self.cleanup()