예제 #1
0
def start(args):
    api = None
    try:
        ctx = Context(s3_path=args.context,
                      cache_dir=args.cache_dir,
                      workload_id=args.workload_id)
        api = ctx.apis_id_map[args.api]
        local_cache["api"] = api
        local_cache["ctx"] = ctx

        if api.get("onnx") is None:
            raise CortexException(api["name"], "onnx key not configured")

        _, prefix = ctx.storage.deconstruct_s3_path(api["onnx"]["model"])
        model_path = os.path.join(args.model_dir, os.path.basename(prefix))
        if api["onnx"].get("request_handler") is not None:
            local_cache["request_handler"] = ctx.get_request_handler_impl(
                api["name"], args.project_dir)
        request_handler = local_cache.get("request_handler")

        if request_handler is not None and util.has_function(
                request_handler, "pre_inference"):
            cx_logger().info(
                "using pre_inference request handler provided in {}".format(
                    api["onnx"]["request_handler"]))
        else:
            cx_logger().info("pre_inference request handler not found")

        if request_handler is not None and util.has_function(
                request_handler, "post_inference"):
            cx_logger().info(
                "using post_inference request handler provided in {}".format(
                    api["onnx"]["request_handler"]))
        else:
            cx_logger().info("post_inference request handler not found")

        sess = rt.InferenceSession(model_path)
        local_cache["sess"] = sess
        local_cache["input_metadata"] = sess.get_inputs()
        cx_logger().info("input_metadata: {}".format(
            truncate(extract_signature(local_cache["input_metadata"]))))
        local_cache["output_metadata"] = sess.get_outputs()
        cx_logger().info("output_metadata: {}".format(
            truncate(extract_signature(local_cache["output_metadata"]))))

    except Exception as e:
        cx_logger().exception("failed to start api")
        sys.exit(1)

    if api.get("tracker") is not None and api["tracker"].get(
            "model_type") == "classification":
        try:
            local_cache["class_set"] = api_utils.get_classes(ctx, api["name"])
        except Exception as e:
            cx_logger().warn(
                "an error occurred while attempting to load classes",
                exc_info=True)

    cx_logger().info("API is ready")
    serve(app, listen="*:{}".format(args.port))
예제 #2
0
def start(args):
    api = None
    try:
        ctx = Context(s3_path=args.context,
                      cache_dir=args.cache_dir,
                      workload_id=args.workload_id)
        api = ctx.apis_id_map[args.api]
        local_cache["api"] = api
        local_cache["ctx"] = ctx

        if api.get("request_handler") is not None:
            local_cache["request_handler"] = ctx.get_request_handler_impl(
                api["name"], args.project_dir)
    except Exception as e:
        logger.exception("failed to start api")
        sys.exit(1)

    try:
        validate_model_dir(args.model_dir)
    except Exception as e:
        logger.exception("failed to validate model")
        sys.exit(1)

    if api.get("tracker") is not None and api["tracker"].get(
            "model_type") == "classification":
        try:
            local_cache["class_set"] = api_utils.get_classes(ctx, api["name"])
        except Exception as e:
            logger.warn("an error occurred while attempting to load classes",
                        exc_info=True)

    channel = grpc.insecure_channel("localhost:" + str(args.tf_serve_port))
    local_cache["stub"] = prediction_service_pb2_grpc.PredictionServiceStub(
        channel)

    # wait a bit for tf serving to start before querying metadata
    limit = 60
    for i in range(limit):
        try:
            local_cache["metadata"] = run_get_model_metadata()
            break
        except Exception as e:
            if i > 6:
                logger.warn(
                    "unable to read model metadata - model is still loading. Retrying..."
                )
            if i == limit - 1:
                logger.exception("retry limit exceeded")
                sys.exit(1)

        time.sleep(5)
    logger.info("model_signature: {}".format(
        extract_signature(
            local_cache["metadata"]["signatureDef"],
            local_cache["api"]["tf_serving"]["signature_key"],
        )))
    serve(app, listen="*:{}".format(args.port))
예제 #3
0
def start(args):
    api = None
    try:
        ctx = Context(s3_path=args.context, cache_dir=args.cache_dir, workload_id=args.workload_id)
        api = ctx.apis_id_map[args.api]
        local_cache["api"] = api
        local_cache["ctx"] = ctx

        if api["predictor"]["type"] != "onnx":
            raise CortexException(api["name"], "predictor type is not onnx")

        cx_logger().info("loading the predictor from {}".format(api["predictor"]["path"]))

        _, prefix = ctx.storage.deconstruct_s3_path(api["predictor"]["model"])
        model_path = os.path.join(args.model_dir, os.path.basename(prefix))
        local_cache["client"] = ONNXClient(model_path)

        predictor_class = ctx.get_predictor_class(api["name"], args.project_dir)

        try:
            local_cache["predictor"] = predictor_class(
                local_cache["client"], api["predictor"]["config"]
            )
        except Exception as e:
            raise UserRuntimeException(api["predictor"]["path"], "__init__", str(e)) from e
        finally:
            refresh_logger()
    except Exception as e:
        cx_logger().exception("failed to start api")
        sys.exit(1)

    if api.get("tracker") is not None and api["tracker"].get("model_type") == "classification":
        try:
            local_cache["class_set"] = api_utils.get_classes(ctx, api["name"])
        except Exception as e:
            cx_logger().warn("an error occurred while attempting to load classes", exc_info=True)

    cx_logger().info("ONNX model signature: {}".format(local_cache["client"].input_signature))

    waitress_kwargs = {}
    if api["predictor"].get("config") is not None:
        for key, value in api["predictor"]["config"].items():
            if key.startswith("waitress_"):
                waitress_kwargs[key[len("waitress_") :]] = value

    if len(waitress_kwargs) > 0:
        cx_logger().info("waitress parameters: {}".format(waitress_kwargs))

    waitress_kwargs["listen"] = "*:{}".format(args.port)

    cx_logger().info("{} api is live".format(api["name"]))
    open("/health_check.txt", "a").close()
    serve(app, **waitress_kwargs)
예제 #4
0
파일: api.py 프로젝트: rogervaas/cortex
def start(args):
    api = None
    try:
        ctx = Context(s3_path=args.context,
                      cache_dir=args.cache_dir,
                      workload_id=args.workload_id)
        api = ctx.apis_id_map[args.api]
        local_cache["api"] = api
        local_cache["ctx"] = ctx

        if api.get("predictor") is None:
            raise CortexException(api["name"], "predictor key not configured")

        cx_logger().info("loading the predictor from {}".format(
            api["predictor"]["path"]))
        local_cache["predictor"] = ctx.get_predictor_impl(
            api["name"], args.project_dir)

        if util.has_function(local_cache["predictor"], "init"):
            try:
                model_path = None
                if api["predictor"].get("model") is not None:
                    _, prefix = ctx.storage.deconstruct_s3_path(
                        api["predictor"]["model"])
                    model_path = os.path.join(
                        args.model_dir,
                        os.path.basename(os.path.normpath(prefix)))

                cx_logger().info("calling the predictor's init() function")
                local_cache["predictor"].init(model_path,
                                              api["predictor"]["metadata"])
            except Exception as e:
                raise UserRuntimeException(api["predictor"]["path"], "init",
                                           str(e)) from e
            finally:
                refresh_logger()
    except:
        cx_logger().exception("failed to start api")
        sys.exit(1)

    if api.get("tracker") is not None and api["tracker"].get(
            "model_type") == "classification":
        try:
            local_cache["class_set"] = api_utils.get_classes(ctx, api["name"])
        except Exception as e:
            cx_logger().warn(
                "an error occurred while attempting to load classes",
                exc_info=True)

    cx_logger().info("{} api is live".format(api["name"]))
    serve(app, listen="*:{}".format(args.port))
예제 #5
0
파일: api.py 프로젝트: databill86/cortex-1
def start(args):

    api = None
    try:
        ctx = Context(s3_path=args.context,
                      cache_dir=args.cache_dir,
                      workload_id=args.workload_id)
        api = ctx.apis_id_map[args.api]
        local_cache["api"] = api
        local_cache["ctx"] = ctx

        _, prefix = ctx.storage.deconstruct_s3_path(api["model"])
        model_path = os.path.join(args.model_dir, os.path.basename(prefix))
        if api.get("request_handler") is not None:
            local_cache["request_handler"] = ctx.get_request_handler_impl(
                api["name"], args.project_dir)

        sess = rt.InferenceSession(model_path)
        local_cache["sess"] = sess
        local_cache["input_metadata"] = sess.get_inputs()
        logger.info("input_metadata: {}".format(
            truncate(extract_signature(local_cache["input_metadata"]))))
        local_cache["output_metadata"] = sess.get_outputs()
        logger.info("output_metadata: {}".format(
            truncate(extract_signature(local_cache["output_metadata"]))))

    except Exception as e:
        logger.exception("failed to start api")
        sys.exit(1)

    if api.get("tracker") is not None and api["tracker"].get(
            "model_type") == "classification":
        try:
            local_cache["class_set"] = api_utils.get_classes(ctx, api["name"])
        except Exception as e:
            logger.warn("an error occurred while attempting to load classes",
                        exc_info=True)

    serve(app, listen="*:{}".format(args.port))
예제 #6
0
def start(args):
    api = None
    try:
        ctx = Context(s3_path=args.context,
                      cache_dir=args.cache_dir,
                      workload_id=args.workload_id)
        api = ctx.apis_id_map[args.api]
        local_cache["api"] = api
        local_cache["ctx"] = ctx

        if api.get("tensorflow") is None:
            raise CortexException(api["name"], "tensorflow key not configured")

        if api["tensorflow"].get("request_handler") is not None:
            cx_logger().info("loading the request handler from {}".format(
                api["tensorflow"]["request_handler"]))
            local_cache["request_handler"] = ctx.get_request_handler_impl(
                api["name"], args.project_dir)
        request_handler = local_cache.get("request_handler")

        if request_handler is not None and util.has_function(
                request_handler, "pre_inference"):
            cx_logger().info(
                "using pre_inference request handler defined in {}".format(
                    api["tensorflow"]["request_handler"]))
        else:
            cx_logger().info("pre_inference request handler not defined")

        if request_handler is not None and util.has_function(
                request_handler, "post_inference"):
            cx_logger().info(
                "using post_inference request handler defined in {}".format(
                    api["tensorflow"]["request_handler"]))
        else:
            cx_logger().info("post_inference request handler not defined")

    except Exception as e:
        cx_logger().exception("failed to start api")
        sys.exit(1)

    try:
        validate_model_dir(args.model_dir)
    except Exception as e:
        cx_logger().exception("failed to validate model")
        sys.exit(1)

    if api.get("tracker") is not None and api["tracker"].get(
            "model_type") == "classification":
        try:
            local_cache["class_set"] = api_utils.get_classes(ctx, api["name"])
        except Exception as e:
            cx_logger().warn(
                "an error occurred while attempting to load classes",
                exc_info=True)

    channel = grpc.insecure_channel("localhost:" + str(args.tf_serve_port))
    local_cache["stub"] = prediction_service_pb2_grpc.PredictionServiceStub(
        channel)

    # wait a bit for tf serving to start before querying metadata
    limit = 60
    for i in range(limit):
        try:
            local_cache["model_metadata"] = run_get_model_metadata()
            break
        except Exception as e:
            if i > 6:
                cx_logger().warn(
                    "unable to read model metadata - model is still loading. Retrying..."
                )
            if i == limit - 1:
                cx_logger().exception("retry limit exceeded")
                sys.exit(1)

        time.sleep(5)

    signature_key, parsed_signature = extract_signature(
        local_cache["model_metadata"]["signatureDef"],
        api["tensorflow"]["signature_key"])

    local_cache["signature_key"] = signature_key
    local_cache["parsed_signature"] = parsed_signature
    cx_logger().info("model_signature: {}".format(
        local_cache["parsed_signature"]))

    cx_logger().info("{} API is live".format(api["name"]))
    serve(app, listen="*:{}".format(args.port))