Ejemplo n.º 1
0
def main():
    with open("/src/cortex/serve/log_config.yaml", "r") as f:
        log_config = yaml.load(f, yaml.FullLoader)

    # get API spec
    cache_dir = os.environ["CORTEX_CACHE_DIR"]
    provider = os.environ["CORTEX_PROVIDER"]
    spec_path = os.environ["CORTEX_API_SPEC"]
    if provider == "local":
        storage = LocalStorage(os.getenv("CORTEX_CACHE_DIR"))
    else:
        storage = S3(bucket=os.environ["CORTEX_BUCKET"],
                     region=os.environ["AWS_REGION"])
    raw_api_spec = get_spec(provider, storage, cache_dir, spec_path)

    # load tensorflow models into TFS
    if raw_api_spec["predictor"]["type"] == "tensorflow":
        load_tensorflow_serving_models()

    # https://github.com/encode/uvicorn/blob/master/uvicorn/config.py
    uvicorn.run(
        "cortex.serve.wsgi:app",
        host="0.0.0.0",
        port=int(os.environ["CORTEX_SERVING_PORT"]),
        workers=int(os.environ["CORTEX_WORKERS_PER_REPLICA"]),
        limit_concurrency=int(os.environ["CORTEX_MAX_WORKER_CONCURRENCY"]),
        backlog=int(os.environ["CORTEX_SO_MAX_CONN"]),
        log_config=log_config,
        log_level="info",
    )
Ejemplo n.º 2
0
def main():
    # wait until neuron-rtd sidecar is ready
    uses_inferentia = os.getenv("CORTEX_ACTIVE_NEURON")
    if uses_inferentia:
        wait_neuron_rtd()

    # strictly for Inferentia
    has_multiple_servers = os.getenv("CORTEX_MULTIPLE_TF_SERVERS")
    if has_multiple_servers:
        base_serving_port = int(os.environ["CORTEX_TF_BASE_SERVING_PORT"])
        num_processes = int(os.environ["CORTEX_PROCESSES_PER_REPLICA"])
        used_ports = {}
        for w in range(int(num_processes)):
            used_ports[str(base_serving_port + w)] = False
        with open("/run/used_ports.json", "w+") as f:
            json.dump(used_ports, f)

    # get API spec
    cache_dir = os.environ["CORTEX_CACHE_DIR"]
    provider = os.environ["CORTEX_PROVIDER"]
    spec_path = os.environ["CORTEX_API_SPEC"]
    if provider == "local":
        storage = LocalStorage(os.getenv("CORTEX_CACHE_DIR"))
    else:
        storage = S3(bucket=os.environ["CORTEX_BUCKET"], region=os.environ["AWS_REGION"])
    raw_api_spec = get_spec(provider, storage, cache_dir, spec_path)

    # load tensorflow models into TFS
    if raw_api_spec["predictor"]["type"] == "tensorflow":
        load_tensorflow_serving_models()
Ejemplo n.º 3
0
def start():
    cache_dir = os.environ["CORTEX_CACHE_DIR"]
    provider = os.environ["CORTEX_PROVIDER"]
    api_spec_path = os.environ["CORTEX_API_SPEC"]
    job_spec_path = os.environ["CORTEX_JOB_SPEC"]
    project_dir = os.environ["CORTEX_PROJECT_DIR"]

    model_dir = os.getenv("CORTEX_MODEL_DIR")
    tf_serving_port = os.getenv("CORTEX_TF_BASE_SERVING_PORT", "9000")
    tf_serving_host = os.getenv("CORTEX_TF_SERVING_HOST", "localhost")

    storage = S3(bucket=os.environ["CORTEX_BUCKET"],
                 region=os.environ["AWS_REGION"])

    has_multiple_servers = os.getenv("CORTEX_MULTIPLE_TF_SERVERS")
    if has_multiple_servers:
        with FileLock("/run/used_ports.json.lock"):
            with open("/run/used_ports.json", "r+") as f:
                used_ports = json.load(f)
                for port in used_ports.keys():
                    if not used_ports[port]:
                        tf_serving_port = port
                        used_ports[port] = True
                        break
                f.seek(0)
                json.dump(used_ports, f)
                f.truncate()

    raw_api_spec = get_spec(provider, storage, cache_dir, api_spec_path)
    job_spec = get_job_spec(storage, cache_dir, job_spec_path)

    api = API(provider=provider,
              storage=storage,
              model_dir=model_dir,
              cache_dir=cache_dir,
              **raw_api_spec)

    client = api.predictor.initialize_client(tf_serving_host=tf_serving_host,
                                             tf_serving_port=tf_serving_port)
    cx_logger().info("loading the predictor from {}".format(
        api.predictor.path))
    predictor_impl = api.predictor.initialize_impl(project_dir, client,
                                                   raw_api_spec, job_spec)

    local_cache["api_spec"] = api
    local_cache["provider"] = provider
    local_cache["job_spec"] = job_spec
    local_cache["predictor_impl"] = predictor_impl
    local_cache["predict_fn_args"] = inspect.getfullargspec(
        predictor_impl.predict).args
    local_cache["sqs_client"] = boto3.client(
        "sqs", region_name=os.environ["AWS_REGION"])

    open("/mnt/workspace/api_readiness.txt", "a").close()

    cx_logger().info("polling for batches...")
    sqs_loop()
Ejemplo n.º 4
0
def main():
    with open("/src/cortex/serve/log_config.yaml", "r") as f:
        log_config = yaml.load(f, yaml.FullLoader)

    # wait until neuron-rtd sidecar is ready
    uses_inferentia = os.getenv("CORTEX_ACTIVE_NEURON")
    if uses_inferentia:
        wait_neuron_rtd()

    # strictly for Inferentia
    has_multiple_servers = os.getenv("CORTEX_MULTIPLE_TF_SERVERS")
    if has_multiple_servers:
        base_serving_port = int(os.environ["CORTEX_TF_BASE_SERVING_PORT"])
        num_processes = int(os.environ["CORTEX_PROCESSES_PER_REPLICA"])
        used_ports = {}
        for w in range(int(num_processes)):
            used_ports[str(base_serving_port + w)] = False
        with open("/run/used_ports.json", "w+") as f:
            json.dump(used_ports, f)

    # get API spec
    cache_dir = os.environ["CORTEX_CACHE_DIR"]
    provider = os.environ["CORTEX_PROVIDER"]
    spec_path = os.environ["CORTEX_API_SPEC"]
    if provider == "local":
        storage = LocalStorage(os.getenv("CORTEX_CACHE_DIR"))
    else:
        storage = S3(bucket=os.environ["CORTEX_BUCKET"],
                     region=os.environ["AWS_REGION"])
    raw_api_spec = get_spec(provider, storage, cache_dir, spec_path)

    # load tensorflow models into TFS
    if raw_api_spec["predictor"]["type"] == "tensorflow":
        load_tensorflow_serving_models()

    if raw_api_spec["kind"] == "RealtimeAPI":
        # https://github.com/encode/uvicorn/blob/master/uvicorn/config.py
        uvicorn.run(
            "cortex.serve.wsgi:app",
            host="0.0.0.0",
            port=int(os.environ["CORTEX_SERVING_PORT"]),
            workers=int(os.environ["CORTEX_PROCESSES_PER_REPLICA"]),
            limit_concurrency=int(os.environ["CORTEX_MAX_PROCESS_CONCURRENCY"]
                                  ),  # this is a per process limit
            backlog=int(os.environ["CORTEX_SO_MAX_CONN"]),
            log_config=log_config,
            log_level="info",
        )
    else:
        from cortex.serve import batch

        batch.start()
Ejemplo n.º 5
0
def start_fn():
    cache_dir = os.environ["CORTEX_CACHE_DIR"]
    provider = os.environ["CORTEX_PROVIDER"]
    spec_path = os.environ["CORTEX_API_SPEC"]
    project_dir = os.environ["CORTEX_PROJECT_DIR"]

    model_dir = os.getenv("CORTEX_MODEL_DIR")
    tf_serving_port = os.getenv("CORTEX_TF_BASE_SERVING_PORT", "9000")
    tf_serving_host = os.getenv("CORTEX_TF_SERVING_HOST", "localhost")

    if provider == "local":
        storage = LocalStorage(os.getenv("CORTEX_CACHE_DIR"))
    else:
        storage = S3(bucket=os.environ["CORTEX_BUCKET"],
                     region=os.environ["AWS_REGION"])

    has_multiple_servers = os.getenv("CORTEX_MULTIPLE_TF_SERVERS")
    if has_multiple_servers:
        with FileLock("/run/used_ports.json.lock"):
            with open("/run/used_ports.json", "r+") as f:
                used_ports = json.load(f)
                for port in used_ports.keys():
                    if not used_ports[port]:
                        tf_serving_port = port
                        used_ports[port] = True
                        break
                f.seek(0)
                json.dump(used_ports, f)
                f.truncate()

    try:
        raw_api_spec = get_spec(provider, storage, cache_dir, spec_path)
        api = API(
            provider=provider,
            storage=storage,
            model_dir=model_dir,
            cache_dir=cache_dir,
            **raw_api_spec,
        )
        client = api.predictor.initialize_client(
            tf_serving_host=tf_serving_host, tf_serving_port=tf_serving_port)
        cx_logger().info("loading the predictor from {}".format(
            api.predictor.path))
        predictor_impl = api.predictor.initialize_impl(project_dir, client)

        local_cache["api"] = api
        local_cache["provider"] = provider
        local_cache["client"] = client
        local_cache["predictor_impl"] = predictor_impl
        local_cache["predict_fn_args"] = inspect.getfullargspec(
            predictor_impl.predict).args
        predict_route = "/"
        if provider != "local":
            predict_route = "/predict"
        local_cache["predict_route"] = predict_route
    except:
        cx_logger().exception("failed to start api")
        sys.exit(1)

    if (provider != "local" and api.monitoring is not None
            and api.monitoring.model_type == "classification"):
        try:
            local_cache["class_set"] = api.get_cached_classes()
        except:
            cx_logger().warn(
                "an error occurred while attempting to load classes",
                exc_info=True)

    app.add_api_route(local_cache["predict_route"], predict, methods=["POST"])
    app.add_api_route(local_cache["predict_route"],
                      get_summary,
                      methods=["GET"])

    return app