예제 #1
0
def main():
    # wait until neuron-rtd sidecar is ready
    uses_inferentia = os.getenv("CORTEX_ACTIVE_NEURON")
    if uses_inferentia:
        wait_neuron_rtd()

    # strictly for Inferentia
    has_multiple_servers = os.getenv("CORTEX_MULTIPLE_TF_SERVERS")
    if has_multiple_servers:
        base_serving_port = int(os.environ["CORTEX_TF_BASE_SERVING_PORT"])
        num_processes = int(os.environ["CORTEX_PROCESSES_PER_REPLICA"])
        used_ports = {}
        for w in range(int(num_processes)):
            used_ports[str(base_serving_port + w)] = False
        with open("/run/used_ports.json", "w+") as f:
            json.dump(used_ports, f)

    # get API spec
    cache_dir = os.environ["CORTEX_CACHE_DIR"]
    provider = os.environ["CORTEX_PROVIDER"]
    spec_path = os.environ["CORTEX_API_SPEC"]
    if provider == "local":
        storage = LocalStorage(os.getenv("CORTEX_CACHE_DIR"))
    else:
        storage = S3(bucket=os.environ["CORTEX_BUCKET"], region=os.environ["AWS_REGION"])
    raw_api_spec = get_spec(provider, storage, cache_dir, spec_path)

    # load tensorflow models into TFS
    if raw_api_spec["predictor"]["type"] == "tensorflow":
        load_tensorflow_serving_models()
예제 #2
0
def main():
    with open("/src/cortex/serve/log_config.yaml", "r") as f:
        log_config = yaml.load(f, yaml.FullLoader)

    # wait until neuron-rtd sidecar is ready
    uses_inferentia = os.getenv("CORTEX_ACTIVE_NEURON")
    if uses_inferentia:
        wait_neuron_rtd()

    # strictly for Inferentia
    has_multiple_servers = os.getenv("CORTEX_MULTIPLE_TF_SERVERS")
    if has_multiple_servers:
        base_serving_port = int(os.environ["CORTEX_TF_BASE_SERVING_PORT"])
        num_processes = int(os.environ["CORTEX_PROCESSES_PER_REPLICA"])
        used_ports = {}
        for w in range(int(num_processes)):
            used_ports[str(base_serving_port + w)] = False
        with open("/run/used_ports.json", "w+") as f:
            json.dump(used_ports, f)

    # get API spec
    cache_dir = os.environ["CORTEX_CACHE_DIR"]
    provider = os.environ["CORTEX_PROVIDER"]
    spec_path = os.environ["CORTEX_API_SPEC"]
    if provider == "local":
        storage = LocalStorage(os.getenv("CORTEX_CACHE_DIR"))
    else:
        storage = S3(bucket=os.environ["CORTEX_BUCKET"],
                     region=os.environ["AWS_REGION"])
    raw_api_spec = get_spec(provider, storage, cache_dir, spec_path)

    # load tensorflow models into TFS
    if raw_api_spec["predictor"]["type"] == "tensorflow":
        load_tensorflow_serving_models()

    if raw_api_spec["kind"] == "RealtimeAPI":
        # https://github.com/encode/uvicorn/blob/master/uvicorn/config.py
        uvicorn.run(
            "cortex.serve.wsgi:app",
            host="0.0.0.0",
            port=int(os.environ["CORTEX_SERVING_PORT"]),
            workers=int(os.environ["CORTEX_PROCESSES_PER_REPLICA"]),
            limit_concurrency=int(os.environ["CORTEX_MAX_PROCESS_CONCURRENCY"]
                                  ),  # this is a per process limit
            backlog=int(os.environ["CORTEX_SO_MAX_CONN"]),
            log_config=log_config,
            log_level="info",
        )
    else:
        from cortex.serve import batch

        batch.start()
예제 #3
0
파일: script.py 프로젝트: xnohat/cortex
def main():
    # wait until neuron-rtd sidecar is ready
    uses_inferentia = os.getenv("CORTEX_ACTIVE_NEURON")
    if uses_inferentia:
        wait_neuron_rtd()

    # strictly for Inferentia
    has_multiple_tf_servers = os.getenv("CORTEX_MULTIPLE_TF_SERVERS")
    num_processes = int(os.environ["CORTEX_PROCESSES_PER_REPLICA"])
    if has_multiple_tf_servers:
        base_serving_port = int(os.environ["CORTEX_TF_BASE_SERVING_PORT"])
        used_ports = {}
        for w in range(int(num_processes)):
            used_ports[str(base_serving_port + w)] = False
        with open("/run/used_ports.json", "w+") as f:
            json.dump(used_ports, f)

    # get API spec
    provider = os.environ["CORTEX_PROVIDER"]
    spec_path = os.environ["CORTEX_API_SPEC"]
    cache_dir = os.getenv("CORTEX_CACHE_DIR")  # when it's deployed locally
    region = os.getenv("AWS_REGION")  # when it's deployed to AWS
    _, api_spec = get_spec(provider, spec_path, cache_dir, region)

    predictor_type = predictor_type_from_api_spec(api_spec)
    multiple_processes = api_spec["predictor"]["processes_per_replica"] > 1
    caching_enabled = is_model_caching_enabled(api_spec)
    model_dir = os.getenv("CORTEX_MODEL_DIR")

    # start live-reloading when model caching not enabled > 1
    cron = None
    if not caching_enabled:
        # create cron dirs if they don't exist
        os.makedirs("/run/cron", exist_ok=True)
        os.makedirs("/tmp/cron", exist_ok=True)

        # prepare crons
        if predictor_type in [PythonPredictorType, ONNXPredictorType
                              ] and are_models_specified(api_spec):
            cron = FileBasedModelsTreeUpdater(
                interval=10,
                api_spec=api_spec,
                download_dir=model_dir,
            )
            cron.start()
        elif predictor_type == TensorFlowPredictorType:
            tf_serving_port = os.getenv("CORTEX_TF_BASE_SERVING_PORT", "9000")
            tf_serving_host = os.getenv("CORTEX_TF_SERVING_HOST", "localhost")
            cron = TFSModelLoader(
                interval=10,
                api_spec=api_spec,
                address=f"{tf_serving_host}:{tf_serving_port}",
                tfs_model_dir=model_dir,
                download_dir=model_dir,
            )
            cron.start()
        elif predictor_type == TensorFlowNeuronPredictorType:
            cron = prepare_tfs_servers_api(api_spec, model_dir)
            cron.start()

        # wait until the cron finishes its first pass
        if cron:
            while cron.is_alive() and not cron.ran_once():
                time.sleep(0.25)

            # disable live reloading when the BatchAPI kind is used
            # disable live reloading for the TF predictor when Inferentia is used and when multiple processes are used (num procs > 1)
            if api_spec["kind"] != "RealtimeAPI" or (
                    predictor_type == TensorFlowNeuronPredictorType
                    and has_multiple_tf_servers and num_processes > 1):
                cron.stop()

    # to syncronize with the other serving processes
    open("/mnt/workspace/init_script_run.txt", "a").close()

    # don't exit the script if the cron is running
    while cron and cron.is_alive():
        time.sleep(0.25)

    # exit if cron has exited with errors
    if cron and isinstance(cron.exitcode, int) and cron.exitcode != 0:
        # if it was killed by a signal
        if cron.exitcode < 0:
            sys.exit(-cron.exitcode)
        sys.exit(cron.exitcode)