Exemple #1
0
def get_spec(
    provider: str,
    spec_path: str,
    cache_dir: str,
    region: Optional[str] = None,
) -> Tuple[Union[LocalStorage, S3, GCS], dict]:
    """
    Args:
        provider: "local", "aws" or "gcp".
        spec_path: Path to API spec (i.e. "s3://cortex-dev-0/apis/iris-classifier/api/69b93378fa5c0218-jy1fjtyihu-9fcc10739e7fc8050cefa8ca27ece1ee/master-spec.json").
        cache_dir: Local directory where the API spec gets saved to.
        region: Region of the bucket. Only required for "S3" provider.
    """

    if provider == "local":
        storage = LocalStorage(cache_dir)
    elif provider == "aws":
        bucket, key = S3.deconstruct_s3_path(spec_path)
        storage = S3(bucket=bucket, region=region)
    elif provider == "gcp":
        bucket, key = GCS.deconstruct_gcs_path(spec_path)
        storage = GCS(bucket=bucket)
    else:
        raise ValueError('invalid "provider" argument')

    if provider == "local":
        return storage, read_json(spec_path)

    local_spec_path = os.path.join(cache_dir, "api_spec.json")
    if not os.path.isfile(local_spec_path):
        storage.download_file(key, local_spec_path)

    return storage, read_json(local_spec_path)
Exemple #2
0
def start(args):
    download_config = json.loads(base64.urlsafe_b64decode(args.download))
    for download_arg in download_config["download_args"]:
        from_path = download_arg["from"]
        to_path = download_arg["to"]
        item_name = download_arg.get("item_name", "")
        bucket_name, prefix = S3.deconstruct_s3_path(from_path)
        s3_client = S3(bucket_name, client_config={})

        if item_name != "":
            if download_arg.get("hide_from_log", False):
                logger().info("downloading {}".format(item_name))
            else:
                logger().info("downloading {} from {}".format(
                    item_name, from_path))

        if download_arg.get("to_file", False):
            s3_client.download_file(prefix, to_path)
        else:
            s3_client.download(prefix, to_path)

        if download_arg.get("unzip", False):
            if item_name != "" and not download_arg.get(
                    "hide_unzipping_log", False):
                logger().info("unzipping {}".format(item_name))
            if download_arg.get("to_file", False):
                util.extract_zip(to_path, delete_zip_file=True)
            else:
                util.extract_zip(os.path.join(to_path,
                                              os.path.basename(from_path)),
                                 delete_zip_file=True)

    if download_config.get("last_log", "") != "":
        logger().info(download_config["last_log"])
Exemple #3
0
def start(args):
    download = json.loads(base64.urlsafe_b64decode(args.download))
    for download_arg in download:
        from_path = download_arg["from"]
        to_path = download_arg["to"]
        item_name = download_arg.get("item_name", "")
        bucket_name, prefix = S3.deconstruct_s3_path(from_path)
        s3_client = S3(bucket_name, client_config={})

        if item_name != "":
            cx_logger().info("downloading {} from {}".format(item_name, from_path))
        s3_client.download(prefix, to_path)

        if download_arg.get("unzip", False):
            if item_name != "":
                cx_logger().info("unzipping {}".format(item_name))
            util.extract_zip(
                os.path.join(to_path, os.path.basename(from_path)), delete_zip_file=True
            )

        if download_arg.get("tf_model_version_rename", "") != "":
            dest = util.trim_suffix(download_arg["tf_model_version_rename"], "/")
            dir_path = os.path.dirname(dest)
            entries = os.listdir(dir_path)
            if len(entries) == 1:
                src = os.path.join(dir_path, entries[0])
                os.rename(src, dest)
Exemple #4
0
def start():
    cache_dir = os.environ["CORTEX_CACHE_DIR"]
    spec = os.environ["CORTEX_API_SPEC"]
    project_dir = os.environ["CORTEX_PROJECT_DIR"]
    model_dir = os.getenv("CORTEX_MODEL_DIR", None)
    tf_serving_port = os.getenv("CORTEX_TF_SERVING_PORT", None)
    storage = S3(bucket=os.environ["CORTEX_BUCKET"],
                 region=os.environ["AWS_REGION"])

    try:
        raw_api_spec = get_spec(storage, cache_dir, spec)
        api = API(storage=storage, cache_dir=cache_dir, **raw_api_spec)
        client = api.predictor.initialize_client(model_dir, tf_serving_port)
        cx_logger().info("loading the predictor from {}".format(
            api.predictor.path))
        predictor_impl = api.predictor.initialize_impl(project_dir, client)

        local_cache["api"] = api
        local_cache["client"] = client
        local_cache["predictor_impl"] = predictor_impl
    except:
        cx_logger().exception("failed to start api")
        sys.exit(1)

    if api.tracker is not None and api.tracker.model_type == "classification":
        try:
            local_cache["class_set"] = api.get_cached_classes()
        except Exception as e:
            cx_logger().warn(
                "an error occurred while attempting to load classes",
                exc_info=True)

    cx_logger().info("{} api is live".format(api.name))
    return app
Exemple #5
0
def main():
    # wait until neuron-rtd sidecar is ready
    uses_inferentia = os.getenv("CORTEX_ACTIVE_NEURON")
    if uses_inferentia:
        wait_neuron_rtd()

    # strictly for Inferentia
    has_multiple_servers = os.getenv("CORTEX_MULTIPLE_TF_SERVERS")
    if has_multiple_servers:
        base_serving_port = int(os.environ["CORTEX_TF_BASE_SERVING_PORT"])
        num_processes = int(os.environ["CORTEX_PROCESSES_PER_REPLICA"])
        used_ports = {}
        for w in range(int(num_processes)):
            used_ports[str(base_serving_port + w)] = False
        with open("/run/used_ports.json", "w+") as f:
            json.dump(used_ports, f)

    # get API spec
    cache_dir = os.environ["CORTEX_CACHE_DIR"]
    provider = os.environ["CORTEX_PROVIDER"]
    spec_path = os.environ["CORTEX_API_SPEC"]
    if provider == "local":
        storage = LocalStorage(os.getenv("CORTEX_CACHE_DIR"))
    else:
        storage = S3(bucket=os.environ["CORTEX_BUCKET"], region=os.environ["AWS_REGION"])
    raw_api_spec = get_spec(provider, storage, cache_dir, spec_path)

    # load tensorflow models into TFS
    if raw_api_spec["predictor"]["type"] == "tensorflow":
        load_tensorflow_serving_models()
Exemple #6
0
def start(args):
    assert_api_version()
    storage = S3(bucket=os.environ["CORTEX_BUCKET"], region=os.environ["AWS_REGION"])
    try:
        raw_api_spec = get_spec(args.cache_dir, args.spec)
        api = API(storage=storage, cache_dir=args.cache_dir, **raw_api_spec)
        client = api.predictor.initialize_client(args)
        cx_logger().info("loading the predictor from {}".format(api.predictor.path))
        predictor_impl = api.predictor.initialize_impl(args.project_dir, client)

        local_cache["api"] = api
        local_cache["client"] = client
        local_cache["predictor_impl"] = predictor_impl
    except:
        cx_logger().exception("failed to start api")
        sys.exit(1)

    if api.tracker is not None and api.tracker.model_type == "classification":
        try:
            local_cache["class_set"] = api.get_cached_classes()
        except Exception as e:
            cx_logger().warn("an error occurred while attempting to load classes", exc_info=True)

    waitress_kwargs = extract_waitress_params(api.predictor.config)
    waitress_kwargs["listen"] = "*:{}".format(args.port)

    open("/health_check.txt", "a").close()
    cx_logger().info("{} api is live".format(api.name))
    serve(app, **waitress_kwargs)
def main():
    with open("/src/cortex/serve/log_config.yaml", "r") as f:
        log_config = yaml.load(f, yaml.FullLoader)

    # get API spec
    cache_dir = os.environ["CORTEX_CACHE_DIR"]
    provider = os.environ["CORTEX_PROVIDER"]
    spec_path = os.environ["CORTEX_API_SPEC"]
    if provider == "local":
        storage = LocalStorage(os.getenv("CORTEX_CACHE_DIR"))
    else:
        storage = S3(bucket=os.environ["CORTEX_BUCKET"],
                     region=os.environ["AWS_REGION"])
    raw_api_spec = get_spec(provider, storage, cache_dir, spec_path)

    # load tensorflow models into TFS
    if raw_api_spec["predictor"]["type"] == "tensorflow":
        load_tensorflow_serving_models()

    # https://github.com/encode/uvicorn/blob/master/uvicorn/config.py
    uvicorn.run(
        "cortex.serve.wsgi:app",
        host="0.0.0.0",
        port=int(os.environ["CORTEX_SERVING_PORT"]),
        workers=int(os.environ["CORTEX_WORKERS_PER_REPLICA"]),
        limit_concurrency=int(os.environ["CORTEX_MAX_WORKER_CONCURRENCY"]),
        backlog=int(os.environ["CORTEX_SO_MAX_CONN"]),
        log_config=log_config,
        log_level="info",
    )
Exemple #8
0
def get_spec(provider, storage, cache_dir, spec_path):
    if provider == "local":
        return read_msgpack(spec_path)

    local_spec_path = os.path.join(cache_dir, "api_spec.msgpack")
    _, key = S3.deconstruct_s3_path(spec_path)
    storage.download_file(key, local_spec_path)
    return read_msgpack(local_spec_path)
Exemple #9
0
def start():
    cache_dir = os.environ["CORTEX_CACHE_DIR"]
    provider = os.environ["CORTEX_PROVIDER"]
    spec_path = os.environ["CORTEX_API_SPEC"]
    project_dir = os.environ["CORTEX_PROJECT_DIR"]
    model_dir = os.getenv("CORTEX_MODEL_DIR", None)
    tf_serving_port = os.getenv("CORTEX_TF_SERVING_PORT", "9000")
    tf_serving_host = os.getenv("CORTEX_TF_SERVING_HOST", "localhost")

    if provider == "local":
        storage = LocalStorage(os.getenv("CORTEX_CACHE_DIR"))
    else:
        storage = S3(bucket=os.environ["CORTEX_BUCKET"],
                     region=os.environ["AWS_REGION"])

    try:
        raw_api_spec = get_spec(provider, storage, cache_dir, spec_path)
        api = API(provider=provider,
                  storage=storage,
                  cache_dir=cache_dir,
                  **raw_api_spec)
        client = api.predictor.initialize_client(
            model_dir,
            tf_serving_host=tf_serving_host,
            tf_serving_port=tf_serving_port)
        cx_logger().info("loading the predictor from {}".format(
            api.predictor.path))
        predictor_impl = api.predictor.initialize_impl(project_dir, client)

        local_cache["api"] = api
        local_cache["provider"] = provider
        local_cache["client"] = client
        local_cache["predictor_impl"] = predictor_impl
        local_cache["predict_fn_args"] = inspect.getfullargspec(
            predictor_impl.predict).args
        predict_route = "/"
        if provider != "local":
            predict_route = "/predict"
        local_cache["predict_route"] = predict_route
    except:
        cx_logger().exception("failed to start api")
        sys.exit(1)

    if (provider != "local" and api.monitoring is not None
            and api.monitoring.model_type == "classification"):
        try:
            local_cache["class_set"] = api.get_cached_classes()
        except:
            cx_logger().warn(
                "an error occurred while attempting to load classes",
                exc_info=True)

    app.add_api_route(local_cache["predict_route"], predict, methods=["POST"])
    app.add_api_route(local_cache["predict_route"],
                      get_summary,
                      methods=["GET"])

    return app
Exemple #10
0
def start():
    cache_dir = os.environ["CORTEX_CACHE_DIR"]
    provider = os.environ["CORTEX_PROVIDER"]
    api_spec_path = os.environ["CORTEX_API_SPEC"]
    job_spec_path = os.environ["CORTEX_JOB_SPEC"]
    project_dir = os.environ["CORTEX_PROJECT_DIR"]

    model_dir = os.getenv("CORTEX_MODEL_DIR")
    tf_serving_port = os.getenv("CORTEX_TF_BASE_SERVING_PORT", "9000")
    tf_serving_host = os.getenv("CORTEX_TF_SERVING_HOST", "localhost")

    storage = S3(bucket=os.environ["CORTEX_BUCKET"],
                 region=os.environ["AWS_REGION"])

    has_multiple_servers = os.getenv("CORTEX_MULTIPLE_TF_SERVERS")
    if has_multiple_servers:
        with FileLock("/run/used_ports.json.lock"):
            with open("/run/used_ports.json", "r+") as f:
                used_ports = json.load(f)
                for port in used_ports.keys():
                    if not used_ports[port]:
                        tf_serving_port = port
                        used_ports[port] = True
                        break
                f.seek(0)
                json.dump(used_ports, f)
                f.truncate()

    raw_api_spec = get_spec(provider, storage, cache_dir, api_spec_path)
    job_spec = get_job_spec(storage, cache_dir, job_spec_path)

    api = API(provider=provider,
              storage=storage,
              model_dir=model_dir,
              cache_dir=cache_dir,
              **raw_api_spec)

    client = api.predictor.initialize_client(tf_serving_host=tf_serving_host,
                                             tf_serving_port=tf_serving_port)
    cx_logger().info("loading the predictor from {}".format(
        api.predictor.path))
    predictor_impl = api.predictor.initialize_impl(project_dir, client,
                                                   raw_api_spec, job_spec)

    local_cache["api_spec"] = api
    local_cache["provider"] = provider
    local_cache["job_spec"] = job_spec
    local_cache["predictor_impl"] = predictor_impl
    local_cache["predict_fn_args"] = inspect.getfullargspec(
        predictor_impl.predict).args
    local_cache["sqs_client"] = boto3.client(
        "sqs", region_name=os.environ["AWS_REGION"])

    open("/mnt/workspace/api_readiness.txt", "a").close()

    cx_logger().info("polling for batches...")
    sqs_loop()
Exemple #11
0
def start(args):
    download_config = json.loads(base64.urlsafe_b64decode(args.download))
    for download_arg in download_config["download_args"]:
        from_path = download_arg["from"]
        to_path = download_arg["to"]
        item_name = download_arg.get("item_name", "")

        if from_path.startswith("s3://"):
            bucket_name, prefix = S3.deconstruct_s3_path(from_path)
            client = S3(bucket_name, client_config={})
        elif from_path.startswith("gs://"):
            bucket_name, prefix = GCS.deconstruct_gcs_path(from_path)
            client = GCS(bucket_name)
        else:
            raise ValueError(
                '"from" download arg can either have the "s3://" or "gs://" prefixes'
            )

        if item_name != "":
            if download_arg.get("hide_from_log", False):
                logger().info("downloading {}".format(item_name))
            else:
                logger().info("downloading {} from {}".format(
                    item_name, from_path))

        if download_arg.get("to_file", False):
            client.download_file(prefix, to_path)
        else:
            client.download(prefix, to_path)

        if download_arg.get("unzip", False):
            if item_name != "" and not download_arg.get(
                    "hide_unzipping_log", False):
                logger().info("unzipping {}".format(item_name))
            if download_arg.get("to_file", False):
                util.extract_zip(to_path, delete_zip_file=True)
            else:
                util.extract_zip(os.path.join(to_path,
                                              os.path.basename(from_path)),
                                 delete_zip_file=True)

    if download_config.get("last_log", "") != "":
        logger().info(download_config["last_log"])
Exemple #12
0
def get_spec(provider, storage, cache_dir, spec_path):
    if provider == "local":
        return read_json(spec_path)

    local_spec_path = os.path.join(cache_dir, "api_spec.json")

    if not os.path.isfile(local_spec_path):
        _, key = S3.deconstruct_s3_path(spec_path)
        storage.download_file(key, local_spec_path)

    return read_json(local_spec_path)
Exemple #13
0
def main():
    with open("/src/cortex/serve/log_config.yaml", "r") as f:
        log_config = yaml.load(f, yaml.FullLoader)

    # wait until neuron-rtd sidecar is ready
    uses_inferentia = os.getenv("CORTEX_ACTIVE_NEURON")
    if uses_inferentia:
        wait_neuron_rtd()

    # strictly for Inferentia
    has_multiple_servers = os.getenv("CORTEX_MULTIPLE_TF_SERVERS")
    if has_multiple_servers:
        base_serving_port = int(os.environ["CORTEX_TF_BASE_SERVING_PORT"])
        num_processes = int(os.environ["CORTEX_PROCESSES_PER_REPLICA"])
        used_ports = {}
        for w in range(int(num_processes)):
            used_ports[str(base_serving_port + w)] = False
        with open("/run/used_ports.json", "w+") as f:
            json.dump(used_ports, f)

    # get API spec
    cache_dir = os.environ["CORTEX_CACHE_DIR"]
    provider = os.environ["CORTEX_PROVIDER"]
    spec_path = os.environ["CORTEX_API_SPEC"]
    if provider == "local":
        storage = LocalStorage(os.getenv("CORTEX_CACHE_DIR"))
    else:
        storage = S3(bucket=os.environ["CORTEX_BUCKET"],
                     region=os.environ["AWS_REGION"])
    raw_api_spec = get_spec(provider, storage, cache_dir, spec_path)

    # load tensorflow models into TFS
    if raw_api_spec["predictor"]["type"] == "tensorflow":
        load_tensorflow_serving_models()

    if raw_api_spec["kind"] == "RealtimeAPI":
        # https://github.com/encode/uvicorn/blob/master/uvicorn/config.py
        uvicorn.run(
            "cortex.serve.wsgi:app",
            host="0.0.0.0",
            port=int(os.environ["CORTEX_SERVING_PORT"]),
            workers=int(os.environ["CORTEX_PROCESSES_PER_REPLICA"]),
            limit_concurrency=int(os.environ["CORTEX_MAX_PROCESS_CONCURRENCY"]
                                  ),  # this is a per process limit
            backlog=int(os.environ["CORTEX_SO_MAX_CONN"]),
            log_config=log_config,
            log_level="info",
        )
    else:
        from cortex.serve import batch

        batch.start()
Exemple #14
0
def get_spec(
    provider: str,
    spec_path: str,
    cache_dir: Optional[str],
    bucket: Optional[str],
    region: Optional[str],
) -> Tuple[Union[LocalStorage, S3], dict]:
    if provider == "local":
        storage = LocalStorage(cache_dir)
    else:
        storage = S3(bucket=bucket, region=region)

    if provider == "local":
        return storage, read_json(spec_path)

    local_spec_path = os.path.join(cache_dir, "api_spec.json")

    if not os.path.isfile(local_spec_path):
        _, key = S3.deconstruct_s3_path(spec_path)
        storage.download_file(key, local_spec_path)

    return storage, read_json(local_spec_path)
Exemple #15
0
def model_downloader(
    predictor_type: PredictorType,
    bucket_provider: str,
    bucket_name: str,
    model_name: str,
    model_version: str,
    model_path: str,
    temp_dir: str,
    model_dir: str,
) -> Optional[datetime.datetime]:
    """
    Downloads model to disk. Validates the cloud model path and the downloaded model as well.

    Args:
        predictor_type: The predictor type as implemented by the API.
        bucket_provider: Provider for the bucket. Can be "s3" or "gs".
        bucket_name: Name of the bucket where the model is stored.
        model_name: Name of the model. Is part of the model's local path.
        model_version: Version of the model. Is part of the model's local path.
        model_path: Model prefix of the versioned model.
        temp_dir: Where to temporarily store the model for validation.
        model_dir: The top directory of where all models are stored locally.

    Returns:
        The model's timestamp. None if the model didn't pass the validation, if it doesn't exist or if there are not enough permissions.
    """

    logger().info(
        f"downloading from bucket {bucket_name}/{model_path}, model {model_name} of version {model_version}, temporarily to {temp_dir} and then finally to {model_dir}"
    )

    if bucket_provider == "s3":
        client = S3(bucket_name)
    if bucket_provider == "gs":
        client = GCS(bucket_name)

    # validate upstream cloud model
    sub_paths, ts = client.search(model_path)
    try:
        validate_model_paths(sub_paths, predictor_type, model_path)
    except CortexException:
        logger().info(f"failed validating model {model_name} of version {model_version}")
        return None

    # download model to temp dir
    temp_dest = os.path.join(temp_dir, model_name, model_version)
    try:
        client.download_dir_contents(model_path, temp_dest)
    except CortexException:
        logger().info(
            f"failed downloading model {model_name} of version {model_version} to temp dir {temp_dest}"
        )
        shutil.rmtree(temp_dest)
        return None

    # validate model
    model_contents = glob.glob(temp_dest + "*/**", recursive=True)
    model_contents = util.remove_non_empty_directory_paths(model_contents)
    try:
        validate_model_paths(model_contents, predictor_type, temp_dest)
    except CortexException:
        logger().info(
            f"failed validating model {model_name} of version {model_version} from temp dir"
        )
        shutil.rmtree(temp_dest)
        return None

    # move model to dest dir
    model_top_dir = os.path.join(model_dir, model_name)
    ondisk_model_version = os.path.join(model_top_dir, model_version)
    logger().info(
        f"moving model {model_name} of version {model_version} to final dir {ondisk_model_version}"
    )
    if os.path.isdir(ondisk_model_version):
        shutil.rmtree(ondisk_model_version)
    shutil.move(temp_dest, ondisk_model_version)

    return max(ts)
Exemple #16
0
def start_fn():
    cache_dir = os.environ["CORTEX_CACHE_DIR"]
    provider = os.environ["CORTEX_PROVIDER"]
    spec_path = os.environ["CORTEX_API_SPEC"]
    project_dir = os.environ["CORTEX_PROJECT_DIR"]

    model_dir = os.getenv("CORTEX_MODEL_DIR")
    tf_serving_port = os.getenv("CORTEX_TF_BASE_SERVING_PORT", "9000")
    tf_serving_host = os.getenv("CORTEX_TF_SERVING_HOST", "localhost")

    if provider == "local":
        storage = LocalStorage(os.getenv("CORTEX_CACHE_DIR"))
    else:
        storage = S3(bucket=os.environ["CORTEX_BUCKET"],
                     region=os.environ["AWS_REGION"])

    has_multiple_servers = os.getenv("CORTEX_MULTIPLE_TF_SERVERS")
    if has_multiple_servers:
        with FileLock("/run/used_ports.json.lock"):
            with open("/run/used_ports.json", "r+") as f:
                used_ports = json.load(f)
                for port in used_ports.keys():
                    if not used_ports[port]:
                        tf_serving_port = port
                        used_ports[port] = True
                        break
                f.seek(0)
                json.dump(used_ports, f)
                f.truncate()

    try:
        raw_api_spec = get_spec(provider, storage, cache_dir, spec_path)
        api = API(
            provider=provider,
            storage=storage,
            model_dir=model_dir,
            cache_dir=cache_dir,
            **raw_api_spec,
        )
        client = api.predictor.initialize_client(
            tf_serving_host=tf_serving_host, tf_serving_port=tf_serving_port)
        cx_logger().info("loading the predictor from {}".format(
            api.predictor.path))
        predictor_impl = api.predictor.initialize_impl(project_dir, client)

        local_cache["api"] = api
        local_cache["provider"] = provider
        local_cache["client"] = client
        local_cache["predictor_impl"] = predictor_impl
        local_cache["predict_fn_args"] = inspect.getfullargspec(
            predictor_impl.predict).args
        predict_route = "/"
        if provider != "local":
            predict_route = "/predict"
        local_cache["predict_route"] = predict_route
    except:
        cx_logger().exception("failed to start api")
        sys.exit(1)

    if (provider != "local" and api.monitoring is not None
            and api.monitoring.model_type == "classification"):
        try:
            local_cache["class_set"] = api.get_cached_classes()
        except:
            cx_logger().warn(
                "an error occurred while attempting to load classes",
                exc_info=True)

    app.add_api_route(local_cache["predict_route"], predict, methods=["POST"])
    app.add_api_route(local_cache["predict_route"],
                      get_summary,
                      methods=["GET"])

    return app
Exemple #17
0
def find_all_cloud_models(
    is_dir_used: bool,
    models_dir: str,
    predictor_type: PredictorType,
    cloud_paths: List[str],
    cloud_model_names: List[str],
) -> Tuple[List[str], Dict[str, List[str]], List[str], List[List[str]],
           List[List[datetime.datetime]], List[str], List[str], ]:
    """
    Get updated information on all models that are currently present on the cloud upstreams.
    Information on the available models, versions, last edit times, the subpaths of each model, and so on.

    Args:
        is_dir_used: Whether predictor:models:dir is used or not.
        models_dir: The value of predictor:models:dir in case it's present. Ignored when not required.
        predictor_type: The predictor type.
        cloud_paths: The cloud model paths as they are specified in predictor:model_path/predictor:models:dir/predictor:models:paths is used. Ignored when not required.
        cloud_model_names: The cloud model names as they are specified in predictor:models:paths:name when predictor:models:paths is used or the default name of the model when predictor:model_path is used. Ignored when not required.

    Returns: The tuple with the following elements:
        model_names - a list with the names of the models (i.e. bert, gpt-2, etc) and they are unique
        versions - a dictionary with the keys representing the model names and the values being lists of versions that each model has.
          For non-versioned model paths ModelVersion.NOT_PROVIDED, the list will be empty.
        model_paths - a list with the prefix of each model.
        sub_paths - a list of filepaths lists for each file of each model.
        timestamps - a list of timestamps lists representing the last edit time of each versioned model.
        bucket_providers - a list of the bucket providers for each model. Can be "s3" or "gs".
        bucket_names - a list of the bucket names of each model.
    """

    # validate models stored in cloud (S3 or GS) that were specified with predictor:models:dir field
    if is_dir_used:
        if S3.is_valid_s3_path(models_dir):
            bucket_name, models_path = S3.deconstruct_s3_path(models_dir)
            client = S3(bucket_name)
        if GCS.is_valid_gcs_path(models_dir):
            bucket_name, models_path = GCS.deconstruct_gcs_path(models_dir)
            client = GCS(bucket_name)

        sub_paths, timestamps = client.search(models_path)

        model_paths, ooa_ids = validate_models_dir_paths(
            sub_paths, predictor_type, models_path)
        model_names = [
            os.path.basename(model_path) for model_path in model_paths
        ]

        model_paths = [
            model_path for model_path in model_paths
            if os.path.basename(model_path) in model_names
        ]
        model_paths = [
            model_path + "/" * (not model_path.endswith("/"))
            for model_path in model_paths
        ]

        if S3.is_valid_s3_path(models_dir):
            bucket_providers = len(model_paths) * ["s3"]
        if GCS.is_valid_gcs_path(models_dir):
            bucket_providers = len(model_paths) * ["gs"]

        bucket_names = len(model_paths) * [bucket_name]
        sub_paths = len(model_paths) * [sub_paths]
        timestamps = len(model_paths) * [timestamps]

    # validate models stored in cloud (S3 or GS) that were specified with predictor:models:paths field
    if not is_dir_used:
        sub_paths = []
        ooa_ids = []
        model_paths = []
        model_names = []
        timestamps = []
        bucket_providers = []
        bucket_names = []
        for idx, path in enumerate(cloud_paths):
            if S3.is_valid_s3_path(path):
                bucket_name, model_path = S3.deconstruct_s3_path(path)
                client = S3(bucket_name)
            elif GCS.is_valid_gcs_path(path):
                bucket_name, model_path = GCS.deconstruct_gcs_path(path)
                client = GCS(bucket_name)
            else:
                continue

            sb, model_path_ts = client.search(model_path)
            try:
                ooa_ids.append(
                    validate_model_paths(sb, predictor_type, model_path))
            except CortexException:
                continue
            model_paths.append(model_path)
            model_names.append(cloud_model_names[idx])
            bucket_names.append(bucket_name)
            sub_paths += [sb]
            timestamps += [model_path_ts]

            if S3.is_valid_s3_path(path):
                bucket_providers.append("s3")
            if GCS.is_valid_gcs_path(path):
                bucket_providers.append("gs")

    # determine the detected versions for each cloud model
    # if the model was not versioned, then leave the version list empty
    versions = {}
    for model_path, model_name, model_ooa_ids, bucket_sub_paths in zip(
            model_paths, model_names, ooa_ids, sub_paths):
        if ModelVersion.PROVIDED not in model_ooa_ids:
            versions[model_name] = []
            continue

        model_sub_paths = [
            os.path.relpath(sub_path, model_path)
            for sub_path in bucket_sub_paths
        ]
        model_versions_paths = [
            path for path in model_sub_paths if not path.startswith("../")
        ]
        model_versions = [
            util.get_leftmost_part_of_path(model_version_path)
            for model_version_path in model_versions_paths
        ]
        model_versions = list(set(model_versions))
        versions[model_name] = model_versions

    # pick up the max timestamp for each versioned model
    aux_timestamps = []
    for model_path, model_name, bucket_sub_paths, sub_path_timestamps in zip(
            model_paths, model_names, sub_paths, timestamps):
        model_ts = []
        if len(versions[model_name]) == 0:
            masks = list(
                map(
                    lambda x: x.startswith(model_path + "/" *
                                           (model_path[-1] != "/")),
                    bucket_sub_paths,
                ))
            model_ts = [max(itertools.compress(sub_path_timestamps, masks))]

        for version in versions[model_name]:
            masks = list(
                map(
                    lambda x: x.startswith(
                        os.path.join(model_path, version) + "/"),
                    bucket_sub_paths,
                ))
            model_ts.append(max(itertools.compress(sub_path_timestamps,
                                                   masks)))

        aux_timestamps.append(model_ts)

    timestamps = aux_timestamps  # type: List[List[datetime.datetime]]

    # model_names - a list with the names of the models (i.e. bert, gpt-2, etc) and they are unique
    # versions - a dictionary with the keys representing the model names and the values being lists of versions that each model has.
    #   For non-versioned model paths ModelVersion.NOT_PROVIDED, the list will be empty
    # model_paths - a list with the prefix of each model
    # sub_paths - a list of filepaths lists for each file of each model
    # timestamps - a list of timestamps lists representing the last edit time of each versioned model
    # bucket_providers - bucket providers
    # bucket_names - names of the buckets

    return model_names, versions, model_paths, sub_paths, timestamps, bucket_providers, bucket_names
Exemple #18
0
def get_job_spec(storage, cache_dir, job_spec_path):
    local_spec_path = os.path.join(cache_dir, "job_spec.json")
    _, key = S3.deconstruct_s3_path(job_spec_path)
    storage.download_file(key, local_spec_path)
    with open(local_spec_path) as f:
        return json.load(f)
Exemple #19
0
    def __init__(self, **kwargs):
        if "cache_dir" in kwargs:
            self.cache_dir = kwargs["cache_dir"]
        elif "local_path" in kwargs:
            local_path_dir = os.path.dirname(os.path.abspath(kwargs["local_path"]))
            self.cache_dir = os.path.join(local_path_dir, "cache")
        else:
            raise ValueError("cache_dir must be specified (or inferred from local_path)")
        util.mkdir_p(self.cache_dir)

        if "local_path" in kwargs:
            self.ctx = util.read_msgpack(kwargs["local_path"])
        elif "obj" in kwargs:
            self.ctx = kwargs["obj"]
        elif "raw_obj" in kwargs:
            self.ctx = kwargs["raw_obj"]
        elif "s3_path":
            local_ctx_path = os.path.join(self.cache_dir, "context.msgpack")
            bucket, key = S3.deconstruct_s3_path(kwargs["s3_path"])
            S3(bucket, client_config={}).download_file(key, local_ctx_path)
            self.ctx = util.read_msgpack(local_ctx_path)
        else:
            raise ValueError("invalid context args: " + kwargs)

        self.workload_id = kwargs.get("workload_id")

        self.id = self.ctx["id"]
        self.key = self.ctx["key"]
        self.metadata_root = self.ctx["metadata_root"]
        self.cortex_config = self.ctx["cortex_config"]
        self.deployment_version = self.ctx["deployment_version"]
        self.root = self.ctx["root"]
        self.status_prefix = self.ctx["status_prefix"]
        self.app = self.ctx["app"]
        self.apis = self.ctx["apis"] or {}
        self.api_version = self.cortex_config["api_version"]
        self.monitoring = None
        self.project_id = self.ctx["project_id"]
        self.project_key = self.ctx["project_key"]

        if "local_storage_path" in kwargs:
            self.storage = LocalStorage(base_dir=kwargs["local_storage_path"])
        else:
            self.storage = S3(
                bucket=self.cortex_config["bucket"],
                region=self.cortex_config["region"],
                client_config={},
            )

        host_ip = os.environ["HOST_IP"]
        datadog.initialize(statsd_host=host_ip, statsd_port="8125")
        self.statsd = datadog.statsd

        if self.api_version != consts.CORTEX_VERSION:
            raise ValueError(
                "API version mismatch (Context: {}, Image: {})".format(
                    self.api_version, consts.CORTEX_VERSION
                )
            )

        # This affects Tensorflow S3 access
        os.environ["AWS_REGION"] = self.cortex_config.get("region", "")

        # ID maps
        self.apis_id_map = ResourceMap(self.apis) if self.apis else None
        self.id_map = self.apis_id_map
Exemple #20
0
    def __init__(self, **kwargs):
        if "cache_dir" in kwargs:
            self.cache_dir = kwargs["cache_dir"]
        elif "local_path" in kwargs:
            local_path_dir = os.path.dirname(os.path.abspath(kwargs["local_path"]))
            self.cache_dir = os.path.join(local_path_dir, "cache")
        else:
            raise ValueError("cache_dir must be specified (or inferred from local_path)")
        util.mkdir_p(self.cache_dir)

        if "local_path" in kwargs:
            ctx_raw = util.read_msgpack(kwargs["local_path"])
            self.ctx = _deserialize_raw_ctx(ctx_raw)
        elif "obj" in kwargs:
            self.ctx = kwargs["obj"]
        elif "raw_obj" in kwargs:
            ctx_raw = kwargs["raw_obj"]
            self.ctx = _deserialize_raw_ctx(ctx_raw)
        elif "s3_path":
            local_ctx_path = os.path.join(self.cache_dir, "context.msgpack")
            bucket, key = S3.deconstruct_s3_path(kwargs["s3_path"])
            S3(bucket, client_config={}).download_file(key, local_ctx_path)
            ctx_raw = util.read_msgpack(local_ctx_path)
            self.ctx = _deserialize_raw_ctx(ctx_raw)
        else:
            raise ValueError("invalid context args: " + kwargs)

        self.workload_id = kwargs.get("workload_id")

        self.id = self.ctx["id"]
        self.key = self.ctx["key"]
        self.cortex_config = self.ctx["cortex_config"]
        self.dataset_version = self.ctx["dataset_version"]
        self.root = self.ctx["root"]
        self.raw_dataset = self.ctx["raw_dataset"]
        self.status_prefix = self.ctx["status_prefix"]
        self.app = self.ctx["app"]
        self.environment = self.ctx["environment"]
        self.python_packages = self.ctx["python_packages"] or {}
        self.raw_columns = self.ctx["raw_columns"] or {}
        self.transformed_columns = self.ctx["transformed_columns"] or {}
        self.transformers = self.ctx["transformers"] or {}
        self.aggregators = self.ctx["aggregators"] or {}
        self.aggregates = self.ctx["aggregates"] or {}
        self.constants = self.ctx["constants"] or {}
        self.models = self.ctx["models"] or {}
        self.estimators = self.ctx["estimators"] or {}
        self.apis = self.ctx["apis"] or {}
        self.training_datasets = {k: v["dataset"] for k, v in self.models.items()}
        self.api_version = self.cortex_config["api_version"]

        if "local_storage_path" in kwargs:
            self.storage = LocalStorage(base_dir=kwargs["local_storage_path"])
        else:
            self.storage = S3(
                bucket=self.cortex_config["bucket"],
                region=self.cortex_config["region"],
                client_config={},
            )

        if self.api_version != consts.CORTEX_VERSION:
            raise ValueError(
                "API version mismatch (Context: {}, Image: {})".format(
                    self.api_version, consts.CORTEX_VERSION
                )
            )

        self.columns = util.merge_dicts_overwrite(self.raw_columns, self.transformed_columns)

        self.raw_column_names = list(self.raw_columns.keys())
        self.transformed_column_names = list(self.transformed_columns.keys())
        self.column_names = list(self.columns.keys())

        # Internal caches
        self._transformer_impls = {}
        self._aggregator_impls = {}
        self._estimator_impls = {}
        self._metadatas = {}
        self._obj_cache = {}
        self.spark_uploaded_impls = {}

        # This affects Tensorflow S3 access
        os.environ["AWS_REGION"] = self.cortex_config.get("region", "")

        # Id map
        self.pp_id_map = ResourceMap(self.python_packages) if self.python_packages else None
        self.rf_id_map = ResourceMap(self.raw_columns) if self.raw_columns else None
        self.ag_id_map = ResourceMap(self.aggregates) if self.aggregates else None
        self.tf_id_map = ResourceMap(self.transformed_columns) if self.transformed_columns else None
        self.td_id_map = ResourceMap(self.training_datasets) if self.training_datasets else None
        self.models_id_map = ResourceMap(self.models) if self.models else None
        self.apis_id_map = ResourceMap(self.apis) if self.apis else None
        self.constants_id_map = ResourceMap(self.constants) if self.constants else None
        self.id_map = util.merge_dicts_overwrite(
            self.pp_id_map,
            self.rf_id_map,
            self.ag_id_map,
            self.tf_id_map,
            self.td_id_map,
            self.models_id_map,
            self.apis_id_map,
            self.constants_id_map,
        )
Exemple #21
0
def get_spec(cache_dir, s3_path):
    local_spec_path = os.path.join(cache_dir, "api_spec.msgpack")
    bucket, key = S3.deconstruct_s3_path(s3_path)
    S3(bucket, client_config={}).download_file(key, local_spec_path)
    return util.read_msgpack(local_spec_path)
Exemple #22
0
def get_spec(storage, cache_dir, s3_path):
    local_spec_path = os.path.join(cache_dir, "api_spec.msgpack")
    _, key = S3.deconstruct_s3_path(s3_path)
    storage.download_file(key, local_spec_path)
    return util.read_msgpack(local_spec_path)