예제 #1
0
파일: api.py 프로젝트: yaoshuyin/cortex
def get_spec(
    provider: str,
    spec_path: str,
    cache_dir: str,
    region: Optional[str] = None,
) -> Tuple[Union[LocalStorage, S3, GCS], dict]:
    """
    Args:
        provider: "local", "aws" or "gcp".
        spec_path: Path to API spec (i.e. "s3://cortex-dev-0/apis/iris-classifier/api/69b93378fa5c0218-jy1fjtyihu-9fcc10739e7fc8050cefa8ca27ece1ee/master-spec.json").
        cache_dir: Local directory where the API spec gets saved to.
        region: Region of the bucket. Only required for "S3" provider.
    """

    if provider == "local":
        storage = LocalStorage(cache_dir)
    elif provider == "aws":
        bucket, key = S3.deconstruct_s3_path(spec_path)
        storage = S3(bucket=bucket, region=region)
    elif provider == "gcp":
        bucket, key = GCS.deconstruct_gcs_path(spec_path)
        storage = GCS(bucket=bucket)
    else:
        raise ValueError('invalid "provider" argument')

    if provider == "local":
        return storage, read_json(spec_path)

    local_spec_path = os.path.join(cache_dir, "api_spec.json")
    if not os.path.isfile(local_spec_path):
        storage.download_file(key, local_spec_path)

    return storage, read_json(local_spec_path)
예제 #2
0
def main():
    with open("/src/cortex/serve/log_config.yaml", "r") as f:
        log_config = yaml.load(f, yaml.FullLoader)

    # get API spec
    cache_dir = os.environ["CORTEX_CACHE_DIR"]
    provider = os.environ["CORTEX_PROVIDER"]
    spec_path = os.environ["CORTEX_API_SPEC"]
    if provider == "local":
        storage = LocalStorage(os.getenv("CORTEX_CACHE_DIR"))
    else:
        storage = S3(bucket=os.environ["CORTEX_BUCKET"],
                     region=os.environ["AWS_REGION"])
    raw_api_spec = get_spec(provider, storage, cache_dir, spec_path)

    # load tensorflow models into TFS
    if raw_api_spec["predictor"]["type"] == "tensorflow":
        load_tensorflow_serving_models()

    # https://github.com/encode/uvicorn/blob/master/uvicorn/config.py
    uvicorn.run(
        "cortex.serve.wsgi:app",
        host="0.0.0.0",
        port=int(os.environ["CORTEX_SERVING_PORT"]),
        workers=int(os.environ["CORTEX_WORKERS_PER_REPLICA"]),
        limit_concurrency=int(os.environ["CORTEX_MAX_WORKER_CONCURRENCY"]),
        backlog=int(os.environ["CORTEX_SO_MAX_CONN"]),
        log_config=log_config,
        log_level="info",
    )
예제 #3
0
def main():
    # wait until neuron-rtd sidecar is ready
    uses_inferentia = os.getenv("CORTEX_ACTIVE_NEURON")
    if uses_inferentia:
        wait_neuron_rtd()

    # strictly for Inferentia
    has_multiple_servers = os.getenv("CORTEX_MULTIPLE_TF_SERVERS")
    if has_multiple_servers:
        base_serving_port = int(os.environ["CORTEX_TF_BASE_SERVING_PORT"])
        num_processes = int(os.environ["CORTEX_PROCESSES_PER_REPLICA"])
        used_ports = {}
        for w in range(int(num_processes)):
            used_ports[str(base_serving_port + w)] = False
        with open("/run/used_ports.json", "w+") as f:
            json.dump(used_ports, f)

    # get API spec
    cache_dir = os.environ["CORTEX_CACHE_DIR"]
    provider = os.environ["CORTEX_PROVIDER"]
    spec_path = os.environ["CORTEX_API_SPEC"]
    if provider == "local":
        storage = LocalStorage(os.getenv("CORTEX_CACHE_DIR"))
    else:
        storage = S3(bucket=os.environ["CORTEX_BUCKET"], region=os.environ["AWS_REGION"])
    raw_api_spec = get_spec(provider, storage, cache_dir, spec_path)

    # load tensorflow models into TFS
    if raw_api_spec["predictor"]["type"] == "tensorflow":
        load_tensorflow_serving_models()
예제 #4
0
def start():
    cache_dir = os.environ["CORTEX_CACHE_DIR"]
    provider = os.environ["CORTEX_PROVIDER"]
    spec_path = os.environ["CORTEX_API_SPEC"]
    project_dir = os.environ["CORTEX_PROJECT_DIR"]
    model_dir = os.getenv("CORTEX_MODEL_DIR", None)
    tf_serving_port = os.getenv("CORTEX_TF_SERVING_PORT", "9000")
    tf_serving_host = os.getenv("CORTEX_TF_SERVING_HOST", "localhost")

    if provider == "local":
        storage = LocalStorage(os.getenv("CORTEX_CACHE_DIR"))
    else:
        storage = S3(bucket=os.environ["CORTEX_BUCKET"],
                     region=os.environ["AWS_REGION"])

    try:
        raw_api_spec = get_spec(provider, storage, cache_dir, spec_path)
        api = API(provider=provider,
                  storage=storage,
                  cache_dir=cache_dir,
                  **raw_api_spec)
        client = api.predictor.initialize_client(
            model_dir,
            tf_serving_host=tf_serving_host,
            tf_serving_port=tf_serving_port)
        cx_logger().info("loading the predictor from {}".format(
            api.predictor.path))
        predictor_impl = api.predictor.initialize_impl(project_dir, client)

        local_cache["api"] = api
        local_cache["provider"] = provider
        local_cache["client"] = client
        local_cache["predictor_impl"] = predictor_impl
        local_cache["predict_fn_args"] = inspect.getfullargspec(
            predictor_impl.predict).args
        predict_route = "/"
        if provider != "local":
            predict_route = "/predict"
        local_cache["predict_route"] = predict_route
    except:
        cx_logger().exception("failed to start api")
        sys.exit(1)

    if (provider != "local" and api.monitoring is not None
            and api.monitoring.model_type == "classification"):
        try:
            local_cache["class_set"] = api.get_cached_classes()
        except:
            cx_logger().warn(
                "an error occurred while attempting to load classes",
                exc_info=True)

    app.add_api_route(local_cache["predict_route"], predict, methods=["POST"])
    app.add_api_route(local_cache["predict_route"],
                      get_summary,
                      methods=["GET"])

    return app
예제 #5
0
def main():
    with open("/src/cortex/serve/log_config.yaml", "r") as f:
        log_config = yaml.load(f, yaml.FullLoader)

    # wait until neuron-rtd sidecar is ready
    uses_inferentia = os.getenv("CORTEX_ACTIVE_NEURON")
    if uses_inferentia:
        wait_neuron_rtd()

    # strictly for Inferentia
    has_multiple_servers = os.getenv("CORTEX_MULTIPLE_TF_SERVERS")
    if has_multiple_servers:
        base_serving_port = int(os.environ["CORTEX_TF_BASE_SERVING_PORT"])
        num_processes = int(os.environ["CORTEX_PROCESSES_PER_REPLICA"])
        used_ports = {}
        for w in range(int(num_processes)):
            used_ports[str(base_serving_port + w)] = False
        with open("/run/used_ports.json", "w+") as f:
            json.dump(used_ports, f)

    # get API spec
    cache_dir = os.environ["CORTEX_CACHE_DIR"]
    provider = os.environ["CORTEX_PROVIDER"]
    spec_path = os.environ["CORTEX_API_SPEC"]
    if provider == "local":
        storage = LocalStorage(os.getenv("CORTEX_CACHE_DIR"))
    else:
        storage = S3(bucket=os.environ["CORTEX_BUCKET"],
                     region=os.environ["AWS_REGION"])
    raw_api_spec = get_spec(provider, storage, cache_dir, spec_path)

    # load tensorflow models into TFS
    if raw_api_spec["predictor"]["type"] == "tensorflow":
        load_tensorflow_serving_models()

    if raw_api_spec["kind"] == "RealtimeAPI":
        # https://github.com/encode/uvicorn/blob/master/uvicorn/config.py
        uvicorn.run(
            "cortex.serve.wsgi:app",
            host="0.0.0.0",
            port=int(os.environ["CORTEX_SERVING_PORT"]),
            workers=int(os.environ["CORTEX_PROCESSES_PER_REPLICA"]),
            limit_concurrency=int(os.environ["CORTEX_MAX_PROCESS_CONCURRENCY"]
                                  ),  # this is a per process limit
            backlog=int(os.environ["CORTEX_SO_MAX_CONN"]),
            log_config=log_config,
            log_level="info",
        )
    else:
        from cortex.serve import batch

        batch.start()
예제 #6
0
def get_spec(
    provider: str,
    spec_path: str,
    cache_dir: Optional[str],
    bucket: Optional[str],
    region: Optional[str],
) -> Tuple[Union[LocalStorage, S3], dict]:
    if provider == "local":
        storage = LocalStorage(cache_dir)
    else:
        storage = S3(bucket=bucket, region=region)

    if provider == "local":
        return storage, read_json(spec_path)

    local_spec_path = os.path.join(cache_dir, "api_spec.json")

    if not os.path.isfile(local_spec_path):
        _, key = S3.deconstruct_s3_path(spec_path)
        storage.download_file(key, local_spec_path)

    return storage, read_json(local_spec_path)
예제 #7
0
def start_fn():
    cache_dir = os.environ["CORTEX_CACHE_DIR"]
    provider = os.environ["CORTEX_PROVIDER"]
    spec_path = os.environ["CORTEX_API_SPEC"]
    project_dir = os.environ["CORTEX_PROJECT_DIR"]

    model_dir = os.getenv("CORTEX_MODEL_DIR")
    tf_serving_port = os.getenv("CORTEX_TF_BASE_SERVING_PORT", "9000")
    tf_serving_host = os.getenv("CORTEX_TF_SERVING_HOST", "localhost")

    if provider == "local":
        storage = LocalStorage(os.getenv("CORTEX_CACHE_DIR"))
    else:
        storage = S3(bucket=os.environ["CORTEX_BUCKET"],
                     region=os.environ["AWS_REGION"])

    has_multiple_servers = os.getenv("CORTEX_MULTIPLE_TF_SERVERS")
    if has_multiple_servers:
        with FileLock("/run/used_ports.json.lock"):
            with open("/run/used_ports.json", "r+") as f:
                used_ports = json.load(f)
                for port in used_ports.keys():
                    if not used_ports[port]:
                        tf_serving_port = port
                        used_ports[port] = True
                        break
                f.seek(0)
                json.dump(used_ports, f)
                f.truncate()

    try:
        raw_api_spec = get_spec(provider, storage, cache_dir, spec_path)
        api = API(
            provider=provider,
            storage=storage,
            model_dir=model_dir,
            cache_dir=cache_dir,
            **raw_api_spec,
        )
        client = api.predictor.initialize_client(
            tf_serving_host=tf_serving_host, tf_serving_port=tf_serving_port)
        cx_logger().info("loading the predictor from {}".format(
            api.predictor.path))
        predictor_impl = api.predictor.initialize_impl(project_dir, client)

        local_cache["api"] = api
        local_cache["provider"] = provider
        local_cache["client"] = client
        local_cache["predictor_impl"] = predictor_impl
        local_cache["predict_fn_args"] = inspect.getfullargspec(
            predictor_impl.predict).args
        predict_route = "/"
        if provider != "local":
            predict_route = "/predict"
        local_cache["predict_route"] = predict_route
    except:
        cx_logger().exception("failed to start api")
        sys.exit(1)

    if (provider != "local" and api.monitoring is not None
            and api.monitoring.model_type == "classification"):
        try:
            local_cache["class_set"] = api.get_cached_classes()
        except:
            cx_logger().warn(
                "an error occurred while attempting to load classes",
                exc_info=True)

    app.add_api_route(local_cache["predict_route"], predict, methods=["POST"])
    app.add_api_route(local_cache["predict_route"],
                      get_summary,
                      methods=["GET"])

    return app
예제 #8
0
파일: context.py 프로젝트: gulahmed/cortex
    def __init__(self, **kwargs):
        if "cache_dir" in kwargs:
            self.cache_dir = kwargs["cache_dir"]
        elif "local_path" in kwargs:
            local_path_dir = os.path.dirname(os.path.abspath(kwargs["local_path"]))
            self.cache_dir = os.path.join(local_path_dir, "cache")
        else:
            raise ValueError("cache_dir must be specified (or inferred from local_path)")
        util.mkdir_p(self.cache_dir)

        if "local_path" in kwargs:
            self.ctx = util.read_msgpack(kwargs["local_path"])
        elif "obj" in kwargs:
            self.ctx = kwargs["obj"]
        elif "raw_obj" in kwargs:
            self.ctx = kwargs["raw_obj"]
        elif "s3_path":
            local_ctx_path = os.path.join(self.cache_dir, "context.msgpack")
            bucket, key = S3.deconstruct_s3_path(kwargs["s3_path"])
            S3(bucket, client_config={}).download_file(key, local_ctx_path)
            self.ctx = util.read_msgpack(local_ctx_path)
        else:
            raise ValueError("invalid context args: " + kwargs)

        self.workload_id = kwargs.get("workload_id")

        self.id = self.ctx["id"]
        self.key = self.ctx["key"]
        self.metadata_root = self.ctx["metadata_root"]
        self.cortex_config = self.ctx["cortex_config"]
        self.deployment_version = self.ctx["deployment_version"]
        self.root = self.ctx["root"]
        self.status_prefix = self.ctx["status_prefix"]
        self.app = self.ctx["app"]
        self.apis = self.ctx["apis"] or {}
        self.api_version = self.cortex_config["api_version"]
        self.monitoring = None
        self.project_id = self.ctx["project_id"]
        self.project_key = self.ctx["project_key"]

        if "local_storage_path" in kwargs:
            self.storage = LocalStorage(base_dir=kwargs["local_storage_path"])
        else:
            self.storage = S3(
                bucket=self.cortex_config["bucket"],
                region=self.cortex_config["region"],
                client_config={},
            )

        host_ip = os.environ["HOST_IP"]
        datadog.initialize(statsd_host=host_ip, statsd_port="8125")
        self.statsd = datadog.statsd

        if self.api_version != consts.CORTEX_VERSION:
            raise ValueError(
                "API version mismatch (Context: {}, Image: {})".format(
                    self.api_version, consts.CORTEX_VERSION
                )
            )

        # This affects Tensorflow S3 access
        os.environ["AWS_REGION"] = self.cortex_config.get("region", "")

        # ID maps
        self.apis_id_map = ResourceMap(self.apis) if self.apis else None
        self.id_map = self.apis_id_map
예제 #9
0
파일: context.py 프로젝트: gulahmed/cortex
class Context:
    def __init__(self, **kwargs):
        if "cache_dir" in kwargs:
            self.cache_dir = kwargs["cache_dir"]
        elif "local_path" in kwargs:
            local_path_dir = os.path.dirname(os.path.abspath(kwargs["local_path"]))
            self.cache_dir = os.path.join(local_path_dir, "cache")
        else:
            raise ValueError("cache_dir must be specified (or inferred from local_path)")
        util.mkdir_p(self.cache_dir)

        if "local_path" in kwargs:
            self.ctx = util.read_msgpack(kwargs["local_path"])
        elif "obj" in kwargs:
            self.ctx = kwargs["obj"]
        elif "raw_obj" in kwargs:
            self.ctx = kwargs["raw_obj"]
        elif "s3_path":
            local_ctx_path = os.path.join(self.cache_dir, "context.msgpack")
            bucket, key = S3.deconstruct_s3_path(kwargs["s3_path"])
            S3(bucket, client_config={}).download_file(key, local_ctx_path)
            self.ctx = util.read_msgpack(local_ctx_path)
        else:
            raise ValueError("invalid context args: " + kwargs)

        self.workload_id = kwargs.get("workload_id")

        self.id = self.ctx["id"]
        self.key = self.ctx["key"]
        self.metadata_root = self.ctx["metadata_root"]
        self.cortex_config = self.ctx["cortex_config"]
        self.deployment_version = self.ctx["deployment_version"]
        self.root = self.ctx["root"]
        self.status_prefix = self.ctx["status_prefix"]
        self.app = self.ctx["app"]
        self.apis = self.ctx["apis"] or {}
        self.api_version = self.cortex_config["api_version"]
        self.monitoring = None
        self.project_id = self.ctx["project_id"]
        self.project_key = self.ctx["project_key"]

        if "local_storage_path" in kwargs:
            self.storage = LocalStorage(base_dir=kwargs["local_storage_path"])
        else:
            self.storage = S3(
                bucket=self.cortex_config["bucket"],
                region=self.cortex_config["region"],
                client_config={},
            )

        host_ip = os.environ["HOST_IP"]
        datadog.initialize(statsd_host=host_ip, statsd_port="8125")
        self.statsd = datadog.statsd

        if self.api_version != consts.CORTEX_VERSION:
            raise ValueError(
                "API version mismatch (Context: {}, Image: {})".format(
                    self.api_version, consts.CORTEX_VERSION
                )
            )

        # This affects Tensorflow S3 access
        os.environ["AWS_REGION"] = self.cortex_config.get("region", "")

        # ID maps
        self.apis_id_map = ResourceMap(self.apis) if self.apis else None
        self.id_map = self.apis_id_map

    def download_file(self, impl_key, cache_impl_path):
        if not os.path.isfile(cache_impl_path):
            self.storage.download_file(impl_key, cache_impl_path)
        return cache_impl_path

    def download_python_file(self, impl_key, module_name):
        cache_impl_path = os.path.join(self.cache_dir, "{}.py".format(module_name))
        self.download_file(impl_key, cache_impl_path)
        return cache_impl_path

    def load_module(self, module_prefix, module_name, impl_path):
        full_module_name = "{}_{}".format(module_prefix, module_name)
        try:
            impl = imp.load_source(full_module_name, impl_path)
        except Exception as e:
            raise UserException("unable to load python file", str(e)) from e

        return impl

    def get_request_handler_impl(self, api_name, project_dir):
        api = self.apis[api_name]
        try:
            impl = self.load_module(
                "request_handler", api["name"], os.path.join(project_dir, api["request_handler"])
            )
        except CortexException as e:
            e.wrap("api " + api_name, "request_handler " + api["request_handler"])
            raise

        try:
            _validate_impl(impl, REQUEST_HANDLER_IMPL_VALIDATION)
        except CortexException as e:
            e.wrap("api " + api_name, "request_handler " + api["request_handler"])
            raise
        return impl

    def get_resource_status(self, resource):
        key = self.resource_status_key(resource)
        return self.storage.get_json(key, num_retries=5)

    def upload_resource_status_start(self, *resources):
        timestamp = util.now_timestamp_rfc_3339()
        for resource in resources:
            key = self.resource_status_key(resource)
            status = {
                "resource_id": resource["id"],
                "resource_type": resource["resource_type"],
                "workload_id": resource["workload_id"],
                "app_name": self.app["name"],
                "start": timestamp,
            }
            self.storage.put_json(status, key)

    def upload_resource_status_no_op(self, *resources):
        timestamp = util.now_timestamp_rfc_3339()
        for resource in resources:
            key = self.resource_status_key(resource)
            status = {
                "resource_id": resource["id"],
                "resource_type": resource["resource_type"],
                "workload_id": resource["workload_id"],
                "app_name": self.app["name"],
                "start": timestamp,
                "end": timestamp,
                "exit_code": "succeeded",
            }
            self.storage.put_json(status, key)

    def upload_resource_status_success(self, *resources):
        self.upload_resource_status_end("succeeded", *resources)

    def upload_resource_status_failed(self, *resources):
        self.upload_resource_status_end("failed", *resources)

    def upload_resource_status_end(self, exit_code, *resources):
        timestamp = util.now_timestamp_rfc_3339()
        for resource in resources:
            status = self.get_resource_status(resource)
            if status.get("end") != None:
                continue
            status["end"] = timestamp
            status["exit_code"] = exit_code
            key = self.resource_status_key(resource)
            self.storage.put_json(status, key)

    def resource_status_key(self, resource):
        return os.path.join(self.status_prefix, resource["id"], resource["workload_id"])

    def publish_metrics(self, metrics):
        if self.statsd is None:
            raise CortexException("statsd client not initialized")  # unexpected

        for metric in metrics:
            tags = ["{}:{}".format(dim["Name"], dim["Value"]) for dim in metric["Dimensions"]]
            if metric.get("Unit") == "Count":
                self.statsd.increment(metric["MetricName"], value=metric["Value"], tags=tags)
            else:
                self.statsd.histogram(metric["MetricName"], value=metric["Value"], tags=tags)
예제 #10
0
    def __init__(self, **kwargs):
        if "cache_dir" in kwargs:
            self.cache_dir = kwargs["cache_dir"]
        elif "local_path" in kwargs:
            local_path_dir = os.path.dirname(os.path.abspath(kwargs["local_path"]))
            self.cache_dir = os.path.join(local_path_dir, "cache")
        else:
            raise ValueError("cache_dir must be specified (or inferred from local_path)")
        util.mkdir_p(self.cache_dir)

        if "local_path" in kwargs:
            ctx_raw = util.read_msgpack(kwargs["local_path"])
            self.ctx = _deserialize_raw_ctx(ctx_raw)
        elif "obj" in kwargs:
            self.ctx = kwargs["obj"]
        elif "raw_obj" in kwargs:
            ctx_raw = kwargs["raw_obj"]
            self.ctx = _deserialize_raw_ctx(ctx_raw)
        elif "s3_path":
            local_ctx_path = os.path.join(self.cache_dir, "context.msgpack")
            bucket, key = S3.deconstruct_s3_path(kwargs["s3_path"])
            S3(bucket, client_config={}).download_file(key, local_ctx_path)
            ctx_raw = util.read_msgpack(local_ctx_path)
            self.ctx = _deserialize_raw_ctx(ctx_raw)
        else:
            raise ValueError("invalid context args: " + kwargs)

        self.workload_id = kwargs.get("workload_id")

        self.id = self.ctx["id"]
        self.key = self.ctx["key"]
        self.cortex_config = self.ctx["cortex_config"]
        self.dataset_version = self.ctx["dataset_version"]
        self.root = self.ctx["root"]
        self.raw_dataset = self.ctx["raw_dataset"]
        self.status_prefix = self.ctx["status_prefix"]
        self.app = self.ctx["app"]
        self.environment = self.ctx["environment"]
        self.python_packages = self.ctx["python_packages"] or {}
        self.raw_columns = self.ctx["raw_columns"] or {}
        self.transformed_columns = self.ctx["transformed_columns"] or {}
        self.transformers = self.ctx["transformers"] or {}
        self.aggregators = self.ctx["aggregators"] or {}
        self.aggregates = self.ctx["aggregates"] or {}
        self.constants = self.ctx["constants"] or {}
        self.models = self.ctx["models"] or {}
        self.estimators = self.ctx["estimators"] or {}
        self.apis = self.ctx["apis"] or {}
        self.training_datasets = {k: v["dataset"] for k, v in self.models.items()}
        self.api_version = self.cortex_config["api_version"]

        if "local_storage_path" in kwargs:
            self.storage = LocalStorage(base_dir=kwargs["local_storage_path"])
        else:
            self.storage = S3(
                bucket=self.cortex_config["bucket"],
                region=self.cortex_config["region"],
                client_config={},
            )

        if self.api_version != consts.CORTEX_VERSION:
            raise ValueError(
                "API version mismatch (Context: {}, Image: {})".format(
                    self.api_version, consts.CORTEX_VERSION
                )
            )

        self.columns = util.merge_dicts_overwrite(self.raw_columns, self.transformed_columns)

        self.raw_column_names = list(self.raw_columns.keys())
        self.transformed_column_names = list(self.transformed_columns.keys())
        self.column_names = list(self.columns.keys())

        # Internal caches
        self._transformer_impls = {}
        self._aggregator_impls = {}
        self._estimator_impls = {}
        self._metadatas = {}
        self._obj_cache = {}
        self.spark_uploaded_impls = {}

        # This affects Tensorflow S3 access
        os.environ["AWS_REGION"] = self.cortex_config.get("region", "")

        # Id map
        self.pp_id_map = ResourceMap(self.python_packages) if self.python_packages else None
        self.rf_id_map = ResourceMap(self.raw_columns) if self.raw_columns else None
        self.ag_id_map = ResourceMap(self.aggregates) if self.aggregates else None
        self.tf_id_map = ResourceMap(self.transformed_columns) if self.transformed_columns else None
        self.td_id_map = ResourceMap(self.training_datasets) if self.training_datasets else None
        self.models_id_map = ResourceMap(self.models) if self.models else None
        self.apis_id_map = ResourceMap(self.apis) if self.apis else None
        self.constants_id_map = ResourceMap(self.constants) if self.constants else None
        self.id_map = util.merge_dicts_overwrite(
            self.pp_id_map,
            self.rf_id_map,
            self.ag_id_map,
            self.tf_id_map,
            self.td_id_map,
            self.models_id_map,
            self.apis_id_map,
            self.constants_id_map,
        )
예제 #11
0
class Context:
    def __init__(self, **kwargs):
        if "cache_dir" in kwargs:
            self.cache_dir = kwargs["cache_dir"]
        elif "local_path" in kwargs:
            local_path_dir = os.path.dirname(os.path.abspath(kwargs["local_path"]))
            self.cache_dir = os.path.join(local_path_dir, "cache")
        else:
            raise ValueError("cache_dir must be specified (or inferred from local_path)")
        util.mkdir_p(self.cache_dir)

        if "local_path" in kwargs:
            ctx_raw = util.read_msgpack(kwargs["local_path"])
            self.ctx = _deserialize_raw_ctx(ctx_raw)
        elif "obj" in kwargs:
            self.ctx = kwargs["obj"]
        elif "raw_obj" in kwargs:
            ctx_raw = kwargs["raw_obj"]
            self.ctx = _deserialize_raw_ctx(ctx_raw)
        elif "s3_path":
            local_ctx_path = os.path.join(self.cache_dir, "context.msgpack")
            bucket, key = S3.deconstruct_s3_path(kwargs["s3_path"])
            S3(bucket, client_config={}).download_file(key, local_ctx_path)
            ctx_raw = util.read_msgpack(local_ctx_path)
            self.ctx = _deserialize_raw_ctx(ctx_raw)
        else:
            raise ValueError("invalid context args: " + kwargs)

        self.workload_id = kwargs.get("workload_id")

        self.id = self.ctx["id"]
        self.key = self.ctx["key"]
        self.cortex_config = self.ctx["cortex_config"]
        self.dataset_version = self.ctx["dataset_version"]
        self.root = self.ctx["root"]
        self.raw_dataset = self.ctx["raw_dataset"]
        self.status_prefix = self.ctx["status_prefix"]
        self.app = self.ctx["app"]
        self.environment = self.ctx["environment"]
        self.python_packages = self.ctx["python_packages"] or {}
        self.raw_columns = self.ctx["raw_columns"] or {}
        self.transformed_columns = self.ctx["transformed_columns"] or {}
        self.transformers = self.ctx["transformers"] or {}
        self.aggregators = self.ctx["aggregators"] or {}
        self.aggregates = self.ctx["aggregates"] or {}
        self.constants = self.ctx["constants"] or {}
        self.models = self.ctx["models"] or {}
        self.estimators = self.ctx["estimators"] or {}
        self.apis = self.ctx["apis"] or {}
        self.training_datasets = {k: v["dataset"] for k, v in self.models.items()}
        self.api_version = self.cortex_config["api_version"]

        if "local_storage_path" in kwargs:
            self.storage = LocalStorage(base_dir=kwargs["local_storage_path"])
        else:
            self.storage = S3(
                bucket=self.cortex_config["bucket"],
                region=self.cortex_config["region"],
                client_config={},
            )

        if self.api_version != consts.CORTEX_VERSION:
            raise ValueError(
                "API version mismatch (Context: {}, Image: {})".format(
                    self.api_version, consts.CORTEX_VERSION
                )
            )

        self.columns = util.merge_dicts_overwrite(self.raw_columns, self.transformed_columns)

        self.raw_column_names = list(self.raw_columns.keys())
        self.transformed_column_names = list(self.transformed_columns.keys())
        self.column_names = list(self.columns.keys())

        # Internal caches
        self._transformer_impls = {}
        self._aggregator_impls = {}
        self._estimator_impls = {}
        self._metadatas = {}
        self._obj_cache = {}
        self.spark_uploaded_impls = {}

        # This affects Tensorflow S3 access
        os.environ["AWS_REGION"] = self.cortex_config.get("region", "")

        # Id map
        self.pp_id_map = ResourceMap(self.python_packages) if self.python_packages else None
        self.rf_id_map = ResourceMap(self.raw_columns) if self.raw_columns else None
        self.ag_id_map = ResourceMap(self.aggregates) if self.aggregates else None
        self.tf_id_map = ResourceMap(self.transformed_columns) if self.transformed_columns else None
        self.td_id_map = ResourceMap(self.training_datasets) if self.training_datasets else None
        self.models_id_map = ResourceMap(self.models) if self.models else None
        self.apis_id_map = ResourceMap(self.apis) if self.apis else None
        self.constants_id_map = ResourceMap(self.constants) if self.constants else None
        self.id_map = util.merge_dicts_overwrite(
            self.pp_id_map,
            self.rf_id_map,
            self.ag_id_map,
            self.tf_id_map,
            self.td_id_map,
            self.models_id_map,
            self.apis_id_map,
            self.constants_id_map,
        )

    def is_raw_column(self, name):
        return name in self.raw_columns

    def is_transformed_column(self, name):
        return name in self.transformed_columns

    def is_constant(self, name):
        return name in self.constants

    def is_aggregate(self, name):
        return name in self.aggregates

    def download_file(self, impl_key, cache_impl_path):
        if not os.path.isfile(cache_impl_path):
            self.storage.download_file(impl_key, cache_impl_path)
        return cache_impl_path

    def download_python_file(self, impl_key, module_name):
        cache_impl_path = os.path.join(self.cache_dir, "{}.py".format(module_name))
        self.download_file(impl_key, cache_impl_path)
        return cache_impl_path

    def get_obj(self, key):
        if key in self._obj_cache:
            return self._obj_cache[key]

        cache_path = os.path.join(self.cache_dir, key)
        self.download_file(key, cache_path)
        self._obj_cache[key] = util.read_msgpack(cache_path)
        return self._obj_cache[key]

    def load_module(self, module_prefix, module_name, impl_key):
        full_module_name = "{}_{}".format(module_prefix, module_name)

        try:
            impl_path = self.download_python_file(impl_key, full_module_name)
        except CortexException as e:
            e.wrap("unable to find python file " + module_name)
            raise

        try:
            impl = imp.load_source(full_module_name, impl_path)
        except Exception as e:
            raise UserException("unable to load python module " + module_name) from e

        return impl, impl_path

    def get_aggregator_impl(self, aggregate_name):
        aggregator_name = self.aggregates[aggregate_name]["aggregator"]
        if aggregator_name in self._aggregator_impls:
            return self._aggregator_impls[aggregator_name]

        aggregator = self.aggregators[aggregator_name]

        module_prefix = "aggregator"
        if "namespace" in aggregator and aggregator.get("namespace", None) is not None:
            module_prefix += "_" + aggregator["namespace"]

        try:
            impl, impl_path = self.load_module(
                module_prefix, aggregator["name"], aggregator["impl_key"]
            )
        except CortexException as e:
            e.wrap("aggregate " + aggregate_name, "aggregator")
            raise

        try:
            _validate_impl(impl, AGGREGATOR_IMPL_VALIDATION)
        except CortexException as e:
            e.wrap("aggregate " + aggregate_name, "aggregator " + aggregator["name"])
            raise

        self._aggregator_impls[aggregator_name] = (impl, impl_path)
        return (impl, impl_path)

    def get_transformer_impl(self, column_name):
        if self.is_transformed_column(column_name) is not True:
            return None, None

        transformer_name = self.transformed_columns[column_name]["transformer"]
        if transformer_name in self._transformer_impls:
            return self._transformer_impls[transformer_name]

        transformer = self.transformers[transformer_name]

        module_prefix = "transformer"
        if "namespace" in transformer and transformer.get("namespace", None) is not None:
            module_prefix += "_" + transformer["namespace"]

        try:
            impl, impl_path = self.load_module(
                module_prefix, transformer["name"], transformer["impl_key"]
            )
        except CortexException as e:
            e.wrap("transformed column " + column_name, "transformer")
            raise

        try:
            _validate_impl(impl, TRANSFORMER_IMPL_VALIDATION)
        except CortexException as e:
            e.wrap("transformed column " + column_name, "transformer " + transformer["name"])
            raise

        self._transformer_impls[transformer_name] = (impl, impl_path)
        return (impl, impl_path)

    def get_estimator_impl(self, model_name):
        estimator_name = self.models[model_name]["estimator"]
        if estimator_name in self._estimator_impls:
            return self._estimator_impls[estimator_name]

        estimator = self.estimators[estimator_name]

        module_prefix = "estimator"
        if "namespace" in estimator and estimator.get("namespace", None) is not None:
            module_prefix += "_" + estimator["namespace"]

        try:
            impl, impl_path = self.load_module(
                module_prefix, estimator["name"], estimator["impl_key"]
            )
        except CortexException as e:
            e.wrap("model " + model_name, "estimator")
            raise

        try:
            _validate_impl(impl, MODEL_IMPL_VALIDATION)
        except CortexException as e:
            e.wrap("model " + model_name, "estimator " + estimator["name"])
            raise

        self._estimator_impls[estimator_name] = (impl, impl_path)
        return (impl, impl_path)

    def get_request_handler_impl(self, api_name):
        api = self.apis[api_name]

        module_prefix = "request_handler"

        try:
            impl, impl_path = self.load_module(
                module_prefix, api["name"], api["request_handler_impl_key"]
            )
        except CortexException as e:
            e.wrap("api " + api_name, "request_handler")
            raise

        try:
            _validate_impl(impl, REQUEST_HANDLER_IMPL_VALIDATION)
        except CortexException as e:
            e.wrap("api " + api_name, "request_handler " + api["request_handler"])
            raise
        return impl

    # Mode must be "training" or "evaluation"
    def get_training_data_parts(self, model_name, mode, part_prefix="part"):
        training_dataset = self.models[model_name]["dataset"]
        if mode == "training":
            data_key = training_dataset["train_key"]
        elif mode == "evaluation":
            data_key = training_dataset["eval_key"]
        else:
            raise CortexException(
                "unrecognized training/evaluation mode {} must be one of (train_key, eval_key)".format(
                    mode
                )
            )

        training_data_parts_prefix = os.path.join(data_key, part_prefix)
        return self.storage.search(prefix=training_data_parts_prefix)

    def store_aggregate_result(self, result, aggregate):
        self.storage.put_msgpack(result, aggregate["key"])

    def extract_column_names(self, input):
        column_names = set()
        for resource_name in util.extract_resource_refs(input):
            if resource_name in self.columns:
                column_names.add(resource_name)
        return column_names

    def model_config(self, model_name):
        model = self.models[model_name]
        if model is None:
            return None
        estimator = self.estimators[model["estimator"]]
        target_column = self.columns[util.get_resource_ref(model["target_column"])]

        if estimator.get("target_column") is not None:
            target_col_type = self.get_inferred_column_type(target_column["name"])
            if target_col_type not in estimator["target_column"]:
                raise UserException(
                    "model " + model_name,
                    "target_column",
                    target_column["name"],
                    "unsupported type (expected type {}, got type {})".format(
                        util.data_type_str(estimator["target_column"]),
                        util.data_type_str(target_col_type),
                    ),
                )

        model_config = deepcopy(model)
        config_keys = [
            "name",
            "estimator"
            "estimator_path"
            "target_column"
            "input"
            "training_input"
            "hparams"
            "prediction_key"
            "data_partition_ratio"
            "training"
            "evaluation"
            "tags",
        ]
        util.keep_dict_keys(model_config, config_keys)

        model_config["target_column"] = target_column["name"]
        model_config["input"] = self.populate_values(
            model["input"], estimator["input"], preserve_column_refs=False
        )
        if model.get("training_input") is not None:
            model_config["training_input"] = self.populate_values(
                model["training_input"], estimator["training_input"], preserve_column_refs=False
            )
        if model.get("hparams") is not None:
            model_config["hparams"] = self.populate_values(
                model["hparams"], estimator["hparams"], preserve_column_refs=False
            )

        return model_config

    def get_resource_status(self, resource):
        key = self.resource_status_key(resource)
        return self.storage.get_json(key)

    def upload_resource_status_start(self, *resources):
        timestamp = util.now_timestamp_rfc_3339()
        for resource in resources:
            key = self.resource_status_key(resource)
            status = {
                "resource_id": resource["id"],
                "resource_type": resource["resource_type"],
                "workload_id": resource["workload_id"],
                "app_name": self.app["name"],
                "start": timestamp,
            }
            self.storage.put_json(status, key)

    def upload_resource_status_no_op(self, *resources):
        timestamp = util.now_timestamp_rfc_3339()
        for resource in resources:
            key = self.resource_status_key(resource)
            status = {
                "resource_id": resource["id"],
                "resource_type": resource["resource_type"],
                "workload_id": resource["workload_id"],
                "app_name": self.app["name"],
                "start": timestamp,
                "end": timestamp,
                "exit_code": "succeeded",
            }
            self.storage.put_json(status, key)

    def upload_resource_status_success(self, *resources):
        self.upload_resource_status_end("succeeded", *resources)

    def upload_resource_status_failed(self, *resources):
        self.upload_resource_status_end("failed", *resources)

    def upload_resource_status_end(self, exit_code, *resources):
        timestamp = util.now_timestamp_rfc_3339()
        for resource in resources:
            status = self.get_resource_status(resource)
            if status.get("end") != None:
                continue
            status["end"] = timestamp
            status["exit_code"] = exit_code
            key = self.resource_status_key(resource)
            self.storage.put_json(status, key)

    def resource_status_key(self, resource):
        return os.path.join(self.status_prefix, resource["id"], resource["workload_id"])

    def get_metadata_url(self, resource_id):
        return os.path.join(self.ctx["metadata_root"], resource_id + ".json")

    def write_metadata(self, resource_id, metadata):
        if resource_id in self._metadatas and self._metadatas[resource_id] == metadata:
            return

        self._metadatas[resource_id] = metadata
        self.storage.put_json(metadata, self.get_metadata_url(resource_id))

    def get_metadata(self, resource_id, use_cache=True):
        if use_cache and resource_id in self._metadatas:
            return self._metadatas[resource_id]

        metadata = self.storage.get_json(self.get_metadata_url(resource_id), allow_missing=True)
        self._metadatas[resource_id] = metadata
        return metadata

    def get_inferred_column_type(self, column_name):
        column = self.columns[column_name]
        column_type = self.columns[column_name]["type"]
        if column_type == consts.COLUMN_TYPE_INFERRED:
            column_type = self.get_metadata(column["id"])["type"]
            self.columns[column_name]["type"] = column_type

        return column_type

    # Replace aggregates and constants with their values, and columns with their names (unless preserve_column_refs == False)
    # Also validate against input_schema (if not None)
    def populate_values(self, input, input_schema, preserve_column_refs):
        if input is None:
            if input_schema is None:
                return None
            if input_schema.get("_allow_null") == True:
                return None
            raise UserException("Null value is not allowed")

        if util.is_resource_ref(input):
            res_name = util.get_resource_ref(input)
            if res_name in self.constants:
                if self.constants[res_name].get("value") is not None:
                    const_val = self.constants[res_name]["value"]
                elif self.constants[res_name].get("path") is not None:
                    const_val = self.storage.get_json_external(self.constants[res_name]["path"])
                try:
                    return self.populate_values(const_val, input_schema, preserve_column_refs)
                except CortexException as e:
                    e.wrap("constant " + res_name)
                    raise

            if res_name in self.aggregates:
                agg_val = self.get_obj(self.aggregates[res_name]["key"])
                try:
                    return self.populate_values(agg_val, input_schema, preserve_column_refs)
                except CortexException as e:
                    e.wrap("aggregate " + res_name)
                    raise

            if res_name in self.columns:
                if input_schema is not None:
                    col_type = self.get_inferred_column_type(res_name)
                    if col_type not in input_schema["_type"]:
                        raise UserException(
                            "column {}: unsupported input type (expected type {}, got type {})".format(
                                res_name,
                                util.data_type_str(input_schema["_type"]),
                                util.data_type_str(col_type),
                            )
                        )
                if preserve_column_refs:
                    return input
                else:
                    return res_name

        if util.is_list(input):
            elem_schema = None
            if input_schema is not None:
                if not util.is_list(input_schema["_type"]):
                    raise UserException(
                        "unsupported input type (expected type {}, got {})".format(
                            util.data_type_str(input_schema["_type"]), util.user_obj_str(input)
                        )
                    )
                elem_schema = input_schema["_type"][0]

                min_count = input_schema.get("_min_count")
                if min_count is not None and len(input) < min_count:
                    raise UserException(
                        "list has length {}, but the minimum allowed length is {}".format(
                            len(input), min_count
                        )
                    )

                max_count = input_schema.get("_max_count")
                if max_count is not None and len(input) > max_count:
                    raise UserException(
                        "list has length {}, but the maximum allowed length is {}".format(
                            len(input), max_count
                        )
                    )

            casted = []
            for i, elem in enumerate(input):
                try:
                    casted.append(self.populate_values(elem, elem_schema, preserve_column_refs))
                except CortexException as e:
                    e.wrap("index " + i)
                    raise
            return casted

        if util.is_dict(input):
            if input_schema is None:
                casted = {}
                for key, val in input.items():
                    key_casted = self.populate_values(key, None, preserve_column_refs)
                    try:
                        val_casted = self.populate_values(val, None, preserve_column_refs)
                    except CortexException as e:
                        e.wrap(util.user_obj_str(key))
                        raise
                    casted[key_casted] = val_casted
                return casted

            if not util.is_dict(input_schema["_type"]):
                raise UserException(
                    "unsupported input type (expected type {}, got {})".format(
                        util.data_type_str(input_schema["_type"]), util.user_obj_str(input)
                    )
                )

            min_count = input_schema.get("_min_count")
            if min_count is not None and len(input) < min_count:
                raise UserException(
                    "map has length {}, but the minimum allowed length is {}".format(
                        len(input), min_count
                    )
                )

            max_count = input_schema.get("_max_count")
            if max_count is not None and len(input) > max_count:
                raise UserException(
                    "map has length {}, but the maximum allowed length is {}".format(
                        len(input), max_count
                    )
                )

            is_generic_map = False
            if len(input_schema["_type"]) == 1:
                input_type_key = next(iter(input_schema["_type"].keys()))
                if is_compound_type(input_type_key):
                    is_generic_map = True
                    generic_map_key_schema = input_schema_from_type_schema(input_type_key)
                    generic_map_value = input_schema["_type"][input_type_key]

            if is_generic_map:
                casted = {}
                for key, val in input.items():
                    key_casted = self.populate_values(
                        key, generic_map_key_schema, preserve_column_refs
                    )
                    try:
                        val_casted = self.populate_values(
                            val, generic_map_value, preserve_column_refs
                        )
                    except CortexException as e:
                        e.wrap(util.user_obj_str(key))
                        raise
                    casted[key_casted] = val_casted
                return casted

            # fixed map
            casted = {}
            for key, val_schema in input_schema["_type"].items():
                if key in input:
                    val = input[key]
                else:
                    if val_schema.get("_optional") is not True:
                        raise UserException("missing key: " + util.user_obj_str(key))
                    if val_schema.get("_default") is None:
                        continue
                    val = val_schema["_default"]

                try:
                    val_casted = self.populate_values(val, val_schema, preserve_column_refs)
                except CortexException as e:
                    e.wrap(util.user_obj_str(key))
                    raise
                casted[key] = val_casted
            return casted

        if input_schema is None:
            return input
        if not util.is_str(input_schema["_type"]):
            raise UserException(
                "unsupported input type (expected type {}, got {})".format(
                    util.data_type_str(input_schema["_type"]), util.user_obj_str(input)
                )
            )
        return cast_compound_type(input, input_schema["_type"])
예제 #12
0
class Context:
    def __init__(self, **kwargs):
        if "cache_dir" in kwargs:
            self.cache_dir = kwargs["cache_dir"]
        elif "local_path" in kwargs:
            local_path_dir = os.path.dirname(os.path.abspath(kwargs["local_path"]))
            self.cache_dir = os.path.join(local_path_dir, "cache")
        else:
            raise ValueError("cache_dir must be specified (or inferred from local_path)")
        util.mkdir_p(self.cache_dir)

        if "local_path" in kwargs:
            self.ctx = util.read_msgpack(kwargs["local_path"])
        elif "obj" in kwargs:
            self.ctx = kwargs["obj"]
        elif "raw_obj" in kwargs:
            self.ctx = kwargs["raw_obj"]
        elif "s3_path":
            local_ctx_path = os.path.join(self.cache_dir, "context.msgpack")
            bucket, key = S3.deconstruct_s3_path(kwargs["s3_path"])
            S3(bucket, client_config={}).download_file(key, local_ctx_path)
            self.ctx = util.read_msgpack(local_ctx_path)
        else:
            raise ValueError("invalid context args: " + kwargs)

        self.workload_id = kwargs.get("workload_id")

        self.id = self.ctx["id"]
        self.key = self.ctx["key"]
        self.metadata_root = self.ctx["metadata_root"]
        self.cluster_config = self.ctx["cluster_config"]
        self.deployment_version = self.ctx["deployment_version"]
        self.root = self.ctx["root"]
        self.status_prefix = self.ctx["status_prefix"]
        self.app = self.ctx["app"]
        self.apis = self.ctx["apis"] or {}
        self.api_version = self.cluster_config["api_version"]
        self.monitoring = None
        self.project_id = self.ctx["project_id"]
        self.project_key = self.ctx["project_key"]

        if "local_storage_path" in kwargs:
            self.storage = LocalStorage(base_dir=kwargs["local_storage_path"])
        else:
            self.storage = S3(
                bucket=self.cluster_config["bucket"],
                region=self.cluster_config["region"],
                client_config={},
            )

        host_ip = os.environ["HOST_IP"]
        datadog.initialize(statsd_host=host_ip, statsd_port="8125")
        self.statsd = datadog.statsd

        if self.api_version != consts.CORTEX_VERSION:
            raise ValueError(
                "api version mismatch (context: {}, image: {})".format(
                    self.api_version, consts.CORTEX_VERSION
                )
            )

        # This affects TensorFlow S3 access
        os.environ["AWS_REGION"] = self.cluster_config.get("region", "")

        # ID maps
        self.apis_id_map = ResourceMap(self.apis) if self.apis else None
        self.id_map = self.apis_id_map

    def download_file(self, impl_key, cache_impl_path):
        if not os.path.isfile(cache_impl_path):
            self.storage.download_file(impl_key, cache_impl_path)
        return cache_impl_path

    def download_python_file(self, impl_key, module_name):
        cache_impl_path = os.path.join(self.cache_dir, "{}.py".format(module_name))
        self.download_file(impl_key, cache_impl_path)
        return cache_impl_path

    def load_module(self, module_prefix, module_name, impl_path):
        full_module_name = "{}_{}".format(module_prefix, module_name)

        if impl_path.endswith(".pickle"):
            try:
                impl = imp.new_module(full_module_name)

                with open(impl_path, "rb") as pickle_file:
                    pickled_dict = dill.load(pickle_file)
                    for key in pickled_dict:
                        setattr(impl, key, pickled_dict[key])
            except Exception as e:
                raise UserException("unable to load pickle", str(e)) from e
        else:
            try:
                impl = imp.load_source(full_module_name, impl_path)
            except Exception as e:
                raise UserException(str(e)) from e

        return impl

    def get_predictor_class(self, api_name, project_dir):
        api = self.apis[api_name]

        if api["predictor"]["type"] == "tensorflow":
            target_class_name = "TensorFlowPredictor"
            validations = TENSORFLOW_CLASS_VALIDATION
        elif api["predictor"]["type"] == "onnx":
            target_class_name = "ONNXPredictor"
            validations = ONNX_CLASS_VALIDATION
        elif api["predictor"]["type"] == "python":
            target_class_name = "PythonPredictor"
            validations = PYTHON_CLASS_VALIDATION

        try:
            impl = self.load_module(
                "predictor", api["name"], os.path.join(project_dir, api["predictor"]["path"])
            )
        except CortexException as e:
            e.wrap("api " + api_name, "error in " + api["predictor"]["path"])
            raise
        finally:
            refresh_logger()

        try:
            classes = inspect.getmembers(impl, inspect.isclass)
            predictor_class = None
            for class_df in classes:
                if class_df[0] == target_class_name:
                    if predictor_class is not None:
                        raise UserException(
                            "multiple definitions for {} class found; please check your imports and class definitions and ensure that there is only one Predictor class definition".format(
                                target_class_name
                            )
                        )
                    predictor_class = class_df[1]
            if predictor_class is None:
                raise UserException("{} class is not defined".format(target_class_name))

            _validate_impl(predictor_class, validations)
        except CortexException as e:
            e.wrap("api " + api_name, "error in " + api["predictor"]["path"])
            raise
        return predictor_class

    def get_resource_status(self, resource):
        key = self.resource_status_key(resource)
        return self.storage.get_json(key, num_retries=5)

    def upload_resource_status_start(self, *resources):
        timestamp = util.now_timestamp_rfc_3339()
        for resource in resources:
            key = self.resource_status_key(resource)
            status = {
                "resource_id": resource["id"],
                "resource_type": resource["resource_type"],
                "workload_id": resource["workload_id"],
                "app_name": self.app["name"],
                "start": timestamp,
            }
            self.storage.put_json(status, key)

    def upload_resource_status_no_op(self, *resources):
        timestamp = util.now_timestamp_rfc_3339()
        for resource in resources:
            key = self.resource_status_key(resource)
            status = {
                "resource_id": resource["id"],
                "resource_type": resource["resource_type"],
                "workload_id": resource["workload_id"],
                "app_name": self.app["name"],
                "start": timestamp,
                "end": timestamp,
                "exit_code": "succeeded",
            }
            self.storage.put_json(status, key)

    def upload_resource_status_success(self, *resources):
        self.upload_resource_status_end("succeeded", *resources)

    def upload_resource_status_failed(self, *resources):
        self.upload_resource_status_end("failed", *resources)

    def upload_resource_status_end(self, exit_code, *resources):
        timestamp = util.now_timestamp_rfc_3339()
        for resource in resources:
            status = self.get_resource_status(resource)
            if status.get("end") != None:
                continue
            status["end"] = timestamp
            status["exit_code"] = exit_code
            key = self.resource_status_key(resource)
            self.storage.put_json(status, key)

    def resource_status_key(self, resource):
        return os.path.join(self.status_prefix, resource["id"], resource["workload_id"])

    def publish_metrics(self, metrics):
        if self.statsd is None:
            raise CortexException("statsd client not initialized")  # unexpected

        for metric in metrics:
            tags = ["{}:{}".format(dim["Name"], dim["Value"]) for dim in metric["Dimensions"]]
            if metric.get("Unit") == "Count":
                self.statsd.increment(metric["MetricName"], value=metric["Value"], tags=tags)
            else:
                self.statsd.histogram(metric["MetricName"], value=metric["Value"], tags=tags)