def __init__(self, provider: str, api_spec: dict, model_dir: str): """ Args: provider: "local" or "aws". api_spec: API configuration. model_dir: Where the models are stored on disk. """ self.provider = provider self.type = predictor_type_from_api_spec(api_spec) self.path = api_spec["predictor"]["path"] self.config = api_spec["predictor"].get("config", {}) self.api_spec = api_spec self.crons = [] if not _are_models_specified(self.api_spec): return self.model_dir = model_dir self.caching_enabled = self._is_model_caching_enabled() self.multiple_processes = self.api_spec["predictor"][ "processes_per_replica"] > 1 # model caching can only be enabled when processes_per_replica is 1 # model side-reloading is supported for any number of processes_per_replica if self.caching_enabled: if self.type == PythonPredictorType: mem_cache_size = self.api_spec["predictor"][ "multi_model_reloading"]["cache_size"] disk_cache_size = self.api_spec["predictor"][ "multi_model_reloading"]["disk_cache_size"] else: mem_cache_size = self.api_spec["predictor"]["models"][ "cache_size"] disk_cache_size = self.api_spec["predictor"]["models"][ "disk_cache_size"] self.models = ModelsHolder( self.type, self.model_dir, mem_cache_size=mem_cache_size, disk_cache_size=disk_cache_size, on_download_callback=model_downloader, ) elif not self.caching_enabled and self.type not in [ TensorFlowPredictorType, TensorFlowNeuronPredictorType, ]: self.models = ModelsHolder(self.type, self.model_dir) else: self.models = None if self.multiple_processes: self.models_tree = None else: self.models_tree = ModelsTree()
def is_model_caching_enabled(api_spec: dir) -> bool: predictor_type = predictor_type_from_api_spec(api_spec) if predictor_type == PythonPredictorType and api_spec["predictor"]["multi_model_reloading"]: models = api_spec["predictor"]["multi_model_reloading"] elif predictor_type != PythonPredictorType: models = api_spec["predictor"]["models"] else: return False return models and models["cache_size"] and models["disk_cache_size"]
def are_models_specified(api_spec: dict) -> bool: """ Checks if models have been specified in the API spec (cortex.yaml). Args: api_spec: API configuration. """ predictor_type = predictor_type_from_api_spec(api_spec) if predictor_type == PythonPredictorType and api_spec["predictor"]["multi_model_reloading"]: models = api_spec["predictor"]["multi_model_reloading"] elif predictor_type != PythonPredictorType: models = api_spec["predictor"]["models"] else: return False return models is not None
def __init__( self, api_spec: Dict[str, Any], storage: S3, storage_path: str, model_dir: str, statsd_host: str, statsd_port: int, lock_dir: str = "/run/cron", ): self.api_spec = api_spec self.storage = storage self.storage_path = storage_path self.path = api_spec["predictor"]["path"] self.config = api_spec["predictor"].get("config", {}) self.type = predictor_type_from_api_spec(api_spec) self.model_dir = model_dir self.lock_dir = lock_dir datadog.initialize(statsd_host=statsd_host, statsd_port=statsd_port) self.__statsd = datadog.statsd
def main(): # wait until neuron-rtd sidecar is ready uses_inferentia = os.getenv("CORTEX_ACTIVE_NEURON") if uses_inferentia: wait_neuron_rtd() # strictly for Inferentia has_multiple_tf_servers = os.getenv("CORTEX_MULTIPLE_TF_SERVERS") num_processes = int(os.environ["CORTEX_PROCESSES_PER_REPLICA"]) if has_multiple_tf_servers: base_serving_port = int(os.environ["CORTEX_TF_BASE_SERVING_PORT"]) used_ports = {} for w in range(int(num_processes)): used_ports[str(base_serving_port + w)] = False with open("/run/used_ports.json", "w+") as f: json.dump(used_ports, f) # get API spec provider = os.environ["CORTEX_PROVIDER"] spec_path = os.environ["CORTEX_API_SPEC"] cache_dir = os.getenv("CORTEX_CACHE_DIR") region = os.getenv("AWS_REGION") # when it's deployed to AWS _, api_spec = get_spec(provider, spec_path, cache_dir, region) predictor_type = predictor_type_from_api_spec(api_spec) multiple_processes = api_spec["predictor"]["processes_per_replica"] > 1 caching_enabled = is_model_caching_enabled(api_spec) model_dir = os.getenv("CORTEX_MODEL_DIR") # start live-reloading when model caching not enabled > 1 cron = None if not caching_enabled: # create cron dirs if they don't exist os.makedirs("/run/cron", exist_ok=True) os.makedirs("/tmp/cron", exist_ok=True) # prepare crons if predictor_type in [PythonPredictorType, ONNXPredictorType] and are_models_specified( api_spec ): cron = FileBasedModelsTreeUpdater( interval=10, api_spec=api_spec, download_dir=model_dir, ) cron.start() elif predictor_type == TensorFlowPredictorType: tf_serving_port = os.getenv("CORTEX_TF_BASE_SERVING_PORT", "9000") tf_serving_host = os.getenv("CORTEX_TF_SERVING_HOST", "localhost") cron = TFSModelLoader( interval=10, api_spec=api_spec, address=f"{tf_serving_host}:{tf_serving_port}", tfs_model_dir=model_dir, download_dir=model_dir, ) cron.start() elif predictor_type == TensorFlowNeuronPredictorType: cron = prepare_tfs_servers_api(api_spec, model_dir) cron.start() # wait until the cron finishes its first pass if cron: while cron.is_alive() and not cron.ran_once(): time.sleep(0.25) # disable live reloading when the BatchAPI kind is used # disable live reloading for the TF predictor when Inferentia is used and when multiple processes are used (num procs > 1) if api_spec["kind"] != "RealtimeAPI" or ( predictor_type == TensorFlowNeuronPredictorType and has_multiple_tf_servers and num_processes > 1 ): cron.stop() # to syncronize with the other serving processes open("/mnt/workspace/init_script_run.txt", "a").close() # don't exit the script if the cron is running while cron and cron.is_alive(): time.sleep(0.25) # exit if cron has exited with errors if cron and isinstance(cron.exitcode, int) and cron.exitcode != 0: # if it was killed by a signal if cron.exitcode < 0: sys.exit(-cron.exitcode) sys.exit(cron.exitcode)
def get_models_from_api_spec( api_spec: dict, model_dir: str = "/mnt/model") -> CuratedModelResources: """ Only effective for models:path, models:paths or for models:dir fields when the dir is a local path. It does not apply for when models:dir field is set to an S3 model path. """ predictor_type = predictor_type_from_api_spec(api_spec) if predictor_type == PythonPredictorType and api_spec["predictor"][ "multi_model_reloading"]: models_spec = api_spec["predictor"]["multi_model_reloading"] elif predictor_type != PythonPredictorType: models_spec = api_spec["predictor"]["models"] else: return CuratedModelResources([]) if not models_spec["path"] and not models_spec["paths"]: return CuratedModelResources([]) # for models.path models = [] if models_spec["path"]: model = { "name": cortex_internal.consts.SINGLE_MODEL_NAME, "path": models_spec["path"], "signature_key": models_spec["signature_key"], } models.append(model) # for models.paths if models_spec["paths"]: for model in models_spec["paths"]: models.append({ "name": model["name"], "path": model["path"], "signature_key": model["signature_key"], }) # building model resources for models.path or models.paths model_resources = [] for model in models: model_resource = {} model_resource["name"] = model["name"] if not model["signature_key"]: model_resource["signature_key"] = models_spec["signature_key"] else: model_resource["signature_key"] = model["signature_key"] ends_as_file_path = model["path"].endswith(".onnx") if ends_as_file_path and os.path.exists( os.path.join(model_dir, model_resource["name"], "1", os.path.basename(model["path"]))): model_resource["is_file_path"] = True model_resource["s3_path"] = False model_resource["gcs_path"] = False model_resource["local_path"] = True model_resource["versions"] = [] model_resource["path"] = os.path.join( model_dir, model_resource["name"], "1", os.path.basename(model["path"])) model_resources.append(model_resource) continue model_resource["is_file_path"] = False model_resource["s3_path"] = model["path"].startswith("s3://") model_resource["gcs_path"] = model["path"].startswith("gs://") model_resource["local_path"] = (not model_resource["s3_path"] and not model_resource["gcs_path"]) if model_resource["s3_path"] or model_resource["gcs_path"]: model_resource["path"] = model["path"] _, versions, _, _, _, _, _ = find_all_cloud_models( False, "", predictor_type, [model_resource["path"]], [model_resource["name"]]) if model_resource["name"] not in versions: continue model_resource["versions"] = versions[model_resource["name"]] else: model_resource["path"] = os.path.join(model_dir, model_resource["name"]) model_resource["versions"] = os.listdir(model_resource["path"]) model_resources.append(model_resource) # building model resources for models.dir if (models_spec and models_spec["dir"] and not models_spec["dir"].startswith("s3://") and not models_spec["dir"].startswith("gs://")): for model_name in os.listdir(model_dir): model_resource = {} model_resource["name"] = model_name model_resource["s3_path"] = False model_resource["gcs_path"] = False model_resource["local_path"] = True model_resource["signature_key"] = models_spec["signature_key"] model_resource["path"] = os.path.join(model_dir, model_name) model_resource["versions"] = os.listdir(model_resource["path"]) model_resources.append(model_resource) return CuratedModelResources(model_resources)
def get_models_from_api_spec( api_spec: dict, model_dir: str = "/mnt/model") -> CuratedModelResources: """ Only effective for models:path, models:paths or for models:dir fields when the dir is a local path. It does not apply for when models:dir field is set to an S3 model path. """ predictor_type = predictor_type_from_api_spec(api_spec) if predictor_type == PythonPredictorType and api_spec["predictor"][ "multi_model_reloading"]: models_spec = api_spec["predictor"]["multi_model_reloading"] elif predictor_type != PythonPredictorType: models_spec = api_spec["predictor"]["models"] else: return CuratedModelResources([]) if not models_spec["path"] and not models_spec["paths"]: return CuratedModelResources([]) # for models.path models = [] if models_spec["path"]: model = { "name": cortex_internal.consts.SINGLE_MODEL_NAME, "path": models_spec["path"], "signature_key": models_spec["signature_key"], } models.append(model) # for models.paths if models_spec["paths"]: for model in models_spec["paths"]: models.append({ "name": model["name"], "path": model["path"], "signature_key": model["signature_key"], }) # building model resources for models.path or models.paths model_resources = [] for model in models: model_resource = {} model_resource["name"] = model["name"] if not model["signature_key"]: model_resource["signature_key"] = models_spec["signature_key"] else: model_resource["signature_key"] = model["signature_key"] model_resource["path"] = model["path"] _, versions, _, _, _, _ = find_all_s3_models(False, "", predictor_type, [model_resource["path"]], [model_resource["name"]]) if model_resource["name"] not in versions: continue model_resource["versions"] = versions[model_resource["name"]] model_resources.append(model_resource) return CuratedModelResources(model_resources)