def __init__(self, provider: str, api_spec: dict, model_dir: str): """ Args: provider: "local" or "aws". api_spec: API configuration. model_dir: Where the models are stored on disk. """ self.provider = provider self.type = predictor_type_from_api_spec(api_spec) self.path = api_spec["predictor"]["path"] self.config = api_spec["predictor"].get("config", {}) self.api_spec = api_spec self.crons = [] if not _are_models_specified(self.api_spec): return self.model_dir = model_dir self.caching_enabled = self._is_model_caching_enabled() self.multiple_processes = self.api_spec["predictor"][ "processes_per_replica"] > 1 # model caching can only be enabled when processes_per_replica is 1 # model side-reloading is supported for any number of processes_per_replica if self.caching_enabled: self.models = ModelsHolder( self.type, self.model_dir, mem_cache_size=self.api_spec["predictor"]["models"] ["cache_size"], disk_cache_size=self.api_spec["predictor"]["models"] ["disk_cache_size"], on_download_callback=model_downloader, ) elif not self.caching_enabled and self.type not in [ TensorFlowPredictorType, TensorFlowNeuronPredictorType, ]: self.models = ModelsHolder(self.type, self.model_dir) else: self.models = None if self.multiple_processes: self.models_tree = None else: self.models_tree = ModelsTree()
def main(): # wait until neuron-rtd sidecar is ready uses_inferentia = os.getenv("CORTEX_ACTIVE_NEURON") if uses_inferentia: wait_neuron_rtd() # strictly for Inferentia has_multiple_tf_servers = os.getenv("CORTEX_MULTIPLE_TF_SERVERS") num_processes = int(os.environ["CORTEX_PROCESSES_PER_REPLICA"]) if has_multiple_tf_servers: base_serving_port = int(os.environ["CORTEX_TF_BASE_SERVING_PORT"]) used_ports = {} for w in range(int(num_processes)): used_ports[str(base_serving_port + w)] = False with open("/run/used_ports.json", "w+") as f: json.dump(used_ports, f) # get API spec provider = os.environ["CORTEX_PROVIDER"] spec_path = os.environ["CORTEX_API_SPEC"] cache_dir = os.getenv("CORTEX_CACHE_DIR") # when it's deployed locally region = os.getenv("AWS_REGION") # when it's deployed to AWS _, api_spec = get_spec(provider, spec_path, cache_dir, region) predictor_type = predictor_type_from_api_spec(api_spec) multiple_processes = api_spec["predictor"]["processes_per_replica"] > 1 caching_enabled = is_model_caching_enabled(api_spec) model_dir = os.getenv("CORTEX_MODEL_DIR") # start live-reloading when model caching not enabled > 1 cron = None if not caching_enabled: # create cron dirs if they don't exist os.makedirs("/run/cron", exist_ok=True) os.makedirs("/tmp/cron", exist_ok=True) # prepare crons if predictor_type in [PythonPredictorType, ONNXPredictorType ] and are_models_specified(api_spec): cron = FileBasedModelsTreeUpdater( interval=10, api_spec=api_spec, download_dir=model_dir, ) cron.start() elif predictor_type == TensorFlowPredictorType: tf_serving_port = os.getenv("CORTEX_TF_BASE_SERVING_PORT", "9000") tf_serving_host = os.getenv("CORTEX_TF_SERVING_HOST", "localhost") cron = TFSModelLoader( interval=10, api_spec=api_spec, address=f"{tf_serving_host}:{tf_serving_port}", tfs_model_dir=model_dir, download_dir=model_dir, ) cron.start() elif predictor_type == TensorFlowNeuronPredictorType: cron = prepare_tfs_servers_api(api_spec, model_dir) cron.start() # wait until the cron finishes its first pass if cron: while cron.is_alive() and not cron.ran_once(): time.sleep(0.25) # disable live reloading when the BatchAPI kind is used # disable live reloading for the TF predictor when Inferentia is used and when multiple processes are used (num procs > 1) if api_spec["kind"] != "RealtimeAPI" or ( predictor_type == TensorFlowNeuronPredictorType and has_multiple_tf_servers and num_processes > 1): cron.stop() # to syncronize with the other serving processes open("/mnt/workspace/init_script_run.txt", "a").close() # don't exit the script if the cron is running while cron and cron.is_alive(): time.sleep(0.25) # exit if cron has exited with errors if cron and isinstance(cron.exitcode, int) and cron.exitcode != 0: # if it was killed by a signal if cron.exitcode < 0: sys.exit(-cron.exitcode) sys.exit(cron.exitcode)
def get_models_from_api_spec( api_spec: dict, model_dir: str = "/mnt/model" ) -> CuratedModelResources: """ Only effective for predictor:model_path, predictor:models:paths or for predictor:models:dir when the dir is a local path. It does not apply for when predictor:models:dir is set to an S3 model path. """ predictor = api_spec["predictor"] if not predictor["model_path"] and not predictor["models"]: return CuratedModelResources([]) predictor_type = predictor_type_from_api_spec(api_spec) # for predictor.model_path models = [] if predictor["model_path"]: model = { "name": cortex.consts.SINGLE_MODEL_NAME, "model_path": predictor["model_path"], "signature_key": predictor["signature_key"], } models.append(model) # for predictor.models.paths if predictor["models"] and predictor["models"]["paths"]: for model in predictor["models"]["paths"]: models.append( { "name": model["name"], "model_path": model["model_path"], "signature_key": model["signature_key"], } ) # building model resources for predictor.model_path or predictor.models.paths model_resources = [] for model in models: model_resource = {} model_resource["name"] = model["name"] model_resource["s3_path"] = model["model_path"].startswith("s3://") model_resource["gcs_path"] = model["model_path"].startswith("gs://") model_resource["local_path"] = ( not model_resource["s3_path"] and not model_resource["gcs_path"] ) if not model["signature_key"] and predictor["models"]: model_resource["signature_key"] = predictor["models"]["signature_key"] else: model_resource["signature_key"] = model["signature_key"] if model_resource["s3_path"] or model_resource["gcs_path"]: model_resource["model_path"] = model["model_path"] _, versions, _, _, _, _, _ = find_all_cloud_models( False, "", predictor_type, [model_resource["model_path"]], [model_resource["name"]] ) if model_resource["name"] not in versions: continue model_resource["versions"] = versions[model_resource["name"]] else: model_resource["model_path"] = os.path.join(model_dir, model_resource["name"]) model_resource["versions"] = os.listdir(model_resource["model_path"]) model_resources.append(model_resource) # building model resources for predictor.models.dir if ( predictor["models"] and predictor["models"]["dir"] and not predictor["models"]["dir"].startswith("s3://") and not predictor["models"]["dir"].startswith("gs://") ): for model_name in os.listdir(model_dir): model_resource = {} model_resource["name"] = model_name model_resource["s3_path"] = False model_resource["gcs_path"] = False model_resource["local_path"] = True model_resource["signature_key"] = predictor["models"]["signature_key"] model_resource["model_path"] = os.path.join(model_dir, model_name) model_resource["versions"] = os.listdir(model_resource["model_path"]) model_resources.append(model_resource) return CuratedModelResources(model_resources)