def start(): while not pathlib.Path("/mnt/workspace/init_script_run.txt").is_file(): time.sleep(0.2) cache_dir = os.environ["CORTEX_CACHE_DIR"] provider = os.environ["CORTEX_PROVIDER"] api_spec_path = os.environ["CORTEX_API_SPEC"] job_spec_path = os.environ["CORTEX_JOB_SPEC"] project_dir = os.environ["CORTEX_PROJECT_DIR"] model_dir = os.getenv("CORTEX_MODEL_DIR") tf_serving_port = os.getenv("CORTEX_TF_BASE_SERVING_PORT", "9000") tf_serving_host = os.getenv("CORTEX_TF_SERVING_HOST", "localhost") region = os.getenv("AWS_REGION") has_multiple_servers = os.getenv("CORTEX_MULTIPLE_TF_SERVERS") if has_multiple_servers: with LockedFile("/run/used_ports.json", "r+") as f: used_ports = json.load(f) for port in used_ports.keys(): if not used_ports[port]: tf_serving_port = port used_ports[port] = True break f.seek(0) json.dump(used_ports, f) f.truncate() api = get_api(provider, api_spec_path, model_dir, cache_dir, region) storage, api_spec = get_spec(provider, api_spec_path, cache_dir, region) job_spec = get_job_spec(storage, cache_dir, job_spec_path) client = api.predictor.initialize_client(tf_serving_host=tf_serving_host, tf_serving_port=tf_serving_port) logger.info("loading the predictor from {}".format(api.predictor.path)) predictor_impl = api.predictor.initialize_impl(project_dir, client, job_spec) local_cache["api_spec"] = api local_cache["provider"] = provider local_cache["job_spec"] = job_spec local_cache["predictor_impl"] = predictor_impl local_cache["predict_fn_args"] = inspect.getfullargspec( predictor_impl.predict).args local_cache["sqs_client"] = boto3.client("sqs", region_name=region) open("/mnt/workspace/api_readiness.txt", "a").close() logger.info("polling for batches...") sqs_loop()
def _get_model(self, model_name: str, model_version: str) -> Any: """ Checks if versioned model is on disk, then checks if model is in memory, and if not, it loads it into memory, and returns the model. Args: model_name: Name of the model, as it's specified in predictor:models:paths or in the other case as they are named on disk. model_version: Version of the model, as it's found on disk. Can also infer the version number from the "latest" tag. Exceptions: RuntimeError: if another thread tried to load the model at the very same time. Returns: The model as returned by self._load_model method. None if the model wasn't found or if it didn't pass the validation. """ model = None tag = "" if model_version == "latest": tag = model_version if not self._caching_enabled: # determine model version if tag == "latest": model_version = self._get_latest_model_version_from_disk( model_name) model_id = model_name + "-" + model_version # grab shared access to versioned model resource = os.path.join(self._lock_dir, model_id + ".txt") with LockedFile(resource, "r", reader_lock=True) as f: # check model status file_status = f.read() if file_status == "" or file_status == "not-available": raise WithBreak current_upstream_ts = int(file_status.split(" ")[1]) update_model = False # grab shared access to models holder and retrieve model with LockedModel(self._models, "r", model_name, model_version): status, local_ts = self._models.has_model( model_name, model_version) if status == "not-available" or ( status == "in-memory" and local_ts != current_upstream_ts): update_model = True raise WithBreak model, _ = self._models.get_model(model_name, model_version, tag) # load model into memory and retrieve it if update_model: with LockedModel(self._models, "w", model_name, model_version): status, _ = self._models.has_model( model_name, model_version) if status == "not-available" or ( status == "in-memory" and local_ts != current_upstream_ts): if status == "not-available": logger.info( f"loading model {model_name} of version {model_version} (thread {td.get_ident()})" ) else: logger.info( f"reloading model {model_name} of version {model_version} (thread {td.get_ident()})" ) try: self._models.load_model( model_name, model_version, current_upstream_ts, [tag], ) except Exception as e: raise UserRuntimeException( f"failed (re-)loading model {model_name} of version {model_version} (thread {td.get_ident()})", str(e), ) model, _ = self._models.get_model( model_name, model_version, tag) if not self._multiple_processes and self._caching_enabled: # determine model version try: if tag == "latest": model_version = self._get_latest_model_version_from_tree( model_name, self._models_tree.model_info(model_name)) except ValueError: # if model_name hasn't been found raise UserRuntimeException( f"'{model_name}' model of tag latest wasn't found in the list of available models" ) # grab shared access to model tree available_model = True with LockedModelsTree(self._models_tree, "r", model_name, model_version): # check if the versioned model exists model_id = model_name + "-" + model_version if model_id not in self._models_tree: available_model = False raise WithBreak # retrieve model tree's metadata upstream_model = self._models_tree[model_id] current_upstream_ts = int( upstream_model["timestamp"].timestamp()) if not available_model: return None # grab shared access to models holder and retrieve model update_model = False with LockedModel(self._models, "r", model_name, model_version): status, local_ts = self._models.has_model( model_name, model_version) if status in ["not-available", "on-disk" ] or (status != "not-available" and local_ts != current_upstream_ts): update_model = True raise WithBreak model, _ = self._models.get_model(model_name, model_version, tag) # download, load into memory the model and retrieve it if update_model: # grab exclusive access to models holder with LockedModel(self._models, "w", model_name, model_version): # check model status status, local_ts = self._models.has_model( model_name, model_version) # refresh disk model if status == "not-available" or ( status in ["on-disk", "in-memory"] and local_ts != current_upstream_ts): if status == "not-available": logger.info( f"model {model_name} of version {model_version} not found locally; continuing with the download..." ) elif status == "on-disk": logger.info( f"found newer model {model_name} of vesion {model_version} on the {upstream_model['provider']} upstream than the one on the disk" ) else: logger.info( f"found newer model {model_name} of vesion {model_version} on the {upstream_model['provider']} upstream than the one loaded into memory" ) # remove model from disk and memory if status == "on-disk": logger.info( f"removing model from disk for model {model_name} of version {model_version}" ) self._models.remove_model(model_name, model_version) if status == "in-memory": logger.info( f"removing model from disk and memory for model {model_name} of version {model_version}" ) self._models.remove_model(model_name, model_version) # download model logger.info( f"downloading model {model_name} of version {model_version} from the {upstream_model['provider']} upstream" ) date = self._models.download_model( upstream_model["provider"], upstream_model["bucket"], model_name, model_version, upstream_model["path"], ) if not date: raise WithBreak current_upstream_ts = int(date.timestamp()) # load model try: logger.info( f"loading model {model_name} of version {model_version} into memory" ) self._models.load_model( model_name, model_version, current_upstream_ts, [tag], ) except Exception as e: raise UserRuntimeException( f"failed (re-)loading model {model_name} of version {model_version} (thread {td.get_ident()})", str(e), ) # retrieve model model, _ = self._models.get_model(model_name, model_version, tag) return model
def start_fn(): provider = os.environ["CORTEX_PROVIDER"] project_dir = os.environ["CORTEX_PROJECT_DIR"] spec_path = os.environ["CORTEX_API_SPEC"] model_dir = os.getenv("CORTEX_MODEL_DIR") cache_dir = os.getenv("CORTEX_CACHE_DIR") region = os.getenv("AWS_REGION") tf_serving_port = os.getenv("CORTEX_TF_BASE_SERVING_PORT", "9000") tf_serving_host = os.getenv("CORTEX_TF_SERVING_HOST", "localhost") has_multiple_servers = os.getenv("CORTEX_MULTIPLE_TF_SERVERS") if has_multiple_servers: with LockedFile("/run/used_ports.json", "r+") as f: used_ports = json.load(f) for port in used_ports.keys(): if not used_ports[port]: tf_serving_port = port used_ports[port] = True break f.seek(0) json.dump(used_ports, f) f.truncate() try: api = get_api(provider, spec_path, model_dir, cache_dir, region) client = api.predictor.initialize_client( tf_serving_host=tf_serving_host, tf_serving_port=tf_serving_port) with FileLock("/run/init_stagger.lock"): logger().info("loading the predictor from {}".format( api.predictor.path)) predictor_impl = api.predictor.initialize_impl(project_dir, client) local_cache["api"] = api local_cache["provider"] = provider local_cache["client"] = client local_cache["predictor_impl"] = predictor_impl local_cache["predict_fn_args"] = inspect.getfullargspec( predictor_impl.predict).args if api.server_side_batching_enabled: dynamic_batching_config = api.api_spec["predictor"][ "server_side_batching"] local_cache["dynamic_batcher"] = DynamicBatcher( predictor_impl, max_batch_size=dynamic_batching_config["max_batch_size"], batch_interval=dynamic_batching_config["batch_interval"] / NANOSECONDS_IN_SECOND, # convert nanoseconds to seconds ) if util.has_method(predictor_impl, "post_predict"): local_cache["post_predict_fn_args"] = inspect.getfullargspec( predictor_impl.post_predict).args predict_route = "/" if provider != "local": predict_route = "/predict" local_cache["predict_route"] = predict_route except: logger().exception("failed to start api") sys.exit(1) if (provider != "local" and api.monitoring is not None and api.monitoring.model_type == "classification"): try: local_cache["class_set"] = api.get_cached_classes() except: logger().warn("an error occurred while attempting to load classes", exc_info=True) app.add_api_route(local_cache["predict_route"], predict, methods=["POST"]) app.add_api_route(local_cache["predict_route"], get_summary, methods=["GET"]) return app
def start(): while not pathlib.Path("/mnt/workspace/init_script_run.txt").is_file(): time.sleep(0.2) cache_dir = os.environ["CORTEX_CACHE_DIR"] api_spec_path = os.environ["CORTEX_API_SPEC"] job_spec_path = os.environ["CORTEX_JOB_SPEC"] project_dir = os.environ["CORTEX_PROJECT_DIR"] model_dir = os.getenv("CORTEX_MODEL_DIR") tf_serving_port = os.getenv("CORTEX_TF_BASE_SERVING_PORT", "9000") tf_serving_host = os.getenv("CORTEX_TF_SERVING_HOST", "localhost") region = os.getenv("AWS_REGION") has_multiple_servers = os.getenv("CORTEX_MULTIPLE_TF_SERVERS") if has_multiple_servers: with LockedFile("/run/used_ports.json", "r+") as f: used_ports = json.load(f) for port in used_ports.keys(): if not used_ports[port]: tf_serving_port = port used_ports[port] = True break f.seek(0) json.dump(used_ports, f) f.truncate() api = get_api(api_spec_path, model_dir, cache_dir) with open(job_spec_path) as json_file: job_spec = json.load(json_file) sqs_client = boto3.client("sqs", region_name=region) client = api.predictor.initialize_client( tf_serving_host=tf_serving_host, tf_serving_port=tf_serving_port ) try: log.info("loading the predictor from {}".format(api.predictor.path)) metrics_client = MetricsClient(api.statsd) predictor_impl = api.predictor.initialize_impl( project_dir=project_dir, client=client, metrics_client=metrics_client, job_spec=job_spec, ) except UserRuntimeException as err: err.wrap(f"failed to start job {job_spec['job_id']}") log.error(str(err), exc_info=True) sys.exit(1) except Exception as err: capture_exception(err) log.error(f"failed to start job {job_spec['job_id']}", exc_info=True) sys.exit(1) # crons only stop if an unhandled exception occurs def check_if_crons_have_failed(): while True: for cron in api.predictor.crons: if not cron.is_alive(): os.kill(os.getpid(), signal.SIGQUIT) time.sleep(1) threading.Thread(target=check_if_crons_have_failed, daemon=True).start() local_cache["api"] = api local_cache["job_spec"] = job_spec local_cache["predictor_impl"] = predictor_impl local_cache["predict_fn_args"] = inspect.getfullargspec(predictor_impl.predict).args local_cache["sqs_client"] = sqs_client open("/mnt/workspace/api_readiness.txt", "a").close() log.info("polling for batches...") try: sqs_handler = SQSHandler( sqs_client=sqs_client, queue_url=job_spec["sqs_url"], renewal_period=MESSAGE_RENEWAL_PERIOD, visibility_timeout=INITIAL_MESSAGE_VISIBILITY, not_found_sleep_time=MESSAGE_NOT_FOUND_SLEEP, message_wait_time=SQS_POLL_WAIT_TIME, dead_letter_queue_url=job_spec.get("sqs_dead_letter_queue"), stop_if_no_messages=True, ) sqs_handler.start( message_fn=handle_batch_message, message_failure_fn=handle_batch_failure, on_job_complete_fn=handle_on_job_complete, ) except UserRuntimeException as err: err.wrap(f"failed to run job {job_spec['job_id']}") log.error(str(err), exc_info=True) sys.exit(1) except Exception as err: capture_exception(err) log.error(f"failed to run job {job_spec['job_id']}", exc_info=True) sys.exit(1)
def start(): while not pathlib.Path("/mnt/workspace/init_script_run.txt").is_file(): time.sleep(0.2) cache_dir = os.environ["CORTEX_CACHE_DIR"] provider = os.environ["CORTEX_PROVIDER"] api_spec_path = os.environ["CORTEX_API_SPEC"] job_spec_path = os.environ["CORTEX_JOB_SPEC"] project_dir = os.environ["CORTEX_PROJECT_DIR"] model_dir = os.getenv("CORTEX_MODEL_DIR") tf_serving_port = os.getenv("CORTEX_TF_BASE_SERVING_PORT", "9000") tf_serving_host = os.getenv("CORTEX_TF_SERVING_HOST", "localhost") region = os.getenv("AWS_REGION") has_multiple_servers = os.getenv("CORTEX_MULTIPLE_TF_SERVERS") if has_multiple_servers: with LockedFile("/run/used_ports.json", "r+") as f: used_ports = json.load(f) for port in used_ports.keys(): if not used_ports[port]: tf_serving_port = port used_ports[port] = True break f.seek(0) json.dump(used_ports, f) f.truncate() api = get_api(provider, api_spec_path, model_dir, cache_dir, region) storage, _ = get_spec(provider, api_spec_path, cache_dir, region) job_spec = get_job_spec(storage, cache_dir, job_spec_path) client = api.predictor.initialize_client(tf_serving_host=tf_serving_host, tf_serving_port=tf_serving_port) try: logger.info("loading the predictor from {}".format(api.predictor.path)) metrics_client = MetricsClient(api.statsd) predictor_impl = api.predictor.initialize_impl( project_dir=project_dir, client=client, metrics_client=metrics_client, job_spec=job_spec, ) except UserRuntimeException as err: err.wrap(f"failed to start job {job_spec['job_id']}") logger.error(str(err), exc_info=True) sys.exit(1) except Exception as err: capture_exception(err) logger.error(f"failed to start job {job_spec['job_id']}", exc_info=True) sys.exit(1) # crons only stop if an unhandled exception occurs def check_if_crons_have_failed(): while True: for cron in api.predictor.crons: if not cron.is_alive(): os.kill(os.getpid(), signal.SIGQUIT) time.sleep(1) threading.Thread(target=check_if_crons_have_failed, daemon=True).start() local_cache["api"] = api local_cache["provider"] = provider local_cache["job_spec"] = job_spec local_cache["predictor_impl"] = predictor_impl local_cache["predict_fn_args"] = inspect.getfullargspec( predictor_impl.predict).args local_cache["sqs_client"] = boto3.client("sqs", region_name=region) open("/mnt/workspace/api_readiness.txt", "a").close() logger.info("polling for batches...") try: sqs_loop() except UserRuntimeException as err: err.wrap(f"failed to run job {job_spec['job_id']}") logger.error(str(err), exc_info=True) sys.exit(1) except Exception as err: capture_exception(err) logger.error(f"failed to run job {job_spec['job_id']}", exc_info=True) sys.exit(1)
def start_fn(): provider = os.environ["CORTEX_PROVIDER"] project_dir = os.environ["CORTEX_PROJECT_DIR"] spec_path = os.environ["CORTEX_API_SPEC"] model_dir = os.getenv("CORTEX_MODEL_DIR") cache_dir = os.getenv("CORTEX_CACHE_DIR") region = os.getenv("AWS_REGION") tf_serving_port = os.getenv("CORTEX_TF_BASE_SERVING_PORT", "9000") tf_serving_host = os.getenv("CORTEX_TF_SERVING_HOST", "localhost") try: has_multiple_servers = os.getenv("CORTEX_MULTIPLE_TF_SERVERS") if has_multiple_servers: with LockedFile("/run/used_ports.json", "r+") as f: used_ports = json.load(f) for port in used_ports.keys(): if not used_ports[port]: tf_serving_port = port used_ports[port] = True break f.seek(0) json.dump(used_ports, f) f.truncate() api = get_api(provider, spec_path, model_dir, cache_dir, region) client = api.predictor.initialize_client( tf_serving_host=tf_serving_host, tf_serving_port=tf_serving_port) with FileLock("/run/init_stagger.lock"): logger.info("loading the predictor from {}".format( api.predictor.path)) predictor_impl = api.predictor.initialize_impl(project_dir, client) # crons only stop if an unhandled exception occurs def check_if_crons_have_failed(): while True: for cron in api.predictor.crons: if not cron.is_alive(): os.kill(os.getpid(), signal.SIGQUIT) time.sleep(1) threading.Thread(target=check_if_crons_have_failed, daemon=True).start() local_cache["api"] = api local_cache["provider"] = provider local_cache["client"] = client local_cache["predictor_impl"] = predictor_impl local_cache["predict_fn_args"] = inspect.getfullargspec( predictor_impl.predict).args if api.python_server_side_batching_enabled: dynamic_batching_config = api.api_spec["predictor"][ "server_side_batching"] local_cache["dynamic_batcher"] = DynamicBatcher( predictor_impl, max_batch_size=dynamic_batching_config["max_batch_size"], batch_interval=dynamic_batching_config["batch_interval"] / NANOSECONDS_IN_SECOND, # convert nanoseconds to seconds ) if util.has_method(predictor_impl, "post_predict"): local_cache["post_predict_fn_args"] = inspect.getfullargspec( predictor_impl.post_predict).args predict_route = "/predict" local_cache["predict_route"] = predict_route except (UserRuntimeException, Exception) as err: if not isinstance(err, UserRuntimeException): capture_exception(err) logger.exception("failed to start api") sys.exit(1) app.add_api_route(local_cache["predict_route"], predict, methods=["POST"]) app.add_api_route(local_cache["predict_route"], get_summary, methods=["GET"]) return app
def init(): project_dir = os.environ["CORTEX_PROJECT_DIR"] spec_path = os.environ["CORTEX_API_SPEC"] model_dir = os.getenv("CORTEX_MODEL_DIR") cache_dir = os.getenv("CORTEX_CACHE_DIR") region = os.getenv("AWS_REGION") tf_serving_port = os.getenv("CORTEX_TF_BASE_SERVING_PORT", "9000") tf_serving_host = os.getenv("CORTEX_TF_SERVING_HOST", "localhost") has_multiple_servers = os.getenv("CORTEX_MULTIPLE_TF_SERVERS") if has_multiple_servers: with LockedFile("/run/used_ports.json", "r+") as f: used_ports = json.load(f) for port in used_ports.keys(): if not used_ports[port]: tf_serving_port = port used_ports[port] = True break f.seek(0) json.dump(used_ports, f) f.truncate() api = get_api(spec_path, model_dir, cache_dir, region) config: Dict[str, Any] = { "api": None, "client": None, "predictor_impl": None, "module_proto_pb2_grpc": None, } proto_without_ext = pathlib.Path(api.predictor.protobuf_path).stem module_proto_pb2 = importlib.import_module(proto_without_ext + "_pb2") module_proto_pb2_grpc = importlib.import_module(proto_without_ext + "_pb2_grpc") client = api.predictor.initialize_client(tf_serving_host=tf_serving_host, tf_serving_port=tf_serving_port) with FileLock("/run/init_stagger.lock"): logger.info("loading the predictor from {}".format(api.predictor.path)) metrics_client = MetricsClient(api.statsd) predictor_impl = api.predictor.initialize_impl( project_dir=project_dir, client=client, metrics_client=metrics_client, proto_module_pb2=module_proto_pb2, ) # crons only stop if an unhandled exception occurs def check_if_crons_have_failed(): while True: for cron in api.predictor.crons: if not cron.is_alive(): os.kill(os.getpid(), signal.SIGQUIT) time.sleep(1) threading.Thread(target=check_if_crons_have_failed, daemon=True).start() ServicerClass = get_servicer_from_module(module_proto_pb2_grpc) class PredictorServicer(ServicerClass): def __init__(self, predict_fn_args, predictor_impl, api): self.predict_fn_args = predict_fn_args self.predictor_impl = predictor_impl self.api = api def Predict(self, payload, context): try: kwargs = build_predict_kwargs(self.predict_fn_args, payload, context) response = self.predictor_impl.predict(**kwargs) self.api.post_status_code_request_metrics(200) except Exception: logger.error(traceback.format_exc()) self.api.post_status_code_request_metrics(500) context.abort(grpc.StatusCode.INTERNAL, "internal server error") return response config["api"] = api config["client"] = client config["predictor_impl"] = predictor_impl config["predict_fn_args"] = inspect.getfullargspec( predictor_impl.predict).args config["module_proto_pb2"] = module_proto_pb2 config["module_proto_pb2_grpc"] = module_proto_pb2_grpc config["predictor_servicer"] = PredictorServicer return config