def _deploy( self, config_file: str, force: bool = False, wait: bool = False, ) -> list: """ Deploy or update APIs specified in the config_file. Args: config_file: Local path to a yaml file defining Cortex APIs. force: Override any in-progress api updates. wait: Streams logs until the APIs are ready. Returns: Deployment status, API specification, and endpoint for each API. """ args = [ "deploy", config_file, "--env", self.env_name, "-o", "json", "-y", ] if force: args.append("--force") output = run_cli(args, hide_output=True) deploy_results = json.loads(output.strip()) deploy_result = deploy_results[0] if not wait: return deploy_result # logging immediately will show previous versions of the replica terminating; # wait a few seconds for the new replicas to start initializing time.sleep(5) def stream_to_stdout(process): for c in iter(lambda: process.stdout.read(1), ""): sys.stdout.write(c) sys.stdout.flush() api_name = deploy_result["api"]["spec"]["name"] if deploy_result["api"]["spec"]["kind"] != "RealtimeAPI": return deploy_result env = os.environ.copy() env["CORTEX_CLI_INVOKER"] = "python" process = subprocess.Popen( [get_cli_path(), "logs", "--env", self.env_name, api_name, "-y"], stderr=subprocess.STDOUT, stdout=subprocess.PIPE, encoding="utf8", errors= "replace", # replace non-utf8 characters with `?` instead of failing env=env, ) streamer = threading.Thread(target=stream_to_stdout, args=[process]) streamer.start() while process.poll() is None: api = self.get_api(api_name) if api["status"]["status_code"] != "status_updating": time.sleep( 5) # accommodate latency in log streaming from the cluster process.terminate() break time.sleep(5) streamer.join(timeout=10) return api
JOB_STATUS_RUNNING = 'status_running' JOB_STATUS_UNEXPECTED_ERROR = 'status_unexpected_error' JOB_STATUS_ENQUEUED_FAILED = 'status_enqueue_failed' CORTEX_DELETE_TIMEOUT_SEC = 10 * 60 CORTEX_DEPLOY_REPORTED_TIMEOUT_SEC = 60 CORTEX_DEFAULT_DEPLOYMENT_TIMEOUT = 20 * 60 CORTEX_DEFAULT_API_TIMEOUT = CORTEX_DEFAULT_DEPLOYMENT_TIMEOUT CORTEX_MIN_API_TIMEOUT_SEC = CORTEX_DELETE_TIMEOUT_SEC CORTEX_DEPLOY_RETRY_BASE_SLEEP_SEC = 5 * 60 CORTEX_STATUS_CHECK_SLEEP_SEC = 15 INFINITE_TIMEOUT_SEC = 30 * 365 * 24 * 60 * 60 # 30 years WAIT_BEFORE_JOB_GET = int(os.environ.get('CORTEX_WAIT_BEFORE_JOB_GET', str(30))) CORTEX_PATH = get_cli_path() logger = logging.getLogger('cortex_client') __cortex_client_instance = None class CortexClient: """ An object used to execute commands on Cortex, maintain API state in the db to collect garbage. """ def __init__(self, db_connection_pool: ThreadedConnectionPool, gc_interval_sec=30 * 60, cortex_env="aws"): self.db_connection_pool = db_connection_pool self._init_garbage_api_collector(gc_interval_sec)
def deploy( self, config_file: str, force: bool = False, wait: bool = False, ) -> list: """ Deploy or update APIs specified in the config_file. Args: config_file: Local path to a yaml file defining Cortex APIs. force: Override any in-progress api updates. wait: Streams logs until the APIs are ready. Returns: Deployment status, API specification, and endpoint for each API. """ args = [ "deploy", config_file, "--env", self.env, "-o", "mixed", ] if force: args.append("--force") output = run_cli(args, mixed_output=True) deploy_results = json.loads(output.strip()) if not wait: return deploy_results def stream_to_stdout(process): for c in iter(lambda: process.stdout.read(1), ""): sys.stdout.write(c) for deploy_result in deploy_results: api_name = deploy_result["api"]["spec"]["name"] kind = deploy_result["api"]["spec"]["kind"] if kind != "RealtimeAPI": continue env = os.environ.copy() env["CORTEX_CLI_INVOKER"] = "python" process = subprocess.Popen( [get_cli_path(), "logs", "--env", self.env, api_name], stderr=subprocess.STDOUT, stdout=subprocess.PIPE, encoding="utf8", env=env, ) streamer = threading.Thread(target=stream_to_stdout, args=[process]) streamer.start() while process.poll() is None: api = self.get_api(api_name) if api["status"]["status_code"] != "status_updating": if api["status"]["status_code"] == "status_live": time.sleep(2) process.terminate() break time.sleep(2) return deploy_results