def create_auth_context(): try: run_client = RunClient() except PolyaxonClientException as e: raise PolyaxonContainerException(e) retry = 1 done = False exp = None while not done and retry <= 3: try: impersonate( owner=run_client.owner, project=run_client.project, run_uuid=run_client.run_uuid, client=run_client.client, ) print("Auth context initialized.") return except PolyaxonClientException as e: retry += 1 print("Could not establish connection, retrying ...") exp = "Polyaxon auth initialized failed authenticating the operation: {}\n{}".format( repr(e), traceback.format_exc()) time.sleep(retry) run_client.log_failed("Could not create an auth context.", traceback=exp) raise PolyaxonContainerException( "Init job did not succeed authenticating job.")
def create_code_repo(repo_path: str, url: str, revision: str, connection: str = None): try: clone_url = get_clone_url(url) except Exception as e: raise PolyaxonContainerException( "Error parsing url: {}.".format(url)) from e clone_git_repo(repo_path=repo_path, url=clone_url) set_remote(repo_path=repo_path, url=url) if revision: checkout_revision(repo_path=repo_path, revision=revision) if not settings.CLIENT_CONFIG.no_api: try: owner, project, run_uuid = get_run_info() except PolyaxonClientException as e: raise PolyaxonContainerException(e) code_ref = get_code_reference(path=repo_path, url=url) artifact_run = V1RunArtifact( name=code_ref.get("commit"), kind=V1ArtifactKind.CODEREF, connection=connection, summary=code_ref, is_input=True, ) RunClient(owner=owner, project=project, run_uuid=run_uuid).log_artifact_lineage(artifact_run)
def create_code_repo( repo_path: str, url: str, revision: str, connection: str = None, flags: List[str] = None, ): try: clone_url = get_clone_url(url) except Exception as e: raise PolyaxonContainerException( "Error parsing url: {}.".format(url)) from e if flags and "--experimental-fetch" in flags: flags.remove("--experimental-fetch") fetch_git_repo(repo_path=repo_path, clone_url=clone_url, revision=revision, flags=flags) else: clone_and_checkout_git_repo(repo_path=repo_path, clone_url=clone_url, revision=revision, flags=flags) # Update remote set_remote(repo_path=repo_path, url=url) if settings.CLIENT_CONFIG.no_api: return try: run_client = RunClient() except PolyaxonClientException as e: raise PolyaxonContainerException(e) code_ref = get_code_reference(path=repo_path, url=url) artifact_run = V1RunArtifact( name=code_ref.get("commit"), kind=V1ArtifactKind.CODEREF, connection=connection, summary=code_ref, is_input=True, ) run_client.log_artifact_lineage(artifact_run)
def create_auth_context(): try: owner, project, run_uuid = get_run_info() except PolyaxonClientException as e: raise PolyaxonContainerException(e) retry = 1 done = False while not done and retry <= 3: try: impersonate(owner=owner, project=project, run_uuid=run_uuid) print("Auth context initialized.") return except PolyaxonClientException: retry += 1 print("Could not establish connection, retrying ...") time.sleep(1 * retry) raise PolyaxonContainerException("Init job did not succeed authenticating job.")
def log_suggestions(suggestions: List[Dict]): from polyaxon import settings from polyaxon.client import RunClient from polyaxon.env_vars.getters import get_run_info from polyaxon.exceptions import PolyaxonClientException, PolyaxonContainerException if not settings.CLIENT_CONFIG.no_api: try: owner, project, run_uuid = get_run_info() except PolyaxonClientException as e: raise PolyaxonContainerException(e) RunClient(owner=owner, project=project, run_uuid=run_uuid).log_outputs(suggestions=suggestions)
def create_dockerfile_lineage(dockerfile_path: str, summary: Dict): if not dockerfile_path: return filename = os.path.basename(dockerfile_path) if settings.CLIENT_CONFIG.no_api: return try: run_client = RunClient() except PolyaxonClientException as e: raise PolyaxonContainerException(e) artifact_run = V1RunArtifact( name=filename, kind=V1ArtifactKind.DOCKERFILE, path=get_rel_asset_path(dockerfile_path), summary=summary, is_input=True, ) run_client.log_artifact_lineage(artifact_run)
def create_file_lineage(filepath: str, summary: Dict, kind: str): kind = kind or V1ArtifactKind.FILE if not filepath: return filename = os.path.basename(filepath) if settings.CLIENT_CONFIG.no_api: return try: run_client = RunClient() except PolyaxonClientException as e: raise PolyaxonContainerException(e) artifact_run = V1RunArtifact( name=get_base_filename(filename), kind=kind, path=get_rel_asset_path(filepath), summary=summary, is_input=True, ) run_client.log_artifact_lineage(artifact_run)
async def start_sidecar( container_id: str, sleep_interval: int, sync_interval: int, monitor_outputs: bool, monitor_logs: bool, ): sync_interval = get_sync_interval( interval=sync_interval, sleep_interval=sleep_interval ) try: pod_id = os.environ[POLYAXON_KEYS_K8S_POD_ID] except KeyError as e: raise PolyaxonContainerException( "Please make sure that this job has been " "started by Polyaxon with all required context." ) from e try: owner, project, run_uuid = get_run_info() except PolyaxonClientException as e: raise PolyaxonContainerException(e) client = RunClient(owner=owner, project=project, run_uuid=run_uuid) k8s_manager = AsyncK8SManager(namespace=CLIENT_CONFIG.namespace, in_cluster=True) await k8s_manager.setup() pod = await k8s_manager.get_pod(pod_id, reraise=True) retry = 1 is_running = True counter = 0 state = { "last_artifacts_check": None, "last_logs_check": None, } async def monitor(): if monitor_logs: await sync_logs( run_uuid=run_uuid, k8s_manager=k8s_manager, pod=pod, last_time=None, stream=True, is_running=is_running, ) if monitor_outputs: last_check = state["last_artifacts_check"] state["last_artifacts_check"] = sync_artifacts( last_check=last_check, run_uuid=run_uuid, ) sync_summaries( last_check=last_check, run_uuid=run_uuid, client=client, ) while is_running and retry <= 3: await asyncio.sleep(sleep_interval) try: is_running = await k8s_manager.is_pod_running(pod_id, container_id) except ApiException as e: retry += 1 logger.info("Exception %s" % repr(e)) logger.info("Sleeping ...") await asyncio.sleep(retry) continue logger.debug("Syncing ...") if is_running: retry = 1 counter += 1 if counter == sync_interval: counter = 0 try: await monitor() except Exception as e: logger.warning("Polyaxon sidecar error: %s" % repr(e)) await monitor() logger.info("Cleaning non main containers") if k8s_manager: await k8s_manager.close()
def start_sidecar( container_id: str, sleep_interval: int, sync_interval: int, monitor_outputs: bool, monitor_logs: bool, ): sync_interval = get_sync_interval(interval=sync_interval, sleep_interval=sleep_interval) try: owner, project, run_uuid = get_run_info() except PolyaxonClientException as e: raise PolyaxonContainerException(e) client = RunClient(owner=owner, project=project, run_uuid=run_uuid) pod_id = CLIENT_CONFIG.pod_id if not pod_id: raise PolyaxonContainerException( "Please make sure that this job has been " "started by Polyaxon with all required context.") k8s_manager = K8SManager(namespace=CLIENT_CONFIG.namespace, in_cluster=True) retry = 1 is_running = True counter = 0 state = { "last_artifacts_check": None, "last_logs_check": None, } def monitor(): if monitor_outputs: last_check = state["last_artifacts_check"] state["last_artifacts_check"] = sync_artifacts( last_check=last_check, run_uuid=run_uuid, ) sync_summaries( last_check=last_check, run_uuid=run_uuid, client=client, ) if monitor_logs: state["last_logs_check"] = sync_logs( k8s_manager=k8s_manager, client=client, last_check=state["last_logs_check"], run_uuid=run_uuid, pod_id=pod_id, container_id=container_id, owner=owner, project=project, ) while is_running and retry <= 3: time.sleep(sleep_interval) try: is_running = is_pod_running(k8s_manager, pod_id, container_id) except ApiException as e: retry += 1 time.sleep(1 * retry) logger.info("Exception %s" % repr(e)) logger.info("Sleeping ...") logger.debug("Syncing ...") if is_running: retry = 1 counter += 1 if counter == sync_interval: counter = 0 try: monitor() except Exception as e: logger.warning("Polyaxon sidecar error: %e", e) monitor() logger.info("Cleaning non main containers")