Esempio n. 1
0
def create_auth_context():
    try:
        run_client = RunClient()
    except PolyaxonClientException as e:
        raise PolyaxonContainerException(e)

    retry = 1
    done = False
    exp = None
    while not done and retry <= 3:
        try:
            impersonate(
                owner=run_client.owner,
                project=run_client.project,
                run_uuid=run_client.run_uuid,
                client=run_client.client,
            )
            print("Auth context initialized.")
            return
        except PolyaxonClientException as e:
            retry += 1
            print("Could not establish connection, retrying ...")
            exp = "Polyaxon auth initialized failed authenticating the operation: {}\n{}".format(
                repr(e), traceback.format_exc())
            time.sleep(retry)
    run_client.log_failed("Could not create an auth context.", traceback=exp)
    raise PolyaxonContainerException(
        "Init job did not succeed authenticating job.")
Esempio n. 2
0
def create_code_repo(repo_path: str,
                     url: str,
                     revision: str,
                     connection: str = None):
    try:
        clone_url = get_clone_url(url)
    except Exception as e:
        raise PolyaxonContainerException(
            "Error parsing url: {}.".format(url)) from e

    clone_git_repo(repo_path=repo_path, url=clone_url)
    set_remote(repo_path=repo_path, url=url)
    if revision:
        checkout_revision(repo_path=repo_path, revision=revision)

    if not settings.CLIENT_CONFIG.no_api:
        try:
            owner, project, run_uuid = get_run_info()
        except PolyaxonClientException as e:
            raise PolyaxonContainerException(e)

        code_ref = get_code_reference(path=repo_path, url=url)
        artifact_run = V1RunArtifact(
            name=code_ref.get("commit"),
            kind=V1ArtifactKind.CODEREF,
            connection=connection,
            summary=code_ref,
            is_input=True,
        )
        RunClient(owner=owner, project=project,
                  run_uuid=run_uuid).log_artifact_lineage(artifact_run)
Esempio n. 3
0
def create_code_repo(
    repo_path: str,
    url: str,
    revision: str,
    connection: str = None,
    flags: List[str] = None,
):
    try:
        clone_url = get_clone_url(url)
    except Exception as e:
        raise PolyaxonContainerException(
            "Error parsing url: {}.".format(url)) from e

    if flags and "--experimental-fetch" in flags:
        flags.remove("--experimental-fetch")
        fetch_git_repo(repo_path=repo_path,
                       clone_url=clone_url,
                       revision=revision,
                       flags=flags)
    else:
        clone_and_checkout_git_repo(repo_path=repo_path,
                                    clone_url=clone_url,
                                    revision=revision,
                                    flags=flags)
    # Update remote
    set_remote(repo_path=repo_path, url=url)

    if settings.CLIENT_CONFIG.no_api:
        return

    try:
        run_client = RunClient()
    except PolyaxonClientException as e:
        raise PolyaxonContainerException(e)

    code_ref = get_code_reference(path=repo_path, url=url)
    artifact_run = V1RunArtifact(
        name=code_ref.get("commit"),
        kind=V1ArtifactKind.CODEREF,
        connection=connection,
        summary=code_ref,
        is_input=True,
    )
    run_client.log_artifact_lineage(artifact_run)
Esempio n. 4
0
def create_auth_context():
    try:
        owner, project, run_uuid = get_run_info()
    except PolyaxonClientException as e:
        raise PolyaxonContainerException(e)

    retry = 1
    done = False
    while not done and retry <= 3:
        try:
            impersonate(owner=owner, project=project, run_uuid=run_uuid)
            print("Auth context initialized.")
            return
        except PolyaxonClientException:
            retry += 1
            print("Could not establish connection, retrying ...")
            time.sleep(1 * retry)

    raise PolyaxonContainerException("Init job did not succeed authenticating job.")
Esempio n. 5
0
def log_suggestions(suggestions: List[Dict]):
    from polyaxon import settings
    from polyaxon.client import RunClient
    from polyaxon.env_vars.getters import get_run_info
    from polyaxon.exceptions import PolyaxonClientException, PolyaxonContainerException

    if not settings.CLIENT_CONFIG.no_api:
        try:
            owner, project, run_uuid = get_run_info()
        except PolyaxonClientException as e:
            raise PolyaxonContainerException(e)

        RunClient(owner=owner, project=project,
                  run_uuid=run_uuid).log_outputs(suggestions=suggestions)
Esempio n. 6
0
def create_dockerfile_lineage(dockerfile_path: str, summary: Dict):
    if not dockerfile_path:
        return
    filename = os.path.basename(dockerfile_path)

    if settings.CLIENT_CONFIG.no_api:
        return

    try:
        run_client = RunClient()
    except PolyaxonClientException as e:
        raise PolyaxonContainerException(e)

    artifact_run = V1RunArtifact(
        name=filename,
        kind=V1ArtifactKind.DOCKERFILE,
        path=get_rel_asset_path(dockerfile_path),
        summary=summary,
        is_input=True,
    )
    run_client.log_artifact_lineage(artifact_run)
Esempio n. 7
0
def create_file_lineage(filepath: str, summary: Dict, kind: str):
    kind = kind or V1ArtifactKind.FILE

    if not filepath:
        return
    filename = os.path.basename(filepath)

    if settings.CLIENT_CONFIG.no_api:
        return

    try:
        run_client = RunClient()
    except PolyaxonClientException as e:
        raise PolyaxonContainerException(e)

    artifact_run = V1RunArtifact(
        name=get_base_filename(filename),
        kind=kind,
        path=get_rel_asset_path(filepath),
        summary=summary,
        is_input=True,
    )
    run_client.log_artifact_lineage(artifact_run)
Esempio n. 8
0
async def start_sidecar(
    container_id: str,
    sleep_interval: int,
    sync_interval: int,
    monitor_outputs: bool,
    monitor_logs: bool,
):
    sync_interval = get_sync_interval(
        interval=sync_interval, sleep_interval=sleep_interval
    )
    try:
        pod_id = os.environ[POLYAXON_KEYS_K8S_POD_ID]
    except KeyError as e:
        raise PolyaxonContainerException(
            "Please make sure that this job has been "
            "started by Polyaxon with all required context."
        ) from e

    try:
        owner, project, run_uuid = get_run_info()
    except PolyaxonClientException as e:
        raise PolyaxonContainerException(e)

    client = RunClient(owner=owner, project=project, run_uuid=run_uuid)
    k8s_manager = AsyncK8SManager(namespace=CLIENT_CONFIG.namespace, in_cluster=True)
    await k8s_manager.setup()
    pod = await k8s_manager.get_pod(pod_id, reraise=True)

    retry = 1
    is_running = True
    counter = 0
    state = {
        "last_artifacts_check": None,
        "last_logs_check": None,
    }

    async def monitor():
        if monitor_logs:
            await sync_logs(
                run_uuid=run_uuid,
                k8s_manager=k8s_manager,
                pod=pod,
                last_time=None,
                stream=True,
                is_running=is_running,
            )
        if monitor_outputs:
            last_check = state["last_artifacts_check"]
            state["last_artifacts_check"] = sync_artifacts(
                last_check=last_check,
                run_uuid=run_uuid,
            )
            sync_summaries(
                last_check=last_check,
                run_uuid=run_uuid,
                client=client,
            )

    while is_running and retry <= 3:
        await asyncio.sleep(sleep_interval)
        try:
            is_running = await k8s_manager.is_pod_running(pod_id, container_id)
        except ApiException as e:
            retry += 1
            logger.info("Exception %s" % repr(e))
            logger.info("Sleeping ...")
            await asyncio.sleep(retry)
            continue

        logger.debug("Syncing ...")
        if is_running:
            retry = 1

        counter += 1
        if counter == sync_interval:
            counter = 0
            try:
                await monitor()
            except Exception as e:
                logger.warning("Polyaxon sidecar error: %s" % repr(e))

    await monitor()
    logger.info("Cleaning non main containers")
    if k8s_manager:
        await k8s_manager.close()
Esempio n. 9
0
def start_sidecar(
    container_id: str,
    sleep_interval: int,
    sync_interval: int,
    monitor_outputs: bool,
    monitor_logs: bool,
):
    sync_interval = get_sync_interval(interval=sync_interval,
                                      sleep_interval=sleep_interval)

    try:
        owner, project, run_uuid = get_run_info()
    except PolyaxonClientException as e:
        raise PolyaxonContainerException(e)

    client = RunClient(owner=owner, project=project, run_uuid=run_uuid)
    pod_id = CLIENT_CONFIG.pod_id
    if not pod_id:
        raise PolyaxonContainerException(
            "Please make sure that this job has been "
            "started by Polyaxon with all required context.")

    k8s_manager = K8SManager(namespace=CLIENT_CONFIG.namespace,
                             in_cluster=True)
    retry = 1
    is_running = True
    counter = 0
    state = {
        "last_artifacts_check": None,
        "last_logs_check": None,
    }

    def monitor():
        if monitor_outputs:
            last_check = state["last_artifacts_check"]
            state["last_artifacts_check"] = sync_artifacts(
                last_check=last_check,
                run_uuid=run_uuid,
            )
            sync_summaries(
                last_check=last_check,
                run_uuid=run_uuid,
                client=client,
            )

        if monitor_logs:
            state["last_logs_check"] = sync_logs(
                k8s_manager=k8s_manager,
                client=client,
                last_check=state["last_logs_check"],
                run_uuid=run_uuid,
                pod_id=pod_id,
                container_id=container_id,
                owner=owner,
                project=project,
            )

    while is_running and retry <= 3:
        time.sleep(sleep_interval)
        try:
            is_running = is_pod_running(k8s_manager, pod_id, container_id)
        except ApiException as e:
            retry += 1
            time.sleep(1 * retry)
            logger.info("Exception %s" % repr(e))
            logger.info("Sleeping ...")

        logger.debug("Syncing ...")
        if is_running:
            retry = 1

        counter += 1
        if counter == sync_interval:
            counter = 0
            try:
                monitor()
            except Exception as e:
                logger.warning("Polyaxon sidecar error: %e", e)

    monitor()
    logger.info("Cleaning non main containers")