Esempio n. 1
0
    def __init__(
        self,
        owner: str = None,
        project: str = None,
        run_uuid: str = None,
        client: PolyaxonClient = None,
    ):

        try:
            owner, project = get_project_or_local(
                get_project_full_name(owner=owner, project=project))
        except PolyaxonClientException:
            pass

        if project is None:
            if settings.CLIENT_CONFIG.is_managed:
                owner, project, _run_uuid = get_run_info()
                run_uuid = run_uuid or _run_uuid
            else:
                raise PolyaxonClientException(
                    "Please provide a valid project.")

        if not owner or not project:
            raise PolyaxonClientException(
                "Please provide a valid project with owner.")

        self.client = client
        if not (self.client or settings.CLIENT_CONFIG.is_offline):
            self.client = PolyaxonClient()

        self._owner = owner
        self._project = project
        self._run_uuid = get_run_or_local(run_uuid)
        self._run_data = polyaxon_sdk.V1Run()
        self._namespace = None
Esempio n. 2
0
    def make_and_create_run(self,
                            run_data: Tuple[str, str, str, str],
                            default_auth: bool = False):
        run_owner, run_project, run_uuid = get_run_info(
            run_instance=run_data[0])
        resource = self.make_run_resource(
            owner_name=run_owner,
            project_name=run_project,
            run_name=run_data[2],
            run_uuid=run_uuid,
            content=run_data[3],
            default_auth=default_auth,
        )
        if not resource:
            return

        try:
            self.spawner.create(run_uuid=run_uuid,
                                run_kind=run_data[1],
                                resource=resource)
        except ApiException as e:
            if e.status == 409:
                logger.info(
                    "Run already running, triggering an apply mechanism.")
            else:
                logger.info("Run submission error.")
        except Exception as e:
            logger.info(
                "Run could not be cleaned. Agent failed converting run manifest: {}\n{}"
                .format(repr(e), traceback.format_exc()))
Esempio n. 3
0
    def apply_run(self, run_data: Tuple[str, str, str, str]):
        run_owner, run_project, run_uuid = get_run_info(
            run_instance=run_data[0])
        resource = self.prepare_run_resource(
            owner_name=run_owner,
            project_name=run_project,
            run_name=run_data[2],
            run_uuid=run_uuid,
            content=run_data[3],
        )
        if not resource:
            return

        try:
            self.spawner.apply(run_uuid=run_uuid,
                               run_kind=run_data[1],
                               resource=resource)
            self.log_run_running(run_owner=run_owner,
                                 run_project=run_project,
                                 run_uuid=run_uuid)
        except Exception as e:
            self.log_run_failed(run_owner=run_owner,
                                run_project=run_project,
                                run_uuid=run_uuid,
                                exc=e)
            self.clean_run(run_uuid=run_uuid, run_kind=run_data[1])
Esempio n. 4
0
    def create_run(self, run_data: Tuple[str, str, str, str]):
        run_owner, run_project, run_uuid = get_run_info(run_instance=run_data[0])
        resource = self.prepare_run_resource(
            owner_name=run_owner,
            project_name=run_project,
            run_name=run_data[2],
            run_uuid=run_uuid,
            content=run_data[3],
        )

        try:
            self.spawner.create(
                run_uuid=run_uuid, run_kind=run_data[1], resource=resource
            )
            self.log_run_scheduled(
                run_owner=run_owner, run_project=run_project, run_uuid=run_uuid
            )
        except ApiException as e:
            if e.status == 409:
                logger.info(
                    "Run already running running, triggering an apply mechanism."
                )
                self.apply_run(run_data=run_data)
        except Exception as e:
            self.log_run_failed(
                run_owner=run_owner, run_project=run_project, run_uuid=run_uuid, exc=e
            )
Esempio n. 5
0
def create_code_repo(repo_path: str,
                     url: str,
                     revision: str,
                     connection: str = None):
    try:
        clone_url = get_clone_url(url)
    except Exception as e:
        raise PolyaxonContainerException(
            "Error parsing url: {}.".format(url)) from e

    clone_git_repo(repo_path=repo_path, url=clone_url)
    set_remote(repo_path=repo_path, url=url)
    if revision:
        checkout_revision(repo_path=repo_path, revision=revision)

    if not settings.CLIENT_CONFIG.no_api:
        try:
            owner, project, run_uuid = get_run_info()
        except PolyaxonClientException as e:
            raise PolyaxonContainerException(e)

        code_ref = get_code_reference(path=repo_path, url=url)
        artifact_run = V1RunArtifact(
            name=code_ref.get("commit"),
            kind=V1ArtifactKind.CODEREF,
            connection=connection,
            summary=code_ref,
            is_input=True,
        )
        RunClient(owner=owner, project=project,
                  run_uuid=run_uuid).log_artifact_lineage(artifact_run)
Esempio n. 6
0
def log_suggestions(suggestions: List[Dict]):
    from polyaxon import settings
    from polyaxon.client import RunClient
    from polyaxon.env_vars.getters import get_run_info
    from polyaxon.exceptions import PolyaxonClientException, PolyaxonContainerException

    if not settings.CLIENT_CONFIG.no_api:
        try:
            owner, project, run_uuid = get_run_info()
        except PolyaxonClientException as e:
            raise PolyaxonContainerException(e)

        RunClient(owner=owner, project=project,
                  run_uuid=run_uuid).log_outputs(suggestions=suggestions)
Esempio n. 7
0
 def stop_run(self, run_data: Tuple[str, str]):
     run_owner, run_project, run_uuid = get_run_info(run_instance=run_data[0])
     try:
         self.spawner.stop(run_uuid=run_uuid, run_kind=run_data[1])
     except ApiException as e:
         if e.status == 404:
             logger.info("Run does not exist anymore, it could have been stopped.")
             self.log_run_stopped(
                 run_owner=run_owner, run_project=run_project, run_uuid=run_uuid
             )
     except Exception as e:
         self.log_run_failed(
             run_owner=run_owner,
             run_project=run_project,
             run_uuid=run_uuid,
             exc=e,
             message="Agent failed stopping run.\n",
         )
Esempio n. 8
0
def create_auth_context():
    try:
        owner, project, run_uuid = get_run_info()
    except PolyaxonClientException as e:
        raise PolyaxonContainerException(e)

    retry = 1
    done = False
    while not done and retry <= 3:
        try:
            impersonate(owner=owner, project=project, run_uuid=run_uuid)
            print("Auth context initialized.")
            return
        except PolyaxonClientException:
            retry += 1
            print("Could not establish connection, retrying ...")
            time.sleep(1 * retry)

    raise PolyaxonContainerException("Init job did not succeed authenticating job.")
Esempio n. 9
0
    def _submit_run(self, run_data: Tuple[str, str, str, str], sync_api=True):
        run_owner, run_project, run_uuid = get_run_info(
            run_instance=run_data[0])
        resource = self.prepare_run_resource(
            owner_name=run_owner,
            project_name=run_project,
            run_name=run_data[2],
            run_uuid=run_uuid,
            content=run_data[3],
        )
        if not resource:
            return

        try:
            self.spawner.create(run_uuid=run_uuid,
                                run_kind=run_data[1],
                                resource=resource)
            if sync_api:
                self.log_run_scheduled(run_owner=run_owner,
                                       run_project=run_project,
                                       run_uuid=run_uuid)
        except ApiException as e:
            if e.status == 409:
                logger.info(
                    "Run already running, triggering an apply mechanism.")
                self.apply_run(run_data=run_data)
            else:
                logger.info("Run submission error.")
                self.log_run_failed(
                    run_owner=run_owner,
                    run_project=run_project,
                    run_uuid=run_uuid,
                    exc=e,
                )
        except Exception as e:
            if sync_api:
                self.log_run_failed(
                    run_owner=run_owner,
                    run_project=run_project,
                    run_uuid=run_uuid,
                    exc=e,
                )
Esempio n. 10
0
def create_dockerfile_lineage(dockerfile_path: str, summary: Dict):
    if not dockerfile_path:
        return
    filename = os.path.basename(dockerfile_path)

    if not settings.CLIENT_CONFIG.no_api:
        try:
            owner, project, run_uuid = get_run_info()
        except PolyaxonClientException as e:
            raise PolyaxonContainerException(e)

        artifact_run = V1RunArtifact(
            name=filename,
            kind=V1ArtifactKind.DOCKERFILE,
            path=RunClient.get_rel_asset_path(dockerfile_path),
            summary=summary,
            is_input=True,
        )
        RunClient(owner=owner, project=project, run_uuid=run_uuid).log_artifact_lineage(
            artifact_run
        )
Esempio n. 11
0
 def delete_run(self, run_data: Tuple[str, str, str, str]):
     run_owner, run_project, run_uuid = get_run_info(
         run_instance=run_data[0])
     self.clean_run(run_uuid=run_uuid, run_kind=run_data[1])
     if run_data[3]:
         self.make_and_create_run(run_data)
Esempio n. 12
0
async def start_sidecar(
    container_id: str,
    sleep_interval: int,
    sync_interval: int,
    monitor_outputs: bool,
    monitor_logs: bool,
):
    sync_interval = get_sync_interval(
        interval=sync_interval, sleep_interval=sleep_interval
    )
    try:
        pod_id = os.environ[POLYAXON_KEYS_K8S_POD_ID]
    except KeyError as e:
        raise PolyaxonContainerException(
            "Please make sure that this job has been "
            "started by Polyaxon with all required context."
        ) from e

    try:
        owner, project, run_uuid = get_run_info()
    except PolyaxonClientException as e:
        raise PolyaxonContainerException(e)

    client = RunClient(owner=owner, project=project, run_uuid=run_uuid)
    k8s_manager = AsyncK8SManager(namespace=CLIENT_CONFIG.namespace, in_cluster=True)
    await k8s_manager.setup()
    pod = await k8s_manager.get_pod(pod_id, reraise=True)

    retry = 1
    is_running = True
    counter = 0
    state = {
        "last_artifacts_check": None,
        "last_logs_check": None,
    }

    async def monitor():
        if monitor_logs:
            await sync_logs(
                run_uuid=run_uuid,
                k8s_manager=k8s_manager,
                pod=pod,
                last_time=None,
                stream=True,
                is_running=is_running,
            )
        if monitor_outputs:
            last_check = state["last_artifacts_check"]
            state["last_artifacts_check"] = sync_artifacts(
                last_check=last_check,
                run_uuid=run_uuid,
            )
            sync_summaries(
                last_check=last_check,
                run_uuid=run_uuid,
                client=client,
            )

    while is_running and retry <= 3:
        await asyncio.sleep(sleep_interval)
        try:
            is_running = await k8s_manager.is_pod_running(pod_id, container_id)
        except ApiException as e:
            retry += 1
            logger.info("Exception %s" % repr(e))
            logger.info("Sleeping ...")
            await asyncio.sleep(retry)
            continue

        logger.debug("Syncing ...")
        if is_running:
            retry = 1

        counter += 1
        if counter == sync_interval:
            counter = 0
            try:
                await monitor()
            except Exception as e:
                logger.warning("Polyaxon sidecar error: %s" % repr(e))

    await monitor()
    logger.info("Cleaning non main containers")
    if k8s_manager:
        await k8s_manager.close()
Esempio n. 13
0
def start_sidecar(
    container_id: str,
    sleep_interval: int,
    sync_interval: int,
    monitor_outputs: bool,
    monitor_logs: bool,
):
    sync_interval = get_sync_interval(interval=sync_interval,
                                      sleep_interval=sleep_interval)

    try:
        owner, project, run_uuid = get_run_info()
    except PolyaxonClientException as e:
        raise PolyaxonContainerException(e)

    client = RunClient(owner=owner, project=project, run_uuid=run_uuid)
    pod_id = CLIENT_CONFIG.pod_id
    if not pod_id:
        raise PolyaxonContainerException(
            "Please make sure that this job has been "
            "started by Polyaxon with all required context.")

    k8s_manager = K8SManager(namespace=CLIENT_CONFIG.namespace,
                             in_cluster=True)
    retry = 1
    is_running = True
    counter = 0
    state = {
        "last_artifacts_check": None,
        "last_logs_check": None,
    }

    def monitor():
        if monitor_outputs:
            last_check = state["last_artifacts_check"]
            state["last_artifacts_check"] = sync_artifacts(
                last_check=last_check,
                run_uuid=run_uuid,
            )
            sync_summaries(
                last_check=last_check,
                run_uuid=run_uuid,
                client=client,
            )

        if monitor_logs:
            state["last_logs_check"] = sync_logs(
                k8s_manager=k8s_manager,
                client=client,
                last_check=state["last_logs_check"],
                run_uuid=run_uuid,
                pod_id=pod_id,
                container_id=container_id,
                owner=owner,
                project=project,
            )

    while is_running and retry <= 3:
        time.sleep(sleep_interval)
        try:
            is_running = is_pod_running(k8s_manager, pod_id, container_id)
        except ApiException as e:
            retry += 1
            time.sleep(1 * retry)
            logger.info("Exception %s" % repr(e))
            logger.info("Sleeping ...")

        logger.debug("Syncing ...")
        if is_running:
            retry = 1

        counter += 1
        if counter == sync_interval:
            counter = 0
            try:
                monitor()
            except Exception as e:
                logger.warning("Polyaxon sidecar error: %e", e)

    monitor()
    logger.info("Cleaning non main containers")
Esempio n. 14
0
 def test_run_info_checks_is_managed(self):
     settings.CLIENT_CONFIG.is_managed = False
     with self.assertRaises(PolyaxonClientException):
         get_run_info()