Python JobInfo 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: ray.dashboard.modules.job.common

클래스/타입: JobInfo

hotexamples.com에서의 예제들: 6

Python JobInfo - 6개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 ray.dashboard.modules.job.common.JobInfo에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

JobInfo(6)

자주 사용되는 메소드들

JobInfo (6)

예제 #1

파일 보기

def test_list_jobs(job_sdk_client: JobSubmissionClient, use_sdk: bool):
    client = job_sdk_client

    runtime_env = {"env_vars": {"TEST": "123"}}
    metadata = {"foo": "bar"}
    entrypoint = "echo hello"
    job_id = client.submit_job(entrypoint=entrypoint,
                               runtime_env=runtime_env,
                               metadata=metadata)

    wait_for_condition(_check_job_succeeded, client=client, job_id=job_id)
    if use_sdk:
        info: JobInfo = client.list_jobs()[job_id]
    else:
        r = client._do_request(
            "GET",
            "/api/jobs/",
        )

        assert r.status_code == 200
        jobs_info_json = json.loads(r.text)
        info_json = jobs_info_json[job_id]
        info = JobInfo(**info_json)

    assert info.entrypoint == entrypoint
    assert info.status == JobStatus.SUCCEEDED
    assert info.message is not None
    assert info.end_time >= info.start_time
    assert info.runtime_env == runtime_env
    assert info.metadata == metadata

예제 #2

파일 보기

파일: sdk.py 프로젝트: krfricke/ray

    def get_job_info(
        self,
        job_id: str,
    ) -> JobInfo:
        """Get the latest status and other information associated with a job.

        Example:
            >>> from ray.job_submission import JobSubmissionClient
            >>> client = JobSubmissionClient("http://127.0.0.1:8265") # doctest: +SKIP
            >>> job_id = client.submit_job(entrypoint="sleep 1") # doctest: +SKIP
            >>> job_submission_client.get_job_info(job_id) # doctest: +SKIP
            JobInfo(status='SUCCEEDED', message='Job finished successfully.',
            error_type=None, start_time=1647388711, end_time=1647388712,
            metadata={}, runtime_env={})

        Args:
            job_id: The ID of the job whose information is being requested.

        Returns:
            The JobInfo for the job.

        Raises:
            RuntimeError: If the job does not exist or if the request to the
            job server fails.
        """
        r = self._do_request("GET", f"/api/jobs/{job_id}")

        if r.status_code == 200:
            return JobInfo(**r.json())
        else:
            self._raise_error(r)

예제 #3

파일 보기

파일: sdk.py 프로젝트: wuisawesome/ray

    def get_job_info(
        self,
        job_id: str,
    ) -> JobInfo:
        r = self._do_request("GET", f"/api/jobs/{job_id}")

        if r.status_code == 200:
            return JobInfo(**r.json())
        else:
            self._raise_error(r)

예제 #4

파일 보기

파일: sdk.py 프로젝트: wuisawesome/ray

    def list_jobs(self) -> Dict[str, JobInfo]:
        r = self._do_request("GET", "/api/jobs/")

        if r.status_code == 200:
            jobs_info_json = r.json()
            jobs_info = {
                job_id: JobInfo(**job_info_json)
                for job_id, job_info_json in jobs_info_json.items()
            }
            return jobs_info
        else:
            self._raise_error(r)

예제 #5

파일 보기

파일: sdk.py 프로젝트: krfricke/ray

    def list_jobs(self) -> Dict[str, JobInfo]:
        """List all jobs along with their status and other information.

        Lists all jobs that have ever run on the cluster, including jobs that are
        currently running and jobs that are no longer running.

        Example:
            >>> from ray.job_submission import JobSubmissionClient
            >>> client = JobSubmissionClient("http://127.0.0.1:8265") # doctest: +SKIP
            >>> client.submit_job(entrypoint="echo hello") # doctest: +SKIP
            >>> client.submit_job(entrypoint="sleep 2") # doctest: +SKIP
            >>> client.list_jobs() # doctest: +SKIP
            {'raysubmit_4LamXRuQpYdSMg7J': JobInfo(status='SUCCEEDED',
            message='Job finished successfully.', error_type=None,
            start_time=1647388711, end_time=1647388712, metadata={}, runtime_env={}),
            'raysubmit_1dxCeNvG1fCMVNHG': JobInfo(status='RUNNING',
            message='Job is currently running.', error_type=None,
            start_time=1647454832, end_time=None, metadata={}, runtime_env={})}

        Returns:
            A dictionary mapping job_ids to their information.

        Raises:
            RuntimeError: If the request to the job server fails.
        """
        r = self._do_request("GET", "/api/jobs/")

        if r.status_code == 200:
            jobs_info_json = r.json()
            jobs_info = {
                job_id: JobInfo(**job_info_json)
                for job_id, job_info_json in jobs_info_json.items()
            }
            return jobs_info
        else:
            self._raise_error(r)

예제 #6

파일 보기

파일: job_manager.py 프로젝트: krfricke/ray

    def submit_job(
        self,
        *,
        entrypoint: str,
        job_id: Optional[str] = None,
        runtime_env: Optional[Dict[str, Any]] = None,
        metadata: Optional[Dict[str, str]] = None,
        _start_signal_actor: Optional[ActorHandle] = None,
    ) -> str:
        """
        Job execution happens asynchronously.

        1) Generate a new unique id for this job submission, each call of this
            method assumes they're independent submission with its own new
            ID, job supervisor actor, and child process.
        2) Create new detached actor with same runtime_env as job spec

        Actual setting up runtime_env, subprocess group, driver command
        execution, subprocess cleaning up and running status update to GCS
        is all handled by job supervisor actor.

        Args:
            entrypoint: Driver command to execute in subprocess shell.
                Represents the entrypoint to start user application.
            runtime_env: Runtime environment used to execute driver command,
                which could contain its own ray.init() to configure runtime
                env at ray cluster, task and actor level.
            metadata: Support passing arbitrary data to driver command in
                case needed.
            _start_signal_actor: Used in testing only to capture state
                transitions between PENDING -> RUNNING. Regular user shouldn't
                need this.

        Returns:
            job_id: Generated uuid for further job management. Only valid
                within the same ray cluster.
        """
        if job_id is None:
            job_id = generate_job_id()
        elif self._job_info_client.get_status(job_id) is not None:
            raise RuntimeError(f"Job {job_id} already exists.")

        logger.info(f"Starting job with job_id: {job_id}")
        job_info = JobInfo(
            entrypoint=entrypoint,
            status=JobStatus.PENDING,
            start_time=int(time.time() * 1000),
            metadata=metadata,
            runtime_env=runtime_env,
        )
        self._job_info_client.put_info(job_id, job_info)

        # Wait for the actor to start up asynchronously so this call always
        # returns immediately and we can catch errors with the actor starting
        # up.
        try:
            supervisor = self._supervisor_actor_cls.options(
                lifetime="detached",
                name=self.JOB_ACTOR_NAME.format(job_id=job_id),
                num_cpus=0,
                # Currently we assume JobManager is created by dashboard server
                # running on headnode, same for job supervisor actors scheduled
                resources={
                    self._get_current_node_resource_key(): 0.001,
                },
                runtime_env=runtime_env,
            ).remote(job_id, entrypoint, metadata or {})
            supervisor.run.remote(_start_signal_actor=_start_signal_actor)

            # Monitor the job in the background so we can detect errors without
            # requiring a client to poll.
            create_task(self._monitor_job(job_id, job_supervisor=supervisor))
        except Exception as e:
            self._job_info_client.put_status(
                job_id,
                JobStatus.FAILED,
                message=f"Failed to start job supervisor: {e}.",
            )

        return job_id