예제 #1
0
def jobs_build(job_id):
    job = get_valid_job(job_id=job_id)
    if not job:
        return None

    if not JobLifeCycle.can_transition(status_from=job.last_status,
                                       status_to=JobLifeCycle.BUILDING):
        _logger.info('Job id `%s` cannot transition from `%s` to `%s`.',
                     job_id, job.last_status, JobLifeCycle.BUILDING)
        return

    build_job, image_exists, build_status = dockerizer_scheduler.create_build_job(
        user=job.user,
        project=job.project,
        config=job.specification.build,
        configmap_refs=job.specification.configmap_refs,
        secret_refs=job.specification.secret_refs,
        code_reference=job.code_reference)

    job.build_job = build_job
    job.save(update_fields=['build_job'])
    if image_exists:
        # The image already exists, so we can start the experiment right away
        celery_app.send_task(
            SchedulerCeleryTasks.JOBS_START,
            kwargs={'job_id': job_id},
            countdown=conf.get('GLOBAL_COUNTDOWN'))
        return

    if not build_status:
        job.set_status(JobLifeCycle.FAILED, message='Could not start build process.')
        return

    # Update job status to show that its building docker image
    job.set_status(JobLifeCycle.BUILDING, message='Building container')
예제 #2
0
파일: job.py 프로젝트: Mofef/polyaxon
 def _set_status(self,
                 status_model,
                 status: str,
                 created_at: AwareDT = None,
                 message: str = None,
                 traceback: Dict = None,
                 details: Dict = None) -> bool:
     current_status = self.last_status_before(status_model=status_model,
                                              status_date=created_at)
     if self.is_done:
         # We should not update statuses anymore
         _logger.debug(
             'Received a new status `%s` for job `%s`. '
             'But the job is already done with status `%s`', status,
             self.unique_name, current_status)
         return False
     if status in JobLifeCycle.HEARTBEAT_STATUS:
         self._ping_heartbeat()
     if JobLifeCycle.can_transition(status_from=current_status,
                                    status_to=status):
         # Add new status to the job
         params = {'created_at': created_at} if created_at else {}
         status_model.objects.create(job=self,
                                     status=status,
                                     message=message,
                                     traceback=traceback,
                                     details=details,
                                     **params)
         return True
     return False
예제 #3
0
def projects_notebook_build(notebook_job_id):
    notebook_job = get_valid_notebook(notebook_job_id=notebook_job_id)
    if not notebook_job:
        return None

    if not JobLifeCycle.can_transition(status_from=notebook_job.last_status,
                                       status_to=JobLifeCycle.BUILDING):
        _logger.info('Notebook `%s` cannot transition from `%s` to `%s`.',
                     notebook_job, notebook_job.last_status, JobLifeCycle.BUILDING)
        return

    build_job, image_exists, build_status = dockerizer_scheduler.create_build_job(
        user=notebook_job.user,
        project=notebook_job.project,
        config=notebook_job.specification.build,
        configmap_refs=notebook_job.specification.configmap_refs,
        secret_refs=notebook_job.specification.secret_refs,
        code_reference=notebook_job.code_reference)

    notebook_job.build_job = build_job
    notebook_job.save(update_fields=['build_job'])
    if image_exists:
        # The image already exists, so we can start the experiment right away
        workers.send(
            SchedulerCeleryTasks.PROJECTS_NOTEBOOK_START,
            kwargs={'notebook_job_id': notebook_job_id})
        return

    if not build_status:
        notebook_job.set_status(JobLifeCycle.FAILED, message='Could not start build process.')
        return

    # Update job status to show that its building docker image
    notebook_job.set_status(JobLifeCycle.BUILDING, message='Building container')
예제 #4
0
def projects_notebook_start(notebook_job_id):
    notebook_job = get_valid_notebook(notebook_job_id=notebook_job_id)
    if not notebook_job:
        return None

    if not JobLifeCycle.can_transition(status_from=notebook_job.last_status,
                                       status_to=JobLifeCycle.SCHEDULED):
        _logger.info('Notebook `%s` cannot transition from `%s` to `%s`.',
                     notebook_job.unique_name, notebook_job.last_status, JobLifeCycle.SCHEDULED)

    notebook_scheduler.start_notebook(notebook_job)
예제 #5
0
def should_handle_job_status(pod_state: Any, status: str) -> bool:
    job_uuid = pod_state['details']['labels']['job_uuid']
    current_status = RedisStatuses.get_status(job=job_uuid)
    if not current_status:  # If the status does not exist or is evicted
        return True

    try:
        return JobLifeCycle.can_transition(
            status_from=RedisStatuses.get_status(job=job_uuid),
            status_to=status)
    except redis.connection.ConnectionError:
        return True
예제 #6
0
def tensorboards_start(tensorboard_job_id):
    tensorboard = get_valid_tensorboard(tensorboard_job_id=tensorboard_job_id)
    if not tensorboard:
        return None

    if not JobLifeCycle.can_transition(status_from=tensorboard.last_status,
                                       status_to=JobLifeCycle.SCHEDULED):
        _logger.info('Tensorboard `%s` cannot transition from `%s` to `%s`.',
                     tensorboard.unique_name, tensorboard.last_status, JobLifeCycle.SCHEDULED)

    try:
        tensorboard_scheduler.start_tensorboard(tensorboard)
    except StoreNotFoundError:
        tensorboard.set_status(status=JobLifeCycle.FAILED,
                               message='Tensorboard failed to start, '
                                       'the outputs volume/storage was not found.')
예제 #7
0
def jobs_start(job_id):
    job = get_valid_job(job_id=job_id)
    if not job:
        return None

    if job.last_status == JobLifeCycle.RUNNING:
        _logger.warning('Job is already running.')
        return None

    if not JobLifeCycle.can_transition(status_from=job.last_status,
                                       status_to=JobLifeCycle.SCHEDULED):
        _logger.info('Job `%s` cannot transition from `%s` to `%s`.',
                     job.unique_name, job.last_status, JobLifeCycle.SCHEDULED)
        return None

    job_scheduler.start_job(job)
예제 #8
0
    def test_job_statuses_transition(self):
        # pylint:disable=too-many-branches
        # pylint:disable=too-many-statements
        # Cannot transition to `CREATED`
        for status in JobLifeCycle.VALUES:
            assert JobLifeCycle.can_transition(
                status_from=status, status_to=JobLifeCycle.CREATED) is False

        # -> BUILDING
        for status in JobLifeCycle.VALUES:
            can_transition = JobLifeCycle.can_transition(
                status_from=status, status_to=JobLifeCycle.BUILDING)
            if status in {
                    JobLifeCycle.CREATED,
                    JobLifeCycle.RESUMING,
                    JobLifeCycle.SCHEDULED,
                    JobLifeCycle.UNSCHEDULABLE,
                    JobLifeCycle.WARNING,
                    JobLifeCycle.UNKNOWN,
            }:
                assert can_transition is True
            else:
                assert can_transition is False

        # -> SCHEDULED
        for status in JobLifeCycle.VALUES:
            can_transition = JobLifeCycle.can_transition(
                status_from=status, status_to=JobLifeCycle.SCHEDULED)
            if status in {
                    JobLifeCycle.CREATED,
                    JobLifeCycle.RESUMING,
                    JobLifeCycle.BUILDING,
                    JobLifeCycle.WARNING,
                    JobLifeCycle.UNSCHEDULABLE,
                    JobLifeCycle.UNKNOWN,
            }:
                assert can_transition is True
            else:
                assert can_transition is False

        # -> RUNNING
        for status in JobLifeCycle.VALUES:
            can_transition = JobLifeCycle.can_transition(
                status_from=status, status_to=JobLifeCycle.RUNNING)
            if status in {
                    JobLifeCycle.CREATED,
                    JobLifeCycle.SCHEDULED,
                    JobLifeCycle.RESUMING,
                    JobLifeCycle.BUILDING,
                    JobLifeCycle.UNSCHEDULABLE,
                    JobLifeCycle.UNKNOWN,
                    JobLifeCycle.WARNING,
            }:
                assert can_transition is True
            else:
                assert can_transition is False

        # -> SKIPPED
        for status in JobLifeCycle.VALUES:
            can_transition = JobLifeCycle.can_transition(
                status_from=status, status_to=JobLifeCycle.SKIPPED)
            if status not in JobLifeCycle.DONE_STATUS:
                assert can_transition is True
            else:
                assert can_transition is False

        # -> SUCCEEDED
        for status in JobLifeCycle.VALUES:
            can_transition = JobLifeCycle.can_transition(
                status_from=status, status_to=JobLifeCycle.SUCCEEDED)
            if status not in JobLifeCycle.DONE_STATUS:
                assert can_transition is True
            else:
                assert can_transition is False

        # -> FAILED
        for status in JobLifeCycle.VALUES:
            can_transition = JobLifeCycle.can_transition(
                status_from=status, status_to=JobLifeCycle.FAILED)
            if status not in JobLifeCycle.DONE_STATUS:
                assert can_transition is True
            else:
                assert can_transition is False

        # -> UPSTREAM_FAILED
        for status in JobLifeCycle.VALUES:
            can_transition = JobLifeCycle.can_transition(
                status_from=status, status_to=JobLifeCycle.UPSTREAM_FAILED)
            if status not in JobLifeCycle.DONE_STATUS:
                assert can_transition is True
            else:
                assert can_transition is False

        # -> STOPPED
        for status in JobLifeCycle.VALUES:
            can_transition = JobLifeCycle.can_transition(
                status_from=status, status_to=JobLifeCycle.STOPPED)
            if status not in JobLifeCycle.DONE_STATUS:
                assert can_transition is True
            else:
                assert can_transition is False

        # -> WARNING
        for status in JobLifeCycle.VALUES:
            can_transition = JobLifeCycle.can_transition(
                status_from=status, status_to=JobLifeCycle.WARNING)
            cond = status in (JobLifeCycle.VALUES - JobLifeCycle.DONE_STATUS -
                              {
                                  JobLifeCycle.WARNING,
                              })
            if cond:
                assert can_transition is True
            else:
                assert can_transition is False

        # -> UNKNOWN
        for status in JobLifeCycle.VALUES:
            can_transition = JobLifeCycle.can_transition(
                status_from=status, status_to=JobLifeCycle.UNKNOWN)
            if status not in {
                    JobLifeCycle.UNKNOWN,
            }:
                assert can_transition is True
            else:
                assert can_transition is False