Example #1
0
def update_job(job: Job) -> Job:
    """
    Update a job based on it's result.

    This method is used to update the status of the job by getting it's
    `AsyncResult`. It is called when (1) the job is retrived (ie. GET) and
    (2) when it is updated with other information (ie PATCH).

    See https://stackoverflow.com/a/38267978 for important considerations
    in using AsyncResult.
    """
    # Get an async result from the backend if the job
    # is not recorded as ready.
    if not JobStatus.has_ended(job.status) or job.result is None or job.error is None:
        async_result = AsyncResult(str(job.id), app=celery)
        status = async_result.status
        info = async_result.info

        job.status = status

        if status in [JobStatus.RUNNING.value, JobStatus.SUCCESS.value] and isinstance(
            async_result.info, dict
        ):
            # For RUNNING, `info` is the `meta` kwarg passed to
            # `Job.update_state()` call in the worker process.
            # For SUCCESS, `info` is the value returned
            # by the `Job.success()` method in the worker process.
            for field in ["result", "log", "url"]:
                if field in info:
                    setattr(job, field, info[field])

        if status == JobStatus.FAILURE.value:
            # For FAILURE, `info` is the raised Exception
            job.error = dict(type=type(info).__name__, message=str(info))

        if job.parent is not None:
            check_job(job.parent)

        job.save()
    return job
Example #2
0
File: jobs.py Project: jlbrewe/hub
def update_job(job: Job, data={}, force: bool = False) -> Job:
    """
    Update a job.

    This method is triggered by a PATCH request from the
    `overseer` service. It updates the status, and other fields of
    the job, and if the job has a parent, updates it's status too.

    See https://stackoverflow.com/a/38267978 for important considerations
    in using AsyncResult.
    """
    # Avoid unnecessary update
    if not job.is_active and not force:
        return job

    was_active = job.is_active

    if JobMethod.is_compound(job.method):
        # Update the status of compound jobs based on children
        status = job.status
        is_active = False
        all_previous_succeeded = True
        any_previous_failed = False
        for child in job.get_children():
            # If the child has a 'higher' status then update the
            # status of the compound job
            status = JobStatus.highest([status, child.status])

            # If the child is still waiting then...
            if child.status == JobStatus.WAITING.value:
                # If all previous have succeeded, dispatch it
                if all_previous_succeeded:
                    dispatch_job(child)
                # If any previous have failed, cancel it
                elif any_previous_failed:
                    cancel_job(child)

            if child.status != JobStatus.SUCCESS.value:
                all_previous_succeeded = False
            if child.status == JobStatus.FAILURE.value:
                any_previous_failed = True

            # If the child is still active then the compound job is active
            if child.is_active:
                is_active = True

        job.is_active = is_active
        job.status = JobStatus.RUNNING.value if is_active else status

    else:
        status = data.get("status")
        assert status

        # Do not do anything if the new status is lower rank than the
        # existing status. This can exist for example when a job is
        # terminated (the SUCCESS state is sent after TERMINATED)
        if JobStatus.rank(status) < JobStatus.rank(job.status):
            return job

        # Update fields sent by `overseer` service, including `status`
        for key, value in data.items():
            setattr(job, key, value)

        def async_result():
            return AsyncResult(str(job.id), app=app)

        # If job succeeded then get the result if we haven't already
        if status == JobStatus.SUCCESS.value and job.result is None:
            response = None
            attempts = 0
            while not response and attempts < 5:
                try:
                    response = async_result().get(timeout=30)
                except Exception:
                    # Catch all errors, but log them. Occasional
                    # errors encountered in prod include ResponseError and TimeoutError
                    logger.warning(
                        "Error getting async result",
                        exc_info=True,
                        extra=dict(id=job.id,
                                   method=job.method,
                                   attempts=attempts),
                    )
                    time.sleep(1)
                    attempts += 1

            if response:
                job.result = response.get("result")
                job.log = response.get("log")
            else:
                logger.error(
                    "Unable to get async result",
                    extra=dict(id=job.id, method=job.method,
                               attempts=attempts),
                )
                job.status = JobStatus.FAILURE.value
                job.error = dict(type="RuntimeError",
                                 message="Unable to get result of job")

        # If job failed then get the error
        # For FAILURE, `info` is the raised Exception
        elif status == JobStatus.FAILURE.value:
            info = async_result().info
            if info:
                job.error = dict(type=type(info).__name__, message=str(info))

        # If the job has just ended then mark it as inactive
        if JobStatus.has_ended(status):
            job.is_active = False

    # If the job is no longer active clear its secrets and run its callback
    if was_active and not job.is_active:
        job.secrets = None
        job.run_callback()

    # Save before updating parent (and then this again)
    job.save()

    # If the job has a parent then update it too
    if job.parent:
        update_job(job.parent)

    return job