Ejemplo n.º 1
0
def done_dask_callback(
    dask_future: distributed.Future,
    task_to_future_map: Dict[str, distributed.Future],
    user_callback: UserCompleteCB,
    main_loop: asyncio.AbstractEventLoop,
):
    # NOTE: BEWARE we are called in a separate thread!!
    job_id = dask_future.key
    event_data: Optional[TaskStateEvent] = None
    logger.debug("task '%s' completed with status %s", job_id,
                 dask_future.status)
    try:
        if dask_future.status == "error":
            task_exception = dask_future.exception(
                timeout=_DASK_FUTURE_TIMEOUT_S)
            task_traceback = dask_future.traceback(
                timeout=_DASK_FUTURE_TIMEOUT_S)
            event_data = TaskStateEvent(
                job_id=job_id,
                state=RunningState.FAILED,
                msg=json_dumps(
                    traceback.format_exception(type(task_exception),
                                               value=task_exception,
                                               tb=task_traceback)),
            )
        elif dask_future.cancelled():
            event_data = TaskStateEvent(job_id=job_id,
                                        state=RunningState.ABORTED)
        else:
            task_result = cast(
                TaskOutputData,
                dask_future.result(timeout=_DASK_FUTURE_TIMEOUT_S))
            assert task_result  # no sec
            event_data = TaskStateEvent(
                job_id=job_id,
                state=RunningState.SUCCESS,
                msg=task_result.json(),
            )
    except distributed.TimeoutError:
        event_data = TaskStateEvent(
            job_id=job_id,
            state=RunningState.FAILED,
            msg=f"Timeout error getting results of '{job_id}'",
        )
        logger.error(
            "fetching result of '%s' timed-out, please check",
            job_id,
            exc_info=True,
        )
    finally:
        # remove the future from the dict to remove any handle to the future, so the worker can free the memory
        task_to_future_map.pop(job_id)
        logger.debug("dispatching callback to finish task '%s'", job_id)
        assert event_data  # nosec
        try:
            asyncio.run_coroutine_threadsafe(user_callback(event_data),
                                             main_loop)
        except Exception:  # pylint: disable=broad-except
            logger.exception(
                "Unexpected issue while transmitting state to main thread")
Ejemplo n.º 2
0
def results(owner, app_name, job_id):
    cluster_type = get_cluster_type(owner, app_name)
    if cluster_type == "single-core":
        async_result = AsyncResult(job_id)
        if async_result.ready() and async_result.successful():
            return json.dumps(async_result.result)
        elif async_result.failed():
            print("traceback", async_result.traceback)
            return json.dumps({
                "status": "WORKER_FAILURE",
                "traceback": async_result.traceback
            })
        else:
            return make_response("not ready", 202)
    elif cluster_type == "dask":
        addr = dask_scheduler_address(owner, app_name)
        with Client(addr) as client:
            fut = Future(job_id, client=client)
            if fut.done() and fut.status != "error":
                return fut.result()
            elif fut.done() and fut.status in ("error", "cancelled"):
                return json.dumps({
                    "status": "WORKER_FAILURE",
                    "traceback": fut.traceback()
                })
            else:
                return make_response("not ready", 202)
    else:
        return json.dumps({"error": "model does not exist."}), 404