Exemple #1
0
def check_and_fix_integrity(wf_ex):
    check_after_seconds = CONF.engine.execution_integrity_check_delay

    if check_after_seconds < 0:
        # Never check integrity if it's a negative value.
        return

    # To break cyclic dependency.
    from mistral.engine import task_handler

    running_task_execs = db_api.get_task_executions(
        workflow_execution_id=wf_ex.id,
        state=states.RUNNING
    )

    for t_ex in running_task_execs:
        # The idea is that we take the latest known timestamp of the task
        # execution and consider it eligible for checking and fixing only
        # if some minimum period of time elapsed since the last update.
        timestamp = t_ex.updated_at or t_ex.created_at

        delta = timeutils.delta_seconds(timestamp, timeutils.utcnow())

        if delta < check_after_seconds:
            continue

        child_executions = t_ex.executions

        if not child_executions:
            continue

        all_finished = all(
            [states.is_completed(c_ex.state) for c_ex in child_executions]
        )

        if all_finished:
            # Find the timestamp of the most recently finished child.
            most_recent_child_timestamp = max(
                [c_ex.updated_at or c_ex.created_at for c_ex in
                 child_executions]
            )
            interval = timeutils.delta_seconds(
                most_recent_child_timestamp,
                timeutils.utcnow()
            )

            if interval > check_after_seconds:
                # We found a task execution in RUNNING state for which all
                # child executions are finished. We need to call
                # "schedule_on_action_complete" on the task handler for any of
                # the child executions so that the task state is calculated and
                # updated properly.
                LOG.warning(
                    "Found a task execution that is likely stuck in RUNNING"
                    " state because all child executions are finished,"
                    " will try to recover [task_execution=%s]", t_ex.id
                )

                task_handler.schedule_on_action_complete(child_executions[-1])
Exemple #2
0
def on_action_complete(action_ex, result):
    task_ex = action_ex.task_execution

    action = _build_action(action_ex)

    try:
        action.complete(result)
    except exc.MistralException as e:
        msg = ("Failed to complete action [action=%s, task=%s]: %s\n%s" %
               (action_ex.name, task_ex.name, e, tb.format_exc()))

        LOG.error(msg)

        action.fail(msg)

        if task_ex:
            task_handler.force_fail_task(task_ex, msg)

        return

    if task_ex:
        task_handler.schedule_on_action_complete(action_ex)
def on_action_complete(action_ex, result):
    task_ex = action_ex.task_execution

    action = _build_action(action_ex)

    try:
        action.complete(result)
    except exc.MistralException as e:
        msg = (
            "Failed to complete action [error=%s, action=%s, task=%s]:\n%s"
            % (e, action_ex.name, task_ex.name, tb.format_exc())
        )

        LOG.error(msg)

        action.fail(msg)

        if task_ex:
            task_handler.force_fail_task(task_ex, msg)

        return

    if task_ex:
        task_handler.schedule_on_action_complete(action_ex)
def _check_and_fix_integrity(wf_ex_id):
    check_after_seconds = CONF.engine.execution_integrity_check_delay

    if check_after_seconds < 0:
        # Never check integrity if it's a negative value.
        return

    # To break cyclic dependency.
    from mistral.engine import task_handler

    with db_api.transaction():
        wf_ex = db_api.get_workflow_execution(wf_ex_id)

        if states.is_completed(wf_ex.state):
            return

        _schedule_check_and_fix_integrity(wf_ex, delay=120)

        running_task_execs = db_api.get_task_executions(
            workflow_execution_id=wf_ex.id,
            state=states.RUNNING,
            limit=CONF.engine.execution_integrity_check_batch_size
        )

        for t_ex in running_task_execs:
            # The idea is that we take the latest known timestamp of the task
            # execution and consider it eligible for checking and fixing only
            # if some minimum period of time elapsed since the last update.
            timestamp = t_ex.updated_at or t_ex.created_at

            delta = timeutils.delta_seconds(timestamp, timeutils.utcnow())

            if delta < check_after_seconds:
                continue

            child_executions = t_ex.executions

            if not child_executions:
                continue

            all_finished = all(
                [states.is_completed(c_ex.state) for c_ex in child_executions]
            )

            if all_finished:
                # Find the timestamp of the most recently finished child.
                most_recent_child_timestamp = max(
                    [c_ex.updated_at or c_ex.created_at for c_ex in
                     child_executions]
                )
                interval = timeutils.delta_seconds(
                    most_recent_child_timestamp,
                    timeutils.utcnow()
                )

                if interval > check_after_seconds:
                    # We found a task execution in RUNNING state for which all
                    # child executions are finished. We need to call
                    # "schedule_on_action_complete" on the task handler for
                    # any of the child executions so that the task state is
                    # calculated and updated properly.
                    LOG.warning(
                        "Found a task execution that is likely stuck in"
                        " RUNNING state because all child executions are"
                        " finished, will try to recover [task_execution=%s]",
                        t_ex.id
                    )

                    task_handler.schedule_on_action_complete(
                        child_executions[-1]
                    )