예제 #1
0
    def _fail_workflow(wf_ex_id, err, action_ex_id=None):
        """Private helper to fail workflow on exceptions."""
        err_msg = str(err)

        with db_api.transaction():
            wf_ex = db_api.load_workflow_execution(wf_ex_id)

            if wf_ex is None:
                LOG.error(
                    "Cant fail workflow execution with id='%s': not found.",
                    wf_ex_id)
                return

            wf_handler.set_execution_state(wf_ex, states.ERROR, err_msg)

            if action_ex_id:
                # Note(dzimine): Don't call self.engine_client:
                # 1) to avoid computing and triggering next tasks
                # 2) to avoid a loop in case of error in transport
                action_ex = db_api.get_action_execution(action_ex_id)

                task_handler.on_action_complete(action_ex,
                                                wf_utils.Result(error=err_msg))

            return wf_ex
예제 #2
0
    def _fail_workflow(wf_ex_id, err, action_ex_id=None):
        """Private helper to fail workflow on exceptions."""
        with db_api.transaction():
            err_msg = str(err)

            wf_ex = db_api.load_workflow_execution(wf_ex_id)

            if wf_ex is None:
                LOG.error(
                    "Cant fail workflow execution with id='%s': not found.",
                    wf_ex_id
                )
                return

            wf_handler.set_execution_state(wf_ex, states.ERROR, err_msg)

            if action_ex_id:
                # Note(dzimine): Don't call self.engine_client:
                # 1) to avoid computing and triggering next tasks
                # 2) to avoid a loop in case of error in transport
                action_ex = db_api.get_action_execution(action_ex_id)

                task_handler.on_action_complete(
                    action_ex,
                    wf_utils.Result(error=err_msg)
                )
예제 #3
0
def _check_and_complete(wf_ex_id):
    # Note: This method can only be called via scheduler.
    with db_api.transaction():
        wf_ex = db_api.load_workflow_execution(wf_ex_id)

        if not wf_ex or states.is_completed(wf_ex.state):
            return

        wf = workflows.Workflow(wf_ex=wf_ex)

        try:
            incomplete_tasks_count = wf.check_and_complete()
        except exc.MistralException as e:
            msg = ("Failed to check and complete [wf_ex=%s]:"
                   " %s\n%s" % (wf_ex, e, tb.format_exc()))

            LOG.error(msg)

            force_fail_workflow(wf.wf_ex, msg)

            return

        if not states.is_completed(wf_ex.state):
            # Let's assume that a task takes 0.01 sec in average to complete
            # and based on this assumption calculate a time of the next check.
            # The estimation is very rough but this delay will be decreasing
            # as tasks will be completing which will give a decent
            # approximation.
            # For example, if a workflow has 100 incomplete tasks then the
            # next check call will happen in 10 seconds. For 500 tasks it will
            # be 50 seconds. The larger the workflow is, the more beneficial
            # this mechanism will be.
            delay = int(incomplete_tasks_count * 0.01)

            _schedule_check_and_complete(wf_ex, delay)
예제 #4
0
def _get_workflow_execution(id, must_exist=True):
    with db_api.transaction():
        if must_exist:
            wf_ex = db_api.get_workflow_execution(id)
        else:
            wf_ex = db_api.load_workflow_execution(id)

        return _load_deferred_output_field(wf_ex)
예제 #5
0
def _get_workflow_execution(id, must_exist=True):
    with db_api.transaction():
        if must_exist:
            wf_ex = db_api.get_workflow_execution(id)
        else:
            wf_ex = db_api.load_workflow_execution(id)

        return _load_deferred_output_field(wf_ex)
예제 #6
0
def _scheduled_on_action_update(action_ex_id, wf_action):
    with db_api.transaction():
        if wf_action:
            action_ex = db_api.load_workflow_execution(action_ex_id)
        else:
            action_ex = db_api.load_action_execution(action_ex_id)

        if action_ex:
            _on_action_update(action_ex)
예제 #7
0
def _get_workflow_execution(id, must_exist=True):
    with db_api.transaction():
        if must_exist:
            wf_ex = db_api.get_workflow_execution(id)
        else:
            wf_ex = db_api.load_workflow_execution(id)

        return rest_utils.load_deferred_fields(
            wf_ex, ['params', 'input', 'output', 'context', 'spec'])
예제 #8
0
def check_and_complete(wf_ex_id):
    wf_ex = db_api.load_workflow_execution(wf_ex_id)

    if not wf_ex or states.is_completed(wf_ex.state):
        return

    wf = workflows.Workflow(wf_ex=wf_ex)

    try:
        wf.check_and_complete()
    except exc.MistralException as e:
        msg = ("Failed to check and complete [wf_ex_id=%s, wf_name=%s]:"
               " %s\n%s" % (wf_ex_id, wf_ex.name, e, tb.format_exc()))

        LOG.error(msg)

        force_fail_workflow(wf.wf_ex, msg)
예제 #9
0
    def _fail_workflow(wf_ex_id, exc):
        """Private helper to fail workflow on exceptions."""

        with db_api.transaction():
            wf_ex = db_api.load_workflow_execution(wf_ex_id)

            if wf_ex is None:
                LOG.error(
                    "Can't fail workflow execution with id='%s': not found.",
                    wf_ex_id
                )
                return None

            wf_ex = wf_handler.lock_workflow_execution(wf_ex_id)

            if not states.is_paused_or_completed(wf_ex.state):
                wf_handler.set_execution_state(wf_ex, states.ERROR, str(exc))

            return wf_ex
예제 #10
0
def check_and_complete(wf_ex_id):
    wf_ex = db_api.load_workflow_execution(wf_ex_id)

    if not wf_ex or states.is_completed(wf_ex.state):
        return

    wf = workflows.Workflow(wf_ex=wf_ex)

    try:
        wf.check_and_complete()
    except exc.MistralException as e:
        msg = (
            "Failed to check and complete [wf_ex_id=%s, wf_name=%s]:"
            " %s\n%s" % (wf_ex_id, wf_ex.name, e, tb.format_exc())
        )

        LOG.error(msg)

        force_fail_workflow(wf.wf_ex, msg)
예제 #11
0
def _check_and_complete(wf_ex_id):
    # Note: This method can only be called via scheduler.
    with db_api.transaction():
        wf_ex = db_api.load_workflow_execution(wf_ex_id)

        if not wf_ex or states.is_completed(wf_ex.state):
            return

        wf = workflows.Workflow(wf_ex=wf_ex)

        try:
            check_and_fix_integrity(wf_ex)

            num_incomplete_tasks = wf.check_and_complete()

            if not states.is_completed(wf_ex.state):
                delay = (
                    2 + int(num_incomplete_tasks * 0.1) if num_incomplete_tasks
                    else 4
                )

                # Rescheduling this check may not happen if errors are
                # raised in the business logic. If the error is DB related
                # and not considered fatal (e.g. disconnect, deadlock), the
                # retry annotation around the method will ensure that the
                # whole method is retried in a new transaction. On fatal
                # errors, the check should not be rescheduled as it could
                # result in undesired consequences.
                # In case there are some errors that should not be
                # considered fatal, those should be handled explicitly.
                _schedule_check_and_complete(wf_ex, delay)

        except exc.MistralException as e:
            msg = (
                "Failed to check and complete [wf_ex_id=%s, wf_name=%s]:"
                " %s\n%s" % (wf_ex_id, wf_ex.name, e, tb.format_exc())
            )

            LOG.error(msg)

            force_fail_workflow(wf.wf_ex, msg)
예제 #12
0
def _check_and_complete(wf_ex_id):
    # Note: This method can only be called via scheduler.
    with db_api.transaction():
        wf_ex = db_api.load_workflow_execution(wf_ex_id)

        if not wf_ex or states.is_completed(wf_ex.state):
            return

        wf = workflows.Workflow(
            db_api.get_workflow_definition(wf_ex.workflow_id),
            wf_ex=wf_ex
        )

        try:
            incomplete_tasks_count = wf.check_and_complete()
        except exc.MistralException as e:
            msg = (
                "Failed to check and complete [wf_ex=%s]:"
                " %s\n%s" % (wf_ex, e, tb.format_exc())
            )

            LOG.error(msg)

            force_fail_workflow(wf.wf_ex, msg)

            return

        if not states.is_completed(wf_ex.state):
            # Let's assume that a task takes 0.01 sec in average to complete
            # and based on this assumption calculate a time of the next check.
            # The estimation is very rough but this delay will be decreasing
            # as tasks will be completing which will give a decent
            # approximation.
            # For example, if a workflow has 100 incomplete tasks then the
            # next check call will happen in 10 seconds. For 500 tasks it will
            # be 50 seconds. The larger the workflow is, the more beneficial
            # this mechanism will be.
            delay = int(incomplete_tasks_count * 0.01)

            _schedule_check_and_complete(wf_ex, delay)
예제 #13
0
def _check_and_fix_integrity(wf_ex_id):
    check_after_seconds = CONF.engine.execution_integrity_check_delay

    if check_after_seconds < 0:
        # Never check integrity if it's a negative value.
        return

    # To break cyclic dependency.
    from mistral.engine import task_handler

    with db_api.transaction():
        wf_ex = db_api.load_workflow_execution(wf_ex_id)

        if not wf_ex:
            return

        if states.is_completed(wf_ex.state):
            return

        _schedule_check_and_fix_integrity(wf_ex, delay=120)

        running_task_execs = db_api.get_task_executions(
            workflow_execution_id=wf_ex.id,
            state=states.RUNNING,
            limit=CONF.engine.execution_integrity_check_batch_size)

        for t_ex in running_task_execs:
            # The idea is that we take the latest known timestamp of the task
            # execution and consider it eligible for checking and fixing only
            # if some minimum period of time elapsed since the last update.
            timestamp = t_ex.updated_at or t_ex.created_at

            delta = timeutils.delta_seconds(timestamp, timeutils.utcnow())

            if delta < check_after_seconds:
                continue

            child_executions = t_ex.executions

            if not child_executions:
                continue

            all_finished = all(
                [states.is_completed(c_ex.state) for c_ex in child_executions])

            if all_finished:
                # Find the timestamp of the most recently finished child.
                most_recent_child_timestamp = max([
                    c_ex.updated_at or c_ex.created_at
                    for c_ex in child_executions
                ])
                interval = timeutils.delta_seconds(most_recent_child_timestamp,
                                                   timeutils.utcnow())

                if interval > check_after_seconds:
                    # We found a task execution in RUNNING state for which all
                    # child executions are finished. We need to call
                    # "schedule_on_action_complete" on the task handler for
                    # any of the child executions so that the task state is
                    # calculated and updated properly.
                    LOG.warning(
                        "Found a task execution that is likely stuck in"
                        " RUNNING state because all child executions are"
                        " finished, will try to recover [task_execution=%s]",
                        t_ex.id)

                    task_handler.schedule_on_action_complete(
                        child_executions[-1])