コード例 #1
0
def _schedule_refresh_task_state(task_ex_id, delay=0):
    """Schedules task preconditions check.

    This method provides transactional decoupling of task preconditions
    check from events that can potentially satisfy those preconditions.

    It's needed in non-locking model in order to avoid 'phantom read'
    phenomena when reading state of multiple tasks to see if a task that
    depends on them can start. Just starting a separate transaction
    without using scheduler is not safe due to concurrency window that
    we'll have in this case (time between transactions) whereas scheduler
    is a special component that is designed to be resistant to failures.

    :param task_ex_id: Task execution ID.
    :param delay: Delay.
    """

    sched = sched_base.get_system_scheduler()

    job = sched_base.SchedulerJob(run_after=delay,
                                  func_name=_REFRESH_TASK_STATE_PATH,
                                  func_args={'task_ex_id': task_ex_id},
                                  key=_get_refresh_state_job_key(task_ex_id))

    sched.schedule(job)
コード例 #2
0
def schedule_on_action_update(action_ex, delay=0):
    """Schedules task update check.

    This method provides transactional decoupling of action update from
    task update check. It's needed in non-locking model in order to
    avoid 'phantom read' phenomena when reading state of multiple actions
    to see if a task is updated. Just starting a separate transaction
    without using scheduler is not safe due to concurrency window that we'll
    have in this case (time between transactions) whereas scheduler is a
    special component that is designed to be resistant to failures.

    :param action_ex: Action execution.
    :param delay: Minimum amount of time before task update check
        should be made.
    """

    # Optimization to avoid opening a new transaction if it's not needed.
    if not action_ex.task_execution.spec.get('with-items'):
        _on_action_update(action_ex)

        return

    sched = sched_base.get_system_scheduler()

    job = sched_base.SchedulerJob(
        run_after=delay,
        func_name=_SCHEDULED_ON_ACTION_UPDATE_PATH,
        func_args={
            'action_ex_id': action_ex.id,
            'wf_action': isinstance(action_ex, models.WorkflowExecution)
        },
        key='th_on_a_u-%s' % action_ex.task_execution_id)

    sched.schedule(job)
コード例 #3
0
    def start(self):
        super(EngineServer, self).start()

        db_api.setup_db()

        self._scheduler = sched_base.get_system_scheduler()
        self._scheduler.start()

        self._expiration_policy_tg = expiration_policy.setup()

        action_heartbeat_checker.start()

        # If the current engine instance uses a local action executor
        # then we also need to initialize a heartbeat reporter for it.
        # Heartbeats will be sent to the engine tier in the same way as
        # with a remote executor. So if the current cluster node crashes
        # in the middle of executing an action then one of the remaining
        # engine instances will expire the action in a configured period
        # of time.
        if cfg.CONF.executor.type == 'local':
            action_heartbeat_sender.start()

        if self._setup_profiler:
            profiler_utils.setup('mistral-engine', cfg.CONF.engine.host)

        # Initialize and start RPC server.

        self._rpc_server = rpc.get_rpc_server_driver()(cfg.CONF.engine)
        self._rpc_server.register_endpoint(self)

        self._rpc_server.run(executor=cfg.CONF.oslo_rpc_executor)

        self._notify_started('Engine server started.')
コード例 #4
0
ファイル: policies.py プロジェクト: shubhamdang/mistral
    def before_task_start(self, task):
        super(WaitBeforePolicy, self).before_task_start(task)

        # No need to wait for a task if delay is 0
        if self.delay == 0:
            return

        ctx_key = 'wait_before_policy'

        policy_ctx = task.get_policy_context(ctx_key)

        if policy_ctx.get('skip'):
            # Unset state 'RUNNING_DELAYED'.
            task.set_state(states.RUNNING, None)

            return

        if task.get_state() != states.IDLE:
            policy_ctx.update({'skip': True})

            task.set_state(
                states.RUNNING_DELAYED,
                "Delayed by 'wait-before' policy [delay=%s]" % self.delay)

            sched = sched_base.get_system_scheduler()

            job = sched_base.SchedulerJob(
                run_after=self.delay,
                func_name=_CONTINUE_TASK_PATH,
                func_args={'task_ex_id': task.get_id()})

            sched.schedule(job)
コード例 #5
0
    def before_task_start(self, task_ex, task_spec):
        super(TimeoutPolicy, self).before_task_start(task_ex, task_spec)

        # No timeout if delay is 0
        if self.delay == 0:
            return

        sched = sched_base.get_system_scheduler()

        job = sched_base.SchedulerJob(
            run_after=self.delay,
            func_name=_FAIL_IF_INCOMPLETE_TASK_PATH,
            func_args={
                'task_ex_id': task_ex.id,
                'timeout': self.delay
            }
        )

        sched.schedule(job)

        wf_trace.info(
            task_ex,
            "Timeout check scheduled [task=%s, timeout(s)=%s]." %
            (task_ex.id, self.delay)
        )
コード例 #6
0
    def after_task_complete(self, task_ex, task_spec):
        super(WaitAfterPolicy, self).after_task_complete(task_ex, task_spec)

        # No need to postpone a task if delay is 0
        if self.delay == 0:
            return

        context_key = 'wait_after_policy'

        runtime_context = _ensure_context_has_key(
            task_ex.runtime_context,
            context_key
        )

        task_ex.runtime_context = runtime_context

        policy_context = runtime_context[context_key]

        if policy_context.get('skip'):
            # Skip, already processed.
            return

        policy_context.update({'skip': True})

        _log_task_delay(task_ex, self.delay)

        end_state = task_ex.state
        end_state_info = task_ex.state_info

        # TODO(rakhmerov): Policies probably need to have tasks.Task
        # interface in order to manage task state safely.
        # Set task state to 'RUNNING_DELAYED'.
        task_ex.state = states.RUNNING_DELAYED
        task_ex.state_info = (
            'Suspended by wait-after policy for %s seconds' % self.delay
        )

        # Schedule to change task state to RUNNING again.
        sched = sched_base.get_system_scheduler()

        job = sched_base.SchedulerJob(
            run_after=self.delay,
            func_name=_COMPLETE_TASK_PATH,
            func_args={
                'task_ex_id': task_ex.id,
                'state': end_state,
                'state_info': end_state_info
            }
        )

        sched.schedule(job)
コード例 #7
0
    def _schedule_if_needed(t_ex_id):
        # NOTE(rakhmerov): we need to minimize the number of scheduled jobs
        # that refresh state of "join" tasks. We'll check if corresponding
        # jobs are already scheduled. Note that we must ignore scheduled jobs
        # that are currently being processed because of a possible race with
        # the transaction that deletes scheduled jobs, i.e. the job may still
        # exist in DB (the deleting transaction didn't commit yet) but it has
        # already been processed and the task state hasn't changed.
        sched = sched_base.get_system_scheduler()

        jobs_exist = sched.has_scheduled_jobs(
            key=_get_refresh_state_job_key(t_ex_id), processing=False)

        if not jobs_exist:
            _schedule_refresh_task_state(t_ex_id)
コード例 #8
0
    def before_task_start(self, task_ex, task_spec):
        super(WaitBeforePolicy, self).before_task_start(task_ex, task_spec)

        # No need to wait for a task if delay is 0
        if self.delay == 0:
            return

        context_key = 'wait_before_policy'

        runtime_context = _ensure_context_has_key(
            task_ex.runtime_context,
            context_key
        )

        task_ex.runtime_context = runtime_context

        policy_context = runtime_context[context_key]

        if policy_context.get('skip'):
            # Unset state 'RUNNING_DELAYED'.
            wf_trace.info(
                task_ex,
                "Task '%s' [%s -> %s]"
                % (task_ex.name, states.RUNNING_DELAYED, states.RUNNING)
            )

            task_ex.state = states.RUNNING

            return

        if task_ex.state != states.IDLE:
            policy_context.update({'skip': True})

            _log_task_delay(task_ex, self.delay)

            task_ex.state = states.RUNNING_DELAYED

            sched = sched_base.get_system_scheduler()

            job = sched_base.SchedulerJob(
                run_after=self.delay,
                func_name=_CONTINUE_TASK_PATH,
                func_args={
                    'task_ex_id': task_ex.id
                }
            )

            sched.schedule(job)
コード例 #9
0
ファイル: workflow_handler.py プロジェクト: Regmir/mistral
def _schedule_check_and_fix_integrity(wf_ex, delay=0):
    """Schedules workflow integrity check.

    :param wf_ex: Workflow execution.
    :param delay: Minimum amount of time before the check should be made.
    """

    if CONF.engine.execution_integrity_check_delay < 0:
        # Never check integrity if it's a negative value.
        return

    sched = sched_base.get_system_scheduler()

    job = sched_base.SchedulerJob(run_after=delay,
                                  func_name=_CHECK_AND_FIX_INTEGRITY_PATH,
                                  func_args={'wf_ex_id': wf_ex.id},
                                  key=_get_integrity_check_key(wf_ex))

    sched.schedule(job)
コード例 #10
0
    def test_delete_workflow_integrity_check_on_execution_delete(self):
        wf_text = """---
        version: '2.0'

        wf:
          tasks:
            async_task:
              action: std.async_noop
        """

        wf_service.create_workflows(wf_text)

        wf_ex = self.engine.start_workflow('wf')

        db_api.delete_workflow_execution(wf_ex.id)

        sched = sched_base.get_system_scheduler()

        self._await(lambda: not sched.has_scheduled_jobs())
コード例 #11
0
ファイル: policies.py プロジェクト: freightstream/mistral
    def after_task_complete(self, task):
        super(WaitAfterPolicy, self).after_task_complete(task)

        # No need to postpone a task if delay is 0
        if self.delay == 0:
            return

        ctx_key = 'wait_after_policy'

        policy_ctx = task.get_policy_context(ctx_key)

        if policy_ctx.get('skip'):
            # Skip, already processed.
            return

        policy_ctx.update({'skip': True})

        end_state = task.get_state()
        end_state_info = task.get_state_info()

        # Set task state to 'RUNNING_DELAYED'.
        task.set_state(
            states.RUNNING_DELAYED,
            "Delayed by 'wait-after' policy [delay=%s]" % self.delay
        )

        # Schedule to change task state to RUNNING again.
        sched = sched_base.get_system_scheduler()

        job = sched_base.SchedulerJob(
            run_after=self.delay,
            func_name=_COMPLETE_TASK_PATH,
            func_args={
                'task_ex_id': task.get_id(),
                'state': end_state,
                'state_info': end_state_info
            }
        )

        sched.schedule(job)
コード例 #12
0
ファイル: engine_server.py プロジェクト: Regmir/mistral
    def start(self):
        super(EngineServer, self).start()

        db_api.setup_db()

        self._scheduler = sched_base.get_system_scheduler()
        self._scheduler.start()

        self._expiration_policy_tg = expiration_policy.setup()

        action_execution_checker.start()

        if self._setup_profiler:
            profiler_utils.setup('mistral-engine', cfg.CONF.engine.host)

        # Initialize and start RPC server.

        self._rpc_server = rpc.get_rpc_server_driver()(cfg.CONF.engine)
        self._rpc_server.register_endpoint(self)

        self._rpc_server.run(executor=cfg.CONF.oslo_rpc_executor)

        self._notify_started('Engine server started.')
コード例 #13
0
    def after_task_complete(self, task_ex, task_spec):
        """Possible Cases:

        1. state = SUCCESS
           if continue_on is not specified,
           no need to move to next iteration;
           if current:count achieve retry:count then policy
           breaks the loop (regardless on continue-on condition);
           otherwise - check continue_on condition and if
           it is True - schedule the next iteration,
           otherwise policy breaks the loop.
        2. retry:count = 5, current:count = 2, state = ERROR,
           state = IDLE/DELAYED, current:count = 3
        3. retry:count = 5, current:count = 4, state = ERROR
        Iterations complete therefore state = #{state}, current:count = 4.
        """
        super(RetryPolicy, self).after_task_complete(task_ex, task_spec)

        # There is nothing to repeat
        if self.count == 0:
            return

        # TODO(m4dcoder): If the task_ex.action_executions and
        # task_ex.workflow_executions collection are not called,
        # then the retry_no in the runtime_context of the task_ex will not
        # be updated accurately. To be exact, the retry_no will be one
        # iteration behind.
        ex = task_ex.executions  # noqa

        context_key = 'retry_task_policy'

        runtime_context = _ensure_context_has_key(task_ex.runtime_context,
                                                  context_key)

        wf_ex = task_ex.workflow_execution

        ctx_view = data_flow.ContextView(
            data_flow.get_current_task_dict(task_ex),
            data_flow.evaluate_task_outbound_context(task_ex), wf_ex.context,
            wf_ex.input)

        continue_on_evaluation = expressions.evaluate(self._continue_on_clause,
                                                      ctx_view)

        break_on_evaluation = expressions.evaluate(self._break_on_clause,
                                                   ctx_view)

        task_ex.runtime_context = runtime_context

        state = task_ex.state

        if not states.is_completed(state) or states.is_cancelled(state):
            return

        policy_context = runtime_context[context_key]

        retry_no = 0

        if 'retry_no' in policy_context:
            retry_no = policy_context['retry_no']
            del policy_context['retry_no']

        retries_remain = retry_no < self.count

        stop_continue_flag = (task_ex.state == states.SUCCESS
                              and not self._continue_on_clause)

        stop_continue_flag = (stop_continue_flag
                              or (self._continue_on_clause
                                  and not continue_on_evaluation))

        break_triggered = (task_ex.state == states.ERROR
                           and break_on_evaluation)

        if not retries_remain or break_triggered or stop_continue_flag:
            return

        data_flow.invalidate_task_execution_result(task_ex)

        policy_context['retry_no'] = retry_no + 1
        runtime_context[context_key] = policy_context

        # NOTE(vgvoleg): join tasks in direct workflows can't be
        # retried as-is, because these tasks can't start without
        # a correct logical state.
        if hasattr(task_spec, "get_join") and task_spec.get_join():
            from mistral.engine import task_handler as t_h

            _log_task_delay(task_ex, self.delay, states.WAITING)

            task_ex.state = states.WAITING

            t_h._schedule_refresh_task_state(task_ex.id, self.delay)

            return

        _log_task_delay(task_ex, self.delay)

        task_ex.state = states.RUNNING_DELAYED

        sched = sched_base.get_system_scheduler()

        job = sched_base.SchedulerJob(run_after=self.delay,
                                      func_name=_CONTINUE_TASK_PATH,
                                      func_args={'task_ex_id': task_ex.id})

        sched.schedule(job)
コード例 #14
0
ファイル: policies.py プロジェクト: shubhamdang/mistral
    def after_task_complete(self, task):
        """Possible Cases:

        1. state = SUCCESS
           if continue_on is not specified,
           no need to move to next iteration;
           if current:count achieve retry:count then policy
           breaks the loop (regardless on continue-on condition);
           otherwise - check continue_on condition and if
           it is True - schedule the next iteration,
           otherwise policy breaks the loop.
        2. retry:count = 5, current:count = 2, state = ERROR,
           state = IDLE/DELAYED, current:count = 3
        3. retry:count = 5, current:count = 4, state = ERROR
        Iterations complete therefore state = #{state}, current:count = 4.
        """
        super(RetryPolicy, self).after_task_complete(task)

        # There is nothing to repeat
        if self.count == 0:
            return

        # TODO(m4dcoder): If the task_ex.action_executions and
        # task_ex.workflow_executions collection are not called,
        # then the retry_no in the runtime_context of the task_ex will not
        # be updated accurately. To be exact, the retry_no will be one
        # iteration behind.
        ex = task.task_ex.executions  # noqa

        ctx_key = 'retry_task_policy'

        expr_ctx = task.get_expression_context(
            ctx=data_flow.evaluate_task_outbound_context(task.task_ex))

        continue_on_evaluation = expressions.evaluate(self._continue_on_clause,
                                                      expr_ctx)

        break_on_evaluation = expressions.evaluate(self._break_on_clause,
                                                   expr_ctx)

        state = task.get_state()

        if not states.is_completed(state) or states.is_cancelled(state):
            return

        policy_ctx = task.get_policy_context(ctx_key)

        retry_no = 0

        if 'retry_no' in policy_ctx:
            retry_no = policy_ctx['retry_no']

            del policy_ctx['retry_no']

        retries_remain = retry_no < self.count

        stop_continue_flag = (task.get_state() == states.SUCCESS
                              and not self._continue_on_clause)

        stop_continue_flag = (stop_continue_flag
                              or (self._continue_on_clause
                                  and not continue_on_evaluation))

        break_triggered = (task.get_state() == states.ERROR
                           and break_on_evaluation)

        if not retries_remain or break_triggered or stop_continue_flag:
            return

        task.invalidate_result()

        policy_ctx['retry_no'] = retry_no + 1

        task.touch_runtime_context()

        # NOTE(vgvoleg): join tasks in direct workflows can't be
        # retried as-is, because these tasks can't start without
        # a correct logical state.
        if hasattr(task.task_spec, "get_join") and task.task_spec.get_join():
            # TODO(rakhmerov): This is an example of broken encapsulation.
            # The control over such operations should belong to the class Task.
            # If it's done, from the outside of the class there will be just
            # one visible operation "continue_task()" or something like that.
            from mistral.engine import task_handler as t_h

            task.set_state(states.WAITING,
                           "Delayed by 'retry' policy [delay=%s]" % self.delay)

            t_h._schedule_refresh_task_state(task.get_id(), self.delay)

            return

        task.set_state(states.RUNNING_DELAYED,
                       "Delayed by 'retry' policy [delay=%s]" % self.delay)

        sched = sched_base.get_system_scheduler()

        job = sched_base.SchedulerJob(run_after=self.delay,
                                      func_name=_CONTINUE_TASK_PATH,
                                      func_args={'task_ex_id': task.get_id()})

        sched.schedule(job)