def _stop_workflow(wf_ex, state, message=None): if state == states.SUCCESS: wf_ctrl = wf_base.get_controller(wf_ex) final_context = {} try: final_context = wf_ctrl.evaluate_workflow_final_context() except Exception as e: LOG.warning( 'Failed to get final context for %s: %s' % (wf_ex, e) ) wf_spec = spec_parser.get_workflow_spec(wf_ex.spec) return wf_handler.succeed_workflow( wf_ex, final_context, wf_spec, message ) elif state == states.ERROR: return wf_handler.fail_workflow(wf_ex, message) return wf_ex
def stop(self, state, msg=None): """Stop workflow. :param state: New workflow state. :param msg: Additional explaining message. """ assert self.wf_ex if state == states.SUCCESS: wf_ctrl = wf_base.get_controller(self.wf_ex) final_context = {} try: final_context = wf_ctrl.evaluate_workflow_final_context() except Exception as e: LOG.warning('Failed to get final context for %s: %s' % (self.wf_ex, e)) return self._succeed_workflow(final_context, msg) elif state == states.ERROR: return self._fail_workflow(msg) elif state == states.CANCELLED: return self._cancel_workflow(msg)
def stop(self, state, msg=None): """Stop workflow. :param state: New workflow state. :param msg: Additional explaining message. """ assert self.wf_ex if state == states.SUCCESS: wf_ctrl = wf_base.get_controller(self.wf_ex) final_context = {} try: final_context = wf_ctrl.evaluate_workflow_final_context() except Exception as e: LOG.warning( 'Failed to get final context for %s: %s' % (self.wf_ex, e) ) return self._succeed_workflow(final_context, msg) elif state == states.ERROR: return self._fail_workflow(msg) elif state == states.CANCELLED: return self._cancel_workflow(msg)
def rerun(self, task_ex, reset=True, env=None): """Rerun workflow from the given task. :param task_ex: Task execution that the workflow needs to rerun from. :param reset: If True, reset task state including deleting its action executions. :param env: Environment. """ assert self.wf_ex wf_service.update_workflow_execution_env(self.wf_ex, env) self._recursive_rerun() wf_ctrl = wf_base.get_controller(self.wf_ex) # Calculate commands to process next. cmds = wf_ctrl.rerun_tasks([task_ex], reset=reset) if cmds: # Import the task_handler module here to avoid circular reference. from mistral.engine import policies policies.RetryPolicy.refresh_runtime_context(task_ex) self._continue_workflow(cmds)
def _continue_workflow(self, task_ex=None, reset=True, env=None): wf_ctrl = wf_base.get_controller(self.wf_ex) # Calculate commands to process next. cmds = wf_ctrl.continue_workflow(task_ex=task_ex, reset=reset, env=env) # When resuming a workflow we need to ignore all 'pause' # commands because workflow controller takes tasks that # completed within the period when the workflow was paused. cmds = list( filter(lambda c: not isinstance(c, commands.PauseWorkflow), cmds) ) # Since there's no explicit task causing the operation # we need to mark all not processed tasks as processed # because workflow controller takes only completed tasks # with flag 'processed' equal to False. for t_ex in self.wf_ex.task_executions: if states.is_completed(t_ex.state) and not t_ex.processed: t_ex.processed = True dispatcher.dispatch_workflow_commands(self.wf_ex, cmds) if not cmds: self._check_and_complete()
def _update_inbound_context(self): assert self.task_ex wf_ctrl = wf_base.get_controller(self.wf_ex, self.wf_spec) self.ctx = wf_ctrl.get_task_inbound_context(self.task_spec) utils.update_dict(self.task_ex.in_context, self.ctx)
def resume(self, env=None): """Resume workflow. :param env: Environment. """ assert self.wf_ex wf_service.update_workflow_execution_env(self.wf_ex, env) self.set_state(states.RUNNING) # Publish event. self.notify(events.WORKFLOW_RESUMED) wf_ctrl = wf_base.get_controller(self.wf_ex) # Calculate commands to process next. cmds = wf_ctrl.continue_workflow() self._continue_workflow(cmds) # If workflow execution is a subworkflow, # schedule update to the task execution. if self.wf_ex.task_execution_id: # Import the task_handler module here to avoid circular reference. from mistral.engine import task_handler task_handler.schedule_on_action_update(self.wf_ex)
def rerun(self, task_ex, reset=True, env=None): """Rerun workflow from the given task. :param task_ex: Task execution that the workflow needs to rerun from. :param reset: If True, reset task state including deleting its action executions. :param env: Environment. """ assert self.wf_ex # Since some lookup utils functions may use cache for completed tasks # we need to clean caches to make sure that stale objects can't be # retrieved. lookup_utils.clean_caches() wf_service.update_workflow_execution_env(self.wf_ex, env) self.set_state(states.RUNNING, recursive=True) wf_ctrl = wf_base.get_controller(self.wf_ex) # Calculate commands to process next. cmds = wf_ctrl.rerun_tasks([task_ex], reset=reset) self._continue_workflow(cmds)
def start(self, input_dict, desc='', params=None): """Start workflow. :param input_dict: Workflow input. :param desc: Workflow execution description. :param params: Workflow type specific parameters. """ assert not self.wf_ex wf_trace.info(self.wf_ex, "Starting workflow: %s" % self.wf_def) # TODO(rakhmerov): This call implicitly changes input_dict! Fix it! # After fix we need to move validation after adding risky fields. eng_utils.validate_input(self.wf_def, input_dict, self.wf_spec) self._create_execution(input_dict, desc, params) self.set_state(states.RUNNING) wf_ctrl = wf_base.get_controller(self.wf_ex, self.wf_spec) cmds = wf_ctrl.continue_workflow() dispatcher.dispatch_workflow_commands(self.wf_ex, cmds)
def start(self, input_dict, desc='', params=None): """Start workflow. :param input_dict: Workflow input. :param desc: Workflow execution description. :param params: Workflow type specific parameters. """ assert not self.wf_ex wf_trace.info( self.wf_ex, "Starting workflow [name=%s, input=%s]" % (self.wf_def.name, utils.cut(input_dict))) # TODO(rakhmerov): This call implicitly changes input_dict! Fix it! # After fix we need to move validation after adding risky fields. eng_utils.validate_input(self.wf_def, input_dict, self.wf_spec) self._create_execution(input_dict, desc, params) self.set_state(states.RUNNING) wf_ctrl = wf_base.get_controller(self.wf_ex, self.wf_spec) cmds = wf_ctrl.continue_workflow() dispatcher.dispatch_workflow_commands(self.wf_ex, cmds)
def rerun(self, task_ex, reset=True, env=None): """Rerun workflow from the given task. :param task_ex: Task execution that the workflow needs to rerun from. :param reset: If True, reset task state including deleting its action executions. :param env: Environment. """ assert self.wf_ex # Since some lookup utils functions may use cache for completed tasks # we need to clean caches to make sure that stale objects can't be # retrieved. lookup_utils.clear_caches() wf_service.update_workflow_execution_env(self.wf_ex, env) self.set_state(states.RUNNING, recursive=True) wf_ctrl = wf_base.get_controller(self.wf_ex) # Calculate commands to process next. cmds = wf_ctrl.rerun_tasks([task_ex], reset=reset) self._continue_workflow(cmds)
def _update_inbound_context(self): assert self.task_ex wf_ctrl = wf_base.get_controller(self.wf_ex, self.wf_spec) self.ctx = wf_ctrl.get_task_inbound_context(self.task_spec) utils.update_dict(self.task_ex.in_context, self.ctx)
def _refresh_task_state(task_ex_id): with db_api.transaction(): task_ex = db_api.load_task_execution(task_ex_id) if not task_ex: return if (states.is_completed(task_ex.state) or task_ex.state == states.RUNNING): return wf_ex = task_ex.workflow_execution if states.is_completed(wf_ex.state): return wf_spec = spec_parser.get_workflow_spec_by_execution_id( task_ex.workflow_execution_id ) wf_ctrl = wf_base.get_controller(wf_ex, wf_spec) with db_api.named_lock(task_ex.id): # NOTE: we have to use this lock to prevent two (or more) such # methods from changing task state and starting its action or # workflow. Checking task state outside of this section is a # performance optimization because locking is pretty expensive. db_api.refresh(task_ex) if (states.is_completed(task_ex.state) or task_ex.state == states.RUNNING): return log_state = wf_ctrl.get_logical_task_state(task_ex) state = log_state.state state_info = log_state.state_info # Update 'triggered_by' because it could have changed. task_ex.runtime_context['triggered_by'] = log_state.triggered_by if state == states.RUNNING: continue_task(task_ex) elif state == states.ERROR: complete_task(task_ex, state, state_info) elif state == states.WAITING: LOG.info( "Task execution is still in WAITING state" " [task_ex_id=%s, task_name=%s]", task_ex_id, task_ex.name ) else: # Must never get here. raise RuntimeError( 'Unexpected logical task state [task_ex_id=%s, ' 'task_name=%s, state=%s]' % (task_ex_id, task_ex.name, state) )
def _refresh_task_state(task_ex_id): with db_api.transaction(): task_ex = db_api.load_task_execution(task_ex_id) if not task_ex: return if (states.is_completed(task_ex.state) or task_ex.state == states.RUNNING): return wf_ex = task_ex.workflow_execution if states.is_completed(wf_ex.state): return wf_spec = spec_parser.get_workflow_spec_by_execution_id( task_ex.workflow_execution_id ) wf_ctrl = wf_base.get_controller(wf_ex, wf_spec) with db_api.named_lock(task_ex.id): # NOTE: we have to use this lock to prevent two (or more) such # methods from changing task state and starting its action or # workflow. Checking task state outside of this section is a # performance optimization because locking is pretty expensive. db_api.refresh(task_ex) if (states.is_completed(task_ex.state) or task_ex.state == states.RUNNING): return log_state = wf_ctrl.get_logical_task_state(task_ex) state = log_state.state state_info = log_state.state_info # Update 'triggered_by' because it could have changed. task_ex.runtime_context['triggered_by'] = log_state.triggered_by if state == states.RUNNING: continue_task(task_ex) elif state == states.ERROR: complete_task(task_ex, state, state_info) elif state == states.WAITING: LOG.info( "Task execution is still in WAITING state" " [task_ex_id=%s, task_name=%s]", task_ex_id, task_ex.name ) else: # Must never get here. raise RuntimeError( 'Unexpected logical task state [task_ex_id=%s, ' 'task_name=%s, state=%s]' % (task_ex_id, task_ex.name, state) )
def test_get_controller_reverse(self): wf_spec = spec_parser.get_workflow_list_spec_from_yaml(REVERSE_WF)[0] wf_ex = db_models.WorkflowExecution(spec=wf_spec.to_dict()) self.assertIsInstance( wf_base.get_controller(wf_ex, wf_spec), reverse_wf.ReverseWorkflowController )
def test_get_controller_direct(self): wf_spec = spec_parser.get_workflow_list_spec_from_yaml(DIRECT_WF)[0] wf_ex = db_models.WorkflowExecution(spec=wf_spec.to_dict()) self.assertIsInstance( wf_base.get_controller(wf_ex, wf_spec), direct_wf.DirectWorkflowController )
def _get_final_context(self): wf_ctrl = wf_base.get_controller(self.wf_ex) final_context = {} try: final_context = wf_ctrl.evaluate_workflow_final_context() except Exception as e: LOG.warning('Failed to get final context for %s: %s' % (self.wf_ex, e)) return final_context
def _get_final_context(self): wf_ctrl = wf_base.get_controller(self.wf_ex) final_context = {} try: final_context = wf_ctrl.evaluate_workflow_final_context() except Exception as e: LOG.warning( 'Failed to get final context for %s: %s' % (self.wf_ex, e) ) return final_context
def _get_final_context(self): wf_ctrl = wf_base.get_controller(self.wf_ex) final_context = {} try: final_context = wf_ctrl.evaluate_workflow_final_context() except Exception as e: LOG.warning('Failed to get final context for workflow execution. ' '[wf_ex_id: %s, wf_name: %s, error: %s]' % (self.wf_ex.id, self.wf_ex.name, str(e))) return final_context
def register_workflow_completion_check(self): wf_ctrl = wf_base.get_controller(self.wf_ex, self.wf_spec) # Register an asynchronous command to check workflow completion # in a separate transaction if the task may potentially lead to # workflow completion. def _check(): wf_handler.check_and_complete(self.wf_ex.id) if wf_ctrl.may_complete_workflow(self.task_ex): post_tx_queue.register_operation(_check, in_tx=True)
def _update_inbound_context(self): task_ex = self.task_ex assert task_ex wf_ctrl = wf_base.get_controller(self.wf_ex, self.wf_spec) self.ctx = wf_ctrl.get_task_inbound_context(self.task_spec) data_flow.add_current_task_to_context(self.ctx, task_ex.id, task_ex.name) utils.update_dict(task_ex.in_context, self.ctx)
def register_workflow_completion_check(self): wf_ctrl = wf_base.get_controller(self.wf_ex, self.wf_spec) # Register an asynchronous command to check workflow completion # in a separate transaction if the task may potentially lead to # workflow completion. def _check(): wf_handler.check_and_complete(self.wf_ex.id) if wf_ctrl.may_complete_workflow(self.task_ex): post_tx_queue.register_operation(_check, in_tx=True)
def complete(self, state, state_info=None): """Complete task and set specified state. Method sets specified task state and runs all necessary post completion logic such as publishing workflow variables and scheduling new workflow commands. :param state: New task state. :param state_info: New state information (i.e. error message). """ assert self.task_ex # Ignore if task already completed. if self.is_completed(): return # If we were unable to change the task state it means that it was # already changed by a concurrent process. In this case we need to # skip all regular completion logic like scheduling new tasks, # running engine commands and publishing. if not self.set_state(state, state_info): return data_flow.publish_variables(self.task_ex, self.task_spec) if not self.task_spec.get_keep_result(): # Destroy task result. for ex in self.task_ex.action_executions: if hasattr(ex, 'output'): ex.output = {} self._after_task_complete() # Ignore DELAYED state. if self.task_ex.state == states.RUNNING_DELAYED: return # If workflow is paused we shouldn't schedule new commands # and mark task as processed. if states.is_paused(self.wf_ex.state): return wf_ctrl = wf_base.get_controller(self.wf_ex, self.wf_spec) # Calculate commands to process next. cmds = wf_ctrl.continue_workflow(task_ex=self.task_ex) # Mark task as processed after all decisions have been made # upon its completion. self.task_ex.processed = True dispatcher.dispatch_workflow_commands(self.wf_ex, cmds)
def _refresh_task_state(task_ex_id): with db_api.transaction(): task_ex = db_api.load_task_execution(task_ex_id) if not task_ex: return wf_ex = task_ex.workflow_execution if states.is_completed(wf_ex.state): return wf_spec = spec_parser.get_workflow_spec_by_execution_id( task_ex.workflow_execution_id ) wf_ctrl = wf_base.get_controller(wf_ex, wf_spec) log_state = wf_ctrl.get_logical_task_state( task_ex ) state = log_state.state state_info = log_state.state_info # Update 'triggered_by' because it could have changed. task_ex.runtime_context['triggered_by'] = log_state.triggered_by if state == states.RUNNING: continue_task(task_ex) elif state == states.ERROR: complete_task(task_ex, state, state_info) elif state == states.WAITING: # Let's assume that a task takes 0.01 sec in average to complete # and based on this assumption calculate a time of the next check. # The estimation is very rough, of course, but this delay will be # decreasing as task preconditions will be completing which will # give a decent asymptotic approximation. # For example, if a 'join' task has 100 inbound incomplete tasks # then the next 'refresh_task_state' call will happen in 10 # seconds. For 500 tasks it will be 50 seconds. The larger the # workflow is, the more beneficial this mechanism will be. delay = int(log_state.cardinality * 0.01) _schedule_refresh_task_state(task_ex, max(1, delay)) else: # Must never get here. raise RuntimeError( 'Unexpected logical task state [task_ex_id=%s, task_name=%s, ' 'state=%s]' % (task_ex_id, task_ex.name, state) )
def complete(self, state, state_info=None): """Complete task and set specified state. Method sets specified task state and runs all necessary post completion logic such as publishing workflow variables and scheduling new workflow commands. :param state: New task state. :param state_info: New state information (i.e. error message). """ assert self.task_ex # Ignore if task already completed. if states.is_completed(self.task_ex.state): return self.set_state(state, state_info) data_flow.publish_variables(self.task_ex, self.task_spec) if not self.task_spec.get_keep_result(): # Destroy task result. for ex in self.task_ex.action_executions: if hasattr(ex, 'output'): ex.output = {} self._after_task_complete() # Ignore DELAYED state. if self.task_ex.state == states.RUNNING_DELAYED: return # If workflow is paused we shouldn't schedule new commands # and mark task as processed. if states.is_paused(self.wf_ex.state): return wf_ctrl = wf_base.get_controller(self.wf_ex, self.wf_spec) # Calculate commands to process next. cmds = wf_ctrl.continue_workflow() # Mark task as processed after all decisions have been made # upon its completion. self.task_ex.processed = True dispatcher.dispatch_workflow_commands(self.wf_ex, cmds)
def check_and_complete(self): """Completes the workflow if it needs to be completed. The method simply checks if there are any tasks that are not in a terminal state. If there aren't any then it performs all necessary logic to finalize the workflow (calculate output etc.). :return: Number of incomplete tasks. """ if states.is_paused_or_completed(self.wf_ex.state): return 0 # Workflow is not completed if there are any incomplete task # executions. incomplete_tasks_count = db_api.get_incomplete_task_executions_count( workflow_execution_id=self.wf_ex.id, ) if incomplete_tasks_count > 0: return incomplete_tasks_count LOG.debug("Workflow completed [id=%s]", self.wf_ex.id) # NOTE(rakhmerov): Once we know that the workflow has completed, # we need to expire all the objects in the DB session to make sure # to read the most relevant data from the DB (that's already been # committed in parallel transactions). Otherwise, some data like # workflow context may be stale and decisions made upon it will be # wrong. db_api.expire_all() wf_ctrl = wf_base.get_controller(self.wf_ex, self.wf_spec) if wf_ctrl.any_cancels(): msg = _build_cancel_info_message(wf_ctrl, self.wf_ex) self._cancel_workflow(msg) elif wf_ctrl.all_errors_handled(): ctx = wf_ctrl.evaluate_workflow_final_context() self._succeed_workflow(ctx) else: msg = _build_fail_info_message(wf_ctrl, self.wf_ex) final_context = wf_ctrl.evaluate_workflow_final_context() self._fail_workflow(final_context, msg) return 0
def check_and_complete(self): """Completes the workflow if it needs to be completed. The method simply checks if there are any tasks that are not in a terminal state. If there aren't any then it performs all necessary logic to finalize the workflow (calculate output etc.). :return: Number of incomplete tasks. """ if states.is_paused_or_completed(self.wf_ex.state): return 0 # Workflow is not completed if there are any incomplete task # executions. incomplete_tasks_count = db_api.get_incomplete_task_executions_count( workflow_execution_id=self.wf_ex.id, ) if incomplete_tasks_count > 0: return incomplete_tasks_count LOG.debug("Workflow completed [id=%s]", self.wf_ex.id) # NOTE(rakhmerov): Once we know that the workflow has completed, # we need to expire all the objects in the DB session to make sure # to read the most relevant data from the DB (that's already been # committed in parallel transactions). Otherwise, some data like # workflow context may be stale and decisions made upon it will be # wrong. db_api.expire_all() wf_ctrl = wf_base.get_controller(self.wf_ex, self.wf_spec) if wf_ctrl.any_cancels(): msg = _build_cancel_info_message(wf_ctrl, self.wf_ex) self._cancel_workflow(msg) elif wf_ctrl.all_errors_handled(): ctx = wf_ctrl.evaluate_workflow_final_context() self._succeed_workflow(ctx) else: msg = _build_fail_info_message(wf_ctrl, self.wf_ex) final_context = wf_ctrl.evaluate_workflow_final_context() self._fail_workflow(final_context, msg) return 0
def start(self, wf_def, wf_ex_id, input_dict, desc='', params=None): """Start workflow. :param wf_def: Workflow definition. :param wf_ex_id: Workflow execution id. :param input_dict: Workflow input. :param desc: Workflow execution description. :param params: Workflow type specific parameters. :raises """ assert not self.wf_ex # New workflow execution. self.wf_spec = spec_parser.get_workflow_spec_by_definition_id( wf_def.id, wf_def.updated_at ) wf_trace.info( self.wf_ex, 'Starting workflow [name=%s, input=%s]' % (wf_def.name, utils.cut(input_dict)) ) self.validate_input(input_dict) self._create_execution( wf_def, wf_ex_id, self.prepare_input(input_dict), desc, params ) self.set_state(states.RUNNING) # Publish event as soon as state is set to running. self.notify(events.WORKFLOW_LAUNCHED) wf_ctrl = wf_base.get_controller(self.wf_ex, self.wf_spec) dispatcher.dispatch_workflow_commands( self.wf_ex, wf_ctrl.continue_workflow() )
def start(self, wf_def, wf_ex_id, input_dict, desc='', params=None): """Start workflow. :param wf_def: Workflow definition. :param wf_ex_id: Workflow execution id. :param input_dict: Workflow input. :param desc: Workflow execution description. :param params: Workflow type specific parameters. :raises """ assert not self.wf_ex # New workflow execution. self.wf_spec = spec_parser.get_workflow_spec_by_definition_id( wf_def.id, wf_def.updated_at ) wf_trace.info( self.wf_ex, 'Starting workflow [name=%s, input=%s]' % (wf_def.name, utils.cut(input_dict)) ) self.validate_input(input_dict) self._create_execution( wf_def, wf_ex_id, self.prepare_input(input_dict), desc, params ) self.set_state(states.RUNNING) # Publish event as soon as state is set to running. self.notify(events.WORKFLOW_LAUNCHED) wf_ctrl = wf_base.get_controller(self.wf_ex, self.wf_spec) dispatcher.dispatch_workflow_commands( self.wf_ex, wf_ctrl.continue_workflow() )
def _refresh_task_state(task_ex_id): with db_api.transaction(): task_ex = db_api.load_task_execution(task_ex_id) if not task_ex: return wf_ex = task_ex.workflow_execution if states.is_completed(wf_ex.state): return wf_spec = spec_parser.get_workflow_spec_by_execution_id( task_ex.workflow_execution_id ) wf_ctrl = wf_base.get_controller(wf_ex, wf_spec) state, state_info, cardinality = wf_ctrl.get_logical_task_state( task_ex ) if state == states.RUNNING: continue_task(task_ex) elif state == states.ERROR: task = _build_task_from_execution(wf_spec, task_ex) task.complete(state, state_info) elif state == states.WAITING: # Let's assume that a task takes 0.01 sec in average to complete # and based on this assumption calculate a time of the next check. # The estimation is very rough, of course, but this delay will be # decreasing as task preconditions will be completing which will # give a decent asymptotic approximation. # For example, if a 'join' task has 100 inbound incomplete tasks # then the next 'refresh_task_state' call will happen in 10 # seconds. For 500 tasks it will be 50 seconds. The larger the # workflow is, the more beneficial this mechanism will be. delay = int(cardinality * 0.01) _schedule_refresh_task_state(task_ex, max(1, delay)) else: # Must never get here. raise RuntimeError( 'Unexpected logical task state [task_ex=%s, state=%s]' % (task_ex, state) )
def _get_final_context(self): final_ctx = {} wf_ctrl = wf_base.get_controller(self.wf_ex) try: final_ctx = wf_ctrl.evaluate_workflow_final_context() except Exception as e: LOG.warning( 'Failed to get final context for workflow execution. ' '[wf_ex_id: %s, wf_name: %s, error: %s]', self.wf_ex.id, self.wf_ex.name, str(e) ) return final_ctx
def _refresh_task_state(task_ex_id): with db_api.transaction(): task_ex = db_api.load_task_execution(task_ex_id) if not task_ex: return wf_ex = task_ex.workflow_execution if states.is_completed(wf_ex.state): return wf_spec = spec_parser.get_workflow_spec_by_execution_id( task_ex.workflow_execution_id) wf_ctrl = wf_base.get_controller(wf_ex, wf_spec) with db_api.named_lock(task_ex.id): db_api.refresh(task_ex) if (states.is_completed(task_ex.state) or task_ex.state == states.RUNNING): return log_state = wf_ctrl.get_logical_task_state(task_ex) state = log_state.state state_info = log_state.state_info # Update 'triggered_by' because it could have changed. task_ex.runtime_context['triggered_by'] = log_state.triggered_by if state == states.RUNNING: continue_task(task_ex) elif state == states.ERROR: complete_task(task_ex, state, state_info) elif state == states.WAITING: LOG.info( "Task execution is still in WAITING state" " [task_ex_id=%s, task_name=%s]", task_ex_id, task_ex.name) else: # Must never get here. raise RuntimeError( 'Unexpected logical task state [task_ex_id=%s, ' 'task_name=%s, state=%s]' % (task_ex_id, task_ex.name, state))
def _continue_workflow(self, wf_ex, task_ex=None, reset=True, env=None): wf_ex = wf_service.update_workflow_execution_env(wf_ex, env) wf_handler.set_execution_state( wf_ex, states.RUNNING, set_upstream=True ) wf_ctrl = wf_base.get_controller(wf_ex) # TODO(rakhmerov): Add YAQL error handling. # Calculate commands to process next. cmds = wf_ctrl.continue_workflow(task_ex=task_ex, reset=reset, env=env) # When resuming a workflow we need to ignore all 'pause' # commands because workflow controller takes tasks that # completed within the period when the workflow was paused. cmds = list( filter( lambda c: not isinstance(c, commands.PauseWorkflow), cmds ) ) # Since there's no explicit task causing the operation # we need to mark all not processed tasks as processed # because workflow controller takes only completed tasks # with flag 'processed' equal to False. for t_ex in wf_ex.task_executions: if states.is_completed(t_ex.state) and not t_ex.processed: t_ex.processed = True wf_spec = spec_parser.get_workflow_spec(wf_ex.spec) self._dispatch_workflow_commands(wf_ex, cmds, wf_spec) if not cmds: if not wf_utils.find_incomplete_task_executions(wf_ex): wf_handler.succeed_workflow( wf_ex, wf_ctrl.evaluate_workflow_final_context(), wf_spec ) return wf_ex.get_clone()
def rerun(self, task_ex, reset=True, env=None): """Rerun workflow from the given task. :param task_ex: Task execution that the workflow needs to rerun from. :param reset: If True, reset task state including deleting its action executions. :param env: Environment. """ assert self.wf_ex # Since some lookup utils functions may use cache for completed tasks # we need to clean caches to make sure that stale objects can't be # retrieved. lookup_utils.clear_caches() # Add default wf_ex.params['env'] for rerun, pass commands extensions. # It uses in update_workflow_execution_env method. # # task_input: customer manual input data # task_output: customer manual output data # task_action: rerun | pass if 'env' not in self.wf_ex.params: self.wf_ex.params['env'] = {} self._task_input_data = env.pop('task_input', None) self._task_action = env.pop('task_action', None) self._task_output_data = env.pop('task_output', None) wf_service.update_workflow_execution_env(self.wf_ex, env) self.set_state(states.RUNNING, recursive=True) wf_ctrl = wf_base.get_controller(self.wf_ex) # Calculate commands to process next. cmds = wf_ctrl.rerun_tasks([task_ex], reset=reset) # Add manual input field in task execution instance for delivering # input data to task handling level. for cmd in cmds: cmd.task_ex._manual_input = self._task_input_data cmd.task_ex._manual_action = self._task_action cmd.task_ex._manual_output = self._task_output_data self._continue_workflow(cmds)
def resume(self, env=None): """Resume workflow. :param env: Environment. """ assert self.wf_ex wf_service.update_workflow_execution_env(self.wf_ex, env) self.set_state(states.RUNNING, recursive=True) wf_ctrl = wf_base.get_controller(self.wf_ex) # Calculate commands to process next. cmds = wf_ctrl.continue_workflow() self._continue_workflow(cmds)
def resume(self, env=None): """Resume workflow. :param env: Environment. """ assert self.wf_ex wf_service.update_workflow_execution_env(self.wf_ex, env) self.set_state(states.RUNNING, recursive=True) wf_ctrl = wf_base.get_controller(self.wf_ex) # Calculate commands to process next. cmds = wf_ctrl.continue_workflow() self._continue_workflow(cmds)
def stop_workflow(wf_ex, state, message=None): if state == states.SUCCESS: wf_ctrl = wf_base.get_controller(wf_ex) final_context = {} try: final_context = wf_ctrl.evaluate_workflow_final_context() except Exception as e: LOG.warning('Failed to get final context for %s: %s' % (wf_ex, e)) wf_spec = spec_parser.get_workflow_spec(wf_ex.spec) return succeed_workflow(wf_ex, final_context, wf_spec, message) elif state == states.ERROR: return fail_workflow(wf_ex, message) return wf_ex
def start_workflow(self, wf_identifier, wf_input, description='', **params): wf_ex_id = None try: # Create a persistent workflow execution in a separate transaction # so that we can return it even in case of unexpected errors that # lead to transaction rollback. with db_api.transaction(): # The new workflow execution will be in an IDLE # state on initial record creation. wf_ex_id, wf_spec = wf_ex_service.create_workflow_execution( wf_identifier, wf_input, description, params ) with db_api.transaction(): wf_ex = db_api.get_workflow_execution(wf_ex_id) wf_handler.set_execution_state(wf_ex, states.RUNNING) wf_ctrl = wf_base.get_controller(wf_ex, wf_spec) self._dispatch_workflow_commands( wf_ex, wf_ctrl.continue_workflow(), wf_spec ) return wf_ex.get_clone() except Exception as e: LOG.error( "Failed to start workflow '%s' id=%s: %s\n%s", wf_identifier, wf_ex_id, e, traceback.format_exc() ) wf_ex = self._fail_workflow(wf_ex_id, e) if wf_ex: return wf_ex.get_clone() raise e
def _check_affected_tasks(task): if not task.is_completed(): return task_ex = task.task_ex wf_ex = task_ex.workflow_execution if states.is_completed(wf_ex.state): return wf_spec = spec_parser.get_workflow_spec_by_execution_id( task_ex.workflow_execution_id ) wf_ctrl = wf_base.get_controller(wf_ex, wf_spec) affected_task_execs = wf_ctrl.find_indirectly_affected_task_executions( task_ex.name ) def _schedule_if_needed(t_ex_id): # NOTE(rakhmerov): we need to minimize the number of delayed calls # that refresh state of "join" tasks. We'll check if corresponding # calls are already scheduled. Note that we must ignore delayed calls # that are currently being processed because of a possible race with # the transaction that deletes delayed calls, i.e. the call may still # exist in DB (the deleting transaction didn't commit yet) but it has # already been processed and the task state hasn't changed. cnt = db_api.get_delayed_calls_count( key=_get_refresh_state_job_key(t_ex_id), processing=False ) if cnt == 0: _schedule_refresh_task_state(t_ex_id) for t_ex in affected_task_execs: post_tx_queue.register_operation( _schedule_if_needed, args=[t_ex.id], in_tx=True )
def _check_affected_tasks(task): if not task.is_completed(): return task_ex = task.task_ex wf_ex = task_ex.workflow_execution if states.is_completed(wf_ex.state): return wf_spec = spec_parser.get_workflow_spec_by_execution_id( task_ex.workflow_execution_id ) wf_ctrl = wf_base.get_controller(wf_ex, wf_spec) affected_task_execs = wf_ctrl.find_indirectly_affected_task_executions( task_ex.name ) def _schedule_if_needed(t_ex_id): # NOTE(rakhmerov): we need to minimize the number of delayed calls # that refresh state of "join" tasks. We'll check if corresponding # calls are already scheduled. Note that we must ignore delayed calls # that are currently being processed because of a possible race with # the transaction that deletes delayed calls, i.e. the call may still # exist in DB (the deleting transaction didn't commit yet) but it has # already been processed and the task state hasn't changed. cnt = db_api.get_delayed_calls_count( key=_get_refresh_state_job_key(t_ex_id), processing=False ) if cnt == 0: _schedule_refresh_task_state(t_ex_id) for t_ex in affected_task_execs: post_tx_queue.register_operation( _schedule_if_needed, args=[t_ex.id], in_tx=True )
def _check_affected_tasks(task): # TODO(rakhmerov): this method should eventually move into # the class Task. The obvious signal is the only argument # that it takes. if not task.is_completed(): return task_ex = task.task_ex wf_ex = task_ex.workflow_execution if states.is_completed(wf_ex.state): return wf_spec = spec_parser.get_workflow_spec_by_execution_id( task_ex.workflow_execution_id) wf_ctrl = wf_base.get_controller(wf_ex, wf_spec) affected_task_execs = wf_ctrl.find_indirectly_affected_task_executions( task_ex.name) def _schedule_if_needed(t_ex_id): # NOTE(rakhmerov): we need to minimize the number of scheduled jobs # that refresh state of "join" tasks. We'll check if corresponding # jobs are already scheduled. Note that we must ignore scheduled jobs # that are currently being processed because of a possible race with # the transaction that deletes scheduled jobs, i.e. the job may still # exist in DB (the deleting transaction didn't commit yet) but it has # already been processed and the task state hasn't changed. sched = sched_base.get_system_scheduler() jobs_exist = sched.has_scheduled_jobs( key=_get_refresh_state_job_key(t_ex_id), processing=False) if not jobs_exist: _schedule_refresh_task_state(t_ex_id) for t_ex in affected_task_execs: post_tx_queue.register_operation(_schedule_if_needed, args=[t_ex.id], in_tx=True)
def _check_and_complete(self): if states.is_paused_or_completed(self.wf_ex.state): return # Workflow is not completed if there are any incomplete task # executions that are not in WAITING state. If all incomplete # tasks are waiting and there are unhandled errors, then these # tasks will not reach completion. In this case, mark the # workflow complete. incomplete_tasks = wf_utils.find_incomplete_task_executions(self.wf_ex) if any(not states.is_waiting(t.state) for t in incomplete_tasks): return wf_ctrl = wf_base.get_controller(self.wf_ex, self.wf_spec) if wf_ctrl.all_errors_handled(): self._succeed_workflow(wf_ctrl.evaluate_workflow_final_context()) else: self._fail_workflow(_build_fail_info_message(wf_ctrl, self.wf_ex))
def _check_and_complete(self): if states.is_paused_or_completed(self.wf_ex.state): return # Workflow is not completed if there are any incomplete task # executions. incomplete_tasks = wf_utils.find_incomplete_task_executions(self.wf_ex) if incomplete_tasks: return wf_ctrl = wf_base.get_controller(self.wf_ex, self.wf_spec) if wf_ctrl.any_cancels(): self._cancel_workflow( _build_cancel_info_message(wf_ctrl, self.wf_ex)) elif wf_ctrl.all_errors_handled(): self._succeed_workflow(wf_ctrl.evaluate_workflow_final_context()) else: self._fail_workflow(_build_fail_info_message(wf_ctrl, self.wf_ex))
def _continue_workflow(wf_ex, task_ex=None, reset=True, env=None): wf_ex = wf_service.update_workflow_execution_env(wf_ex, env) wf_handler.set_workflow_state( wf_ex, states.RUNNING, set_upstream=True ) wf_ctrl = wf_base.get_controller(wf_ex) # TODO(rakhmerov): Add error handling. # Calculate commands to process next. cmds = wf_ctrl.continue_workflow(task_ex=task_ex, reset=reset, env=env) # When resuming a workflow we need to ignore all 'pause' # commands because workflow controller takes tasks that # completed within the period when the workflow was paused. # TODO(rakhmerov): This all should be in workflow handler, it's too # specific for engine level. cmds = list( filter( lambda c: not isinstance(c, commands.PauseWorkflow), cmds ) ) # Since there's no explicit task causing the operation # we need to mark all not processed tasks as processed # because workflow controller takes only completed tasks # with flag 'processed' equal to False. for t_ex in wf_ex.task_executions: if states.is_completed(t_ex.state) and not t_ex.processed: t_ex.processed = True dispatcher.dispatch_workflow_commands(wf_ex, cmds) if not cmds: wf_handler.check_workflow_completion(wf_ex) return wf_ex.get_clone()
def _check_affected_tasks(task): if not task.is_completed(): return task_ex = task.task_ex wf_ex = task_ex.workflow_execution if states.is_completed(wf_ex.state): return wf_spec = spec_parser.get_workflow_spec_by_execution_id( task_ex.workflow_execution_id) wf_ctrl = wf_base.get_controller(wf_ex, wf_spec) affected_task_execs = wf_ctrl.find_indirectly_affected_task_executions( task_ex.name) for t_ex in affected_task_execs: _schedule_refresh_task_state(t_ex)
def start_workflow(self, wf_identifier, wf_input, description='', **params): with db_api.transaction(): # TODO(rakhmerov): It needs to be hidden in workflow_handler and # Workflow abstraction. # The new workflow execution will be in an IDLE # state on initial record creation. wf_ex, wf_spec = wf_ex_service.create_workflow_execution( wf_identifier, wf_input, description, params ) wf_handler.set_workflow_state(wf_ex, states.RUNNING) wf_ctrl = wf_base.get_controller(wf_ex, wf_spec) cmds = wf_ctrl.continue_workflow() dispatcher.dispatch_workflow_commands(wf_ex, cmds) return wf_ex.get_clone()
def _on_task_state_change(self, task_ex, wf_ex, wf_spec): task_spec = wf_spec.get_tasks()[task_ex.name] if task_handler.is_task_completed(task_ex, task_spec): task_handler.after_task_complete(task_ex, task_spec, wf_spec) # Ignore DELAYED state. if task_ex.state == states.RUNNING_DELAYED: return wf_ctrl = wf_base.get_controller(wf_ex, wf_spec) # Calculate commands to process next. try: cmds = wf_ctrl.continue_workflow() except exc.YaqlEvaluationException as e: LOG.error( 'YAQL error occurred while calculating next workflow ' 'commands [wf_ex_id=%s, task_ex_id=%s]: %s', wf_ex.id, task_ex.id, e ) wf_handler.fail_workflow(wf_ex, str(e)) return # Mark task as processed after all decisions have been made # upon its completion. task_ex.processed = True self._dispatch_workflow_commands(wf_ex, cmds, wf_spec) self._check_workflow_completion(wf_ex, wf_ctrl, wf_spec) elif task_handler.need_to_continue(task_ex, task_spec): # Re-run existing task. cmds = [commands.RunExistingTask(task_ex, reset=False)] self._dispatch_workflow_commands(wf_ex, cmds, wf_spec)
def check_and_complete(self): """Completes the workflow if it needs to be completed. The method simply checks if there are any tasks that are not in a terminal state. If there aren't any then it performs all necessary logic to finalize the workflow (calculate output etc.). :return: Number of incomplete tasks. """ if states.is_paused_or_completed(self.wf_ex.state): return 0 # Workflow is not completed if there are any incomplete task # executions. incomplete_tasks_count = db_api.get_incomplete_task_executions_count( workflow_execution_id=self.wf_ex.id, ) if incomplete_tasks_count > 0: return incomplete_tasks_count wf_ctrl = wf_base.get_controller(self.wf_ex, self.wf_spec) if wf_ctrl.any_cancels(): msg = _build_cancel_info_message(wf_ctrl, self.wf_ex) self._cancel_workflow(msg) elif wf_ctrl.all_errors_handled(): ctx = wf_ctrl.evaluate_workflow_final_context() self._succeed_workflow(ctx) else: msg = _build_fail_info_message(wf_ctrl, self.wf_ex) final_context = wf_ctrl.evaluate_workflow_final_context() self._fail_workflow(final_context, msg) return 0
def rerun(self, task_ex, reset=True, env=None): """Rerun workflow from the given task. :param task_ex: Task execution that the workflow needs to rerun from. :param reset: If True, reset task state including deleting its action executions. :param env: Environment. """ assert self.wf_ex wf_service.update_workflow_execution_env(self.wf_ex, env) self.set_state(states.RUNNING, recursive=True) _update_task_environment(task_ex, env) wf_ctrl = wf_base.get_controller(self.wf_ex) # Calculate commands to process next. cmds = wf_ctrl.rerun_tasks([task_ex], reset=reset) self._continue_workflow(cmds)
def _refresh_task_state(task_ex_id): with db_api.transaction(): task_ex = db_api.get_task_execution(task_ex_id) wf_spec = spec_parser.get_workflow_spec_by_execution_id( task_ex.workflow_execution_id) wf_ctrl = wf_base.get_controller(task_ex.workflow_execution, wf_spec) state, state_info = wf_ctrl.get_logical_task_state(task_ex) if state == states.RUNNING: continue_task(task_ex) elif state == states.ERROR: fail_task(task_ex, state_info) elif state == states.WAITING: # TODO(rakhmerov): Algorithm for increasing rescheduling delay. _schedule_refresh_task_state(task_ex, 1) else: # Must never get here. raise RuntimeError( 'Unexpected logical task state [task_ex=%s, state=%s]' % (task_ex, state))
def check_and_complete(self): """Completes the workflow if it needs to be completed. The method simply checks if there are any tasks that are not in a terminal state. If there aren't any then it performs all necessary logic to finalize the workflow (calculate output etc.). :return: Number of incomplete tasks. """ if states.is_paused_or_completed(self.wf_ex.state): return 0 # Workflow is not completed if there are any incomplete task # executions. incomplete_tasks_count = db_api.get_incomplete_task_executions_count( workflow_execution_id=self.wf_ex.id, ) if incomplete_tasks_count > 0: return incomplete_tasks_count wf_ctrl = wf_base.get_controller(self.wf_ex, self.wf_spec) if wf_ctrl.any_cancels(): msg = _build_cancel_info_message(wf_ctrl, self.wf_ex) self._cancel_workflow(msg) elif wf_ctrl.all_errors_handled(): ctx = wf_ctrl.evaluate_workflow_final_context() self._succeed_workflow(ctx) else: msg = _build_fail_info_message(wf_ctrl, self.wf_ex) final_context = wf_ctrl.evaluate_workflow_final_context() self._fail_workflow(final_context, msg) return 0
def complete(self, state, state_info=None): """Complete task and set specified state. Method sets specified task state and runs all necessary post completion logic such as publishing workflow variables and scheduling new workflow commands. :param state: New task state. :param state_info: New state information (i.e. error message). """ assert self.task_ex # Record the current task state. old_task_state = self.task_ex.state # Ignore if task already completed. if self.is_completed(): # Publish task event again so subscribers know # task completed state is being processed again. self.notify(old_task_state, self.task_ex.state) return # If we were unable to change the task state it means that it was # already changed by a concurrent process. In this case we need to # skip all regular completion logic like scheduling new tasks, # running engine commands and publishing. if not self.set_state(state, state_info): return data_flow.publish_variables(self.task_ex, self.task_spec) if not self.task_spec.get_keep_result(): # Destroy task result. for ex in self.task_ex.action_executions: if hasattr(ex, 'output'): ex.output = {} self._after_task_complete() # Ignore DELAYED state. if self.task_ex.state == states.RUNNING_DELAYED: return # If workflow is paused we shouldn't schedule new commands # and mark task as processed. if states.is_paused(self.wf_ex.state): # Publish task event even if the workflow is paused. self.notify(old_task_state, self.task_ex.state) return wf_ctrl = wf_base.get_controller(self.wf_ex, self.wf_spec) # Calculate commands to process next. cmds = wf_ctrl.continue_workflow(task_ex=self.task_ex) # Check whether the task generated any next tasks. if any([not commands.is_engine_command(c) for c in cmds]): self.task_ex.has_next_tasks = True # Check whether the error is handled. if self.task_ex.state == states.ERROR: self.task_ex.error_handled = any([c.handles_error for c in cmds]) # Mark task as processed after all decisions have been made # upon its completion. self.task_ex.processed = True self.register_workflow_completion_check() self.save_finished_time() # Publish task event. self.notify(old_task_state, self.task_ex.state) dispatcher.dispatch_workflow_commands(self.wf_ex, cmds)