def _process_finished_ti(session, ti): """ Process the TaskInstance object which already finished. :param session: :param ti: :return: """ from airflow.ti_deps.deps.runnable_exec_date_dep import RunnableExecDateDep from airflow.ti_deps.deps.valid_state_dep import ValidStateDep from airflow.ti_deps.deps.events_dep import EventTIDep EVENT_SCHEDULED_DEPS = { RunnableExecDateDep(), ValidStateDep(FINISHED_STATES), EventTIDep(), } dep_context = DepContext(deps=EVENT_SCHEDULED_DEPS) if ti.are_dependencies_met(dep_context=dep_context, session=session): ts = TaskState.query_task_state(ti, session=session) if ts.action is None or TaskAction(ts.action) == TaskAction.NONE: return if TaskAction(ts.action) == TaskAction.RESTART: log.debug('Queuing Finished task: %s', ti) ti.state = State.SCHEDULED log.info("Creating / updating %s in ORM", ti) session.merge(ti) ts.action = None session.merge(ts) session.commit()
def test_all_deps_met(self): """ Test to make sure all of the conditions for the dep are met """ ti = self._get_task_instance( dag_end_date=datetime(2016, 1, 2), task_end_date=datetime(2016, 1, 2), execution_date=datetime(2016, 1, 1), ) assert RunnableExecDateDep().is_met(ti=ti)
def test_exec_date_after_end_date(self): """ If the dag's execution date is in the future this dep should fail """ ti = self._get_task_instance( dag_end_date=datetime(2016, 1, 3), task_end_date=datetime(2016, 1, 3), execution_date=datetime(2016, 1, 2), ) self.assertFalse(RunnableExecDateDep().is_met(ti=ti))
def test_exec_date_after_task_end_date(self): """ If the task instance execution date is after the tasks's end date this dep should fail """ ti = self._get_task_instance( dag_end_date=datetime(2016, 1, 3), task_end_date=datetime(2016, 1, 1), execution_date=datetime(2016, 1, 2), ) assert not RunnableExecDateDep().is_met(ti=ti)
def test_exec_date_after_end_date(self): """ If the dag's execution date is in the future this dep should fail """ dag = DAG('test_localtaskjob_heartbeat', start_date=datetime(2015, 1, 1), end_date=datetime(2016, 11, 5), schedule_interval=None) with dag: op1 = DummyOperator(task_id='op1') ti = TaskInstance(task=op1, execution_date=datetime(2016, 11, 2)) self.assertFalse(RunnableExecDateDep().is_met(ti=ti))
def test_exec_date_dep(allow_trigger_in_future, schedule_interval, execution_date, is_met): """ If the dag's execution date is in the future but (allow_trigger_in_future=False or not schedule_interval) this dep should fail """ with conf_vars({('scheduler', 'allow_trigger_in_future'): allow_trigger_in_future}): dag = DAG( 'test_localtaskjob_heartbeat', start_date=datetime(2015, 1, 1), end_date=datetime(2016, 11, 5), schedule_interval=schedule_interval) with dag: op1 = DummyOperator(task_id='op1') ti = TaskInstance(task=op1, execution_date=execution_date) assert RunnableExecDateDep().is_met(ti=ti) == is_met
def test_exec_date_dep(allow_trigger_in_future, schedule_interval, execution_date, is_met): """ If the dag's execution date is in the future but (allow_trigger_in_future=False or not schedule_interval) this dep should fail """ with patch.object(settings, 'ALLOW_FUTURE_EXEC_DATES', allow_trigger_in_future): dag = DAG('test_localtaskjob_heartbeat', start_date=datetime(2015, 1, 1), end_date=datetime(2016, 11, 5), schedule_interval=schedule_interval) with dag: op1 = DummyOperator(task_id='op1') ti = TaskInstance(task=op1, execution_date=execution_date) assert RunnableExecDateDep().is_met(ti=ti) == is_met
def _process_running_ti(session, ti): """ Process the TaskInstance object which are running. :param session: :param ti: :return: """ from airflow.ti_deps.deps.runnable_exec_date_dep import RunnableExecDateDep from airflow.ti_deps.deps.valid_state_dep import ValidStateDep from airflow.ti_deps.deps.events_dep import EventTIDep EVENT_SCHEDULED_DEPS = { RunnableExecDateDep(), ValidStateDep(RUNNING_STATES), EventTIDep(), } dep_context = DepContext(deps=EVENT_SCHEDULED_DEPS) if ti.are_dependencies_met(dep_context=dep_context, session=session): if action_is_stop_or_restart(ti, session): log.info("stop or restart task %s ", ti)
State.FAILED, State.NONE, State.QUEUED, State.SCHEDULED, State.SKIPPED, State.UPSTREAM_FAILED, State.UP_FOR_RETRY, State.UP_FOR_RESCHEDULE, } # Context to get the dependencies that need to be met in order for a task instance to # be backfilled. QUEUE_DEPS = { NotRunningDep(), NotSkippedDep(), RunnableExecDateDep(), ValidStateDep(QUEUEABLE_STATES), } # Dependencies that need to be met for a given task instance to be able to get run by an # executor. This class just extends QueueContext by adding dependencies for resources. RUN_DEPS = QUEUE_DEPS | { DagTISlotsAvailableDep(), TaskConcurrencyDep(), } # TODO(aoen): SCHEDULER_DEPS is not coupled to actual execution in any way and # could easily be modified or removed from the scheduler causing this dependency to become # outdated and incorrect. This coupling should be created (e.g. via a dag_deps analog of # ti_deps that will be used in the scheduler code) to ensure that the logic here is # equivalent to the logic in the scheduler.
QUEUEABLE_STATES = { State.FAILED, State.NONE, State.QUEUED, State.SCHEDULED, State.SKIPPED, State.UPSTREAM_FAILED, State.UP_FOR_RETRY, } # Context to get the dependencies that need to be met in order for a task instance to # be backfilled. QUEUE_DEPS = { NotRunningDep(), # 任务实例没有运行 NotSkippedDep(), # 任务实例没有被标记为跳过 RunnableExecDateDep(), # 判断任务执行时间 必须小于等于当前时间 且 小于等于结束时间 ValidStateDep(QUEUEABLE_STATES), # 验证任务的状态必须在队列状态中 } # Dependencies that need to be met for a given task instance to be able to get run by an # executor. This class just extends QueueContext by adding dependencies for resources. RUN_DEPS = QUEUE_DEPS | { DagTISlotsAvailableDep(), # 每个dag能并发执行的最大任务数依赖 TaskConcurrencyDep(), # 每个任务的任务实例有最大限制 } # TODO(aoen): SCHEDULER_DEPS is not coupled to actual execution in any way and # could easily be modified or removed from the scheduler causing this dependency to become # outdated and incorrect. This coupling should be created (e.g. via a dag_deps analog of # ti_deps that will be used in the scheduler code) to ensure that the logic here is # equivalent to the logic in the scheduler.