コード例 #1
0
    def _process_finished_ti(session, ti):
        """
        Process the TaskInstance object which already finished.
        :param session:
        :param ti:
        :return:
        """
        from airflow.ti_deps.deps.runnable_exec_date_dep import RunnableExecDateDep
        from airflow.ti_deps.deps.valid_state_dep import ValidStateDep
        from airflow.ti_deps.deps.events_dep import EventTIDep

        EVENT_SCHEDULED_DEPS = {
            RunnableExecDateDep(),
            ValidStateDep(FINISHED_STATES),
            EventTIDep(),
        }
        dep_context = DepContext(deps=EVENT_SCHEDULED_DEPS)
        if ti.are_dependencies_met(dep_context=dep_context, session=session):
            ts = TaskState.query_task_state(ti, session=session)
            if ts.action is None or TaskAction(ts.action) == TaskAction.NONE:
                return
            if TaskAction(ts.action) == TaskAction.RESTART:
                log.debug('Queuing Finished task: %s', ti)
                ti.state = State.SCHEDULED
                log.info("Creating / updating %s in ORM", ti)
                session.merge(ti)
            ts.action = None
            session.merge(ts)
            session.commit()
コード例 #2
0
 def test_no_valid_states(self):
     """
     If there are no valid states the dependency should throw
     """
     ti = Mock(state=State.SUCCESS, end_date=datetime(2016, 1, 1))
     with self.assertRaises(AirflowException):
         ValidStateDep({}).is_met(ti=ti)
コード例 #3
0
ファイル: valid_state_dep.py プロジェクト: gkhasel1/airflow
    def test_invalid_state(self):
        """
        Invalid state should fail this dep
        """
        ti = FakeTI(state=State.SUCCESS, end_date=datetime(2016, 1, 1))

        self.assertFalse(
            ValidStateDep({State.FAILURE}).is_met(ti=ti, dep_context=None))
コード例 #4
0
ファイル: valid_state_dep.py プロジェクト: gkhasel1/airflow
    def test_valid_state(self):
        """
        Valid state should pass this dep
        """
        ti = FakeTI(state=State.QUEUED, end_date=datetime(2016, 1, 1))

        self.assertTrue(
            ValidStateDep({State.QUEUED}).is_met(ti=ti, dep_context=None))
コード例 #5
0
    def _process_running_ti(session, ti):
        """
        Process the TaskInstance object which are running.
        :param session:
        :param ti:
        :return:
        """
        from airflow.ti_deps.deps.runnable_exec_date_dep import RunnableExecDateDep
        from airflow.ti_deps.deps.valid_state_dep import ValidStateDep
        from airflow.ti_deps.deps.events_dep import EventTIDep

        EVENT_SCHEDULED_DEPS = {
            RunnableExecDateDep(),
            ValidStateDep(RUNNING_STATES),
            EventTIDep(),
        }
        dep_context = DepContext(deps=EVENT_SCHEDULED_DEPS)
        if ti.are_dependencies_met(dep_context=dep_context, session=session):
            if action_is_stop_or_restart(ti, session):
                log.info("stop or restart task %s ", ti)
コード例 #6
0
ファイル: dep_context.py プロジェクト: FlyrInc/apache-airflow
    State.NONE,
    State.QUEUED,
    State.SCHEDULED,
    State.SKIPPED,
    State.UPSTREAM_FAILED,
    State.UP_FOR_RETRY,
    State.UP_FOR_RESCHEDULE,
}

# Context to get the dependencies that need to be met in order for a task instance to
# be backfilled.
QUEUE_DEPS = {
    NotRunningDep(),
    NotSkippedDep(),
    RunnableExecDateDep(),
    ValidStateDep(QUEUEABLE_STATES),
}

# Dependencies that need to be met for a given task instance to be able to get run by an
# executor. This class just extends QueueContext by adding dependencies for resources.
RUN_DEPS = QUEUE_DEPS | {
    DagTISlotsAvailableDep(),
    TaskConcurrencyDep(),
}

# TODO(aoen): SCHEDULER_DEPS is not coupled to actual execution in any way and
# could easily be modified or removed from the scheduler causing this dependency to become
# outdated and incorrect. This coupling should be created (e.g. via a dag_deps analog of
# ti_deps that will be used in the scheduler code) to ensure that the logic here is
# equivalent to the logic in the scheduler.
コード例 #7
0
 def test_invalid_state(self):
     """
     Invalid state should fail this dep
     """
     ti = Mock(state=State.SUCCESS, end_date=datetime(2016, 1, 1))
     self.assertFalse(ValidStateDep({State.FAILED}).is_met(ti=ti))
コード例 #8
0
 def test_valid_state(self):
     """
     Valid state should pass this dep
     """
     ti = Mock(state=State.QUEUED, end_date=datetime(2016, 1, 1))
     self.assertTrue(ValidStateDep({State.QUEUED}).is_met(ti=ti))
コード例 #9
0
ファイル: single_dag_run_job.py プロジェクト: kalebinn/dbnd
    def _process_dag_task_instances(self, ti_status, executor, pickle_id, session=None):
        """
        Process a set of task instances from a set of dag runs. Special handling is done
        to account for different task instance states that could be present when running
        them in a backfill process.
        :param ti_status: the internal status of the job
        :type ti_status: DagRunJob._DagRunTaskStatus
        :param executor: the executor to run the task instances
        :type executor: BaseExecutor
        :param pickle_id: the pickle_id if dag is pickled, None otherwise
        :type pickle_id: int
        :param start_date: the start date of the backfill job
        :type start_date: datetime
        :param session: the current session object
        :type session: Session
        :return: the list of execution_dates for the finished dag runs
        :rtype: list
        """

        executed_run_dates = []

        # values() returns a view so we copy to maintain a full list of the TIs to run
        all_ti = list(ti_status.to_run.values())
        waiting_for_executor_result = {}

        while (len(ti_status.to_run) > 0 or len(ti_status.running) > 0) and len(
            ti_status.deadlocked
        ) == 0:
            if current.is_killed():
                raise friendly_error.task_execution.databand_context_killed(
                    "SingleDagRunJob scheduling main loop"
                )
            self.log.debug("*** Clearing out not_ready list ***")
            ti_status.not_ready.clear()

            self.ti_state_manager.refresh_task_instances_state(
                all_ti, self.dag.dag_id, self.execution_date, session=session
            )

            # we need to execute the tasks bottom to top
            # or leaf to root, as otherwise tasks might be
            # determined deadlocked while they are actually
            # waiting for their upstream to finish
            for task in self.dag.topological_sort():

                # TODO: too complicated mechanism,
                # it's not possible that we have multiple tasks with the same id in to run
                for key, ti in list(ti_status.to_run.items()):
                    if task.task_id != ti.task_id:
                        continue

                    if not self._optimize:
                        ti.refresh_from_db()

                    task = self.dag.get_task(ti.task_id)
                    ti.task = task

                    # TODO : do we need that?
                    # ignore_depends_on_past = (
                    #     self.ignore_first_depends_on_past and
                    #     ti.execution_date == (start_date or ti.start_date))
                    ignore_depends_on_past = False
                    self.log.debug("Task instance to run %s state %s", ti, ti.state)

                    # guard against externally modified tasks instances or
                    # in case max concurrency has been reached at task runtime
                    if ti.state == State.NONE:
                        self.log.warning(
                            "FIXME: task instance {} state was set to None "
                            "externally. This should not happen"
                        )
                        ti.set_state(State.SCHEDULED, session=session)

                    # The task was already marked successful or skipped by a
                    # different Job. Don't rerun it.
                    if ti.state == State.SUCCESS:
                        ti_status.succeeded.add(key)
                        self.log.debug("Task instance %s succeeded. Don't rerun.", ti)
                        ti_status.to_run.pop(key)
                        if key in ti_status.running:
                            ti_status.running.pop(key)
                        continue
                    elif ti.state == State.SKIPPED:
                        ti_status.skipped.add(key)
                        self.log.debug("Task instance %s skipped. Don't rerun.", ti)
                        ti_status.to_run.pop(key)
                        if key in ti_status.running:
                            ti_status.running.pop(key)
                        continue
                    elif ti.state == State.FAILED:
                        self.log.error("Task instance %s failed", ti)
                        ti_status.failed.add(key)
                        ti_status.to_run.pop(key)
                        if key in ti_status.running:
                            ti_status.running.pop(key)
                        continue
                    elif ti.state == State.UPSTREAM_FAILED:
                        self.log.error("Task instance %s upstream failed", ti)
                        ti_status.failed.add(key)
                        ti_status.to_run.pop(key)
                        if key in ti_status.running:
                            ti_status.running.pop(key)
                        continue

                    runtime_deps = []
                    if self.airflow_config.disable_dag_concurrency_rules:
                        # RUN Deps validate dag and task concurrency
                        # It's less relevant when we run in stand along mode with SingleDagRunJob
                        # from airflow.ti_deps.deps.runnable_exec_date_dep import RunnableExecDateDep
                        from airflow.ti_deps.deps.valid_state_dep import ValidStateDep

                        # from airflow.ti_deps.deps.dag_ti_slots_available_dep import DagTISlotsAvailableDep
                        # from airflow.ti_deps.deps.task_concurrency_dep import TaskConcurrencyDep
                        # from airflow.ti_deps.deps.pool_slots_available_dep import PoolSlotsAvailableDep
                        runtime_deps = {
                            # RunnableExecDateDep(),
                            ValidStateDep(SCHEDUALED_OR_RUNNABLE),
                            # DagTISlotsAvailableDep(),
                            # TaskConcurrencyDep(),
                            # PoolSlotsAvailableDep(),
                        }
                    else:
                        runtime_deps = RUNNING_DEPS

                    dagrun_dep_context = DepContext(
                        deps=runtime_deps,
                        ignore_depends_on_past=ignore_depends_on_past,
                        ignore_task_deps=self.ignore_task_deps,
                        flag_upstream_failed=True,
                    )

                    # Is the task runnable? -- then run it
                    # the dependency checker can change states of tis
                    if ti.are_dependencies_met(
                        dep_context=dagrun_dep_context,
                        session=session,
                        verbose=self.verbose,
                    ):
                        ti.refresh_from_db(lock_for_update=True, session=session)
                        if (
                            ti.state == State.SCHEDULED
                            or ti.state == State.UP_FOR_RETRY
                        ):
                            if executor.has_task(ti):
                                self.log.debug(
                                    "Task Instance %s already in executor "
                                    "waiting for queue to clear",
                                    ti,
                                )
                            else:
                                self.log.debug("Sending %s to executor", ti)
                                # if ti.state == State.UP_FOR_RETRY:
                                #     ti._try_number += 1
                                # Skip scheduled state, we are executing immediately
                                ti.state = State.QUEUED
                                session.merge(ti)

                                cfg_path = None
                                if executor.__class__ in (
                                    executors.LocalExecutor,
                                    executors.SequentialExecutor,
                                ):
                                    cfg_path = tmp_configuration_copy()

                                executor.queue_task_instance(
                                    ti,
                                    mark_success=self.mark_success,
                                    pickle_id=pickle_id,
                                    ignore_task_deps=self.ignore_task_deps,
                                    ignore_depends_on_past=ignore_depends_on_past,
                                    pool=self.pool,
                                    cfg_path=cfg_path,
                                )

                                ti_status.to_run.pop(key)
                                ti_status.running[key] = ti
                                waiting_for_executor_result[key] = ti
                        session.commit()
                        continue

                    if ti.state == State.UPSTREAM_FAILED:
                        self.log.error("Task instance %s upstream failed", ti)
                        ti_status.failed.add(key)
                        ti_status.to_run.pop(key)
                        if key in ti_status.running:
                            ti_status.running.pop(key)
                        continue

                    # special case
                    if ti.state == State.UP_FOR_RETRY:
                        self.log.debug(
                            "Task instance %s retry period not " "expired yet", ti
                        )
                        if key in ti_status.running:
                            ti_status.running.pop(key)
                        ti_status.to_run[key] = ti
                        continue

                    # all remaining tasks
                    self.log.debug("Adding %s to not_ready", ti)
                    ti_status.not_ready.add(key)
            # execute the tasks in the queue
            self.heartbeat()
            executor.heartbeat()

            # If the set of tasks that aren't ready ever equals the set of
            # tasks to run and there are no running tasks then the backfill
            # is deadlocked
            if (
                ti_status.not_ready
                and ti_status.not_ready == set(ti_status.to_run)
                and len(ti_status.running) == 0
            ):
                self.log.warning(
                    "scheduler: Deadlock discovered for ti_status.to_run=%s",
                    ti_status.to_run.values(),
                )
                ti_status.deadlocked.update(ti_status.to_run.values())
                ti_status.to_run.clear()

            self.ti_state_manager.refresh_task_instances_state(
                all_ti, self.dag.dag_id, self.execution_date, session=session
            )

            # check executor state
            self._manage_executor_state(ti_status.running, waiting_for_executor_result)

            if self._zombie_cleaner:
                # this code exists in airflow original scheduler
                # clean zombies ( we don't need multiple runs here actually
                self._zombie_cleaner.find_and_clean_dag_zombies(
                    dag=self.dag, execution_date=self.execution_date
                )

            # update the task counters
            self._update_counters(ti_status, waiting_for_executor_result)

            # update dag run state
            _dag_runs = ti_status.active_runs[:]
            for run in _dag_runs:
                run.update_state(session=session)

                self._update_databand_task_run_states(run)

                if run.state in State.finished():
                    ti_status.finished_runs += 1
                    ti_status.active_runs.remove(run)
                    executed_run_dates.append(run.execution_date)

            self._log_progress(ti_status)

            if self.fail_fast and ti_status.failed:
                msg = ",".join([t[1] for t in ti_status.failed])
                logger.error(
                    "scheduler: Terminating executor because a task failed and fail_fast mode is enabled %s",
                    msg,
                )
                raise DatabandFailFastError(
                    "Failing whole pipeline as it has failed/canceled tasks %s" % msg,
                )

        # return updated status
        return executed_run_dates
コード例 #10
0
}

BACKFILL_QUEUEABLE_STATES = {
    # For cases like unit tests and run manually
    State.NONE,
    State.UP_FOR_RESCHEDULE,
    State.UP_FOR_RETRY,
    # For normal backfill cases
    State.SCHEDULED,
}

# Context to get the dependencies that need to be met in order for a task instance to be
# set to 'scheduled' state.
SCHEDULED_DEPS = {
    RunnableExecDateDep(),
    ValidStateDep(SCHEDULEABLE_STATES),
    TaskNotRunningDep(),
}

# Dependencies that if met, task instance should be re-queued.
REQUEUEABLE_DEPS = {
    DagTISlotsAvailableDep(),
    TaskConcurrencyDep(),
    PoolSlotsAvailableDep(),
}

# Dependencies that need to be met for a given task instance to be set to 'RUNNING' state.
RUNNING_DEPS = {
    RunnableExecDateDep(),
    ValidStateDep(RUNNABLE_STATES),
    DagTISlotsAvailableDep(),
コード例 #11
0
    State.FAILED,
    State.NONE,
    State.QUEUED,
    State.SCHEDULED,
    State.SKIPPED,
    State.UPSTREAM_FAILED,
    State.UP_FOR_RETRY,
}

# Context to get the dependencies that need to be met in order for a task instance to
# be backfilled.
QUEUE_DEPS = {
    NotRunningDep(),  # 任务实例没有运行
    NotSkippedDep(),  # 任务实例没有被标记为跳过
    RunnableExecDateDep(),  # 判断任务执行时间 必须小于等于当前时间  且 小于等于结束时间
    ValidStateDep(QUEUEABLE_STATES),  # 验证任务的状态必须在队列状态中
}

# Dependencies that need to be met for a given task instance to be able to get run by an
# executor. This class just extends QueueContext by adding dependencies for resources.
RUN_DEPS = QUEUE_DEPS | {
    DagTISlotsAvailableDep(),  # 每个dag能并发执行的最大任务数依赖
    TaskConcurrencyDep(),  # 每个任务的任务实例有最大限制
}

# TODO(aoen): SCHEDULER_DEPS is not coupled to actual execution in any way and
# could easily be modified or removed from the scheduler causing this dependency to become
# outdated and incorrect. This coupling should be created (e.g. via a dag_deps analog of
# ti_deps that will be used in the scheduler code) to ensure that the logic here is
# equivalent to the logic in the scheduler.
コード例 #12
0
ファイル: dep_context.py プロジェクト: rrbarbosa/airflow
    State.SCHEDULED,
    State.SKIPPED,
    State.UPSTREAM_FAILED,
    State.UP_FOR_RETRY,
}

# The minimum execution context for task instances to be executed.
MIN_EXEC_DEPS = {
    NotRunningDep(),
    NotSkippedDep(),
    RunnableExecDateDep(),
}

# Context to get the dependencies that need to be met in order for a task instance to
# be backfilled.
QUEUE_DEPS = MIN_EXEC_DEPS | {ValidStateDep(QUEUEABLE_STATES)}

# Dependencies that need to be met for a given task instance to be able to get run by an
# executor. This class just extends QueueContext by adding dependencies for resources.
RUN_DEPS = QUEUE_DEPS | {DagTISlotsAvailableDep()}

# TODO(aoen): SCHEDULER_DEPS is not coupled to actual execution in any way and
# could easily be modified or removed from the scheduler causing this dependency to become
# outdated and incorrect. This coupling should be created (e.g. via a dag_deps analog of
# ti_deps that will be used in the scheduler code) to ensure that the logic here is
# equivalent to the logic in the scheduler.

# Dependencies that need to be met for a given task instance to get scheduled by the
# scheduler, then queued by the scheduler, then run by an executor.
SCHEDULER_DEPS = RUN_DEPS | {
    DagrunRunningDep(),