Example #1
0
    def _set_unfinished_dag_runs_to_failed(self, dag_runs, session=None):
        """
        Go through the dag_runs and update the state based on the task_instance state.
        Then set DAG runs that are not finished to failed.

        :param dag_runs: DAG runs
        :param session: session
        :return: None
        """
        for dag_run in dag_runs:
            dag_run.update_state()
            if dag_run.state not in State.finished():
                dag_run.set_state(State.FAILED)
            session.merge(dag_run)
Example #2
0
 def poke(self, context, session=None):
     ti = context['ti']
     tasks = (
         session.query(ErgoTask)
         .options(joinedload('job'))
         .filter_by(task_id=self.ergo_task_id, ti_dag_id=ti.dag_id)
         # TODO filter on execution date (otherwise there's no point)
     )
     tasks = list(tasks)
     self.log.info('Received %d results...', len(tasks))
     for task in tasks:
         job = task.job
         if job is None or task.state in State.unfinished():
             return False
     return True
Example #3
0
def _draw_task(
    task: Union[MappedOperator, BaseOperator],
    parent_graph: graphviz.Digraph,
    states_by_task_id: Optional[Dict[Any, Any]],
) -> None:
    """Draw a single task on the given parent_graph"""
    if states_by_task_id:
        state = states_by_task_id.get(task.task_id, State.NONE)
        color = State.color_fg(state)
        fill_color = State.color(state)
    else:
        color = task.ui_fgcolor
        fill_color = task.ui_color

    parent_graph.node(
        task.task_id,
        _attributes={
            "label": task.label,
            "shape": "rectangle",
            "style": "filled,rounded",
            "color": _refine_color(color),
            "fillcolor": _refine_color(fill_color),
        },
    )
Example #4
0
    def verify_state(self, dag, task_ids, execution_dates, state, old_tis, session=None):
        TI = models.TaskInstance

        tis = session.query(TI).filter(
            TI.dag_id == dag.dag_id,
            TI.execution_date.in_(execution_dates)
        ).all()

        self.assertTrue(len(tis) > 0)

        for ti in tis:  # pylint: disable=too-many-nested-blocks
            if ti.task_id in task_ids and ti.execution_date in execution_dates:
                self.assertEqual(ti.state, state)
                if state in State.finished():
                    self.assertIsNotNone(ti.end_date)
            else:
                for old_ti in old_tis:
                    if old_ti.task_id == ti.task_id and old_ti.execution_date == ti.execution_date:
                        self.assertEqual(ti.state, old_ti.state)
Example #5
0
    def ensure_finished_tasks(self, dag, execution_date, session):
        """
        This method makes sure finished_tasks is populated if it's currently None.
        This is for the strange feature of running tasks without dag_run.

        :param dag: The DAG for which to find finished tasks
        :type dag: airflow.models.DAG
        :param execution_date: The execution_date to look for
        :param session: Database session to use
        :return: A list of all the finished tasks of this DAG and execution_date
        :rtype: list[airflow.models.TaskInstance]
        """
        if self.finished_tasks is None:
            self.finished_tasks = dag.get_task_instances(
                start_date=execution_date,
                end_date=execution_date,
                state=State.finished() + [State.UPSTREAM_FAILED],
                session=session,
            )
        return self.finished_tasks
Example #6
0
def get_backlog(session):
    backlog = dict((
        session.query(TI.queue, func.count()).filter(
            or_(
                # This case is unfinished, but not yet queued, tasks
                # In some cases, the task state can be null,
                # which we will choose to ignore
                and_(
                    TI.queued_dttm.is_(None),
                    TI.state.isnot(None),
                    TI.state.notin_(State.finished()),
                )),
            TI.state.in_([State.QUEUED]),
        )).group_by(TI.queue).all())

    queues = []
    for queue, count in backlog.items():
        q = Queue(name=queue, backlog=count)
        queues.append(q)

    return queues
Example #7
0
    def _get_states_count_upstream_ti(ti, finished_tasks, session):
        """
        This function returns the states of the upstream tis for a specific ti in order to determine
        whether this ti can run in this iteration

        :param ti: the ti that we want to calculate deps for
        :type ti: airflow.models.TaskInstance
        :param finished_tasks: all the finished tasks of the dag_run
        :type finished_tasks: list[airflow.models.TaskInstance]
        """
        if finished_tasks is None:
            # this is for the strange feature of running tasks without dag_run
            finished_tasks = ti.task.dag.get_task_instances(
                start_date=ti.execution_date,
                end_date=ti.execution_date,
                state=State.finished() + [State.UPSTREAM_FAILED],
                session=session)
        counter = Counter(task.state for task in finished_tasks
                          if task.task_id in ti.task.upstream_task_ids)
        return counter.get(State.SUCCESS, 0), counter.get(State.SKIPPED, 0), counter.get(State.FAILED, 0), \
            counter.get(State.UPSTREAM_FAILED, 0), sum(counter.values())
    def _get_dag_runs(self, event, session):
        dag_runs = []
        if EventType.is_in(event.event_type) and EventType(
                event.event_type) != EventType.UNDEFINED:
            if EventType(event.event_type) == EventType.DAG_RUN_EXECUTABLE:
                dag_run_id = int(event.key)
                dag_run = session.query(DagRun).filter(
                    DagRun.id == dag_run_id).first()
                if dag_run is None:
                    self.log.error("DagRun is None id {0}".format(dag_run_id))
                    return dag_runs
                simple_dag = event.simple_dag
                dag_run.pickle_id = None
                # create route
                self.dagrun_route.add_dagrun(dag_run, simple_dag, session)
                dag_runs.append(dag_run)

            elif EventType(event.event_type) == EventType.TASK_STATUS_CHANGED:
                dag_id, task_id, execution_date = TaskInstanceHelper.from_task_key(
                    event.key)
                state, try_num = TaskInstanceHelper.from_event_value(
                    event.value)
                dag_run = self.dagrun_route.find_dagrun(dag_id, execution_date)
                if dag_run is None:
                    return dag_runs
                self._set_task_instance_state(dag_run, dag_id, task_id,
                                              execution_date, state, try_num)

                sync_dag_run = session.query(DagRun).filter(
                    DagRun.id == dag_run.id).first()
                if sync_dag_run.state in State.finished():
                    self.log.info(
                        "DagRun finished dag_id {0} execution_date {1} state {2}"
                        .format(dag_run.dag_id, dag_run.execution_date,
                                sync_dag_run.state))
                    if self.dagrun_route.find_dagrun_by_id(
                            sync_dag_run.id) is not None:
                        self.dagrun_route.remove_dagrun(dag_run, session)
                        self.log.debug("Route remove dag run {0}".format(
                            sync_dag_run.id))
                        self.mail_box.send_message(
                            DagRunFinishedEvent(dag_run.id,
                                                sync_dag_run.state))
                else:
                    dag_runs.append(dag_run)

            elif EventType(event.event_type) == EventType.DAG_RUN_FINISHED:
                self.log.debug("DagRun {0} finished".format(event.key))
            elif EventType(event.event_type) == EventType.STOP_SCHEDULER_CMD:
                if self.unit_test_mode:
                    self.running = False
                return dag_runs
        else:
            runs = self.dagrun_route.find_dagruns_by_event(
                event_key=event.key, event_type=event.event_type)
            if runs is not None:
                for run in runs:
                    task_deps = load_task_dependencies(dag_id=run.dag_id,
                                                       session=session)
                    tis = run.get_task_instances(session=session)
                    for ti in tis:
                        if ti.task_id not in task_deps:
                            continue
                        if (event.key,
                                event.event_type) in task_deps[ti.task_id]:
                            self.log.debug("{0} handle event {1}".format(
                                ti.task_id, event))
                            ts = TaskState.query_task_state(ti,
                                                            session=session)
                            handler = ts.event_handler
                            if handler is not None:
                                action = handler.handle_event(event,
                                                              ti=ti,
                                                              ts=ts,
                                                              session=session)
                                ts.action = action
                                session.merge(ts)
                                session.commit()
                                self.log.debug(
                                    "set task action {0} {1}".format(
                                        ti.task_id, action))
                dag_runs.extend(runs)
                session.commit()

        for dag_run in dag_runs:
            run_process_func(target=process_tasks,
                             args=(
                                 dag_run,
                                 self.dagrun_route.find_simple_dag(dag_run.id),
                                 self.log,
                             ))
        return dag_runs
Example #9
0
 def set_state(self, state):
     if self._state != state:
         self._state = state
         self.end_date = timezone.utcnow() if self._state in State.finished() else None
Example #10
0
 def set_state(self, state):
     if self._state != state:
         self._state = state
         self.end_date = timezone.utcnow() if self._state in State.finished(
         ) else None
Example #11
0
    def update_state(self, session=None):
        """
        Determines the overall state of the DagRun based on the state
        of its TaskInstances.

        :return: ready_tis: the tis that can be scheduled in the current loop
        :rtype ready_tis: list[airflow.models.TaskInstance]
        """

        dag = self.get_dag()
        ready_tis = []
        tis = [
            ti for ti in self.get_task_instances(
                session=session, state=State.task_states + (State.SHUTDOWN, ))
        ]
        self.log.debug("number of tis tasks for %s: %s task(s)", self,
                       len(tis))
        for ti in tis:
            ti.task = dag.get_task(ti.task_id)

        start_dttm = timezone.utcnow()
        unfinished_tasks = [t for t in tis if t.state in State.unfinished()]
        finished_tasks = [
            t for t in tis
            if t.state in State.finished() + [State.UPSTREAM_FAILED]
        ]
        none_depends_on_past = all(not t.task.depends_on_past
                                   for t in unfinished_tasks)
        none_task_concurrency = all(t.task.task_concurrency is None
                                    for t in unfinished_tasks)
        if unfinished_tasks:
            scheduleable_tasks = [
                ut for ut in unfinished_tasks
                if ut.state in SCHEDULEABLE_STATES
            ]
            self.log.debug("number of scheduleable tasks for %s: %s task(s)",
                           self, len(scheduleable_tasks))
            ready_tis, changed_tis = self._get_ready_tis(
                scheduleable_tasks, finished_tasks, session)
            self.log.debug("ready tis length for %s: %s task(s)", self,
                           len(ready_tis))
            if none_depends_on_past and none_task_concurrency:
                # small speed up
                are_runnable_tasks = ready_tis or self._are_premature_tis(
                    unfinished_tasks, finished_tasks, session) or changed_tis

        duration = (timezone.utcnow() - start_dttm)
        Stats.timing("dagrun.dependency-check.{}".format(self.dag_id),
                     duration)

        leaf_task_ids = {t.task_id for t in dag.leaves}
        leaf_tis = [ti for ti in tis if ti.task_id in leaf_task_ids]

        # if all roots finished and at least one failed, the run failed
        if not unfinished_tasks and any(
                leaf_ti.state in {State.FAILED, State.UPSTREAM_FAILED}
                for leaf_ti in leaf_tis):
            self.log.error('Marking run %s failed', self)
            self.set_state(State.FAILED)
            dag.handle_callback(self,
                                success=False,
                                reason='task_failure',
                                session=session)

        # if all leafs succeeded and no unfinished tasks, the run succeeded
        elif not unfinished_tasks and all(
                leaf_ti.state in {State.SUCCESS, State.SKIPPED}
                for leaf_ti in leaf_tis):
            self.log.info('Marking run %s successful', self)
            self.set_state(State.SUCCESS)
            dag.handle_callback(self,
                                success=True,
                                reason='success',
                                session=session)

        # if *all tasks* are deadlocked, the run failed
        elif (unfinished_tasks and none_depends_on_past
              and none_task_concurrency and not are_runnable_tasks):
            self.log.error('Deadlock; marking run %s failed', self)
            self.set_state(State.FAILED)
            dag.handle_callback(self,
                                success=False,
                                reason='all_tasks_deadlocked',
                                session=session)

        # finally, if the roots aren't done, the dag is still running
        else:
            self.set_state(State.RUNNING)

        self._emit_duration_stats_for_finished_state()

        # todo: determine we want to use with_for_update to make sure to lock the run
        session.merge(self)
        session.commit()

        return ready_tis
Example #12
0
def state_token(state):
    color = State.color(state)
    return Markup('<span class="label" style="background-color:{color};">'
                  '{state}</span>').format(color=color, state=state)
Example #13
0
    def _process_dag_task_instances(self, ti_status, executor, pickle_id, session=None):
        """
        Process a set of task instances from a set of dag runs. Special handling is done
        to account for different task instance states that could be present when running
        them in a backfill process.
        :param ti_status: the internal status of the job
        :type ti_status: DagRunJob._DagRunTaskStatus
        :param executor: the executor to run the task instances
        :type executor: BaseExecutor
        :param pickle_id: the pickle_id if dag is pickled, None otherwise
        :type pickle_id: int
        :param start_date: the start date of the backfill job
        :type start_date: datetime
        :param session: the current session object
        :type session: Session
        :return: the list of execution_dates for the finished dag runs
        :rtype: list
        """

        executed_run_dates = []

        # values() returns a view so we copy to maintain a full list of the TIs to run
        all_ti = list(ti_status.to_run.values())
        waiting_for_executor_result = {}

        while (len(ti_status.to_run) > 0 or len(ti_status.running) > 0) and len(
            ti_status.deadlocked
        ) == 0:
            if current.is_killed():
                raise friendly_error.task_execution.databand_context_killed(
                    "SingleDagRunJob scheduling main loop"
                )
            self.log.debug("*** Clearing out not_ready list ***")
            ti_status.not_ready.clear()

            self.ti_state_manager.refresh_task_instances_state(
                all_ti, self.dag.dag_id, self.execution_date, session=session
            )

            # we need to execute the tasks bottom to top
            # or leaf to root, as otherwise tasks might be
            # determined deadlocked while they are actually
            # waiting for their upstream to finish
            for task in self.dag.topological_sort():

                # TODO: too complicated mechanism,
                # it's not possible that we have multiple tasks with the same id in to run
                for key, ti in list(ti_status.to_run.items()):
                    if task.task_id != ti.task_id:
                        continue

                    if not self._optimize:
                        ti.refresh_from_db()

                    task = self.dag.get_task(ti.task_id)
                    ti.task = task

                    # TODO : do we need that?
                    # ignore_depends_on_past = (
                    #     self.ignore_first_depends_on_past and
                    #     ti.execution_date == (start_date or ti.start_date))
                    ignore_depends_on_past = False
                    self.log.debug("Task instance to run %s state %s", ti, ti.state)

                    # guard against externally modified tasks instances or
                    # in case max concurrency has been reached at task runtime
                    if ti.state == State.NONE:
                        self.log.warning(
                            "FIXME: task instance {} state was set to None "
                            "externally. This should not happen"
                        )
                        ti.set_state(State.SCHEDULED, session=session)

                    # The task was already marked successful or skipped by a
                    # different Job. Don't rerun it.
                    if ti.state == State.SUCCESS:
                        ti_status.succeeded.add(key)
                        self.log.debug("Task instance %s succeeded. Don't rerun.", ti)
                        ti_status.to_run.pop(key)
                        if key in ti_status.running:
                            ti_status.running.pop(key)
                        continue
                    elif ti.state == State.SKIPPED:
                        ti_status.skipped.add(key)
                        self.log.debug("Task instance %s skipped. Don't rerun.", ti)
                        ti_status.to_run.pop(key)
                        if key in ti_status.running:
                            ti_status.running.pop(key)
                        continue
                    elif ti.state == State.FAILED:
                        self.log.error("Task instance %s failed", ti)
                        ti_status.failed.add(key)
                        ti_status.to_run.pop(key)
                        if key in ti_status.running:
                            ti_status.running.pop(key)
                        continue
                    elif ti.state == State.UPSTREAM_FAILED:
                        self.log.error("Task instance %s upstream failed", ti)
                        ti_status.failed.add(key)
                        ti_status.to_run.pop(key)
                        if key in ti_status.running:
                            ti_status.running.pop(key)
                        continue

                    runtime_deps = []
                    if self.airflow_config.disable_dag_concurrency_rules:
                        # RUN Deps validate dag and task concurrency
                        # It's less relevant when we run in stand along mode with SingleDagRunJob
                        # from airflow.ti_deps.deps.runnable_exec_date_dep import RunnableExecDateDep
                        from airflow.ti_deps.deps.valid_state_dep import ValidStateDep

                        # from airflow.ti_deps.deps.dag_ti_slots_available_dep import DagTISlotsAvailableDep
                        # from airflow.ti_deps.deps.task_concurrency_dep import TaskConcurrencyDep
                        # from airflow.ti_deps.deps.pool_slots_available_dep import PoolSlotsAvailableDep
                        runtime_deps = {
                            # RunnableExecDateDep(),
                            ValidStateDep(SCHEDUALED_OR_RUNNABLE),
                            # DagTISlotsAvailableDep(),
                            # TaskConcurrencyDep(),
                            # PoolSlotsAvailableDep(),
                        }
                    else:
                        runtime_deps = RUNNING_DEPS

                    dagrun_dep_context = DepContext(
                        deps=runtime_deps,
                        ignore_depends_on_past=ignore_depends_on_past,
                        ignore_task_deps=self.ignore_task_deps,
                        flag_upstream_failed=True,
                    )

                    # Is the task runnable? -- then run it
                    # the dependency checker can change states of tis
                    if ti.are_dependencies_met(
                        dep_context=dagrun_dep_context,
                        session=session,
                        verbose=self.verbose,
                    ):
                        ti.refresh_from_db(lock_for_update=True, session=session)
                        if (
                            ti.state == State.SCHEDULED
                            or ti.state == State.UP_FOR_RETRY
                        ):
                            if executor.has_task(ti):
                                self.log.debug(
                                    "Task Instance %s already in executor "
                                    "waiting for queue to clear",
                                    ti,
                                )
                            else:
                                self.log.debug("Sending %s to executor", ti)
                                # if ti.state == State.UP_FOR_RETRY:
                                #     ti._try_number += 1
                                # Skip scheduled state, we are executing immediately
                                ti.state = State.QUEUED
                                session.merge(ti)

                                cfg_path = None
                                if executor.__class__ in (
                                    executors.LocalExecutor,
                                    executors.SequentialExecutor,
                                ):
                                    cfg_path = tmp_configuration_copy()

                                executor.queue_task_instance(
                                    ti,
                                    mark_success=self.mark_success,
                                    pickle_id=pickle_id,
                                    ignore_task_deps=self.ignore_task_deps,
                                    ignore_depends_on_past=ignore_depends_on_past,
                                    pool=self.pool,
                                    cfg_path=cfg_path,
                                )

                                ti_status.to_run.pop(key)
                                ti_status.running[key] = ti
                                waiting_for_executor_result[key] = ti
                        session.commit()
                        continue

                    if ti.state == State.UPSTREAM_FAILED:
                        self.log.error("Task instance %s upstream failed", ti)
                        ti_status.failed.add(key)
                        ti_status.to_run.pop(key)
                        if key in ti_status.running:
                            ti_status.running.pop(key)
                        continue

                    # special case
                    if ti.state == State.UP_FOR_RETRY:
                        self.log.debug(
                            "Task instance %s retry period not " "expired yet", ti
                        )
                        if key in ti_status.running:
                            ti_status.running.pop(key)
                        ti_status.to_run[key] = ti
                        continue

                    # all remaining tasks
                    self.log.debug("Adding %s to not_ready", ti)
                    ti_status.not_ready.add(key)
            # execute the tasks in the queue
            self.heartbeat()
            executor.heartbeat()

            # If the set of tasks that aren't ready ever equals the set of
            # tasks to run and there are no running tasks then the backfill
            # is deadlocked
            if (
                ti_status.not_ready
                and ti_status.not_ready == set(ti_status.to_run)
                and len(ti_status.running) == 0
            ):
                self.log.warning(
                    "scheduler: Deadlock discovered for ti_status.to_run=%s",
                    ti_status.to_run.values(),
                )
                ti_status.deadlocked.update(ti_status.to_run.values())
                ti_status.to_run.clear()

            self.ti_state_manager.refresh_task_instances_state(
                all_ti, self.dag.dag_id, self.execution_date, session=session
            )

            # check executor state
            self._manage_executor_state(ti_status.running, waiting_for_executor_result)

            if self._zombie_cleaner:
                # this code exists in airflow original scheduler
                # clean zombies ( we don't need multiple runs here actually
                self._zombie_cleaner.find_and_clean_dag_zombies(
                    dag=self.dag, execution_date=self.execution_date
                )

            # update the task counters
            self._update_counters(ti_status, waiting_for_executor_result)

            # update dag run state
            _dag_runs = ti_status.active_runs[:]
            for run in _dag_runs:
                run.update_state(session=session)

                self._update_databand_task_run_states(run)

                if run.state in State.finished():
                    ti_status.finished_runs += 1
                    ti_status.active_runs.remove(run)
                    executed_run_dates.append(run.execution_date)

            self._log_progress(ti_status)

            if self.fail_fast and ti_status.failed:
                msg = ",".join([t[1] for t in ti_status.failed])
                logger.error(
                    "scheduler: Terminating executor because a task failed and fail_fast mode is enabled %s",
                    msg,
                )
                raise DatabandFailFastError(
                    "Failing whole pipeline as it has failed/canceled tasks %s" % msg,
                )

        # return updated status
        return executed_run_dates
import logging
import subprocess

from airflow.models import DAG, DagRun, TaskInstance
from airflow.operators.python_operator import PythonOperator
from airflow.operators.subdag_operator import SubDagOperator
from airflow.utils.db import provide_session
from airflow.utils.state import State
from datetime import datetime, timedelta
import pendulum

unfinished_states = State.unfinished()
delta_from_max_date = timedelta(minutes=10)
delta_to_mark_as_stuck = timedelta(hours=12)


@provide_session
def find_dag_runs(first, dag_id=None, execution_date=None, state=None, session=None):
    query = session.query(DagRun)
    query = query if dag_id is None else query.filter(DagRun.dag_id == dag_id)
    query = query if execution_date is None else query.filter(DagRun.execution_date == execution_date)
    query = query if state is None else query.filter(DagRun.state == state)
    return query.first() if first else query.all()


@provide_session
def find_task_instances(first, task_id=None, dag_id=None, execution_date=None, state=None, operator=None, session=None):
    query = session.query(TaskInstance)
    query = query if task_id is None else query.filter(TaskInstance.task_id == task_id)
    query = query if dag_id is None else query.filter(TaskInstance.dag_id == dag_id)
    query = query if execution_date is None else query.filter(TaskInstance.execution_date == execution_date)
Example #15
0
def state_token(state):
    color = State.color(state)
    return Markup(
        '<span class="label" style="background-color:{color};">'
        '{state}</span>').format(color=color, state=state)
Example #16
0
 def mark_state(ti, sensor_instance):
     ti.state = state
     sensor_instance.state = state
     if state in State.finished():
         ti.end_date = end_date
         ti.set_duration()
    def _is_finished_wait_for_gapped_task(self, context):
        '''

        @return: bool - whether there are tasks to wait for.
        '''
        if (self._gapped_root_dag_run == None):
            self._init_gapped_root_dag_run(context['execution_date'])
            if (self._gapped_root_dag_run == None):
                # The start time of the external gapped dag run is more recent than the needed gap.
                # so there is nothing to sense here.
                return True

        self.log.info(
            'Poking for the following'
            '{self._external_dag_id}.'
            '{self._external_task_id} on '
            '{self._gapped_root_dag_run.execution_date} ... '.format(**locals()))

        self._gapped_root_dag_run.refresh_from_db()
        is_finished_wait_for_gapped_task = True
        root_state = self._gapped_root_dag_run.get_state()
        if (root_state == State.RUNNING):
            is_finished_wait_for_gapped_task = False
            self._refresh_gapped_dag_run()
            if (self._gapped_dag_run != None):
                gapped_dag_run_state = self._gapped_dag_run.get_state()
                if (gapped_dag_run_state in State.unfinished()):
                    external_task_instance = self._gapped_dag_run.get_task_instance(task_id=self._external_task_id)
                    if (external_task_instance == None):
                        self.log.info(
                            'Still poking since the dag run has not finished and the gapped task instance still have not started: '
                            'dag_id: {self._gapped_dag_run.dag_id} '
                            'run_id: {self._gapped_dag_run.run_id} '
                            'state: {gapped_dag_run_state} '.format(**locals()))
                    elif (external_task_instance.state in State.unfinished()):
                        self.log.info(
                            'Still poking since the gapped task instance has not finished: '
                            'dag_id: {self._gapped_dag_run.dag_id} '
                            'run_id: {self._gapped_dag_run.run_id} '
                            'start_date: {external_task_instance.start_date} '
                            'end_date: {external_task_instance.end_date} '
                            'task_state: {external_task_instance.state} '.format(**locals()))
                    else:
                        is_finished_wait_for_gapped_task = True
                        self.log.info(
                            'Finish poking since the gapped task instance has finished: '
                            'dag_id: {self._gapped_dag_run.dag_id} '
                            'run_id: {self._gapped_dag_run.run_id} '
                            'start_date: {external_task_instance.start_date} '
                            'end_date: {external_task_instance.end_date} '
                            'task_state: {external_task_instance.state} '.format(**locals()))
                else:
                    is_finished_wait_for_gapped_task = True
                    self.log.info(
                        'Finish poking since the gapped dag run is not running any more: '
                        'dag_id: {self._gapped_dag_run.dag_id} '
                        'run_id: {self._gapped_dag_run.run_id} '
                        'state: {gapped_dag_run_state} '.format(**locals()))
            else:
                self.log.info(
                    'Still poking since the root dag is still running and the gapped dag run does not exist: '
                    'dag_id: {self._gapped_root_dag_run.dag_id} '
                    'run_id: {self._gapped_root_dag_run.run_id} '
                    'state: {root_state} '.format(**locals()))
        else:
            self.log.info(
                'Finish poking since the root dag is not running any more: '
                'dag_id: {self._gapped_root_dag_run.dag_id} '
                'run_id: {self._gapped_root_dag_run.run_id} '
                'state: {root_state} '.format(**locals()))

        return is_finished_wait_for_gapped_task
Example #18
0
def state_token(state):
    """Returns a formatted string with HTML for a given State"""
    color = State.color(state)
    return Markup('<span class="label" style="background-color:{color};">'
                  '{state}</span>').format(color=color, state=state)
Example #19
0
def state_f(v, c, m, p):
    state = m.state
    color = State.color(m.state)
    return Markup('<span class="label" style="background-color:{color};">'
                  '{state}</span>').format(**locals())
Example #20
0
    def _process_backfill_task_instances(self,
                                         ti_status,
                                         executor,
                                         pickle_id,
                                         start_date=None,
                                         session=None):
        """
        Process a set of task instances from a set of dag runs. Special handling is done
        to account for different task instance states that could be present when running
        them in a backfill process.

        :param ti_status: the internal status of the job
        :type ti_status: BackfillJob._DagRunTaskStatus
        :param executor: the executor to run the task instances
        :type executor: BaseExecutor
        :param pickle_id: the pickle_id if dag is pickled, None otherwise
        :type pickle_id: int
        :param start_date: the start date of the backfill job
        :type start_date: datetime.datetime
        :param session: the current session object
        :type session: sqlalchemy.orm.session.Session
        :return: the list of execution_dates for the finished dag runs
        :rtype: list
        """

        executed_run_dates = []

        while ((len(ti_status.to_run) > 0 or len(ti_status.running) > 0)
               and len(ti_status.deadlocked) == 0):
            self.log.debug("*** Clearing out not_ready list ***")
            ti_status.not_ready.clear()

            # we need to execute the tasks bottom to top
            # or leaf to root, as otherwise tasks might be
            # determined deadlocked while they are actually
            # waiting for their upstream to finish
            @provide_session
            def _per_task_process(task, key, ti, session=None):
                ti.refresh_from_db()

                task = self.dag.get_task(ti.task_id)
                ti.task = task

                ignore_depends_on_past = (self.ignore_first_depends_on_past
                                          and ti.execution_date
                                          == (start_date or ti.start_date))
                self.log.debug("Task instance to run %s state %s", ti,
                               ti.state)

                # The task was already marked successful or skipped by a
                # different Job. Don't rerun it.
                if ti.state == State.SUCCESS:
                    ti_status.succeeded.add(key)
                    self.log.debug("Task instance %s succeeded. Don't rerun.",
                                   ti)
                    ti_status.to_run.pop(key)
                    if key in ti_status.running:
                        ti_status.running.pop(key)
                    return
                elif ti.state == State.SKIPPED:
                    ti_status.skipped.add(key)
                    self.log.debug("Task instance %s skipped. Don't rerun.",
                                   ti)
                    ti_status.to_run.pop(key)
                    if key in ti_status.running:
                        ti_status.running.pop(key)
                    return

                # guard against externally modified tasks instances or
                # in case max concurrency has been reached at task runtime
                elif ti.state == State.NONE:
                    self.log.warning(
                        "FIXME: task instance {} state was set to None "
                        "externally. This should not happen")
                    ti.set_state(State.SCHEDULED, session=session)
                if self.rerun_failed_tasks:
                    # Rerun failed tasks or upstreamed failed tasks
                    if ti.state in (State.FAILED, State.UPSTREAM_FAILED):
                        self.log.error("Task instance {ti} "
                                       "with state {state}".format(
                                           ti=ti, state=ti.state))
                        if key in ti_status.running:
                            ti_status.running.pop(key)
                        # Reset the failed task in backfill to scheduled state
                        ti.set_state(State.SCHEDULED, session=session)
                else:
                    # Default behaviour which works for subdag.
                    if ti.state in (State.FAILED, State.UPSTREAM_FAILED):
                        self.log.error("Task instance {ti} "
                                       "with {state} state".format(
                                           ti=ti, state=ti.state))
                        ti_status.failed.add(key)
                        ti_status.to_run.pop(key)
                        if key in ti_status.running:
                            ti_status.running.pop(key)
                        return

                backfill_context = DepContext(
                    deps=BACKFILL_QUEUED_DEPS,
                    ignore_depends_on_past=ignore_depends_on_past,
                    ignore_task_deps=self.ignore_task_deps,
                    flag_upstream_failed=True)

                ti.refresh_from_db(lock_for_update=True, session=session)
                # Is the task runnable? -- then run it
                # the dependency checker can change states of tis
                if ti.are_dependencies_met(dep_context=backfill_context,
                                           session=session,
                                           verbose=self.verbose):
                    if executor.has_task(ti):
                        self.log.debug(
                            "Task Instance %s already in executor "
                            "waiting for queue to clear", ti)
                    else:
                        self.log.debug('Sending %s to executor', ti)
                        # Skip scheduled state, we are executing immediately
                        ti.state = State.QUEUED
                        ti.queued_dttm = timezone.utcnow(
                        ) if not ti.queued_dttm else ti.queued_dttm
                        session.merge(ti)

                        cfg_path = None
                        if executor.__class__ in (
                                executors.LocalExecutor,
                                executors.SequentialExecutor):
                            cfg_path = tmp_configuration_copy()

                        executor.queue_task_instance(
                            ti,
                            mark_success=self.mark_success,
                            pickle_id=pickle_id,
                            ignore_task_deps=self.ignore_task_deps,
                            ignore_depends_on_past=ignore_depends_on_past,
                            pool=self.pool,
                            cfg_path=cfg_path)
                        ti_status.running[key] = ti
                        ti_status.to_run.pop(key)
                    session.commit()
                    return

                if ti.state == State.UPSTREAM_FAILED:
                    self.log.error("Task instance %s upstream failed", ti)
                    ti_status.failed.add(key)
                    ti_status.to_run.pop(key)
                    if key in ti_status.running:
                        ti_status.running.pop(key)
                    return

                # special case
                if ti.state == State.UP_FOR_RETRY:
                    self.log.debug(
                        "Task instance %s retry period not "
                        "expired yet", ti)
                    if key in ti_status.running:
                        ti_status.running.pop(key)
                    ti_status.to_run[key] = ti
                    return

                # special case
                if ti.state == State.UP_FOR_RESCHEDULE:
                    self.log.debug(
                        "Task instance %s reschedule period not "
                        "expired yet", ti)
                    if key in ti_status.running:
                        ti_status.running.pop(key)
                    ti_status.to_run[key] = ti
                    return

                # all remaining tasks
                self.log.debug('Adding %s to not_ready', ti)
                ti_status.not_ready.add(key)

            try:
                for task in self.dag.topological_sort():
                    for key, ti in list(ti_status.to_run.items()):
                        if task.task_id != ti.task_id:
                            continue

                        pool = session.query(models.Pool) \
                            .filter(models.Pool.pool == task.pool) \
                            .first()
                        if not pool:
                            raise PoolNotFound('Unknown pool: {}'.format(
                                task.pool))

                        open_slots = pool.open_slots(session=session)
                        if open_slots <= 0:
                            raise NoAvailablePoolSlot(
                                "Not scheduling since there are "
                                "%s open slots in pool %s".format(
                                    open_slots, task.pool))

                        num_running_task_instances_in_dag = DAG.get_num_task_instances(
                            self.dag_id,
                            states=self.STATES_COUNT_AS_RUNNING,
                        )

                        if num_running_task_instances_in_dag >= self.dag.concurrency:
                            raise DagConcurrencyLimitReached(
                                "Not scheduling since DAG concurrency limit "
                                "is reached.")

                        if task.task_concurrency:
                            num_running_task_instances_in_task = DAG.get_num_task_instances(
                                dag_id=self.dag_id,
                                task_ids=[task.task_id],
                                states=self.STATES_COUNT_AS_RUNNING,
                            )

                            if num_running_task_instances_in_task >= task.task_concurrency:
                                raise TaskConcurrencyLimitReached(
                                    "Not scheduling since Task concurrency limit "
                                    "is reached.")

                        _per_task_process(task, key, ti)
            except (NoAvailablePoolSlot, DagConcurrencyLimitReached,
                    TaskConcurrencyLimitReached) as e:
                self.log.debug(e)

            # execute the tasks in the queue
            self.heartbeat()
            executor.heartbeat()

            # If the set of tasks that aren't ready ever equals the set of
            # tasks to run and there are no running tasks then the backfill
            # is deadlocked
            if (ti_status.not_ready
                    and ti_status.not_ready == set(ti_status.to_run)
                    and len(ti_status.running) == 0):
                self.log.warning("Deadlock discovered for ti_status.to_run=%s",
                                 ti_status.to_run.values())
                ti_status.deadlocked.update(ti_status.to_run.values())
                ti_status.to_run.clear()

            # check executor state
            self._manage_executor_state(ti_status.running)

            # update the task counters
            self._update_counters(ti_status=ti_status)

            # update dag run state
            _dag_runs = ti_status.active_runs[:]
            for run in _dag_runs:
                run.update_state(session=session)
                if run.state in State.finished():
                    ti_status.finished_runs += 1
                    ti_status.active_runs.remove(run)
                    executed_run_dates.append(run.execution_date)

            self._log_progress(ti_status)

        # return updated status
        return executed_run_dates
Example #21
0
def set_state(tasks: Iterable[BaseOperator],
              execution_date: datetime.datetime,
              upstream: bool = False,
              downstream: bool = False,
              future: bool = False,
              past: bool = False,
              state: str = State.SUCCESS,
              commit: bool = False,
              session=None):  # pylint: disable=too-many-arguments,too-many-locals
    """
    Set the state of a task instance and if needed its relatives. Can set state
    for future tasks (calculated from execution_date) and retroactively
    for past tasks. Will verify integrity of past dag runs in order to create
    tasks that did not exist. It will not create dag runs that are missing
    on the schedule (but it will as for subdag dag runs if needed).

    :param tasks: the iterable of tasks from which to work. task.task.dag needs to be set
    :param execution_date: the execution date from which to start looking
    :param upstream: Mark all parents (upstream tasks)
    :param downstream: Mark all siblings (downstream tasks) of task_id, including SubDags
    :param future: Mark all future tasks on the interval of the dag up until
        last execution date.
    :param past: Retroactively mark all tasks starting from start_date of the DAG
    :param state: State to which the tasks need to be set
    :param commit: Commit tasks to be altered to the database
    :param session: database session
    :return: list of tasks that have been created and updated
    """
    if not tasks:
        return []

    if not timezone.is_localized(execution_date):
        raise ValueError(
            "Received non-localized date {}".format(execution_date))

    task_dags = {task.dag for task in tasks}
    if len(task_dags) > 1:
        raise ValueError(
            "Received tasks from multiple DAGs: {}".format(task_dags))
    dag = next(iter(task_dags))
    if dag is None:
        raise ValueError("Received tasks with no DAG")

    dates = get_execution_dates(dag, execution_date, future, past)

    task_ids = list(find_task_relatives(tasks, downstream, upstream))

    confirmed_dates = verify_dag_run_integrity(dag, dates)

    sub_dag_run_ids = get_subdag_runs(dag, session, state, task_ids, commit,
                                      confirmed_dates)

    # now look for the task instances that are affected

    qry_dag = get_all_dag_task_query(dag, session, state, task_ids,
                                     confirmed_dates)

    if commit:
        tis_altered = qry_dag.with_for_update().all()
        if sub_dag_run_ids:
            qry_sub_dag = all_subdag_tasks_query(sub_dag_run_ids, session,
                                                 state, confirmed_dates)
            tis_altered += qry_sub_dag.with_for_update().all()
        for task_instance in tis_altered:
            task_instance.state = state
            if state in State.finished():
                task_instance.end_date = timezone.utcnow()
                task_instance.set_duration()
    else:
        tis_altered = qry_dag.all()
        if sub_dag_run_ids:
            qry_sub_dag = all_subdag_tasks_query(sub_dag_run_ids, session,
                                                 state, confirmed_dates)
            tis_altered += qry_sub_dag.all()

    return tis_altered
Example #22
0
    def update_state(self, session=None):
        """
        Determines the overall state of the DagRun based on the state
        of its TaskInstances.

        :return: State
        """

        dag = self.get_dag()

        tis = self.get_task_instances(session=session)
        self.log.debug("Updating state for %s considering %s task(s)", self,
                       len(tis))

        for ti in list(tis):
            # skip in db?
            if ti.state == State.REMOVED:
                tis.remove(ti)
            else:
                ti.task = dag.get_task(ti.task_id)

        # pre-calculate
        # db is faster
        start_dttm = timezone.utcnow()
        unfinished_tasks = self.get_task_instances(state=State.unfinished(),
                                                   session=session)
        none_depends_on_past = all(not t.task.depends_on_past
                                   for t in unfinished_tasks)
        none_task_concurrency = all(t.task.task_concurrency is None
                                    for t in unfinished_tasks)
        # small speed up
        if unfinished_tasks and none_depends_on_past and none_task_concurrency:
            # todo: this can actually get pretty slow: one task costs between 0.01-015s
            no_dependencies_met = True
            for ut in unfinished_tasks:
                # We need to flag upstream and check for changes because upstream
                # failures/re-schedules can result in deadlock false positives
                old_state = ut.state
                deps_met = ut.are_dependencies_met(dep_context=DepContext(
                    flag_upstream_failed=True,
                    ignore_in_retry_period=True,
                    ignore_in_reschedule_period=True),
                                                   session=session)
                if deps_met or old_state != ut.current_state(session=session):
                    no_dependencies_met = False
                    break

        duration = (timezone.utcnow() - start_dttm).total_seconds() * 1000
        Stats.timing("dagrun.dependency-check.{}".format(self.dag_id),
                     duration)

        root_ids = [t.task_id for t in dag.roots]
        roots = [t for t in tis if t.task_id in root_ids]

        # if all roots finished and at least one failed, the run failed
        if (not unfinished_tasks
                and any(r.state in (State.FAILED, State.UPSTREAM_FAILED)
                        for r in roots)):
            self.log.info('Marking run %s failed', self)
            self.set_state(State.FAILED)
            dag.handle_callback(self,
                                success=False,
                                reason='task_failure',
                                session=session)

        # if all roots succeeded and no unfinished tasks, the run succeeded
        elif not unfinished_tasks and all(
                r.state in (State.SUCCESS, State.SKIPPED) for r in roots):
            self.log.info('Marking run %s successful', self)
            self.set_state(State.SUCCESS)
            dag.handle_callback(self,
                                success=True,
                                reason='success',
                                session=session)

        # if *all tasks* are deadlocked, the run failed
        elif (unfinished_tasks and none_depends_on_past
              and none_task_concurrency and no_dependencies_met):
            self.log.info('Deadlock; marking run %s failed', self)
            self.set_state(State.FAILED)
            dag.handle_callback(self,
                                success=False,
                                reason='all_tasks_deadlocked',
                                session=session)

        # finally, if the roots aren't done, the dag is still running
        else:
            self.set_state(State.RUNNING)

        self._emit_duration_stats_for_finished_state()

        # todo: determine we want to use with_for_update to make sure to lock the run
        session.merge(self)
        session.commit()

        return self.state
Example #23
0
    def update_state(
        self,
        session: Session = None,
        execute_callbacks: bool = True
    ) -> Tuple[List[TI], Optional[callback_requests.DagCallbackRequest]]:
        """
        Determines the overall state of the DagRun based on the state
        of its TaskInstances.

        :param session: Sqlalchemy ORM Session
        :type session: Session
        :param execute_callbacks: Should dag callbacks (success/failure, SLA etc) be invoked
            directly (default: true) or recorded as a pending request in the ``callback`` property
        :type execute_callbacks: bool
        :return: Tuple containing tis that can be scheduled in the current loop & `callback` that
            needs to be executed
        """
        # Callback to execute in case of Task Failures
        callback: Optional[callback_requests.DagCallbackRequest] = None

        start_dttm = timezone.utcnow()
        self.last_scheduling_decision = start_dttm

        dag = self.get_dag()
        ready_tis: List[TI] = []
        tis = list(
            self.get_task_instances(session=session,
                                    state=State.task_states +
                                    (State.SHUTDOWN, )))
        self.log.debug("number of tis tasks for %s: %s task(s)", self,
                       len(tis))
        for ti in tis:
            ti.task = dag.get_task(ti.task_id)

        unfinished_tasks = [t for t in tis if t.state in State.unfinished()]
        finished_tasks = [
            t for t in tis
            if t.state in State.finished() + [State.UPSTREAM_FAILED]
        ]
        none_depends_on_past = all(not t.task.depends_on_past
                                   for t in unfinished_tasks)
        none_task_concurrency = all(t.task.task_concurrency is None
                                    for t in unfinished_tasks)
        if unfinished_tasks:
            scheduleable_tasks = [
                ut for ut in unfinished_tasks
                if ut.state in SCHEDULEABLE_STATES
            ]
            self.log.debug("number of scheduleable tasks for %s: %s task(s)",
                           self, len(scheduleable_tasks))
            ready_tis, changed_tis = self._get_ready_tis(
                scheduleable_tasks, finished_tasks, session)
            self.log.debug("ready tis length for %s: %s task(s)", self,
                           len(ready_tis))
            if none_depends_on_past and none_task_concurrency:
                # small speed up
                are_runnable_tasks = ready_tis or self._are_premature_tis(
                    unfinished_tasks, finished_tasks, session) or changed_tis

        duration = (timezone.utcnow() - start_dttm)
        Stats.timing("dagrun.dependency-check.{}".format(self.dag_id),
                     duration)

        leaf_task_ids = {t.task_id for t in dag.leaves}
        leaf_tis = [ti for ti in tis if ti.task_id in leaf_task_ids]

        # if all roots finished and at least one failed, the run failed
        if not unfinished_tasks and any(
                leaf_ti.state in {State.FAILED, State.UPSTREAM_FAILED}
                for leaf_ti in leaf_tis):
            self.log.error('Marking run %s failed', self)
            self.set_state(State.FAILED)
            if execute_callbacks:
                dag.handle_callback(self,
                                    success=False,
                                    reason='task_failure',
                                    session=session)
            else:
                callback = callback_requests.DagCallbackRequest(
                    full_filepath=dag.fileloc,
                    dag_id=self.dag_id,
                    execution_date=self.execution_date,
                    is_failure_callback=True,
                    msg='task_failure')

        # if all leafs succeeded and no unfinished tasks, the run succeeded
        elif not unfinished_tasks and all(
                leaf_ti.state in {State.SUCCESS, State.SKIPPED}
                for leaf_ti in leaf_tis):
            self.log.info('Marking run %s successful', self)
            self.set_state(State.SUCCESS)
            if execute_callbacks:
                dag.handle_callback(self,
                                    success=True,
                                    reason='success',
                                    session=session)
            else:
                callback = callback_requests.DagCallbackRequest(
                    full_filepath=dag.fileloc,
                    dag_id=self.dag_id,
                    execution_date=self.execution_date,
                    is_failure_callback=False,
                    msg='success')

        # if *all tasks* are deadlocked, the run failed
        elif (unfinished_tasks and none_depends_on_past
              and none_task_concurrency and not are_runnable_tasks):
            self.log.error('Deadlock; marking run %s failed', self)
            self.set_state(State.FAILED)
            if execute_callbacks:
                dag.handle_callback(self,
                                    success=False,
                                    reason='all_tasks_deadlocked',
                                    session=session)
            else:
                callback = callback_requests.DagCallbackRequest(
                    full_filepath=dag.fileloc,
                    dag_id=self.dag_id,
                    execution_date=self.execution_date,
                    is_failure_callback=True,
                    msg='all_tasks_deadlocked')

        # finally, if the roots aren't done, the dag is still running
        else:
            self.set_state(State.RUNNING)

        self._emit_duration_stats_for_finished_state()

        session.merge(self)

        return ready_tis, callback
Example #24
0
    def test_get_states_count_upstream_ti(self):
        """
        this test tests the helper function '_get_states_count_upstream_ti' as a unit and inside update_state
        """
        from airflow.ti_deps.dep_context import DepContext

        get_states_count_upstream_ti = TriggerRuleDep._get_states_count_upstream_ti
        session = settings.Session()
        now = timezone.utcnow()
        dag = DAG('test_dagrun_with_pre_tis',
                  start_date=DEFAULT_DATE,
                  default_args={'owner': 'owner1'})

        with dag:
            op1 = DummyOperator(task_id='A')
            op2 = DummyOperator(task_id='B')
            op3 = DummyOperator(task_id='C')
            op4 = DummyOperator(task_id='D')
            op5 = DummyOperator(task_id='E',
                                trigger_rule=TriggerRule.ONE_FAILED)

            op1.set_downstream([op2, op3])  # op1 >> op2, op3
            op4.set_upstream([op3, op2])  # op3, op2 >> op4
            op5.set_upstream([op2, op3, op4])  # (op2, op3, op4) >> op5

        dag.clear()
        dr = dag.create_dagrun(run_id='test_dagrun_with_pre_tis',
                               state=State.RUNNING,
                               execution_date=now,
                               start_date=now)

        ti_op1 = TaskInstance(task=dag.get_task(op1.task_id),
                              execution_date=dr.execution_date)
        ti_op2 = TaskInstance(task=dag.get_task(op2.task_id),
                              execution_date=dr.execution_date)
        ti_op3 = TaskInstance(task=dag.get_task(op3.task_id),
                              execution_date=dr.execution_date)
        ti_op4 = TaskInstance(task=dag.get_task(op4.task_id),
                              execution_date=dr.execution_date)
        ti_op5 = TaskInstance(task=dag.get_task(op5.task_id),
                              execution_date=dr.execution_date)

        ti_op1.set_state(state=State.SUCCESS, session=session)
        ti_op2.set_state(state=State.FAILED, session=session)
        ti_op3.set_state(state=State.SUCCESS, session=session)
        ti_op4.set_state(state=State.SUCCESS, session=session)
        ti_op5.set_state(state=State.SUCCESS, session=session)

        # check handling with cases that tasks are triggered from backfill with no finished tasks
        finished_tasks = DepContext().ensure_finished_tasks(
            ti_op2.task.dag, ti_op2.execution_date, session)
        self.assertEqual(
            get_states_count_upstream_ti(finished_tasks=finished_tasks,
                                         ti=ti_op2), (1, 0, 0, 0, 1))
        finished_tasks = dr.get_task_instances(state=State.finished() +
                                               [State.UPSTREAM_FAILED],
                                               session=session)
        self.assertEqual(
            get_states_count_upstream_ti(finished_tasks=finished_tasks,
                                         ti=ti_op4), (1, 0, 1, 0, 2))
        self.assertEqual(
            get_states_count_upstream_ti(finished_tasks=finished_tasks,
                                         ti=ti_op5), (2, 0, 1, 0, 3))

        dr.update_state()
        self.assertEqual(State.SUCCESS, dr.state)
Example #25
0
    def update_state(self, session=None):
        """
        Determines the overall state of the DagRun based on the state
        of its TaskInstances.

        :return: State
        """

        dag = self.get_dag()

        tis = self.get_task_instances(session=session)
        self.log.debug("Updating state for %s considering %s task(s)", self, len(tis))

        for ti in list(tis):
            # skip in db?
            if ti.state == State.REMOVED:
                tis.remove(ti)
            else:
                ti.task = dag.get_task(ti.task_id)

        # pre-calculate
        # db is faster
        start_dttm = timezone.utcnow()
        unfinished_tasks = self.get_task_instances(
            state=State.unfinished(),
            session=session
        )
        none_depends_on_past = all(not t.task.depends_on_past for t in unfinished_tasks)
        none_task_concurrency = all(t.task.task_concurrency is None
                                    for t in unfinished_tasks)
        # small speed up
        if unfinished_tasks and none_depends_on_past and none_task_concurrency:
            # todo: this can actually get pretty slow: one task costs between 0.01-015s
            no_dependencies_met = True
            for ut in unfinished_tasks:
                # We need to flag upstream and check for changes because upstream
                # failures/re-schedules can result in deadlock false positives
                old_state = ut.state
                deps_met = ut.are_dependencies_met(
                    dep_context=DepContext(
                        flag_upstream_failed=True,
                        ignore_in_retry_period=True,
                        ignore_in_reschedule_period=True),
                    session=session)
                if deps_met or old_state != ut.current_state(session=session):
                    no_dependencies_met = False
                    break

        duration = (timezone.utcnow() - start_dttm).total_seconds() * 1000
        Stats.timing("dagrun.dependency-check.{}".format(self.dag_id), duration)

        root_ids = [t.task_id for t in dag.roots]
        roots = [t for t in tis if t.task_id in root_ids]

        # if all roots finished and at least one failed, the run failed
        if (not unfinished_tasks and
                any(r.state in (State.FAILED, State.UPSTREAM_FAILED) for r in roots)):
            self.log.info('Marking run %s failed', self)
            self.set_state(State.FAILED)
            dag.handle_callback(self, success=False, reason='task_failure',
                                session=session)

        # if all roots succeeded and no unfinished tasks, the run succeeded
        elif not unfinished_tasks and all(r.state in (State.SUCCESS, State.SKIPPED)
                                          for r in roots):
            self.log.info('Marking run %s successful', self)
            self.set_state(State.SUCCESS)
            dag.handle_callback(self, success=True, reason='success', session=session)

        # if *all tasks* are deadlocked, the run failed
        elif (unfinished_tasks and none_depends_on_past and
              none_task_concurrency and no_dependencies_met):
            self.log.info('Deadlock; marking run %s failed', self)
            self.set_state(State.FAILED)
            dag.handle_callback(self, success=False, reason='all_tasks_deadlocked',
                                session=session)

        # finally, if the roots aren't done, the dag is still running
        else:
            self.set_state(State.RUNNING)

        self._emit_duration_stats_for_finished_state()

        # todo: determine we want to use with_for_update to make sure to lock the run
        session.merge(self)
        session.commit()

        return self.state