Exemple #1
0
    def _execute(self):
        """
        Runs a dag for a specified date range.
        """
        session = settings.Session()

        start_date = self.bf_start_date
        end_date = self.bf_end_date

        # picklin'
        pickle_id = None
        if not self.donot_pickle and self.executor.__class__ not in (
                executors.LocalExecutor, executors.SequentialExecutor):
            pickle = models.DagPickle(self.dag)
            session.add(pickle)
            session.commit()
            pickle_id = pickle.id

        executor = self.executor
        executor.start()
        executor_fails = Counter()

        # Build a list of all instances to run
        tasks_to_run = {}
        failed = set()
        succeeded = set()
        started = set()
        skipped = set()
        not_ready = set()
        deadlocked = set()

        for task in self.dag.tasks:
            if (not self.include_adhoc) and task.adhoc:
                continue

            start_date = start_date or task.start_date
            end_date = end_date or task.end_date or datetime.now()
            for dttm in self.dag.date_range(start_date, end_date=end_date):
                ti = models.TaskInstance(task, dttm)
                tasks_to_run[ti.key] = ti
                session.merge(ti)
        session.commit()

        # Triggering what is ready to get triggered
        while tasks_to_run and not deadlocked:
            not_ready.clear()
            for key, ti in list(tasks_to_run.items()):

                ti.refresh_from_db()
                ignore_depends_on_past = (self.ignore_first_depends_on_past
                                          and ti.execution_date
                                          == (start_date or ti.start_date))

                # The task was already marked successful or skipped by a
                # different Job. Don't rerun it.
                if key not in started:
                    if ti.state == State.SUCCESS:
                        succeeded.add(key)
                        tasks_to_run.pop(key)
                        continue
                    elif ti.state == State.SKIPPED:
                        skipped.add(key)
                        tasks_to_run.pop(key)
                        continue

                # Is the task runnable? -- then run it
                if ti.is_queueable(
                        include_queued=True,
                        ignore_depends_on_past=ignore_depends_on_past,
                        flag_upstream_failed=True):
                    self.logger.debug('Sending {} to executor'.format(ti))
                    executor.queue_task_instance(
                        ti,
                        mark_success=self.mark_success,
                        pickle_id=pickle_id,
                        ignore_dependencies=self.ignore_dependencies,
                        ignore_depends_on_past=ignore_depends_on_past,
                        pool=self.pool)
                    started.add(key)

                # Mark the task as not ready to run
                elif ti.state in (State.NONE, State.UPSTREAM_FAILED):
                    not_ready.add(key)

            self.heartbeat()
            executor.heartbeat()

            # If the set of tasks that aren't ready ever equals the set of
            # tasks to run, then the backfill is deadlocked
            if not_ready and not_ready == set(tasks_to_run):
                deadlocked.update(tasks_to_run.values())
                tasks_to_run.clear()

            # Reacting to events
            for key, state in list(executor.get_event_buffer().items()):
                dag_id, task_id, execution_date = key
                if key not in tasks_to_run:
                    continue
                ti = tasks_to_run[key]
                ti.refresh_from_db()

                # executor reports failure
                if state == State.FAILED:

                    # task reports running
                    if ti.state == State.RUNNING:
                        msg = ('Executor reports that task instance {} failed '
                               'although the task says it is running.'.format(
                                   key))
                        self.logger.error(msg)
                        ti.handle_failure(msg)
                        tasks_to_run.pop(key)

                    # task reports skipped
                    elif ti.state == State.SKIPPED:
                        self.logger.error("Skipping {} ".format(key))
                        skipped.add(key)
                        tasks_to_run.pop(key)

                    # anything else is a failure
                    else:
                        self.logger.error(
                            "Task instance {} failed".format(key))
                        failed.add(key)
                        tasks_to_run.pop(key)

                # executor reports success
                elif state == State.SUCCESS:

                    # task reports success
                    if ti.state == State.SUCCESS:
                        self.logger.info(
                            'Task instance {} succeeded'.format(key))
                        succeeded.add(key)
                        tasks_to_run.pop(key)

                    # task reports failure
                    elif ti.state == State.FAILED:
                        self.logger.error(
                            "Task instance {} failed".format(key))
                        failed.add(key)
                        tasks_to_run.pop(key)

                    # task reports skipped
                    elif ti.state == State.SKIPPED:
                        self.logger.info(
                            "Task instance {} skipped".format(key))
                        skipped.add(key)
                        tasks_to_run.pop(key)

                    # this probably won't ever be triggered
                    elif ti in not_ready:
                        self.logger.info(
                            "{} wasn't expected to run, but it did".format(ti))

                    # executor reports success but task does not - this is weird
                    elif ti.state not in (State.SUCCESS, State.QUEUED,
                                          State.UP_FOR_RETRY):
                        self.logger.error(
                            "The airflow run command failed "
                            "at reporting an error. This should not occur "
                            "in normal circumstances. Task state is '{}',"
                            "reported state is '{}'. TI is {}"
                            "".format(ti.state, state, ti))

                        # if the executor fails 3 or more times, stop trying to
                        # run the task
                        executor_fails[key] += 1
                        if executor_fails[key] >= 3:
                            msg = (
                                'The airflow run command failed to report an '
                                'error for task {} three or more times. The '
                                'task is being marked as failed. This is very '
                                'unusual and probably means that an error is '
                                'taking place before the task even '
                                'starts.'.format(key))
                            self.logger.error(msg)
                            ti.handle_failure(msg)
                            tasks_to_run.pop(key)

            msg = ' | '.join([
                "[backfill progress]", "waiting: {0}", "succeeded: {1}",
                "kicked_off: {2}", "failed: {3}", "skipped: {4}",
                "deadlocked: {5}"
            ]).format(len(tasks_to_run), len(succeeded), len(started),
                      len(failed), len(skipped), len(deadlocked))
            self.logger.info(msg)

        executor.end()
        session.close()

        err = ''
        if failed:
            err += ("---------------------------------------------------\n"
                    "Some task instances failed:\n{}\n".format(failed))
        if deadlocked:
            err += ('---------------------------------------------------\n'
                    'BackfillJob is deadlocked.')
            deadlocked_depends_on_past = any(
                t.are_dependencies_met() != t.are_dependencies_met(
                    ignore_depends_on_past=True) for t in deadlocked)
            if deadlocked_depends_on_past:
                err += (
                    'Some of the deadlocked tasks were unable to run because '
                    'of "depends_on_past" relationships. Try running the '
                    'backfill with the option '
                    '"ignore_first_depends_on_past=True" or passing "-I" at '
                    'the command line.')
            err += ' These tasks were unable to run:\n{}\n'.format(deadlocked)
        if err:
            raise AirflowException(err)

        self.logger.info("Backfill done. Exiting.")
Exemple #2
0
    def _execute(self):
        """
        Runs a dag for a specified date range.
        """
        session = settings.Session()

        start_date = self.bf_start_date
        end_date = self.bf_end_date

        # picklin'
        pickle_id = None
        if not self.donot_pickle and self.executor.__class__ not in (
                executors.LocalExecutor, executors.SequentialExecutor):
            pickle = models.DagPickle(self.dag)
            session.add(pickle)
            session.commit()
            pickle_id = pickle.id

        executor = self.executor
        executor.start()

        # Build a list of all instances to run
        tasks_to_run = {}
        failed = []
        succeeded = []
        started = []
        wont_run = []
        for task in self.dag.tasks:
            if (not self.include_adhoc) and task.adhoc:
                continue

            start_date = start_date or task.start_date
            end_date = end_date or task.end_date or datetime.now()
            for dttm in self.dag.date_range(start_date, end_date=end_date):
                ti = models.TaskInstance(task, dttm)
                tasks_to_run[ti.key] = ti

        # Triggering what is ready to get triggered
        while tasks_to_run:
            for key, ti in list(tasks_to_run.items()):
                ti.refresh_from_db()
                if ti.state in (State.SUCCESS,
                                State.SKIPPED) and key in tasks_to_run:
                    succeeded.append(key)
                    tasks_to_run.pop(key)
                elif ti.state in (State.RUNNING, State.QUEUED):
                    continue
                elif ti.is_runnable(flag_upstream_failed=True):
                    executor.queue_task_instance(
                        ti,
                        mark_success=self.mark_success,
                        task_start_date=self.bf_start_date,
                        pickle_id=pickle_id,
                        ignore_dependencies=self.ignore_dependencies,
                        pool=self.pool)
                    ti.state = State.RUNNING
                    if key not in started:
                        started.append(key)
            self.heartbeat()
            executor.heartbeat()

            # Reacting to events
            for key, state in list(executor.get_event_buffer().items()):
                dag_id, task_id, execution_date = key
                if key not in tasks_to_run:
                    continue
                ti = tasks_to_run[key]
                ti.refresh_from_db()
                if (ti.state in (State.FAILED, State.SKIPPED)
                        or state == State.FAILED):
                    if ti.state == State.FAILED or state == State.FAILED:
                        failed.append(key)
                        self.logger.error("Task instance " + str(key) +
                                          " failed")
                    elif ti.state == State.SKIPPED:
                        wont_run.append(key)
                        self.logger.error("Skipping " + str(key) + " failed")
                    tasks_to_run.pop(key)
                    # Removing downstream tasks that also shouldn't run
                    for t in self.dag.get_task(task_id).get_flat_relatives(
                            upstream=False):
                        key = (ti.dag_id, t.task_id, execution_date)
                        if key in tasks_to_run:
                            wont_run.append(key)
                            tasks_to_run.pop(key)
                elif ti.state == State.SUCCESS and state == State.SUCCESS:
                    succeeded.append(key)
                    tasks_to_run.pop(key)
                elif (ti.state not in (State.SUCCESS, State.QUEUED)
                      and state == State.SUCCESS):
                    self.logger.error(
                        "The airflow run command failed "
                        "at reporting an error. This should not occur "
                        "in normal circumstances. Task state is '{}',"
                        "reported state is '{}'. TI is {}"
                        "".format(ti.state, state, ti))

            msg = ("[backfill progress] "
                   "waiting: {0} | "
                   "succeeded: {1} | "
                   "kicked_off: {2} | "
                   "failed: {3} | "
                   "wont_run: {4} ").format(len(tasks_to_run), len(succeeded),
                                            len(started), len(failed),
                                            len(wont_run))
            self.logger.info(msg)

        executor.end()
        session.close()
        if failed:
            msg = ("------------------------------------------\n"
                   "Some tasks instances failed, "
                   "here's the list:\n{}".format(failed))
            raise AirflowException(msg)
        self.logger.info("All done. Exiting.")
Exemple #3
0
    def _execute(self):
        """
        Runs a dag for a specified date range.
        """
        session = settings.Session()

        start_date = self.bf_start_date
        end_date = self.bf_end_date

        # picklin'
        pickle_id = None
        if not self.donot_pickle and self.executor.__class__ not in (
                executors.LocalExecutor, executors.SequentialExecutor):
            pickle = models.DagPickle(self.dag)
            session.add(pickle)
            session.commit()
            pickle_id = pickle.id

        executor = self.executor
        executor.start()

        # Build a list of all instances to run
        tasks_to_run = {}
        failed = []
        succeeded = []
        started = []
        wont_run = []
        for task in self.dag.tasks:
            if (not self.include_adhoc) and task.adhoc:
                continue

            start_date = start_date or task.start_date
            end_date = end_date or task.end_date or datetime.now()
            for dttm in utils.date_range(
                    start_date, end_date, task.dag.schedule_interval):
                ti = models.TaskInstance(task, dttm)
                tasks_to_run[ti.key] = ti

        # Triggering what is ready to get triggered
        while tasks_to_run:
            for key, ti in tasks_to_run.items():
                ti.refresh_from_db()
                if ti.state == State.SUCCESS and key in tasks_to_run:
                    succeeded.append(key)
                    del tasks_to_run[key]
                elif ti.is_runnable():
                    executor.queue_task_instance(
                        ti,
                        mark_success=self.mark_success,
                        task_start_date=self.bf_start_date,
                        pickle_id=pickle_id,
                        ignore_dependencies=self.ignore_dependencies)
                    ti.state = State.RUNNING
                    if key not in started:
                        started.append(key)
            self.heartbeat()
            executor.heartbeat()

            # Reacting to events
            for key, state in executor.get_event_buffer().items():
                dag_id, task_id, execution_date = key
                if key not in tasks_to_run:
                    continue
                ti = tasks_to_run[key]
                ti.refresh_from_db()
                if ti.state == State.FAILED:
                    failed.append(key)
                    logging.error("Task instance " + str(key) + " failed")
                    del tasks_to_run[key]
                    # Removing downstream tasks from the one that has failed
                    for t in self.dag.get_task(task_id).get_flat_relatives(
                            upstream=False):
                        key = (ti.dag_id, t.task_id, execution_date)
                        if key in tasks_to_run:
                            wont_run.append(key)
                            del tasks_to_run[key]
                elif ti.state == State.SUCCESS:
                    succeeded.append(key)
                    del tasks_to_run[key]

            msg = (
                "[backfill progress] "
                "waiting: {0} | "
                "succeeded: {1} | "
                "kicked_off: {2} | "
                "failed: {3} | "
                "skipped: {4} ").format(
                    len(tasks_to_run),
                    len(succeeded),
                    len(started),
                    len(failed),
                    len(wont_run))
            logging.info(msg)

        executor.end()
        session.close()
        if failed:
            raise AirflowException(
                "Some tasks instances failed, here's the list:\n"+str(failed))
        logging.info("All done. Exiting.")
Exemple #4
0
    def _execute(self):
        """
        Runs a dag for a specified date range.
        """
        start_date = self.bf_start_date
        end_date = self.bf_end_date

        session = settings.Session()
        pickle = models.DagPickle(self.dag, self)
        executor = self.executor
        executor.start()
        session.add(pickle)
        session.commit()
        pickle_id = pickle.id

        # Build a list of all intances to run
        tasks_to_run = {}
        failed = []
        succeeded = []
        started = []
        wont_run = []
        for task in self.dag.tasks:
            start_date = start_date or task.start_date
            end_date = end_date or task.end_date or datetime.now()
            for dttm in utils.date_range(start_date, end_date,
                                         task.dag.schedule_interval):
                ti = models.TaskInstance(task, dttm)
                tasks_to_run[ti.key] = ti

        # Triggering what is ready to get triggered
        while tasks_to_run:
            msg = ("Yet to run: {0} | "
                   "Succeeded: {1} | "
                   "Started: {2} | "
                   "Failed: {3} | "
                   "Won't run: {4} ").format(len(tasks_to_run), len(succeeded),
                                             len(started), len(failed),
                                             len(wont_run))

            logging.info(msg)
            for key, ti in tasks_to_run.items():
                ti.refresh_from_db()
                if ti.state == State.SUCCESS and key in tasks_to_run:
                    succeeded.append(key)
                    del tasks_to_run[key]
                elif ti.is_runnable():
                    executor.queue_command(key=ti.key,
                                           command=ti.command(
                                               mark_success=self.mark_success,
                                               pickle_id=pickle_id))
                    ti.state = State.RUNNING
                    if key not in started:
                        started.append(key)
            self.heartbeat()
            executor.heartbeat()

            # Reacting to events
            for key, state in executor.get_event_buffer().items():
                dag_id, task_id, execution_date = key
                if key not in tasks_to_run:
                    continue
                ti = tasks_to_run[key]
                ti.refresh_from_db()
                if ti.state == State.FAILED:
                    failed.append(key)
                    logging.error("Task instance " + str(key) + " failed")
                    del tasks_to_run[key]
                    # Removing downstream tasks from the one that has failed
                    for t in self.dag.get_task(task_id).get_flat_relatives(
                            upstream=False):
                        key = (ti.dag_id, t.task_id, execution_date)
                        if key in tasks_to_run:
                            wont_run.append(key)
                            del tasks_to_run[key]
                elif ti.state == State.SUCCESS:
                    succeeded.append(key)
                    del tasks_to_run[key]
        executor.end()
        logging.info("Run summary:")
        session.close()