Ejemplo n.º 1
0
    def _dbnd_run_error(self, ex):
        if (
                # what scenario is this aiflow filtering supposed to help with?
                # I had airflow put a default airflow.cfg in .dbnd causing validation error in k8sExecutor which was invisible in the console (only in task log)
            ("airflow" not in ex.__class__.__name__.lower()
             or ex.__class__.__name__ == "AirflowConfigException")
                and "Failed tasks are:" not in str(ex)
                and not isinstance(ex, DatabandRunError)
                and not isinstance(ex, KeyboardInterrupt)
                and not isinstance(ex, DatabandSigTermError)):
            logger.exception(ex)

        if (isinstance(ex, KeyboardInterrupt)
                or isinstance(ex, DatabandSigTermError) or self.is_killed()):
            run_state = RunState.CANCELLED
            unfinished_task_state = TaskRunState.UPSTREAM_FAILED
        elif isinstance(ex, DatabandFailFastError):
            run_state = RunState.FAILED
            unfinished_task_state = TaskRunState.UPSTREAM_FAILED
        else:
            run_state = RunState.FAILED
            unfinished_task_state = TaskRunState.FAILED

        self.set_run_state(run_state)
        self.tracker.tracking_store.set_unfinished_tasks_state(
            run_uid=self.run_uid, state=unfinished_task_state)

        err_banner_msg = self.describe.get_error_banner()
        logger.error(u"\n\n{sep}\n{banner}\n{sep}".format(
            sep=console_utils.ERROR_SEPARATOR, banner=err_banner_msg))
        return DatabandRunError("Run has failed: %s" % ex,
                                run=self,
                                nested_exceptions=ex)
Ejemplo n.º 2
0
def _get_dbnd_run_relative_cmd():
    argv = list(sys.argv)
    while argv:
        current = argv.pop(0)
        if current == "run":
            return argv
    raise DatabandRunError(
        "Can't calculate run command from '%s'",
        help_msg="Check that it has a format of '..executable.. run ...'",
    )
Ejemplo n.º 3
0
    def do_run(self):
        topological_tasks = topological_sort(
            [tr.task for tr in self.task_runs])
        fail_fast = self.settings.run.fail_fast
        task_failed = False

        task_runs_to_update_state = []
        for task in topological_tasks:
            tr = self.run.get_task_run_by_id(task.task_id)
            if tr.is_reused:
                continue

            if fail_fast and task_failed:
                logger.info("Setting %s to %s", task.task_id,
                            TaskRunState.UPSTREAM_FAILED)
                tr.set_task_run_state(TaskRunState.UPSTREAM_FAILED,
                                      track=False)
                task_runs_to_update_state.append(tr)
                continue

            if self.run.is_killed():
                logger.info(
                    "Databand Context is killed! Stopping %s to %s",
                    task.task_id,
                    TaskRunState.FAILED,
                )
                tr.set_task_run_state(TaskRunState.FAILED, track=False)
                task_runs_to_update_state.append(tr)
                continue

            logger.debug("Executing task: %s", task.task_id)

            try:
                tr.runner.execute()
            except DatabandSigTermError as e:
                raise e
            except Exception as e:
                task_failed = True
                logger.error("Failed to execute task '%s': %s" %
                             (task.task_id, str(e)))

        if task_runs_to_update_state:
            self.run.tracker.set_task_run_states(task_runs_to_update_state)

        if task_failed:
            err = _collect_errors(self.run.task_runs)

            if err:
                raise DatabandRunError(err)
Ejemplo n.º 4
0
    def _execute(self, session=None):
        """
        Initializes all components required to run a dag for a specified date range and
        calls helper method to execute the tasks.
        """
        # Trigger cleaning
        if self.airflow_config.clean_zombies_during_backfill:
            ClearZombieJob().run()

        ti_status = BackfillJob._DagRunTaskStatus()

        # picklin'
        pickle_id = self.dag.pickle_id
        # We don't need to pickle our dag again as it already pickled on job creattion
        # also this will save it into databand table, that have no use for the airflow
        # if not self.donot_pickle and self.executor.__class__ not in (
        #     executors.LocalExecutor,
        #     executors.SequentialExecutor,
        # ):
        #     pickle_id = airflow_pickle(self.dag, session=session)

        executor = self.executor
        executor.start()

        ti_status.total_runs = 1  # total dag runs in backfill

        dag_run = None
        try:
            dag_run = self._get_dag_run(session=session)

            # Create relation DagRun <> Job
            dag_run.conf = {"job_id": self.id}
            session.merge(dag_run)
            session.commit()

            run_date = dag_run.execution_date
            if dag_run is None:
                raise DatabandSystemError("Can't build dagrun")

            tis_map = self._task_instances_for_dag_run(dag_run, session=session)

            if not tis_map:
                raise DatabandSystemError("There are no task instances to run!")
            ti_status.active_runs.append(dag_run)
            ti_status.to_run.update(tis_map or {})

            processed_dag_run_dates = self._process_dag_task_instances(
                ti_status=ti_status,
                executor=executor,
                pickle_id=pickle_id,
                session=session,
            )
            ti_status.executed_dag_run_dates.update(processed_dag_run_dates)

            err = self._collect_errors(ti_status=ti_status, session=session)
            if err:
                raise DatabandRunError("Airflow executor has failed to run the run")

            if run_date not in ti_status.executed_dag_run_dates:
                self.log.warning(
                    "Dag %s is not marked as completed!  %s not found in %s",
                    self.dag_id,
                    run_date,
                    ti_status.executed_dag_run_dates,
                )
        finally:
            # in sequential executor a keyboard interrupt would reach here and
            # then executor.end() -> heartbeat() -> sync() will cause the queued commands
            # to be run again before exiting
            if hasattr(executor, "commands_to_run"):
                executor.commands_to_run = []
            try:
                executor.end()
            except Exception:
                logger.exception("Failed to terminate executor")
            if dag_run and dag_run.state == State.RUNNING:
                _kill_dag_run_zombi(dag_run, session)
            session.commit()

        self.log.info("Run is completed. Exiting.")