def build_airflow_dag(self, task_runs):
        # create new dag from current tasks and tasks selected to run
        root_task = self.run.root_task_run.task
        if isinstance(root_task, AirflowDagAsDbndTask):
            # it's the dag without the task itself
            dag = root_task.dag
            set_af_doc_md(self.run, dag)
            for af_task in dag.tasks:
                task_run = self.run.get_task_run(operator_to_to_dbnd_task_id(af_task))
                set_af_operator_doc_md(task_run, af_task)
            return root_task.dag

        # paused is just for better clarity in the airflow ui
        dag = DAG(
            self.run.dag_id,
            default_args=get_dbnd_default_args(),
            is_paused_upon_creation=True,
            concurrency=self.airflow_config.dbnd_dag_concurrency,
        )
        if hasattr(dag, "_description"):
            dag._description = "Dynamic DAG generated by DBND"

        with dag:
            airflow_ops = {}
            for task_run in task_runs:
                task = task_run.task
                if isinstance(task, AirflowOperatorAsDbndTask):
                    op = task.airflow_op
                    # this is hack, we clean the state of the op.
                    # better : implement proxy object like
                    # databandOperator that can wrap real Operator
                    op._dag = dag
                    op.upstream_task_ids.clear()
                    dag.add_task(op)
                    set_af_operator_doc_md(task_run, op)
                else:
                    # we will create DatabandOperator for databand tasks
                    op = build_dbnd_operator_from_taskrun(task_run)

                airflow_ops[task.task_id] = op

            for task_run in task_runs:
                task = task_run.task
                op = airflow_ops[task.task_id]
                upstream_tasks = task.ctrl.task_dag.upstream
                for t in upstream_tasks:
                    if t.task_id not in airflow_ops:
                        # we have some tasks that were not selected to run, we don't add them to graph
                        continue
                    upstream_ops = airflow_ops[t.task_id]
                    if upstream_ops.task_id not in op.upstream_task_ids:
                        op.set_upstream(upstream_ops)

        dag.fileloc = root_task.task_definition.source_code.task_source_file
        set_af_doc_md(self.run, dag)
        return dag
예제 #2
0
def get_databand_op_catcher_dag():
    import airflow

    if airflow.settings.CONTEXT_MANAGER_DAG:
        # we are inside native airflow DAG or already have DatabandOpCatcherDag
        return None

    global _dag_catcher
    if not _dag_catcher:
        _dag_catcher = DatabandOpCatcherDag(
            dag_id="_dbnd_airflow_operator_catcher",
            default_args=get_dbnd_default_args(),
        )
    _dag_catcher.task_dict = {}
    return _dag_catcher
예제 #3
0
파일: _plugin.py 프로젝트: cxz/dbnd
def dbnd_post_enter_context(ctx):  # type: (DatabandContext) -> None
    from dbnd_airflow.dbnd_task_executor.airflow_operators_catcher import (
        DatabandOpCatcherDag, )
    from dbnd_airflow.config import get_dbnd_default_args

    global _airflow_op_catcher_dag

    import airflow

    if airflow.settings.CONTEXT_MANAGER_DAG:
        # we are inside native airflow DAG or already have DatabandOpCatcherDag
        return
    _airflow_op_catcher_dag = DatabandOpCatcherDag(
        dag_id="inline_airflow_ops", default_args=get_dbnd_default_args())
    _airflow_op_catcher_dag.__enter__()
예제 #4
0
def get_databand_op_catcher_dag():
    import airflow  # noqa: F401

    if safe_get_context_manager_dag():
        # we are inside native airflow DAG or already have DatabandOpCatcherDag
        return None

    global _dag_catcher
    if not _dag_catcher:
        _dag_catcher = DatabandOpCatcherDag(
            dag_id="_dbnd_airflow_operator_catcher",
            default_args=get_dbnd_default_args(),
        )
    _dag_catcher.task_dict = {}
    return _dag_catcher