def build_airflow_dag(self, task_runs): # create new dag from current tasks and tasks selected to run root_task = self.run.root_task_run.task if isinstance(root_task, AirflowDagAsDbndTask): # it's the dag without the task itself dag = root_task.dag set_af_doc_md(self.run, dag) for af_task in dag.tasks: task_run = self.run.get_task_run(operator_to_to_dbnd_task_id(af_task)) set_af_operator_doc_md(task_run, af_task) return root_task.dag # paused is just for better clarity in the airflow ui dag = DAG( self.run.dag_id, default_args=get_dbnd_default_args(), is_paused_upon_creation=True, concurrency=self.airflow_config.dbnd_dag_concurrency, ) if hasattr(dag, "_description"): dag._description = "Dynamic DAG generated by DBND" with dag: airflow_ops = {} for task_run in task_runs: task = task_run.task if isinstance(task, AirflowOperatorAsDbndTask): op = task.airflow_op # this is hack, we clean the state of the op. # better : implement proxy object like # databandOperator that can wrap real Operator op._dag = dag op.upstream_task_ids.clear() dag.add_task(op) set_af_operator_doc_md(task_run, op) else: # we will create DatabandOperator for databand tasks op = build_dbnd_operator_from_taskrun(task_run) airflow_ops[task.task_id] = op for task_run in task_runs: task = task_run.task op = airflow_ops[task.task_id] upstream_tasks = task.ctrl.task_dag.upstream for t in upstream_tasks: if t.task_id not in airflow_ops: # we have some tasks that were not selected to run, we don't add them to graph continue upstream_ops = airflow_ops[t.task_id] if upstream_ops.task_id not in op.upstream_task_ids: op.set_upstream(upstream_ops) dag.fileloc = root_task.task_definition.source_code.task_source_file set_af_doc_md(self.run, dag) return dag
def get_databand_op_catcher_dag(): import airflow if airflow.settings.CONTEXT_MANAGER_DAG: # we are inside native airflow DAG or already have DatabandOpCatcherDag return None global _dag_catcher if not _dag_catcher: _dag_catcher = DatabandOpCatcherDag( dag_id="_dbnd_airflow_operator_catcher", default_args=get_dbnd_default_args(), ) _dag_catcher.task_dict = {} return _dag_catcher
def dbnd_post_enter_context(ctx): # type: (DatabandContext) -> None from dbnd_airflow.dbnd_task_executor.airflow_operators_catcher import ( DatabandOpCatcherDag, ) from dbnd_airflow.config import get_dbnd_default_args global _airflow_op_catcher_dag import airflow if airflow.settings.CONTEXT_MANAGER_DAG: # we are inside native airflow DAG or already have DatabandOpCatcherDag return _airflow_op_catcher_dag = DatabandOpCatcherDag( dag_id="inline_airflow_ops", default_args=get_dbnd_default_args()) _airflow_op_catcher_dag.__enter__()
def get_databand_op_catcher_dag(): import airflow # noqa: F401 if safe_get_context_manager_dag(): # we are inside native airflow DAG or already have DatabandOpCatcherDag return None global _dag_catcher if not _dag_catcher: _dag_catcher = DatabandOpCatcherDag( dag_id="_dbnd_airflow_operator_catcher", default_args=get_dbnd_default_args(), ) _dag_catcher.task_dict = {} return _dag_catcher