def _bag_dag(self, *, dag, root_dag, recursive): """Actual implementation of bagging a dag. The only purpose of this is to avoid exposing ``recursive`` in ``bag_dag()``, intended to only be used by the ``_bag_dag()`` implementation. """ check_cycle(dag) # throws if a task cycle is found dag.resolve_template_files() dag.last_loaded = timezone.utcnow() # Check policies settings.dag_policy(dag) for task in dag.tasks: settings.task_policy(task) subdags = dag.subdags try: # DAG.subdags automatically performs DFS search, so we don't recurse # into further _bag_dag() calls. if recursive: for subdag in subdags: subdag.full_filepath = dag.full_filepath subdag.parent_dag = dag subdag.is_subdag = True self._bag_dag(dag=subdag, root_dag=root_dag, recursive=False) prev_dag = self.dags.get(dag.dag_id) if prev_dag and prev_dag.full_filepath != dag.full_filepath: raise AirflowDagDuplicatedIdException( dag_id=dag.dag_id, incoming=dag.full_filepath, existing=self.dags[dag.dag_id].full_filepath, ) self.dags[dag.dag_id] = dag self.log.debug('Loaded DAG %s', dag) except (AirflowDagCycleException, AirflowDagDuplicatedIdException): # There was an error in bagging the dag. Remove it from the list of dags self.log.exception('Exception bagging dag: %s', dag.dag_id) # Only necessary at the root level since DAG.subdags automatically # performs DFS to search through all subdags if recursive: for subdag in subdags: if subdag.dag_id in self.dags: del self.dags[subdag.dag_id] raise
def test_dag_integrity(self, dag_path, mocker): mocker.patch.object(airflow_models.Variable, 'get', side_effect=variable_patch) environ['GIT_REPO'] = "https://github.com/FOO/repo" with requests_mock.Mocker() as mock: mock_response = { "name": "foo", "downloadURL": "http://artifact.com" } mock.get(requests_mock.ANY, json=mock_response) """Import DAG files and check for a valid DAG instance.""" dag_name = path.basename(dag_path) module = _import_file(dag_name, dag_path) # Validate if there is at least 1 DAG object in the file dag_objects = [ var for var in vars(module).values() if isinstance(var, airflow_models.dag.DAG) ] assert dag_objects # For every DAG object, test for cycles for dag in dag_objects: dag.tree_view() dag_cycle_tester.check_cycle(dag)