Example #1
0
    def _bag_dag(self, *, dag, root_dag, recursive):
        """Actual implementation of bagging a dag.

        The only purpose of this is to avoid exposing ``recursive`` in ``bag_dag()``,
        intended to only be used by the ``_bag_dag()`` implementation.
        """
        check_cycle(dag)  # throws if a task cycle is found

        dag.resolve_template_files()
        dag.last_loaded = timezone.utcnow()

        # Check policies
        settings.dag_policy(dag)

        for task in dag.tasks:
            settings.task_policy(task)

        subdags = dag.subdags

        try:
            # DAG.subdags automatically performs DFS search, so we don't recurse
            # into further _bag_dag() calls.
            if recursive:
                for subdag in subdags:
                    subdag.full_filepath = dag.full_filepath
                    subdag.parent_dag = dag
                    subdag.is_subdag = True
                    self._bag_dag(dag=subdag,
                                  root_dag=root_dag,
                                  recursive=False)

            prev_dag = self.dags.get(dag.dag_id)
            if prev_dag and prev_dag.full_filepath != dag.full_filepath:
                raise AirflowDagDuplicatedIdException(
                    dag_id=dag.dag_id,
                    incoming=dag.full_filepath,
                    existing=self.dags[dag.dag_id].full_filepath,
                )
            self.dags[dag.dag_id] = dag
            self.log.debug('Loaded DAG %s', dag)
        except (AirflowDagCycleException, AirflowDagDuplicatedIdException):
            # There was an error in bagging the dag. Remove it from the list of dags
            self.log.exception('Exception bagging dag: %s', dag.dag_id)
            # Only necessary at the root level since DAG.subdags automatically
            # performs DFS to search through all subdags
            if recursive:
                for subdag in subdags:
                    if subdag.dag_id in self.dags:
                        del self.dags[subdag.dag_id]
            raise
Example #2
0
 def test_dag_integrity(self, dag_path, mocker):
     mocker.patch.object(airflow_models.Variable,
                         'get',
                         side_effect=variable_patch)
     environ['GIT_REPO'] = "https://github.com/FOO/repo"
     with requests_mock.Mocker() as mock:
         mock_response = {
             "name": "foo",
             "downloadURL": "http://artifact.com"
         }
         mock.get(requests_mock.ANY, json=mock_response)
         """Import DAG files and check for a valid DAG instance."""
         dag_name = path.basename(dag_path)
         module = _import_file(dag_name, dag_path)
         # Validate if there is at least 1 DAG object in the file
         dag_objects = [
             var for var in vars(module).values()
             if isinstance(var, airflow_models.dag.DAG)
         ]
         assert dag_objects
         # For every DAG object, test for cycles
         for dag in dag_objects:
             dag.tree_view()
             dag_cycle_tester.check_cycle(dag)