コード例 #1
0
ファイル: dag_processing.py プロジェクト: zhongjiajie/airflow
    def _refresh_dag_dir(self):
        """
        Refresh file paths from dag dir if we haven't done it for too long.
        """
        now = timezone.utcnow()
        elapsed_time_since_refresh = (now - self.last_dag_dir_refresh_time).total_seconds()
        if elapsed_time_since_refresh > self.dag_dir_list_interval:
            # Build up a list of Python files that could contain DAGs
            self.log.info("Searching for files in %s", self._dag_directory)
            self._file_paths = list_py_file_paths(self._dag_directory)
            self.last_dag_dir_refresh_time = now
            self.log.info("There are %s files in %s", len(self._file_paths), self._dag_directory)
            self.set_file_paths(self._file_paths)

            # noinspection PyBroadException
            try:
                self.log.debug("Removing old import errors")
                self.clear_nonexistent_import_errors()  # pylint: disable=no-value-for-parameter
            except Exception:  # pylint: disable=broad-except
                self.log.exception("Error removing old import errors")

            if STORE_SERIALIZED_DAGS:
                from airflow.models.serialized_dag import SerializedDagModel
                from airflow.models.dag import DagModel
                SerializedDagModel.remove_deleted_dags(self._file_paths)
                DagModel.deactivate_deleted_dags(self._file_paths)

            if self.store_dag_code:
                from airflow.models.dagcode import DagCode
                DagCode.remove_deleted_code(self._file_paths)
コード例 #2
0
    def _refresh_dag_dir(self):
        """
        Refresh file paths from dag dir if we haven't done it for too long.
        """
        airflow_home = os.getenv("AIRFLOW_HOME", "/usr/lib/airflow")
        now = timezone.utcnow()
        elapsed_time_since_refresh = (
            now - self.last_dag_dir_refresh_time).total_seconds()
        if elapsed_time_since_refresh > self.dag_dir_list_interval or os.path.exists(
                airflow_home + "/scheduler.refresh"):
            # Build up a list of Python files that could contain DAGs
            self.log.info("Searching for files in %s", self._dag_directory)
            self._file_paths = list_py_file_paths(self._dag_directory)
            self.last_dag_dir_refresh_time = now
            self.log.info("There are %s files in %s", len(self._file_paths),
                          self._dag_directory)
            self.set_file_paths(self._file_paths)

            # noinspection PyBroadException
            try:
                self.log.debug("Removing old import errors")
                self.clear_nonexistent_import_errors()  # pylint: disable=no-value-for-parameter
                os.remove(airflow_home + "/scheduler.refresh")
                self.logger.info("Refresh Dags of scheduler triggered")
            except Exception:  # pylint: disable=broad-except
                self.log.exception("Error removing old import errors")

            if STORE_SERIALIZED_DAGS:
                from airflow.models.serialized_dag import SerializedDagModel
                from airflow.models.dag import DagModel
                SerializedDagModel.remove_deleted_dags(self._file_paths)
                DagModel.deactivate_deleted_dags(self._file_paths)
コード例 #3
0
ファイル: manager.py プロジェクト: yangrong688/airflow
    def _refresh_dag_dir(self):
        """Refresh file paths from dag dir if we haven't done it for too long."""
        now = timezone.utcnow()
        elapsed_time_since_refresh = (
            now - self.last_dag_dir_refresh_time).total_seconds()
        if elapsed_time_since_refresh > self.dag_dir_list_interval:
            # Build up a list of Python files that could contain DAGs
            self.log.info("Searching for files in %s", self._dag_directory)
            self._file_paths = list_py_file_paths(self._dag_directory)
            self.last_dag_dir_refresh_time = now
            self.log.info("There are %s files in %s", len(self._file_paths),
                          self._dag_directory)
            self.set_file_paths(self._file_paths)

            try:
                self.log.debug("Removing old import errors")
                self.clear_nonexistent_import_errors()
            except Exception:
                self.log.exception("Error removing old import errors")

            SerializedDagModel.remove_deleted_dags(self._file_paths)
            DagModel.deactivate_deleted_dags(self._file_paths)

            from airflow.models.dagcode import DagCode

            DagCode.remove_deleted_code(self._file_paths)
コード例 #4
0
 def test_remove_dags_by_filepath(self):
     """DAGs can be removed from database."""
     example_dags_list = list(self._write_example_dags().values())
     # Remove SubDags from the list as they are not stored in DB in a separate row
     # and are directly added in Json blob of the main DAG
     filtered_example_dags_list = [dag for dag in example_dags_list if not dag.is_subdag]
     # Tests removing by file path.
     dag_removed_by_file = filtered_example_dags_list[0]
     # remove repeated files for those DAGs that define multiple dags in the same file (set comprehension)
     example_dag_files = list({dag.full_filepath for dag in filtered_example_dags_list})
     example_dag_files.remove(dag_removed_by_file.full_filepath)
     SDM.remove_deleted_dags(example_dag_files)
     self.assertFalse(SDM.has_dag(dag_removed_by_file.dag_id))
コード例 #5
0
    def test_remove_dags(self):
        """DAGs can be removed from database."""
        example_dags_list = list(self._write_example_dags().values())
        # Remove SubDags from the list as they are not stored in DB in a separate row
        # and are directly added in Json blob of the main DAG
        filtered_example_dags_list = [dag for dag in example_dags_list if not dag.is_subdag]
        # Tests removing by dag_id.
        dag_removed_by_id = filtered_example_dags_list[0]
        SDM.remove_dag(dag_removed_by_id.dag_id)
        self.assertFalse(SDM.has_dag(dag_removed_by_id.dag_id))

        # Tests removing by file path.
        dag_removed_by_file = filtered_example_dags_list[1]
        example_dag_files = [dag.full_filepath for dag in filtered_example_dags_list]
        example_dag_files.remove(dag_removed_by_file.full_filepath)
        SDM.remove_deleted_dags(example_dag_files)
        self.assertFalse(SDM.has_dag(dag_removed_by_file.dag_id))
コード例 #6
0
ファイル: manager.py プロジェクト: dskoda1/airflow
    def _refresh_dag_dir(self):
        """Refresh file paths from dag dir if we haven't done it for too long."""
        now = timezone.utcnow()
        elapsed_time_since_refresh = (
            now - self.last_dag_dir_refresh_time).total_seconds()
        if elapsed_time_since_refresh > self.dag_dir_list_interval:
            # Build up a list of Python files that could contain DAGs
            self.log.info("Searching for files in %s", self._dag_directory)
            self._file_paths = list_py_file_paths(self._dag_directory)
            self.last_dag_dir_refresh_time = now
            self.log.info("There are %s files in %s", len(self._file_paths),
                          self._dag_directory)
            self.set_file_paths(self._file_paths)

            try:
                self.log.debug("Removing old import errors")
                self.clear_nonexistent_import_errors()
            except Exception:
                self.log.exception("Error removing old import errors")

            # Check if file path is a zipfile and get the full path of the python file.
            # Without this, SerializedDagModel.remove_deleted_files would delete zipped dags.
            # Likewise DagCode.remove_deleted_code
            dag_filelocs = []
            for fileloc in self._file_paths:
                if not fileloc.endswith(".py") and zipfile.is_zipfile(fileloc):
                    with zipfile.ZipFile(fileloc) as z:
                        dag_filelocs.extend([
                            os.path.join(fileloc, info.filename)
                            for info in z.infolist()
                            if might_contain_dag(info.filename, True, z)
                        ])
                else:
                    dag_filelocs.append(fileloc)

            SerializedDagModel.remove_deleted_dags(dag_filelocs)
            DagModel.deactivate_deleted_dags(self._file_paths)

            from airflow.models.dagcode import DagCode

            DagCode.remove_deleted_code(dag_filelocs)