def _refresh_dag_dir(self): """ Refresh file paths from dag dir if we haven't done it for too long. """ now = timezone.utcnow() elapsed_time_since_refresh = (now - self.last_dag_dir_refresh_time).total_seconds() if elapsed_time_since_refresh > self.dag_dir_list_interval: # Build up a list of Python files that could contain DAGs self.log.info("Searching for files in %s", self._dag_directory) self._file_paths = list_py_file_paths(self._dag_directory) self.last_dag_dir_refresh_time = now self.log.info("There are %s files in %s", len(self._file_paths), self._dag_directory) self.set_file_paths(self._file_paths) # noinspection PyBroadException try: self.log.debug("Removing old import errors") self.clear_nonexistent_import_errors() # pylint: disable=no-value-for-parameter except Exception: # pylint: disable=broad-except self.log.exception("Error removing old import errors") if STORE_SERIALIZED_DAGS: from airflow.models.serialized_dag import SerializedDagModel from airflow.models.dag import DagModel SerializedDagModel.remove_deleted_dags(self._file_paths) DagModel.deactivate_deleted_dags(self._file_paths) if self.store_dag_code: from airflow.models.dagcode import DagCode DagCode.remove_deleted_code(self._file_paths)
def _refresh_dag_dir(self): """ Refresh file paths from dag dir if we haven't done it for too long. """ airflow_home = os.getenv("AIRFLOW_HOME", "/usr/lib/airflow") now = timezone.utcnow() elapsed_time_since_refresh = ( now - self.last_dag_dir_refresh_time).total_seconds() if elapsed_time_since_refresh > self.dag_dir_list_interval or os.path.exists( airflow_home + "/scheduler.refresh"): # Build up a list of Python files that could contain DAGs self.log.info("Searching for files in %s", self._dag_directory) self._file_paths = list_py_file_paths(self._dag_directory) self.last_dag_dir_refresh_time = now self.log.info("There are %s files in %s", len(self._file_paths), self._dag_directory) self.set_file_paths(self._file_paths) # noinspection PyBroadException try: self.log.debug("Removing old import errors") self.clear_nonexistent_import_errors() # pylint: disable=no-value-for-parameter os.remove(airflow_home + "/scheduler.refresh") self.logger.info("Refresh Dags of scheduler triggered") except Exception: # pylint: disable=broad-except self.log.exception("Error removing old import errors") if STORE_SERIALIZED_DAGS: from airflow.models.serialized_dag import SerializedDagModel from airflow.models.dag import DagModel SerializedDagModel.remove_deleted_dags(self._file_paths) DagModel.deactivate_deleted_dags(self._file_paths)
def _refresh_dag_dir(self): """Refresh file paths from dag dir if we haven't done it for too long.""" now = timezone.utcnow() elapsed_time_since_refresh = ( now - self.last_dag_dir_refresh_time).total_seconds() if elapsed_time_since_refresh > self.dag_dir_list_interval: # Build up a list of Python files that could contain DAGs self.log.info("Searching for files in %s", self._dag_directory) self._file_paths = list_py_file_paths(self._dag_directory) self.last_dag_dir_refresh_time = now self.log.info("There are %s files in %s", len(self._file_paths), self._dag_directory) self.set_file_paths(self._file_paths) try: self.log.debug("Removing old import errors") self.clear_nonexistent_import_errors() except Exception: self.log.exception("Error removing old import errors") SerializedDagModel.remove_deleted_dags(self._file_paths) DagModel.deactivate_deleted_dags(self._file_paths) from airflow.models.dagcode import DagCode DagCode.remove_deleted_code(self._file_paths)
def test_remove_dags_by_filepath(self): """DAGs can be removed from database.""" example_dags_list = list(self._write_example_dags().values()) # Remove SubDags from the list as they are not stored in DB in a separate row # and are directly added in Json blob of the main DAG filtered_example_dags_list = [dag for dag in example_dags_list if not dag.is_subdag] # Tests removing by file path. dag_removed_by_file = filtered_example_dags_list[0] # remove repeated files for those DAGs that define multiple dags in the same file (set comprehension) example_dag_files = list({dag.full_filepath for dag in filtered_example_dags_list}) example_dag_files.remove(dag_removed_by_file.full_filepath) SDM.remove_deleted_dags(example_dag_files) self.assertFalse(SDM.has_dag(dag_removed_by_file.dag_id))
def test_remove_dags(self): """DAGs can be removed from database.""" example_dags_list = list(self._write_example_dags().values()) # Remove SubDags from the list as they are not stored in DB in a separate row # and are directly added in Json blob of the main DAG filtered_example_dags_list = [dag for dag in example_dags_list if not dag.is_subdag] # Tests removing by dag_id. dag_removed_by_id = filtered_example_dags_list[0] SDM.remove_dag(dag_removed_by_id.dag_id) self.assertFalse(SDM.has_dag(dag_removed_by_id.dag_id)) # Tests removing by file path. dag_removed_by_file = filtered_example_dags_list[1] example_dag_files = [dag.full_filepath for dag in filtered_example_dags_list] example_dag_files.remove(dag_removed_by_file.full_filepath) SDM.remove_deleted_dags(example_dag_files) self.assertFalse(SDM.has_dag(dag_removed_by_file.dag_id))
def _refresh_dag_dir(self): """Refresh file paths from dag dir if we haven't done it for too long.""" now = timezone.utcnow() elapsed_time_since_refresh = ( now - self.last_dag_dir_refresh_time).total_seconds() if elapsed_time_since_refresh > self.dag_dir_list_interval: # Build up a list of Python files that could contain DAGs self.log.info("Searching for files in %s", self._dag_directory) self._file_paths = list_py_file_paths(self._dag_directory) self.last_dag_dir_refresh_time = now self.log.info("There are %s files in %s", len(self._file_paths), self._dag_directory) self.set_file_paths(self._file_paths) try: self.log.debug("Removing old import errors") self.clear_nonexistent_import_errors() except Exception: self.log.exception("Error removing old import errors") # Check if file path is a zipfile and get the full path of the python file. # Without this, SerializedDagModel.remove_deleted_files would delete zipped dags. # Likewise DagCode.remove_deleted_code dag_filelocs = [] for fileloc in self._file_paths: if not fileloc.endswith(".py") and zipfile.is_zipfile(fileloc): with zipfile.ZipFile(fileloc) as z: dag_filelocs.extend([ os.path.join(fileloc, info.filename) for info in z.infolist() if might_contain_dag(info.filename, True, z) ]) else: dag_filelocs.append(fileloc) SerializedDagModel.remove_deleted_dags(dag_filelocs) DagModel.deactivate_deleted_dags(self._file_paths) from airflow.models.dagcode import DagCode DagCode.remove_deleted_code(dag_filelocs)