def _cleanup_stale_dags(self): """ Clean up any DAGs that we have not loaded recently. There are two parts to the cleanup: 1. Mark DAGs that haven't been seen as inactive 2. Delete any DAG serializations for DAGs that haven't been seen """ if 0 < self._dag_cleanup_interval < ( timezone.utcnow() - self.last_dag_cleanup_time).total_seconds(): # In the worst case Every DAG should have been processed within # file_process_interval + processor_timeout + min_serialized_dag_update_interval max_processing_time = self._processor_timeout + \ timedelta(seconds=self._file_process_interval) + \ timedelta(seconds=self._min_serialized_dag_update_interval) min_last_seen_date = timezone.utcnow() - max_processing_time self.log.info( "Deactivating DAGs that haven't been touched since %s", min_last_seen_date.isoformat()) airflow.models.DAG.deactivate_stale_dags(min_last_seen_date) if STORE_SERIALIZED_DAGS: from airflow.models.serialized_dag import SerializedDagModel SerializedDagModel.remove_stale_dags(min_last_seen_date) if self.store_dag_code: from airflow.models.dagcode import DagCode DagCode.remove_unused_code() self.last_dag_cleanup_time = timezone.utcnow()
def test_remove_stale_dags(self): example_dags_list = list(self._write_example_dags().values()) # Remove SubDags from the list as they are not stored in DB in a separate row # and are directly added in Json blob of the main DAG filtered_example_dags_list = [dag for dag in example_dags_list if not dag.is_subdag] # Tests removing a stale DAG stale_dag = SDM(filtered_example_dags_list[0]) fresh_dag = SDM(filtered_example_dags_list[1]) # Overwrite stale_dag's last_updated to be 10 minutes ago stale_dag.last_updated = timezone.utcnow() - timezone.dt.timedelta(seconds=600) with create_session() as session: session.merge(stale_dag) session.commit() # Remove any stale DAGs older than 5 minutes SDM.remove_stale_dags(timezone.utcnow() - timezone.dt.timedelta(seconds=300)) self.assertFalse(SDM.has_dag(stale_dag.dag_id)) self.assertTrue(SDM.has_dag(fresh_dag.dag_id))