Ejemplo n.º 1
0
    def _cleanup_stale_dags(self):
        """
        Clean up any DAGs that we have not loaded recently.  There are
        two parts to the cleanup:
          1. Mark DAGs that haven't been seen as inactive
          2. Delete any DAG serializations for DAGs that haven't been seen
        """

        if 0 < self._dag_cleanup_interval < (
                timezone.utcnow() -
                self.last_dag_cleanup_time).total_seconds():
            # In the worst case Every DAG should have been processed within
            # file_process_interval + processor_timeout + min_serialized_dag_update_interval
            max_processing_time = self._processor_timeout + \
                timedelta(seconds=self._file_process_interval) + \
                timedelta(seconds=self._min_serialized_dag_update_interval)
            min_last_seen_date = timezone.utcnow() - max_processing_time

            self.log.info(
                "Deactivating DAGs that haven't been touched since %s",
                min_last_seen_date.isoformat())
            airflow.models.DAG.deactivate_stale_dags(min_last_seen_date)

            if STORE_SERIALIZED_DAGS:
                from airflow.models.serialized_dag import SerializedDagModel
                SerializedDagModel.remove_stale_dags(min_last_seen_date)

            if self.store_dag_code:
                from airflow.models.dagcode import DagCode
                DagCode.remove_unused_code()

            self.last_dag_cleanup_time = timezone.utcnow()
Ejemplo n.º 2
0
 def test_remove_stale_dags(self):
     example_dags_list = list(self._write_example_dags().values())
     # Remove SubDags from the list as they are not stored in DB in a separate row
     # and are directly added in Json blob of the main DAG
     filtered_example_dags_list = [dag for dag in example_dags_list if not dag.is_subdag]
     # Tests removing a stale DAG
     stale_dag = SDM(filtered_example_dags_list[0])
     fresh_dag = SDM(filtered_example_dags_list[1])
     # Overwrite stale_dag's last_updated to be 10 minutes ago
     stale_dag.last_updated = timezone.utcnow() - timezone.dt.timedelta(seconds=600)
     with create_session() as session:
         session.merge(stale_dag)
         session.commit()
     # Remove any stale DAGs older than 5 minutes
     SDM.remove_stale_dags(timezone.utcnow() - timezone.dt.timedelta(seconds=300))
     self.assertFalse(SDM.has_dag(stale_dag.dag_id))
     self.assertTrue(SDM.has_dag(fresh_dag.dag_id))