コード例 #1
0
ファイル: report.py プロジェクト: trejas/rb_status_plugin
    def delete_dag(self, keep_records_in_log: bool = True, session=None):
        dag = session.query(DagModel).filter(
            DagModel.dag_id == self.dag_id).first()
        if dag is None:
            raise DagNotFound(f"Dag id {self.dag_id} not found")

        # so explicitly removes serialized DAG here.
        if STORE_SERIALIZED_DAGS and SerializedDagModel.has_dag(
                dag_id=self.dag_id, session=session):
            SerializedDagModel.remove_dag(dag_id=self.dag_id, session=session)

        # noinspection PyUnresolvedReferences,PyProtectedMember
        for model in models.base.Base._decl_class_registry.values():
            if hasattr(model, "dag_id"):
                if model.__name__:
                    print(model.__name__)
                if keep_records_in_log and model.__name__ == "Log":
                    continue
                cond = or_(model.dag_id == self.dag_id,
                           model.dag_id.like(self.dag_id + ".%"))
                session.query(model).filter(cond).delete(
                    synchronize_session="fetch")

        # Delete entries in Import Errors table for a deleted DAG
        # This handles the case when the dag_id is changed in the file
        session.query(models.ImportError).filter(
            models.ImportError.filename == dag.fileloc).delete(
                synchronize_session="fetch")
コード例 #2
0
 def test_remove_stale_dags(self):
     example_dags_list = list(self._write_example_dags().values())
     # Remove SubDags from the list as they are not stored in DB in a separate row
     # and are directly added in Json blob of the main DAG
     filtered_example_dags_list = [dag for dag in example_dags_list if not dag.is_subdag]
     # Tests removing a stale DAG
     stale_dag = SDM(filtered_example_dags_list[0])
     fresh_dag = SDM(filtered_example_dags_list[1])
     # Overwrite stale_dag's last_updated to be 10 minutes ago
     stale_dag.last_updated = timezone.utcnow() - timezone.dt.timedelta(seconds=600)
     with create_session() as session:
         session.merge(stale_dag)
         session.commit()
     # Remove any stale DAGs older than 5 minutes
     SDM.remove_stale_dags(timezone.utcnow() - timezone.dt.timedelta(seconds=300))
     self.assertFalse(SDM.has_dag(stale_dag.dag_id))
     self.assertTrue(SDM.has_dag(fresh_dag.dag_id))
コード例 #3
0
    def test_remove_dags(self):
        """DAGs can be removed from database."""
        example_dags_list = list(self._write_example_dags().values())
        # Remove SubDags from the list as they are not stored in DB in a separate row
        # and are directly added in Json blob of the main DAG
        filtered_example_dags_list = [dag for dag in example_dags_list if not dag.is_subdag]
        # Tests removing by dag_id.
        dag_removed_by_id = filtered_example_dags_list[0]
        SDM.remove_dag(dag_removed_by_id.dag_id)
        self.assertFalse(SDM.has_dag(dag_removed_by_id.dag_id))

        # Tests removing by file path.
        dag_removed_by_file = filtered_example_dags_list[1]
        example_dag_files = [dag.full_filepath for dag in filtered_example_dags_list]
        example_dag_files.remove(dag_removed_by_file.full_filepath)
        SDM.remove_deleted_dags(example_dag_files)
        self.assertFalse(SDM.has_dag(dag_removed_by_file.dag_id))
コード例 #4
0
ファイル: delete_dag.py プロジェクト: vipadm/airflow
def delete_dag(dag_id: str,
               keep_records_in_log: bool = True,
               session=None) -> int:
    """
    :param dag_id: the dag_id of the DAG to delete
    :param keep_records_in_log: whether keep records of the given dag_id
        in the Log table in the backend database (for reasons like auditing).
        The default value is True.
    :param session: session used
    :return count of deleted dags
    """
    log.info("Deleting DAG: %s", dag_id)
    running_tis = (session.query(models.TaskInstance.state).filter(
        models.TaskInstance.dag_id == dag_id).filter(
            models.TaskInstance.state == State.RUNNING).first())
    if running_tis:
        raise AirflowException("TaskInstances still running")
    dag = session.query(DagModel).filter(DagModel.dag_id == dag_id).first()
    if dag is None:
        raise DagNotFound(f"Dag id {dag_id} not found")

    # deleting a DAG should also delete all of its subdags
    dags_to_delete_query = session.query(DagModel.dag_id).filter(
        or_(
            DagModel.dag_id == dag_id,
            and_(DagModel.dag_id.like(f"{dag_id}.%"), DagModel.is_subdag),
        ))
    dags_to_delete = [dag_id for dag_id, in dags_to_delete_query]

    # Scheduler removes DAGs without files from serialized_dag table every dag_dir_list_interval.
    # There may be a lag, so explicitly removes serialized DAG here.
    if SerializedDagModel.has_dag(dag_id=dag_id, session=session):
        SerializedDagModel.remove_dag(dag_id=dag_id, session=session)

    count = 0

    for model in get_sqla_model_classes():
        if hasattr(model, "dag_id"):
            if keep_records_in_log and model.__name__ == 'Log':
                continue
            count += (session.query(model).filter(
                model.dag_id.in_(dags_to_delete)).delete(
                    synchronize_session='fetch'))
    if dag.is_subdag:
        parent_dag_id, task_id = dag_id.rsplit(".", 1)
        for model in TaskFail, models.TaskInstance:
            count += (session.query(model).filter(
                model.dag_id == parent_dag_id,
                model.task_id == task_id).delete())

    # Delete entries in Import Errors table for a deleted DAG
    # This handles the case when the dag_id is changed in the file
    session.query(models.ImportError).filter(
        models.ImportError.filename == dag.fileloc).delete(
            synchronize_session='fetch')

    return count
コード例 #5
0
 def test_remove_dags_by_id(self):
     """DAGs can be removed from database."""
     example_dags_list = list(self._write_example_dags().values())
     # Remove SubDags from the list as they are not stored in DB in a separate row
     # and are directly added in Json blob of the main DAG
     filtered_example_dags_list = [dag for dag in example_dags_list if not dag.is_subdag]
     # Tests removing by dag_id.
     dag_removed_by_id = filtered_example_dags_list[0]
     SDM.remove_dag(dag_removed_by_id.dag_id)
     assert not SDM.has_dag(dag_removed_by_id.dag_id)
コード例 #6
0
    def test_write_dag(self):
        """DAGs can be written into database."""
        example_dags = self._write_example_dags()

        with create_session() as session:
            for dag in example_dags.values():
                assert SDM.has_dag(dag.dag_id)
                result = session.query(SDM.fileloc, SDM.data).filter(SDM.dag_id == dag.dag_id).one()

                assert result.fileloc == dag.full_filepath
                # Verifies JSON schema.
                SerializedDAG.validate_schema(result.data)
コード例 #7
0
 def test_remove_dags_by_filepath(self):
     """DAGs can be removed from database."""
     example_dags_list = list(self._write_example_dags().values())
     # Remove SubDags from the list as they are not stored in DB in a separate row
     # and are directly added in Json blob of the main DAG
     filtered_example_dags_list = [dag for dag in example_dags_list if not dag.is_subdag]
     # Tests removing by file path.
     dag_removed_by_file = filtered_example_dags_list[0]
     # remove repeated files for those DAGs that define multiple dags in the same file (set comprehension)
     example_dag_files = list({dag.full_filepath for dag in filtered_example_dags_list})
     example_dag_files.remove(dag_removed_by_file.full_filepath)
     SDM.remove_deleted_dags(example_dag_files)
     self.assertFalse(SDM.has_dag(dag_removed_by_file.dag_id))
コード例 #8
0
ファイル: delete_dag.py プロジェクト: zw39125432/airflow
def delete_dag(dag_id: str,
               keep_records_in_log: bool = True,
               session=None) -> int:
    """
    :param dag_id: the dag_id of the DAG to delete
    :param keep_records_in_log: whether keep records of the given dag_id
        in the Log table in the backend database (for reasons like auditing).
        The default value is True.
    :param session: session used
    :return count of deleted dags
    """
    logger = LoggingMixin()
    logger.log.info("Deleting DAG: %s", dag_id)
    dag = session.query(DagModel).filter(DagModel.dag_id == dag_id).first()
    if dag is None:
        raise DagNotFound("Dag id {} not found".format(dag_id))

    # Scheduler removes DAGs without files from serialized_dag table every dag_dir_list_interval.
    # There may be a lag, so explicitly removes serialized DAG here.
    if STORE_SERIALIZED_DAGS and SerializedDagModel.has_dag(dag_id=dag_id,
                                                            session=session):
        SerializedDagModel.remove_dag(dag_id=dag_id, session=session)

    count = 0

    # noinspection PyUnresolvedReferences,PyProtectedMember
    for model in models.base.Base._decl_class_registry.values():  # pylint: disable=protected-access
        if hasattr(model, "dag_id"):
            if keep_records_in_log and model.__name__ == 'Log':
                continue
            cond = or_(model.dag_id == dag_id,
                       model.dag_id.like(dag_id + ".%"))
            count += session.query(model).filter(cond).delete(
                synchronize_session='fetch')
    if dag.is_subdag:
        parent_dag_id, task_id = dag_id.rsplit(".", 1)
        for model in models.DagRun, TaskFail, models.TaskInstance:
            count += session.query(model).filter(
                model.dag_id == parent_dag_id,
                model.task_id == task_id).delete()

    # Delete entries in Import Errors table for a deleted DAG
    # This handles the case when the dag_id is changed in the file
    session.query(models.ImportError).filter(
        models.ImportError.filename == dag.fileloc).delete(
            synchronize_session='fetch')

    return count