Beispiel #1
0
    def test_normalize_query_with_comments(self):
        query1 = "select top 3 * from receipts -- I have a comment"
        query2 = "SELECT top 3 * FROM receipts"

        assert utils.normalize_query(query1) == utils.normalize_query(query2)

        query1 = """
        SELECT TIN, COUNT(SdcReceiptSignature)  AS ReceiptCount, SUM(TaxableAmount2) as TaxbleAmount
        FROM Receipts
        WHERE SdcDateTime BETWEEN '20180601' AND '20180609' AND ReceiptType='N' GROUP BY TIN
        """

        query2 = """
        SELECT
            -- I have a comment
            TIN,
            COUNT(SdcReceiptSignature)  AS ReceiptCount,
            SUM(TaxableAmount2) as TaxbleAmount
        FROM
            Receipts
        WHERE
            SdcDateTime BETWEEN '20180601' AND '20180609'
        AND
            ReceiptType='N'
        GROUP BY
            TIN
        """

        assert utils.normalize_query(query1) == utils.normalize_query(query2)
Beispiel #2
0
    def test_normalize_query_with_tabs(self):
        query1 = """
        select top 1
        	tin,
        	Journal
        from
        	Receipts
        """
        query2 = """
        select top 1
            tin,
            Journal
        from
            Receipts
        """

        assert utils.normalize_query(query1) == utils.normalize_query(query2)
Beispiel #3
0
    def test_cache_independent_from_format(self, tmp_path, metadata, results):
        query1 = "select top 3 * from receipts"
        query2 = "SELECT top 3 * FROM receipts"

        assert utils.normalize_query(query1) == utils.normalize_query(query2)

        parquet_store = store.FileStore(cache_store=tmp_path, normalize=True)

        for query in (query1, query2):
            assert not parquet_store.get_metadata_filepath(query).exists()
            assert not parquet_store.get_cache_filepath(query).exists()
            assert not parquet_store.exists(query)

        parquet_store.dump(query1, results, metadata)

        for query in (query1, query2):
            assert parquet_store.get_metadata_filepath(query).exists()
            assert parquet_store.get_cache_filepath(query).exists()
            assert parquet_store.exists(query)
Beispiel #4
0
    def test_normalize_query(self):
        query1 = "select top 3 * from receipts"
        query2 = "SELECT top 3 * FROM receipts"

        assert utils.normalize_query(query1) == utils.normalize_query(query2)

        query1 = """
        SELECT TIN, COUNT(SdcReceiptSignature)  AS ReceiptCount, SUM(TaxableAmount2) as TaxbleAmount
        FROM Receipts
        WHERE SdcDateTime BETWEEN '20180601' AND '20180609' AND ReceiptType='N' GROUP BY TIN
        """

        query2 = """
        SELECT
            TIN,
            COUNT(SdcReceiptSignature)  AS ReceiptCount,
            SUM(TaxableAmount2) as TaxbleAmount
        FROM
            Receipts
        WHERE
            SdcDateTime BETWEEN '20180601' AND '20180609'
        AND
            ReceiptType='N'
        GROUP BY
            TIN
        """

        assert utils.normalize_query(query1) == utils.normalize_query(query2)

        query1 = "select top 3 * from receipts"
        query2 = "SELECT top 3 * FROM Receipts"

        assert utils.normalize_query(query1) != utils.normalize_query(
            query2
        ), "Identifier names (table names and columns) should not be normalized"
Beispiel #5
0
    def test_dump_load_metadata(self, file_store, query, metadata):
        file_store.dump_metadata(query, metadata)
        assert file_store.get_metadata_filepath(query).exists()
        metadata_loaded = file_store.load_metadata(query)
        assert metadata == metadata_loaded

        assert metadata_loaded["query"] != query
        assert metadata_loaded["query"] == utils.normalize_query(query)

        with pytest.raises(ValueError) as excinfo:
            file_store.load_metadata("select * from dummy")
        assert f"Metadata for the given query does not exist." in str(
            excinfo.value)
Beispiel #6
0
 def test_list_store_with_one_element(self, file_store, query, metadata,
                                      results):
     file_store.dump(query, results, metadata)
     store_content = file_store.list()
     assert store_content.shape == (1, 4)
     assert list(store_content.columns) == [
         "query",
         "cache_file",
         "executed_at",
         "duration",
     ]
     assert (store_content.loc[0, "cache_file"] ==
             file_store.get_cache_filepath(query).name)
     assert store_content.loc[0, "query"] == utils.normalize_query(query)
Beispiel #7
0
 def test_normalize_query_with_big_query(self):
     ids = tuple(range(40000))
     query = f"select * from table where id in ({ids})"
     assert utils.normalize_query(query) == query