Python log_memory_usage Beispiele

Programmiersprache: Python

Namespace / Paketname: Utils.gpuMemory

Methode / Funktion: log_memory_usage

Beispiele auf hotexamples.com: 30

Python log_memory_usage - 30 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die Utils.gpuMemory.log_memory_usage, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Beispiel #1

Datei anzeigen

def main(dask_client, drill, dir_data_file, bc, nRals):

    start_mem = gpuMemory.capture_gpu_memory_usage()

    def executionTest():
        tables = ["nation", "bool_orders", "region", "customer"]
        bool_orders_index = tables.index("bool_orders")
        data_types = [DataType.CSV]  # TODO ORC gdf parquet json

        #Create Tables ------------------------------------------------------------------------------------------------------------
        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType):
                continue
            cs.create_tables(bc,
                             dir_data_file,
                             fileSchemaType,
                             tables=tables,
                             bool_orders_index=bool_orders_index)

            #Set up util params-------------------------------------------------------------------------------------------------------
            worder = 1
            use_percentage = False
            non_aceptable_diff = 0
            min_aceptable_diff = 0.01

            print('==============================')
            print(queryType)
            print('==============================')

            #         queryId = 'TEST_00'
            #         query = "select 1 from nation"
            #         runTest.run_query(bc, drill, query, queryId, queryType, worder, '', non_aceptable_diff, use_percentage, fileSchemaType)
            #
            #         queryId = 'TEST_0A'
            #         query = "select *, 2 from nation"
            #         runTest.run_query(bc, drill, query, queryId, queryType, worder, '', non_aceptable_diff, use_percentage, fileSchemaType)
            #
            #Run Queries -------------------------------------------------------------------------------------------------------------
            queryId = 'TEST_01'
            query = "select * from bool_orders order by o_orderkey, o_custkey limit 300"
            runTest.run_query(bc, drill, query, queryId, queryType, 0, '',
                              min_aceptable_diff, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_02'
            query = "select o_custkey, o_confirmed from bool_orders where o_confirmed is null limit 30"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              non_aceptable_diff, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_03'
            query = """select o_custkey, 0.75 * o_totalprice, o_confirmed from bool_orders 
                   where o_confirmed = true order by o_custkey, o_orderKey limit 20"""
            runTest.run_query(bc, drill, query, queryId, queryType, 0, '',
                              min_aceptable_diff, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_04'
            query = """select o_custkey, o_confirmed from bool_orders where o_confirmed is not NULL 
                  order by o_custkey, o_confirmed desc limit 25"""
            runTest.run_query(bc, drill, query, queryId, queryType, 0, '',
                              non_aceptable_diff, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_05'
            query = "select o_custkey, 0.95 * o_totalprice, o_confirmed from bool_orders where o_confirmed is null"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              min_aceptable_diff, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_06'
            query = "select o_custkey, 0.75 * o_totalprice as proffit from bool_orders where o_custkey < 300 and o_confirmed = False"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              min_aceptable_diff, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_07'
            query = """select o_custkey, 0.75 * o_totalprice as proffit from bool_orders 
                       where o_custkey < 300 and o_confirmed is not NULL order by o_custkey, o_orderkey limit 200"""
            runTest.run_query(bc, drill, query, queryId, queryType, 0, '',
                              min_aceptable_diff, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_08'  # when count() and o_confirmed is null (o_confirmed is not null) are mixed then fails
            query = "select count(o_orderstatus) from bool_orders where o_orderstatus <> 'O' and o_confirmed = true"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              non_aceptable_diff, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_09'
            query = "select sum(o_orderkey)/count(o_orderkey), max(o_totalprice) from bool_orders where o_confirmed = true and o_orderkey < 2500"
            runTest.run_query(
                bc, drill, query, queryId, queryType, worder, '',
                min_aceptable_diff, True,
                fileSchemaType)  #TODO: Change sum/count for avg KC

            queryId = 'TEST_10'
            query = "select sum(o_orderkey)/count(o_orderkey), max(o_totalprice) from bool_orders where o_confirmed IS NULL and o_orderkey < 2500"
            #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', min_aceptable_diff, True) #TODO: Change sum/count for avg KC

            queryId = 'TEST_11'
            query = """select o_custkey, min(o_totalprice) from bool_orders 
                   where o_custkey < 300 and o_confirmed is not NULL group by o_custkey"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              min_aceptable_diff, use_percentage,
                              fileSchemaType)

            #         queryId = 'TEST_12'
            #         query = """select count(o_custkey), max(o_totalprice), min(o_totalprice), sum(o_totalprice)/count(o_totalprice)
            #                from bool_orders group by o_custkey"""
            #         runTest.run_query(bc, drill, query, queryId, queryType, worder, '', min_aceptable_diff, True) #TODO: Change sum/count for avg KC
            #
            #         queryId = 'TEST_13'
            #         query = """select count(o_custkey), o_orderkey, o_confirmed from bool_orders
            #                where o_orderkey < 100 and o_confirmed = FALSE group by o_orderkey, (o_orderkey + o_custkey), o_confirmed"""
            #         runTest.run_query(bc, drill, query, queryId, queryType, worder, "o_orderkey", non_aceptable_diff, use_percentage, fileSchemaType)
            #
            #         queryId = 'TEST_14'
            #         query = "select o.o_custkey, c.c_name as customer_name from bool_orders as o inner join customer as c on c.c_custkey = o.o_custkey where o.o_confirmed = False and o.o_orderstatus <> 'O'"
            #         runTest.run_query(bc, drill, query, queryId, queryType, worder, "o_custkey", non_aceptable_diff, use_percentage, fileSchemaType)
            #
            #         queryId = 'TEST_15'
            #         query = "select o.o_custkey, c.c_name, n.n_regionkey from nation as n inner join customer as c on n.n_nationkey = c.c_nationkey inner join bool_orders as o on c.c_custkey = o.o_custkey where o.o_confirmed = False and o.o_orderstatus <> 'O'"
            #         runTest.run_query(bc, drill, query, queryId, queryType, worder, "o_custkey", non_aceptable_diff, use_percentage, fileSchemaType)
            #
            #         queryId = 'TEST_16'
            #         query = "select o.o_custkey, c.c_name, n.n_regionkey from customer as c inner join bool_orders as o on c.c_custkey = o.o_custkey inner join nation as n on n.n_nationkey = c.c_nationkey where o.o_confirmed = false and o.o_orderstatus <> 'O'"
            #         runTest.run_query(bc, drill, query, queryId, queryType, worder, "o_custkey", non_aceptable_diff, use_percentage, fileSchemaType)
            #
            #         queryId = 'TEST_17'
            #         query = """select count(o_custkey), max(o_totalprice), min(o_totalprice), sum(o_totalprice)/count(o_totalprice)
            #                from bool_orders group by o_totalprice, o_custkey, o_orderkey order by o_totalprice, o_custkey, o_orderkey desc"""
            #         runTest.run_query(bc, drill, query, queryId, queryType, 0, '', min_aceptable_diff, True) #TODO: Change sum/count for avg KC

            if Settings.execution_mode == ExecutionMode.GENERATOR:
                print("==============================")
                break

    executionTest()

    end_mem = gpuMemory.capture_gpu_memory_usage()

    gpuMemory.log_memory_usage(queryType, start_mem, end_mem)

Beispiel #2

Datei anzeigen

def main(dask_client, drill, dir_data_file, bc, nRals):

    start_mem = gpuMemory.capture_gpu_memory_usage()

    queryType = "Round"

    def executionTest(queryType):
        tables = ["nation", "region", "customer", "orders", "lineitem"]
        data_types = [
            DataType.DASK_CUDF,
            DataType.CUDF,
            DataType.CSV,
            DataType.ORC,
            DataType.PARQUET,
        ]  # TODO json

        # Create Tables -----------------------------------------------------
        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType):
                continue
            cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables)

            # Run Query ------------------------------------------------------
            # Parameter to indicate if its necessary to order
            # the resulsets before compare them
            worder = 1
            use_percentage = False
            acceptable_difference = 0

            print("==============================")
            print(queryType + " Tests")
            print("==============================")

            queryId = "TEST_01"
            query = """select ROUND(orders.o_orderkey),
                        ROUND(orders.o_totalprice)
                    from customer
                    left outer join orders
                    on customer.c_custkey = orders.o_custkey
                    where customer.c_nationkey = 3
                    and customer.c_custkey < 500"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_02"
            query = """select ROUND(orders.o_totalprice, 2),
                        ROUND(orders.o_totalprice, -2)
                    from customer
                    left outer join orders
                    on customer.c_custkey = orders.o_custkey
                    where customer.c_nationkey = 3
                    and customer.c_custkey < 500"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_03"
            query = """select customer.c_custkey, orders.o_orderkey,
                    ROUND(orders.o_custkey,0)
                    from customer
                    left outer join orders
                    on customer.c_custkey = orders.o_custkey
                    where customer.c_nationkey = 3
                    and customer.c_custkey < 500"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_04"
            query = """select MAX(ROUND(n1.n_regionkey,3))
                    from nation as n1
                    full outer join nation as n2
                    on n1.n_nationkey = n2.n_nationkey + 6 """
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )  # WSM NEED TO REVISIT THIS

            queryId = "TEST_05"
            query = "select ROUND(AVG(o_totalprice)) from orders"
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            if Settings.execution_mode == ExecutionMode.GENERATOR:
                print("==============================")
                break

    executionTest(queryType)

    end_mem = gpuMemory.capture_gpu_memory_usage()

    gpuMemory.log_memory_usage(queryType, start_mem, end_mem)

Beispiel #3

Datei anzeigen

Datei: substringTest.py Projekt: quasiben/blazingsql

def main(dask_client, drill, dir_data_file, bc, nRals):

    start_mem = gpuMemory.capture_gpu_memory_usage()

    queryType = 'Substring'

    def executionTest():

        tables = ["partsupp", "customer", "nation"]
        data_types = [
            DataType.DASK_CUDF, DataType.CUDF, DataType.CSV, DataType.ORC,
            DataType.PARQUET
        ]  # TODO json

        #Create Tables ------------------------------------------------------------------------------------------------------------
        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType):
                continue
            cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables)

            #Run Query -----------------------------------------------------------------------------

            worder = 1  #Parameter to indicate if its necessary to order the resulsets before compare them
            use_percentage = False
            acceptable_difference = 0.01

            print('==============================')
            print(queryType)
            print('==============================')

            queryId = 'TEST_01'
            query = """select SUBSTRING(CAST(ps_partkey as VARCHAR),1,1), ps_availqty from partsupp 
                    where ps_availqty > 7000 and ps_supplycost > 700 order by ps_partkey, ps_availqty limit 50"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_02'
            query = """select c_custkey, c_name from customer where SUBSTRING(c_name,1,17) = 'Customer#00000000'"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_03'
            query = """select c_custkey, SUBSTRING(c_name, 1, 8) from customer 
                    where c_name = 'Customer#000000009'"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_04'
            query = """select * from nation where SUBSTRING(n_name,1,1) = 'I'"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_05'
            query = """select c_custkey, c_name, SUBSTRING(c_name,1,1), SUBSTRING(c_name,2,1), SUBSTRING(c_name,1,2), 
                              SUBSTRING(c_name,2,2) from customer where c_custkey < 20"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_06'
            query = """select c.c_custkey, SUBSTRING(c.c_name, 10, 18), CAST(SUBSTRING(c.c_name, 10, 18) as INT), 
                            CAST(SUBSTRING(c.c_name, 10, 18) as INT) + 1 from customer c
                        where c.c_custkey < 50"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_07'
            query = """select c.c_custkey, SUBSTRING(c.c_name, 1, 8), SUBSTRING(c.c_name, 10, 18) || '**' 
                        from customer c where c.c_custkey < 0"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_08'
            query = """select * from (
                            select c.c_custkey, SUBSTRING(c.c_name, 1, 8) as n1, SUBSTRING(c.c_name, 10, 18) || '**' as n2
                            from customer c where c.c_custkey < 50
                     ) as n where SUBSTRING(n.n1, 1,7) = 'Customer'"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            if Settings.execution_mode == ExecutionMode.GENERATOR:
                print("==============================")
                break

    executionTest()

    end_mem = gpuMemory.capture_gpu_memory_usage()

    gpuMemory.log_memory_usage(queryType, start_mem, end_mem)

Beispiel #4

Datei anzeigen

def main(dask_client, drill, spark, dir_data_file, bc, nRals):

    start_mem = gpuMemory.capture_gpu_memory_usage()

    def executionTest():
        tables = ["orders", "customer"]
        data_types = [DataType.ORC]  # TODO gdf csv parquet json

        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType):
                continue
            cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables)

            # Run Query ------------------------------------------------------
            # Parameter to indicate if its necessary to order
            # the resulsets before compare them
            worder = 1
            use_percentage = False
            acceptable_difference = 0.01

            print("==============================")
            print(queryType)
            print("==============================")

            queryId = "TEST_01"
            query = """select 2, o_orderdate
                    from orders
                    order by o_orderdate asc
                    limit 5"""
            query_spark = """select 2, o_orderdate
                    from orders
                    order by o_orderdate nulls last
                    limit 5"""
            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "o_orderdate",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
                query_spark=query_spark,
            )

            queryId = "TEST_02"
            query = """select 'Rommel',c_name from customer
                    order by c_name limit 5"""
            query_spark = """select 'Rommel',c_name from customer
                    order by c_name nulls last limit 5"""
            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "c_name",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
                query_spark=query_spark,
            )

            queryId = "TEST_03"
            query = """select '1990-01-01', c_custkey from customer
                    order by c_custkey limit 5"""
            query_spark = """select '1990-01-01', c_custkey from customer
                    order by c_custkey nulls last limit 5"""
            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "c_custkey",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
                query_spark=query_spark,
            )

            queryId = "TEST_04"
            query = """select timestamp '1990-01-01 00:00:00', c_custkey
                    from customer order by c_custkey limit 5"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "c_custkey",
                acceptable_difference,
                True,
                fileSchemaType,
            )  # TODO: Change sum/count for avg KC

            queryId = "TEST_05"
            query = """select 80000 as constant, c_custkey
                    from customer order by c_custkey limit 5"""
            query_spark = """select 80000 as constant, c_custkey
                    from customer order by c_custkey nulls last limit 5"""
            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "c_custkey",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
                query_spark=query_spark,
            )

            queryId = "TEST_06"  # TODO: Blazing not support: 2+2
            query = """select 2+2, o_orderdate from orders
                    order by o_orderkey limit 5"""
            # runTest.run_query(bc, spark, query, queryId, queryType,
            #  0, '', acceptable_difference, use_percentage, fileSchemaType)

            if Settings.execution_mode == ExecutionMode.GENERATOR:
                print("==============================")
                break

    executionTest()

    end_mem = gpuMemory.capture_gpu_memory_usage()

    gpuMemory.log_memory_usage(queryType, start_mem, end_mem)

Beispiel #5

Datei anzeigen

Datei: windowNoPartitionTest.py Projekt: sparsh-ai/blazingsql

def main(dask_client, drill, spark, dir_data_file, bc, nRals):

    start_mem = gpuMemory.capture_gpu_memory_usage()

    def executionTest():
        tables = ["orders", "nation", "lineitem", "customer", "region"]
        data_types = [
            DataType.DASK_CUDF,
            DataType.CUDF,
            DataType.CSV,
            DataType.ORC,
            DataType.PARQUET,
        ]  # TODO json

        # Create Tables -----------------------------------------------------
        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType):
                continue
            cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables)

            # Run Query ------------------------------------------------------
            worder = 1
            use_percentage = False
            acceptable_difference = 0.01

            print("==============================")
            print(queryType)

            print("==============================")

            # ------------ ROWS bounding ----------------

            queryId = "TEST_01"
            query = """select min(n_nationkey) over
                            (
                                order by n_name
                                ROWS BETWEEN 1 PRECEDING
                                AND 1 FOLLOWING
                            ) min_val,
 							n_nationkey, n_regionkey, n_name
                        from nation"""
            runTest.run_query(bc, spark, query, queryId, queryType, 0, "",
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = "TEST_02"
            query = """select min(o_orderkey) over
                            (
                                order by o_totalprice
                                ROWS BETWEEN 2 PRECEDING
                                AND 1 FOLLOWING
                            ) min_keys, 
                            max(o_orderkey) over
                            (
                                order by o_totalprice
                                ROWS BETWEEN 2 PRECEDING
                                AND 1 FOLLOWING
                            ) max_keys, o_orderkey, o_orderpriority
                        from orders
                        where o_orderpriority <> '2-HIGH'
                        and o_clerk = 'Clerk#000000880'
                        and o_orderstatus is not null
                        and o_totalprice is not null
                        order by o_orderstatus, o_totalprice
                        limit 50"""
            runTest.run_query(bc, spark, query, queryId, queryType, worder, "",
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = "TEST_03"
            query = """with new_nation as (
                    select n.n_nationkey as n_natio1,
                        n.n_name as n_nam1,
                        n.n_regionkey as n_region1
                    from nation as n
                    inner join region as r
                    on n.n_nationkey = r.r_regionkey
                )
                select avg(cast(nn.n_natio1 as double)) over 
                    (
                        order by nn.n_nam1
                        ROWS BETWEEN 3 PRECEDING
                        AND 2 FOLLOWING
                    ) avg_keys,
                    nn.n_natio1, nn.n_nam1, nn.n_region1
                from new_nation nn
                order by nn.n_natio1, avg_keys"""
            runTest.run_query(bc, spark, query, queryId, queryType, worder, "",
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = "TEST_04"
            query = """select max(l_partkey) over
                            (
                                order by l_extendedprice desc, l_orderkey, l_quantity
                                ROWS BETWEEN 6 PRECEDING
                                AND 2 FOLLOWING
                            ) max_keys,
                            l_linestatus, l_extendedprice
                        from lineitem
                        where l_shipmode not in ('MAIL', 'SHIP', 'AIR')
                        and l_linestatus = 'F'
                        and l_extendedprice is not null
                        order by l_extendedprice, l_orderkey, max_keys
                        limit 50"""

            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_05"
            query = """select  l.l_orderkey, l.l_linenumber, l.l_suppkey, l.l_partkey, c.c_custkey,
                            max(l.l_partkey) over
                            (
                                order by l.l_orderkey, l.l_linenumber, l.l_suppkey
                                ROWS BETWEEN 6 PRECEDING
                                AND 2 FOLLOWING
                            ) max_pkeys,
                            max(c.c_custkey) over
                            (
                                order by  l.l_orderkey, l.l_linenumber, l.l_suppkey
                                ROWS BETWEEN 6 PRECEDING
                                AND 2 FOLLOWING
                            ) max_cust
                            
                        from lineitem as l
                        inner join orders as o on o.o_orderkey = l.l_orderkey
                        inner join customer as c on c.c_custkey = o.o_custkey
                        where l.l_quantity = 1 and c.c_custkey < 10000
                        order by l.l_orderkey, l.l_linenumber, l.l_partkey, c.c_custkey                        
                        """

            runTest.run_query(bc, spark, query, queryId, queryType, 0, "",
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            if Settings.execution_mode == ExecutionMode.GENERATOR:
                print("==============================")
                break

    executionTest()

    end_mem = gpuMemory.capture_gpu_memory_usage()

    gpuMemory.log_memory_usage(queryType, start_mem, end_mem)

Beispiel #6

Datei anzeigen

Datei: whereClauseTest.py Projekt: tspannhw/blazingsql

def main(dask_client, drill, dir_data_file, bc, nRals):

    start_mem = gpuMemory.capture_gpu_memory_usage()

    def executionTest():

        tables = ["customer", "orders", "nation", "lineitem"]
        data_types = [
            DataType.DASK_CUDF,
            DataType.CUDF,
            DataType.CSV,
            DataType.ORC,
            DataType.PARQUET,
        ]  # TODO json

        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType):
                continue
            print(fileSchemaType)
            cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables)

            # Run Query ------------------------------------------------------
            # Parameter to indicate if its necessary to order
            # the resulsets before compare them
            worder = 1
            use_percentage = False
            acceptable_difference = 0.01

            print("==============================")
            print(queryType)
            print("==============================")

            queryId = "TEST_01"
            query = """select c_custkey, c_nationkey, c_acctbal
                    from customer where c_custkey < 15"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "c_custkey",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_02"
            query = """select c_custkey, c_nationkey, c_acctbal
                    from customer where c_custkey < 150
                    and c_nationkey = 5"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "c_custkey",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_03"
            query = """select c_custkey, c_nationkey as nkey
                    from customer where c_custkey < 0"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "c_custkey",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_04"
            query = """select c_custkey, c_nationkey as nkey
                    from customer where c_custkey < 0
                    and c_nationkey >= 30"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "c_custkey",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_05"
            query = """select c_custkey, c_nationkey as nkey
                    from customer where c_custkey < 0
                    or c_nationkey >= 24"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "c_custkey",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_06"
            query = """select c_custkey, c_nationkey as nkey
                    from customer where c_custkey < 0
                    and c_nationkey >= 3"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "c_custkey",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_07"
            query = """select c_custkey, c_nationkey, c_acctbal
                    from customer
                    where c_custkey < 150 and c_nationkey = 5
                    and c_acctbal > 600"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "c_custkey",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_08"
            query = """select c_custkey, c_nationkey as nkey
                    from customer where c_custkey > 0
                    or c_nationkey >= 24 or c_acctbal > 700"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_09"
            query = """select c_custkey, c_nationkey, c_acctbal
                    from customer where c_custkey < 150 and c_nationkey = 5
                    or c_custkey = 200 or c_nationkey >= 10
                    or c_acctbal <= 500"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_10"
            query = """select c.c_custkey, c.c_nationkey, n.n_regionkey
                    from customer as c
                    inner join nation as n on c.c_nationkey = n.n_nationkey
                    where n.n_regionkey = 1
                    and (c.c_custkey > 10 and c.c_custkey < 50)"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_11"
            query = """select * from nation
                    where n_regionkey > 2 and n_regionkey < 5"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            # Gives [RunExecuteGraph Error] std::exception and crashes with ORC
            # Related Issue: https://github.com/BlazingDB/blazingsql/issues/1324
            # queryId = "TEST_12"
            # query = """select c_custkey, c_nationkey as nkey from customer
            #         where -c_nationkey + c_acctbal > 750.3"""
            # runTest.run_query(
            #     bc,
            #     drill,
            #     query,
            #     queryId,
            #     queryType,
            #     worder,
            #     "c_custkey",
            #     acceptable_difference,
            #     use_percentage,
            #     fileSchemaType,
            # )

            # Gives [RunExecuteGraph Error] std::exception and crashes with ORC
            # Related Issue: https://github.com/BlazingDB/blazingsql/issues/1324
            # queryId = "TEST_13"
            # query = """select c_custkey, c_nationkey as nkey from customer
            #    where -c_nationkey + c_acctbal > 750"""
            # runTest.run_query(
            #     bc,
            #     drill,
            #     query,
            #     queryId,
            #     queryType,
            #     worder,
            #     "c_custkey",
            #     acceptable_difference,
            #     use_percentage,
            #     fileSchemaType,
            # )

            queryId = "TEST_14"
            query = """select c_custkey, c_name, c_acctbal
                    from customer
                    where c_custkey <> 10 and c_custkey <> 11
                    and c_custkey <> 100 and c_custkey <> 1000
                    and c_custkey < 1001"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = 'TEST_15'
            query = """select l_orderkey, l_partkey, l_suppkey,
                    l_returnflag from lineitem 
                    where l_returnflag <> 'g packages.'"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_16"
            query = """select l_orderkey, l_partkey, l_suppkey, l_returnflag
                    from lineitem
                    where l_returnflag='N' and l_linenumber < 3
                    and l_orderkey < 50"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_17"
            query = """select c_custkey, c_nationkey, c_acctbal
                    from customer
                    where c_custkey < 15000
                    and c_nationkey = 5 or c_custkey = c_nationkey * c_nationkey
                    or c_nationkey >= 10 or c_acctbal <= 500"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            if Settings.execution_mode == ExecutionMode.GENERATOR:
                print("==============================")
                break

    executionTest()

    end_mem = gpuMemory.capture_gpu_memory_usage()

    gpuMemory.log_memory_usage(queryType, start_mem, end_mem)

Beispiel #7

Datei anzeigen

def main(dask_client, drill, dir_data_file, bc, nRals):

    start_mem = gpuMemory.capture_gpu_memory_usage()

    def executionTest():

        tables = ["customer", "orders", "nation"]
        data_types = [
            DataType.DASK_CUDF,
            DataType.CUDF,
            DataType.CSV,
            DataType.ORC,
            DataType.PARQUET,
        ]  # TODO json

        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType):
                continue
            print(fileSchemaType)
            cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables)

            # Run Query ------------------------------------------------------
            # Parameter to indicate if its necessary to order
            # the resulsets before compare them
            worder = 1
            use_percentage = False
            acceptable_difference = 0.01

            print("==============================")
            print(queryType)
            print("==============================")

            queryId = "TEST_01"
            query = """select c_custkeynew, c_nationkey, c_acctbal 
                    from customer where c_custkey < 15"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "c_custkey",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
                message_validation="Column 'c_custkeynew' not found in any table",
            )

            queryId = "TEST_02"
            query = """select c_custkey, c_nationkey, c_acctbal 
                    from customer1 where c_custkey < 150 
                    and c_nationkey = 5"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "c_custkey",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
                message_validation="Object 'customer1' not found",
            )

            queryId = "TEST_03"
            query = """select maxi(c_custkey), c_nationkey as nkey 
                    from customer where c_custkey < 0"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "c_custkey",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
                message_validation="No match found for function signature maxi(<NUMERIC>)",
            )

            queryId = "TEST_04"
            query = """select max(c_custkey) c_nationkey as nkey 
                    from customer where c_custkey < 0"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "c_custkey",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
                message_validation="""SqlSyntaxException

                select max(c_custkey) c_nationkey as nkey 
                                                ^^
                                    from customer where c_custkey < 0

                Encountered "as" at line 1, column 35.
                Was expecting one of:
                    <EOF> 
                    "EXCEPT" ...
                    "FETCH" ...
                    "FROM" ...
                    "INTERSECT" ...
                    "LIMIT" ...
                    "OFFSET" ...
                    "ORDER" ...
                    "MINUS" ...
                    "UNION" ...
                    "," ...""",
            )

            if Settings.execution_mode == ExecutionMode.GENERATOR:
                print("==============================")
                break

    executionTest()

    end_mem = gpuMemory.capture_gpu_memory_usage()

    gpuMemory.log_memory_usage(queryType, start_mem, end_mem)

Beispiel #8

Datei anzeigen

Datei: columnBasisTest.py Projekt: shxinding/blazingsql

def main(dask_client, drill, dir_data_file, bc, nRals):
    # TODO percy should be possible to make this test distributed
    if int(nRals) != 1:
        print(queryType + " will run only when nRals = 1")
        return

    start_mem = gpuMemory.capture_gpu_memory_usage()

    def executionTest():
        tables = ["customer", "nation"]
        data_types = [
            DataType.CUDF,
            DataType.CSV,
            DataType.ORC,
            DataType.PARQUET,
        ]  # TODO parquet json

        # Create Tables -----------------------------------------------------
        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType):
                continue
            cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables)

            # Run Query ------------------------------------------------------
            # Parameter to indicate if its necessary to order
            # the resulsets before compare them
            worder = 1
            use_percentage = False
            acceptable_difference = 0.01

            print("==============================")
            print(queryType)
            print("==============================")

            # Adding plus 3 to a column
            queryId = "TEST_01"

            query = "select c_custkey, c_nationkey, c_acctbal from customer"
            query_blz = runTest.get_blazingsql_query("main", query)

            token = bc.sql(query_blz)

            temp_gdf = get_results(nRals, token)

            temp_gdf["c_custkey"] = temp_gdf["c_custkey"] + 3

            bc.create_table("temp", temp_gdf)

            sql = "select * from main.temp"
            token = bc.sql(sql)
            result_gdf = get_results(nRals, token)

            query = """select c_custkey + 3, c_nationkey, c_acctbal
                    from customer"""

            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
                nested_query=True,
                blz_result=result_gdf,
                print_result=True,
            )

            # Dropping a column
            queryId = "TEST_02"

            query = "select c_custkey, c_nationkey, c_acctbal from customer"
            query_blz = runTest.get_blazingsql_query("main", query)

            token = bc.sql(query_blz)
            temp_gdf = get_results(nRals, token)

            temp_gdf.drop(columns=["c_custkey"], inplace=True)

            bc.create_table("temp2", temp_gdf)

            sql = "select * from main.temp2"
            token = bc.sql(sql)
            result_gdf = get_results(nRals, token)

            query = "select c_nationkey, c_acctbal from customer"

            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
                nested_query=True,
                blz_result=result_gdf,
                print_result=True,
            )

            # Adding plus 3 to a column and dropping another
            queryId = "TEST_03"

            query = """select c_custkey, c_nationkey, c_acctbal
                from customer where c_acctbal > 1000"""
            query_blz = runTest.get_blazingsql_query("main", query)

            token = bc.sql(query_blz)
            temp_gdf = get_results(nRals, token)

            temp_gdf["c_acctbal"] = temp_gdf["c_acctbal"] + 3
            temp_gdf.drop(columns=["c_custkey"], inplace=True)

            bc.create_table("temp3", temp_gdf)

            sql = "select * from main.temp3"
            token = bc.sql(sql)
            result_gdf = get_results(nRals, token)

            query = (
                """select c_nationkey, c_acctbal + 3
                    from customer where c_acctbal > 1000"""
            )

            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
                nested_query=True,
                blz_result=result_gdf,
                print_result=True,
            )

            # Reutilizing blazinsql results twice
            queryId = "TEST_04"

            query = """select c_custkey, c_nationkey, c_acctbal
                    from customer where c_acctbal > 1000"""
            query_blz = runTest.get_blazingsql_query("main", query)

            token = bc.sql(query_blz)
            temp_gdf = get_results(nRals, token)

            temp_gdf["c_acctbal_new"] = temp_gdf["c_acctbal"] + 3

            temp_gdf.drop(columns=["c_acctbal"], inplace=True)
            temp_gdf.drop(columns=["c_custkey"], inplace=True)

            bc.create_table("temp0", temp_gdf)
            sql = "select * from main.temp0"

            token = bc.sql(sql)
            temp2_gdf = get_results(nRals, token)

            temp2_gdf.drop(columns=["c_nationkey"], inplace=True)

            bc.create_table("temp4", temp2_gdf)
            sql = "select * from main.temp4"

            token = bc.sql(sql)
            result_gdf = get_results(nRals, token)

            query = """select c_acctbal + 3 as c_acctbal_new
                    from customer where c_acctbal > 1000"""

            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
                nested_query=True,
                blz_result=result_gdf,
                print_result=True,
            )

            # Adding another column
            queryId = "TEST_05"

            query = "select c_acctbal from customer where c_acctbal > 1000"
            query_blz = runTest.get_blazingsql_query("main", query)

            token = bc.sql(query_blz)
            temp_gdf = get_results(nRals, token)
            temp_gdf["c_acctbal_new"] = temp_gdf["c_acctbal"] + 3

            bc.create_table("temp5", temp_gdf)
            sql = "select * from main.temp5"

            token = bc.sql(sql)
            result_gdf = get_results(nRals, token)

            query = """select c_acctbal, c_acctbal + 3 as c_acctbal_new
                    from customer where c_acctbal > 1000"""

            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
                nested_query=True,
                blz_result=result_gdf,
                print_result=True,
            )

            queryId = "TEST_06"

            token = bc.sql(
                """select n_nationkey, n_regionkey from nation
                 where n_nationkey < 0"""
            )
            result_gdf = get_results(nRals, token)

            bc.create_table("results", result_gdf)

            token1 = bc.sql(
                """select n.n_nationkey, r.n_regionkey
                 from main.nation as n left join main.results as r
                 on n.n_nationkey = r.n_nationkey"""
            )
            result_gdf1 = get_results(nRals, token1)

            query = """select n.n_nationkey, r.n_regionkey from nation as n
            left join (select n_nationkey, n_regionkey from nation
            where n_nationkey < 0) as r on n.n_nationkey = r.n_nationkey"""

            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
                nested_query=True,
                blz_result=result_gdf1,
            )

            queryId = "TEST_07"

            # Calling a cudf function that returns the same column
            token = bc.sql("select n_nationkey, n_regionkey from main.nation")
            result_gdf = get_results(nRals, token)

            result_gdf["n_nationkey"] = result_gdf[
                                            "n_nationkey"].astype("int32")

            bc.create_table("results_tmp", result_gdf)

            token1 = bc.sql("select * from main.results_tmp")
            result_gdf1 = get_results(nRals, token1)

            query = "select n_nationkey, n_regionkey from nation"

            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
                nested_query=True,
                blz_result=result_gdf1,
                print_result=True,
            )

            if Settings.execution_mode == ExecutionMode.GENERATOR:
                print("==============================")
                break

    executionTest()

    end_mem = gpuMemory.capture_gpu_memory_usage()

    gpuMemory.log_memory_usage(queryType, start_mem, end_mem)

Beispiel #9

Datei anzeigen

def main(dask_client, drill, spark, dir_data_file, bc, nRals):

    if dask_client is not None:
        dask_client.close()
        dask_client.shutdown()
        del dask_client
    del bc

    # conf_opt_1
    conf_opt_1 = {}
    conf_opt_1["JOIN_PARTITION_SIZE_THRESHOLD"] = 10
    conf_opt_1["MAX_DATA_LOAD_CONCAT_CACHE_BYTE_SIZE"] = 10
    conf_opt_1["MAX_KERNEL_RUN_THREADS"] = 5

    # conf_opt_2
    conf_opt_2 = {}
    conf_opt_2["JOIN_PARTITION_SIZE_THRESHOLD"] = 10
    conf_opt_2["MAX_DATA_LOAD_CONCAT_CACHE_BYTE_SIZE"] = 10
    conf_opt_2["MAX_KERNEL_RUN_THREADS"] = 1
    conf_opt_2["ENABLE_GENERAL_ENGINE_LOGS"] = False
    conf_opt_2["ENABLE_COMMS_LOGS"] = True
    conf_opt_2["ENABLE_TASK_LOGS"] = False
    conf_opt_2["ENABLE_OTHER_ENGINE_LOGS"] = True

    # conf_opt_3
    conf_opt_3 = {}
    conf_opt_3["MAX_NUM_ORDER_BY_PARTITIONS_PER_NODE"] = 1
    conf_opt_3["NUM_BYTES_PER_ORDER_BY_PARTITION"] = 10

    # conf_opt_4
    conf_opt_4 = {}
    conf_opt_4["MAX_ORDER_BY_SAMPLES_PER_NODE"] = 5
    conf_opt_4["MAX_SEND_MESSAGE_THREADS"] = 1
    conf_opt_4["TRANSPORT_BUFFER_BYTE_SIZE"] = 10000
    conf_opt_4["TRANSPORT_POOL_NUM_BUFFERS"] = 10000
    conf_opt_4["ENABLE_GENERAL_ENGINE_LOGS"] = False
    conf_opt_4["ENABLE_COMMS_LOGS"] = False
    conf_opt_4["ENABLE_TASK_LOGS"] = True
    conf_opt_4["ENABLE_OTHER_ENGINE_LOGS"] = True

    # conf_opt_5
    conf_opt_5 = {}
    conf_opt_5["MAX_ORDER_BY_SAMPLES_PER_NODE"] = 5000000
    conf_opt_5["MAX_SEND_MESSAGE_THREADS"] = 200
    conf_opt_5["TRANSPORT_BUFFER_BYTE_SIZE"] = 10000
    conf_opt_5["TRANSPORT_POOL_NUM_BUFFERS"] = 10

    # conf_opt_6
    conf_opt_6 = {}
    conf_opt_6["MAX_ORDER_BY_SAMPLES_PER_NODE"] = 5
    conf_opt_6["MAX_SEND_MESSAGE_THREADS"] = 200
    conf_opt_6["TRANSPORT_BUFFER_BYTE_SIZE"] = 100000000
    conf_opt_6["TRANSPORT_POOL_NUM_BUFFERS"] = 10
    conf_opt_6["ENABLE_GENERAL_ENGINE_LOGS"] = True
    conf_opt_6["ENABLE_COMMS_LOGS"] = True
    conf_opt_6["ENABLE_TASK_LOGS"] = False
    conf_opt_6["ENABLE_OTHER_ENGINE_LOGS"] = False

    # conf_opt_7
    conf_opt_7 = {}
    conf_opt_7["MAX_ORDER_BY_SAMPLES_PER_NODE"] = 5000000
    conf_opt_7["MAX_SEND_MESSAGE_THREADS"] = 1
    conf_opt_7["TRANSPORT_BUFFER_BYTE_SIZE"] = 100000000
    conf_opt_7["TRANSPORT_POOL_NUM_BUFFERS"] = 100

    # conf_opt_8
    conf_opt_8 = {}
    conf_opt_8["BLAZING_DEVICE_MEM_CONSUMPTION_THRESHOLD"] = 0.0001
    conf_opt_8["MEMORY_MONITOR_PERIOD"] = 5000000
    conf_opt_8["ENABLE_GENERAL_ENGINE_LOGS"] = True
    conf_opt_8["ENABLE_COMMS_LOGS"] = False
    conf_opt_8["ENABLE_TASK_LOGS"] = True
    conf_opt_8["ENABLE_OTHER_ENGINE_LOGS"] = False

    # all sets
    all_set_list = [
        conf_opt_1,
        conf_opt_2,
        conf_opt_3,
        conf_opt_4,
        conf_opt_5,
        conf_opt_6,
        conf_opt_7,
        conf_opt_8,
    ]

    start_mem = gpuMemory.capture_gpu_memory_usage()

    queryType = "Config Options"

    def executionTest(queryType, setInd, config_options):

        bc, dask_client = init_context(config_options)

        tables = [
            "nation",
            "region",
            "customer",
            "lineitem",
            "orders",
            "supplier",
            "part",
            "partsupp",
        ]

        data_types = [
            DataType.DASK_CUDF,
            DataType.CUDF,
            DataType.CSV,
            DataType.PARQUET,
        ]  # TODO orc, json

        # Create Tables ------------------------------------------------------
        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType):
                continue
            cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables)

            # Run Query ------------------------------------------------------
            worder = 1  # Parameter to indicate if its necessary to order
            # the resulsets before compare them
            use_percentage = False
            acceptable_difference = 0.001

            print("==============================")
            print(queryType)
            print("Test set: " + str(setInd + 1) + " Options: " +
                  str(config_options))
            print("==============================")

            queryId = "TEST_01"

            query = tpch.get_tpch_query(queryId)

            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_02"

            query = tpch.get_tpch_query(queryId)

            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_03"

            query = tpch.get_tpch_query(queryId)

            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                0.1,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_04"

            query = tpch.get_tpch_query(queryId)

            if fileSchemaType == DataType.ORC:
                runTest.run_query(
                    bc,
                    spark,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )
            else:
                runTest.run_query(
                    bc,
                    drill,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    True,
                    fileSchemaType,
                )

            queryId = "TEST_05"

            query = tpch.get_tpch_query(queryId)

            if fileSchemaType == DataType.ORC:
                runTest.run_query(
                    bc,
                    spark,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )
            else:
                runTest.run_query(
                    bc,
                    drill,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )

            queryId = "TEST_06"

            query = tpch.get_tpch_query(queryId)

            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                True,
                fileSchemaType,
            )

            queryId = "TEST_07"

            query = tpch.get_tpch_query(queryId)

            if fileSchemaType == DataType.ORC:
                runTest.run_query(
                    bc,
                    spark,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )
            else:
                runTest.run_query(
                    bc,
                    drill,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )

            queryId = "TEST_08"

            query = tpch.get_tpch_query(queryId)

            if fileSchemaType == DataType.ORC:
                runTest.run_query(
                    bc,
                    spark,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )
            else:
                runTest.run_query(
                    bc,
                    drill,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )

            queryId = "TEST_09"

            query = tpch.get_tpch_query(queryId)

            if fileSchemaType == DataType.ORC:
                runTest.run_query(
                    bc,
                    spark,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    True,
                    fileSchemaType,
                )
            else:
                runTest.run_query(
                    bc,
                    drill,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    True,
                    fileSchemaType,
                )

            queryId = "TEST_10"

            query = tpch.get_tpch_query(queryId)

            if fileSchemaType == DataType.ORC:
                runTest.run_query(
                    bc,
                    spark,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )
            else:
                runTest.run_query(
                    bc,
                    drill,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )

            queryId = "TEST_11"

            query = tpch.get_tpch_query(queryId)

            # runTest.run_query(
            #     bc,
            #     drill,
            #     query,
            #     queryId,
            #     queryType,
            #     worder,
            #     "",
            #     acceptable_difference,
            #     use_percentage,
            #     fileSchemaType,
            # )

            queryId = "TEST_12"

            query = tpch.get_tpch_query(queryId)

            if fileSchemaType == DataType.ORC:
                runTest.run_query(
                    bc,
                    spark,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )
            else:
                runTest.run_query(
                    bc,
                    drill,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )

            queryId = "TEST_13"

            query = tpch.get_tpch_query(queryId)

            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_14"

            query = tpch.get_tpch_query(queryId)

            if fileSchemaType == DataType.ORC:
                runTest.run_query(
                    bc,
                    spark,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )
            else:
                runTest.run_query(
                    bc,
                    drill,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )

            queryId = "TEST_15"

            query = tpch.get_tpch_query(queryId)

            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_16"

            query = tpch.get_tpch_query(queryId)

            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_17"

            query = tpch.get_tpch_query(queryId)

            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_18"

            query = tpch.get_tpch_query(queryId)

            if fileSchemaType == DataType.ORC:
                runTest.run_query(
                    bc,
                    spark,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )
            else:
                runTest.run_query(
                    bc,
                    drill,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )

            queryId = "TEST_19"

            query = tpch.get_tpch_query(queryId)

            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                True,
                fileSchemaType,
            )

            queryId = "TEST_20"

            query = tpch.get_tpch_query(queryId)

            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                True,
                fileSchemaType,
            )

            queryId = "TEST_21"

            query = tpch.get_tpch_query(queryId)

            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_22"

            query = tpch.get_tpch_query(queryId)

            # runTest.run_query(
            #     bc,
            #     drill,
            #     query,
            #     queryId,
            #     queryType,
            #     worder,
            #     "",
            #     acceptable_difference,
            #     use_percentage,
            #     fileSchemaType,
            # )

        if dask_client is not None:
            dask_client.run(gc.collect)
            dask_client.run_on_scheduler(gc.collect)

            dask_client.close()
            dask_client.shutdown()
            del dask_client
        del bc

    for setInd, config_options in enumerate(all_set_list):
        executionTest(queryType, setInd, config_options)
        gc.collect()

    end_mem = gpuMemory.capture_gpu_memory_usage()

    gpuMemory.log_memory_usage(queryType, start_mem, end_mem)

Beispiel #10

Datei anzeigen

Datei: tpchQueriesTest.py Projekt: tspannhw/blazingsql

def main(dask_client, drill, spark, dir_data_file, bc, nRals):

    start_mem = gpuMemory.capture_gpu_memory_usage()

    queryType = "TPCH Queries"

    def executionTest(queryType):

        tables = [
            "nation",
            "region",
            "customer",
            "lineitem",
            "orders",
            "supplier",
            "part",
            "partsupp",
        ]
        data_types = [           
            DataType.DASK_CUDF,
            DataType.CUDF,
            DataType.CSV,
            DataType.PARQUET
        ]  # TODO orc, json

        # Create Tables ------------------------------------------------------
        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType):
                continue
            cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables)

            # Run Query ------------------------------------------------------
            worder = 1  # Parameter to indicate if its necessary to order
            # the resulsets before compare them
            use_percentage = False
            acceptable_difference = 0.001

            print("==============================")
            print(queryType)
            print("==============================")

            queryId = "TEST_01"

            query = tpch.get_tpch_query(queryId)

            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_02"

            query = tpch.get_tpch_query(queryId)

            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_03"

            query = tpch.get_tpch_query(queryId)

            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                0.1,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_04"

            query = tpch.get_tpch_query(queryId)

            if fileSchemaType == DataType.ORC:
                runTest.run_query(
                    bc,
                    spark,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )
            else:
                runTest.run_query(
                    bc,
                    drill,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    True,
                    fileSchemaType,
                )

            queryId = "TEST_05"

            query = tpch.get_tpch_query(queryId)

            if fileSchemaType == DataType.ORC:
                runTest.run_query(
                    bc,
                    spark,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )
            else:
                runTest.run_query(
                    bc,
                    drill,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )

            queryId = "TEST_06"

            query = tpch.get_tpch_query(queryId)

            runTest.run_query(
                    bc,
                    spark,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    True,
                    fileSchemaType,
                )

            queryId = "TEST_07"

            query = tpch.get_tpch_query(queryId)

            if fileSchemaType == DataType.ORC:
                runTest.run_query(
                    bc,
                    spark,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )
            else:
                runTest.run_query(
                    bc,
                    drill,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )

            queryId = "TEST_08"

            query = tpch.get_tpch_query(queryId)

            if fileSchemaType == DataType.ORC:
                runTest.run_query(
                    bc,
                    spark,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )
            else:
                runTest.run_query(
                    bc,
                    drill,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )

            queryId = "TEST_09"

            query = tpch.get_tpch_query(queryId)

            if fileSchemaType == DataType.ORC:
                runTest.run_query(
                    bc,
                    spark,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    True,
                    fileSchemaType,
                )
            else:
                runTest.run_query(
                    bc,
                    drill,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    True,
                    fileSchemaType,
                )

            queryId = "TEST_10"

            query = tpch.get_tpch_query(queryId)

            if fileSchemaType == DataType.ORC:
                runTest.run_query(
                    bc,
                    spark,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )
            else:
                runTest.run_query(
                    bc,
                    drill,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )

            queryId = "TEST_11"

            query = tpch.get_tpch_query(queryId)

            # runTest.run_query(
            #     bc,
            #     drill,
            #     query,
            #     queryId,
            #     queryType,
            #     worder,
            #     "",
            #     acceptable_difference,
            #     use_percentage,
            #     fileSchemaType,
            # )

            queryId = "TEST_12"

            query = tpch.get_tpch_query(queryId)

            if fileSchemaType == DataType.ORC:
                runTest.run_query(
                    bc,
                    spark,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )
            else:
                runTest.run_query(
                    bc,
                    drill,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )

            queryId = "TEST_13"

            query = tpch.get_tpch_query(queryId)

            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_14"

            query = tpch.get_tpch_query(queryId)

            if fileSchemaType == DataType.ORC:
                runTest.run_query(
                    bc,
                    spark,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )
            else:
                runTest.run_query(
                    bc,
                    drill,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )

            queryId = "TEST_15"

            query = tpch.get_tpch_query(queryId)

            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType
            )

            queryId = "TEST_16"

            query = tpch.get_tpch_query(queryId)

            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                '',
                acceptable_difference,
                use_percentage,
                fileSchemaType
            )

            queryId = "TEST_17"

            query = tpch.get_tpch_query(queryId)

            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_18"

            query = tpch.get_tpch_query(queryId)

            if fileSchemaType == DataType.ORC:
                runTest.run_query(
                    bc,
                    spark,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )
            else:
                runTest.run_query(
                    bc,
                    drill,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )

            queryId = "TEST_19"

            query = tpch.get_tpch_query(queryId)

            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                True,
                fileSchemaType,
            )

            # queryId = "TEST_20"

            # query = tpch.get_tpch_query(queryId)

            # runTest.run_query(
            #     bc,
            #     spark,
            #     query,
            #     queryId,
            #     queryType,
            #     worder,
            #     "",
            #     acceptable_difference,
            #     True,
            #     fileSchemaType,
            # )

            # queryId = "TEST_21"

            # query = tpch.get_tpch_query(queryId)

            # runTest.run_query(
            #     bc,
            #     spark,
            #     query,
            #     queryId,
            #     queryType,
            #     worder,
            #     "",
            #     acceptable_difference,
            #     use_percentage,
            #     fileSchemaType,
            # )

            queryId = "TEST_22"

            query = tpch.get_tpch_query(queryId)

            # runTest.run_query(
            #     bc,
            #     drill,
            #     query,
            #     queryId,
            #     queryType,
            #     worder,
            #     "",
            #     acceptable_difference,
            #     use_percentage,
            #     fileSchemaType,
            # )

    executionTest(queryType)

    end_mem = gpuMemory.capture_gpu_memory_usage()

    gpuMemory.log_memory_usage(queryType, start_mem, end_mem)

Beispiel #11

Datei anzeigen

Datei: stringTests.py Projekt: tspannhw/blazingsql

def main(dask_client, drill, spark, dir_data_file, bc, nRals):
    start_mem = gpuMemory.capture_gpu_memory_usage()

    def executionTest():
        tables = ["customer", "orders", "nation", "region"]
        data_types = [
            DataType.DASK_CUDF,
            DataType.CUDF,
            DataType.CSV,
            DataType.ORC,
            DataType.PARQUET,
        ]  # TODO json

        # Create Tables -----------------------------------------------------
        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType):
                continue
            cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables)

            # Run Query ------------------------------------------------------
            # Parameter to indicate if its necessary to order
            # the resulsets before compare them
            worder = 1
            use_percentage = False
            acceptable_difference = 0.01

            print("==============================")
            print(queryType)

            print("==============================")

            queryId = "TEST_01"
            query = """select o_orderkey, sum(o_totalprice)/count(o_orderstatus)
                    from orders where o_custkey < 100
                    group by o_orderstatus, o_orderkey"""
            runTest.run_query(
                bc,
                spark,  #because Drill outputs some inf's instead of NaN
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_02"
            query = """select o_orderkey, o_orderstatus
                from orders where o_custkey < 10
                and o_orderstatus <> 'O'
                order by o_orderkey, o_orderstatus limit 50"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_03"
            query = """select count(o_orderstatus)
                    from orders where o_orderstatus <> 'O'"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_04"
            query = """select count(o_orderkey), sum(o_orderkey), o_clerk
                    from orders where o_custkey < 1000
                    group by o_clerk, o_orderstatus"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_05"
            query = """select avg(CAST(o_orderkey AS DOUBLE))
                    from orders group by o_orderstatus"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_06"
            query = """select count(o_shippriority), sum(o_totalprice)
                    from orders group by o_shippriority"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_07"
            query = """with regionTemp as (
                        select r_regionkey, r_name
                        from region where r_regionkey > 2
                    ), nationTemp as (
                        select n_nationkey, n_regionkey as fkey, n_name
                        from nation where n_nationkey > 3
                        order by n_nationkey
                    )
                    select regionTemp.r_name, nationTemp.n_name
                    from regionTemp inner join nationTemp
                    on regionTemp.r_regionkey = nationTemp.fkey"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_08"
            query = """select c_custkey, CHAR_LENGTH(c_comment)
                    from customer where MOD(CHAR_LENGTH(c_comment), 7) = 0"""
            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_09"
            query = "select sum(CHAR_LENGTH(c_comment)) from customer"
            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_10"
            query = """SELECT
                REPLACE(c_comment, 'in', '') as empty_replace,
                REPLACE(c_comment, 'the', '&&') as and_replace,
                REPLACE(c_comment, 'a', '$e*u') as a_replace
                FROM customer
                """
            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_11"
            query = """SELECT
                REPLACE(SUBSTRING(c_comment, 2, 10), 'e', '&&') as rep_sub,
                CAST(REPLACE(c_comment, 'a', 'e') LIKE '%the%' AS INT) as rep_like,
                SUBSTRING(REPLACE(c_comment, 'e', '&#'), 2, 30) as sub_rep
                FROM customer
                """
            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_12"
            query = """SELECT
                TRIM(c_comment) as both_space_trim,
                TRIM(LEADING 'Cu' FROM c_name) as leading_char_trim,
                TRIM(TRAILING 'E' FROM c_mktsegment) as trailing_char_trim,
                LTRIM(c_comment) as left_trim,
                RTRIM(c_comment) as right_trim
                FROM customer
                """
            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_13"
            query = """SELECT
                TRIM(TRAILING 'er' FROM SUBSTRING(c_name, 0, 7)) as trim_subs,
                TRIM(LEADING 'CU' FROM UPPER(c_name)) as trim_upper,
                LOWER(TRIM('AE' FROM c_mktsegment)) as lower_trim,
                LTRIM(REPLACE(c_name, 'Customer', '   Test')) as ltrim_replace,
                CAST(RTRIM(c_comment) LIKE '%the%' AS INT) as rtrim_like
                FROM customer
                """
            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_14"
            query = """SELECT
                REVERSE(c_comment) as reversed_c,
                SUBSTRING(REVERSE(c_comment), 2, 10) as sub_reversed_c,
                CAST(SUBSTRING(REVERSE(c_comment), 2, 10) LIKE '%or%' AS INT) as cast_sub_reversed_c
                FROM customer
                """
            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_15"
            query = """SELECT
                COUNT(SUBSTRING(REVERSE(c_comment), 2, 10)) as reverse_subbed_count
                FROM customer
                GROUP BY SUBSTRING(REVERSE(c_comment), 2, 10)
                """
            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_16"
            query = """SELECT
                REGEXP_REPLACE(c_comment, 'a', 'the') as rep_a,
                REGEXP_REPLACE(c_comment, 'e|a|i|o|u', 'Z') as vowel_z,
                CHAR_LENGTH(REGEXP_REPLACE(c_comment, '[a-z]+', 'L')) char_len_rep,
                REGEXP_REPLACE(c_comment, '[a-z]+(.+)', 'Z') as capture_rep
                FROM customer
                """
            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_17"
            query_bsql = """SELECT
                REGEXP_REPLACE(c_comment, '[a-zA-Z]{2}([a-z]{2})([a-z]{2})', '\\2--\\1') as backref,
                REGEXP_REPLACE(c_comment, '[a-zA-Z]{2}([a-z]{1})([a-z]{1})', '\\2--\\1', 4) as backref_pos,
                REGEXP_REPLACE(c_comment, 'e|a|i|o|u', 'Z', 4) as vowel_pos,
                REGEXP_REPLACE(c_comment, '[a-z]+(.+)', 'Z', 10) as capture_pos
                FROM customer
                """
            query_spark = """SELECT
                REGEXP_REPLACE(c_comment, '[a-zA-Z]{2}([a-z]{2})([a-z]{2})', '$2--$1') as backref,
                CONCAT(
                    SUBSTRING(c_comment, 0, 3),
                    REGEXP_REPLACE(SUBSTRING(c_comment, 4), '[a-zA-Z]{2}([a-z]{1})([a-z]{1})', '$2--$1')
                ) as backref_pos,
                CONCAT(SUBSTRING(c_comment, 0, 3), REGEXP_REPLACE(SUBSTRING(c_comment, 4), 'e|a|i|o|u', 'Z')) as vowel_pos,
                CONCAT(SUBSTRING(c_comment, 0, 9), REGEXP_REPLACE(SUBSTRING(c_comment, 10), '[a-z]+(.+)', 'Z')) as capture_pos
                FROM customer
                """
            runTest.run_query(
                bc,
                spark,
                query_bsql,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
                query_spark=query_spark,
            )

            queryId = "TEST_18"
            query = """select c_custkey, c_nationkey, c_name
                    from customer where c_custkey > 300 and c_custkey < 600
                    order by c_nationkey, c_custkey"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                0,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            if Settings.execution_mode == ExecutionMode.GENERATOR:
                print("==============================")
                break

    executionTest()

    end_mem = gpuMemory.capture_gpu_memory_usage()

    gpuMemory.log_memory_usage(queryType, start_mem, end_mem)

Beispiel #12

Datei anzeigen

Datei: sum0Test.py Projekt: youjoinit/blazingsql

def main(dask_client, drill, dir_data_lc, bc, nRals):

    start_mem = gpuMemory.capture_gpu_memory_usage()

    def executionTest():

        # Read Data TPCH------------------------------------------------------

        data_types = [
            DataType.DASK_CUDF,
            DataType.CUDF,
            DataType.CSV,
            DataType.ORC,
            DataType.PARQUET,
        ]  # TODO json

        for fileSchemaType in data_types:
            queryType = test_name(testName, fileSchemaType)
            if skip_test(dask_client, nRals, fileSchemaType, queryType):
                continue
            cs.create_tables(bc, dir_data_lc, fileSchemaType)

            # Run Query ------------------------------------------------------
            # Parameter to indicate if its necessary to order
            # the resulsets before compare them
            worder = 1
            use_percentage = False
            acceptable_difference = 0.01

            print("==============================")
            print(queryType)
            print("==============================")

            queryId = "TEST_01"
            query = """select count(c_custkey) as c1, count(c_acctbal) as c2
                    from customer"""
            algebra = """LogicalAggregate(group=[{}], c1=[COUNT($0)],
                             c2=[COUNT($1)])
            BindableTableScan(table=[[main, customer]],
                 projects=[[0, 5]], aliases=[[c_custkey, c_acctbal]])"""
            # runTest.run_query(bc, drill, query, queryId, queryType, worder,
            #  '', acceptable_difference, use_percentage, algebra=algebra)

            queryId = "TEST_02"
            query = """select count(c_custkey), sum(c_acctbal),
                        sum(c_acctbal)/count(c_acctbal), min(c_custkey),
                        max(c_nationkey),
                        (max(c_nationkey) + min(c_nationkey))/2 c_nationkey
                    from customer where c_custkey < 100
                    group by c_nationkey"""
            algebra = """LogicalProject(EXPR$0=[$1], EXPR$1=[$2],
                     EXPR$2=[/($2, $3)], EXPR$3=[$4], EXPR$4=[$5],
                         c_nationkey=[/(+($5, $6), 2)])
            LogicalAggregate(group=[{0}], EXPR$0=[COUNT($1)],
                 EXPR$1=[SUM($2)], agg#2=[COUNT($2)], EXPR$3=[MIN($1)],
                  EXPR$4=[MAX($0)], agg#5=[MIN($0)])
            LogicalProject(c_nationkey=[$1], c_custkey=[$0], c_acctbal=[$2])
            BindableTableScan(table=[[main, customer]],
                filters=[[<($0, 100)]], projects=[[0, 3, 5]],
                 aliases=[[c_custkey, c_nationkey, c_acctbal]])"""
            # TODO: Change sum/count for avg KC
            # runTest.run_query(bc, drill, query, queryId, queryType, worder,
            # '', acceptable_difference, True, algebra=algebra)

            queryId = "TEST_03"
            query = """select n1.n_nationkey as supp_nation,
                        n2.n_nationkey as cust_nation,
                        l.l_extendedprice * l.l_discount
                    from supplier as s inner join lineitem as l
                    on s.s_suppkey = l.l_suppkey inner join orders as o
                    on o.o_orderkey = l.l_orderkey
                    inner join customer as c on c.c_custkey = o.o_custkey
                    inner join nation as n1 on s.s_nationkey = n1.n_nationkey
                    inner join nation as n2 on c.c_nationkey = n2.n_nationkey
                    where n1.n_nationkey = 1
                    and n2.n_nationkey = 2 and o.o_orderkey < 10000"""
            algebra = """LogicalProject(supp_nation=[$1], cust_nation=[$3],
                         EXPR$2=[$2])
            LogicalJoin(condition=[=($0, $3)], joinType=[inner])
                LogicalProject(c_nationkey=[$1], n_nationkey=[$3], *=[$2])
                LogicalJoin(condition=[=($0, $3)], joinType=[inner])
                    LogicalProject(s_nationkey=[$0], c_nationkey=[$4], *=[$2])
                    LogicalJoin(condition=[=($3, $1)], joinType=[inner])
                        LogicalProject(s_nationkey=[$0], o_custkey=[$4],
                          *=[$2])
                        LogicalJoin(condition=[=($3, $1)], joinType=[inner])
                            LogicalProject(s_nationkey=[$1], l_orderkey=[$2],
                              *=[$4])
                            LogicalJoin(condition=[=($0, $3)],
                              joinType=[inner])
                                BindableTableScan(table=[[main, supplier]],
                                  projects=[[0, 3]],
                                  aliases=[[s_suppkey, s_nationkey]])
                                LogicalProject(l_orderkey=[$0], l_suppkey=[$1],
                                   *=[*($2, $3)])
                                BindableTableScan(table=[[main, lineitem]],
                                  projects=[[0, 2, 5, 6]],
                                   aliases=[[l_orderkey, l_suppkey, *]])
                            BindableTableScan(table=[[main, orders]],
                              filters=[[<($0, 10000)]], projects=[[0, 1]],
                                 aliases=[[o_orderkey, o_custkey]])
                        BindableTableScan(table=[[main, customer]],
                            projects=[[0, 3]],
                             aliases=[[c_custkey, c_nationkey]])
                    BindableTableScan(table=[[main, nation]],
                     filters=[[=($0, 1)]], projects=[[0]],
                      aliases=[[n_nationkey]])
                BindableTableScan(table=[[main, nation]],
                 filters=[[=($0, 2)]], projects=[[0]],
                  aliases=[[n_nationkey]])"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                algebra=algebra,
            )

            queryId = "TEST_04"
            query = """select count(n1.n_nationkey) as n1key,
                        count(n2.n_nationkey) as n2key,
                        count(n2.n_nationkey) as cstar
                    from nation as n1 full outer join nation as n2
                    on n1.n_nationkey = n2.n_nationkey + 6"""
            algebra = """LogicalProject(n1key=[$0], n2key=[$1], cstar=[$1])
            LogicalAggregate(group=[{}], n1key=[COUNT($0)], cstar=[COUNT($1)])
                LogicalProject(n_nationkey=[$0], n_nationkey0=[$1])
                LogicalJoin(condition=[=($0, $2)], joinType=[full])
                    BindableTableScan(table=[[main, nation]],
                     projects=[[0]], aliases=[[n_nationkey]])
                    LogicalProject(n_nationkey=[$0], $f4=[+($0, 6)])
                    BindableTableScan(table=[[main, nation]],
                     projects=[[0]], aliases=[[n_nationkey, $f4]])"""
            # TODO: Change count(n2.n_nationkey) as cstar as count(*)
            # as cstar when it will be supported KC
            # runTest.run_query(bc, drill, query, queryId, queryType, worder,
            #  '', acceptable_difference, use_percentage, algebra = algebra)

            queryId = "TEST_05"
            query = """select count(o_orderkey), sum(o_orderkey), o_clerk
                    from orders
                    where o_custkey < 1000
                    group by o_clerk, o_orderstatus"""
            algebra = """LogicalProject(EXPR$0=[$2], EXPR$1=[$3],
                                                     o_clerk=[$0])
            LogicalAggregate(group=[{0, 1}], EXPR$0=[COUNT($2)],
                                                EXPR$1=[SUM($2)])
                LogicalProject(o_clerk=[$3], o_orderstatus=[$2],
                                                 o_orderkey=[$0])
                BindableTableScan(table=[[main, orders]],
                    filters=[[<($1, 1000)]], projects=[[0, 1, 2, 6]],
                     aliases=[[o_orderkey, $f1, o_orderstatus, o_clerk]])"""
            # runTest.run_query(bc, drill, query, queryId, queryType, worder,
            #  '', acceptable_difference, True, algebra = algebra)

            queryId = "TEST_06"
            # TODO: Change sum/count for avg KC
            query = """select sum(o_orderkey)/count(o_orderkey)
                    from orders group by o_orderstatus"""
            algebra = """LogicalProject(EXPR$0=[/($1, $2)])
            LogicalAggregate(group=[{0}], agg#0=[SUM($1)], agg#1=[COUNT($1)])
                BindableTableScan(table=[[main, orders]], projects=[[2, 0]],
                                 aliases=[[o_orderstatus, o_orderkey]])"""
            # runTest.run_query(bc, drill, query, queryId, queryType, worder,
            #  '', acceptable_difference, True, algebra = algebra)

            queryId = "TEST_07"
            query = """with regionTemp as (
                        select r_regionkey, r_name
                        from region where r_regionkey > 2
                    ), nationTemp as
                    (
                        select n_nationkey, n_regionkey as fkey, n_name
                        from nation where n_nationkey > 3
                        order by n_nationkey
                    )
                    select regionTemp.r_name, nationTemp.n_name
                    from regionTemp inner join nationTemp
                    on regionTemp.r_regionkey = nationTemp.fkey"""
            algebra = """LogicalProject(r_name=[$1], n_name=[$3])
            LogicalJoin(condition=[=($0, $2)], joinType=[inner])
                BindableTableScan(table=[[main, region]],
                        filters=[[>($0, 2)]], projects=[[0, 1]],
                         aliases=[[r_regionkey, r_name]])
                LogicalProject(fkey=[$2], n_name=[$1])
                BindableTableScan(table=[[main, nation]],
                    filters=[[>($0, 3)]], projects=[[0, 1, 2]],
                     aliases=[[n_nationkey, n_name, fkey]])"""
            # runTest.run_query(bc, drill, query, queryId, queryType, worder,
            #  '', acceptable_difference, use_percentage, algebra = algebra)

            if Settings.execution_mode == ExecutionMode.GENERATOR:
                print("==============================")
                break

    executionTest()

    end_mem = gpuMemory.capture_gpu_memory_usage()

    gpuMemory.log_memory_usage(testName, start_mem, end_mem)

Beispiel #13

Datei anzeigen

def main(dask_client, drill, dir_data_lc, bc, nRals):

    start_mem = gpuMemory.capture_gpu_memory_usage()

    queryType = "File System S3"

    def executionTest(queryType):
        # Read Data TPCH------------------------------------------------------
        authority = "tpch_s3"

        awsS3BucketName = Settings.data["TestSettings"]["awsS3BucketName"]
        awsS3AccessKeyId = Settings.data["TestSettings"]["awsS3AccessKeyId"]
        awsS3SecretKey = Settings.data["TestSettings"]["awsS3SecretKey"]

        bc.s3(
            authority,
            bucket_name=awsS3BucketName,
            encryption_type=S3EncryptionType.NONE,
            access_key_id=awsS3AccessKeyId,
            secret_key=awsS3SecretKey,
        )

        # dir_df = dir_data_lc[dir_data_lc.find("DataSet"):len(dir_data_lc)]

        dir_data_lc = "s3://" + authority + "/" + "DataSet100Mb2part/"

        tables = [
            "nation", "region", "supplier", "customer", "lineitem", "orders"
        ]
        data_types = [DataType.CSV, DataType.PARQUET]  # TODO json

        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType):
                continue
            cs.create_tables(bc, dir_data_lc, fileSchemaType, tables=tables)

            # Run Query -----------------------------------------------------
            # Parameter to indicate if its necessary to order
            # the resulsets before compare them
            worder = 1
            use_percentage = False
            acceptable_difference = 0.01

            print("==============================")
            print(queryType)
            print("==============================")

            queryId = "TEST_01"
            query = """select count(c_custkey) as c1, count(c_acctbal) as c2
                    from customer"""
            query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_02"
            query = """select count(n_nationkey), count(n_regionkey)
                    from nation"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_03"
            query = """select count(s_suppkey), count(s_nationkey)
                    from supplier"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_04"
            query = """select count(c_custkey), sum(c_acctbal),
                    sum(c_acctbal)/count(c_acctbal),
                    min(c_custkey), max(c_nationkey),
                    (max(c_nationkey) + min(c_nationkey))/2 c_nationkey
                from customer where c_custkey < 100
                group by c_nationkey"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                True,
                fileSchemaType,
            )  # TODO: Change sum/count for avg KC

            queryId = "TEST_05"
            query = """select c.c_custkey, c.c_nationkey, n.n_regionkey
                    from customer as c inner join nation as n
                    on c.c_nationkey = n.n_nationkey
                    where n.n_regionkey = 1 and c.c_custkey < 50"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_06"
            query = """select c_custkey, c_nationkey, c_acctbal
                    from customer order by c_nationkey, c_custkey, c_acctbal"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                0,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_07"
            query = """select c_custkey + c_nationkey, c_acctbal
                    from customer order by 1, 2"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                0,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_08"
            query = """select n1.n_nationkey as supp_nation,
                        n2.n_nationkey as cust_nation,
                        l.l_extendedprice * l.l_discount
                    from supplier as s
                    inner join lineitem as l on s.s_suppkey = l.l_suppkey
                    inner join orders as o on o.o_orderkey = l.l_orderkey
                    inner join customer as c on c.c_custkey = o.o_custkey
                    inner join nation as n1 on s.s_nationkey = n1.n_nationkey
                    inner join nation as n2 on c.c_nationkey = n2.n_nationkey
                    where n1.n_nationkey = 1
                    and n2.n_nationkey = 2 and o.o_orderkey < 10000"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_09"
            query = """select c_custkey, c_nationkey as nkey
                    from customer where c_custkey < 0 and c_nationkey >= 30"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_10"
            query = """select sin(c_acctbal), cos(c_acctbal),
                        asin(c_acctbal), acos(c_acctbal),
                        ln(c_acctbal), tan(c_acctbal),
                        atan(c_acctbal), floor(c_acctbal), c_acctbal
                    from customer"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_11"
            query = """select n1.n_nationkey as n1key,
                        n2.n_nationkey as n2key,
                        n1.n_nationkey + n2.n_nationkey
                    from nation as n1
                    full outer join nation as n2
                    on n1.n_nationkey = n2.n_nationkey + 6
                    where n1.n_nationkey < 10
                    and n1.n_nationkey > 5"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_12"
            query = """select count(n1.n_nationkey) as n1key,
                    count(n2.n_nationkey) as n2key, count(*) as cstar
                    from nation as n1
                    full outer join nation as n2
                    on n1.n_nationkey = n2.n_nationkey + 6"""
            # TODO: Change count(n2.n_nationkey) as cstar as count(*) as cstar
            # when it will be supported KC
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_13"
            query = """select o_orderkey, o_custkey
                    from orders where o_orderkey < 10 and o_orderkey >= 1"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_14"
            query = """select 100168549 - sum(o_orderkey)/count(o_orderkey),
                        56410984/sum(o_totalprice),
                        (123 - 945/max(o_orderkey)) /
                            (sum(81619/o_orderkey)/count(81619/o_orderkey))
                    from orders where o_orderkey < 50"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                True,
                fileSchemaType,
            )  # TODO: Change sum/count for avg KC

            queryId = "TEST_15"
            query = """select o_orderkey,
                        sum(o_totalprice)/count(o_orderstatus)
                    from orders
                    where o_custkey < 100
                    group by o_orderstatus, o_orderkey"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                True,
                fileSchemaType,
            )  # TODO: Change sum/count for avg KC

            queryId = "TEST_16"
            query = """select o_orderkey, o_orderstatus
                    from orders where o_custkey < 10
                    and o_orderstatus <> 'O'
                    order by o_orderkey, o_orderstatus
                    limit 50"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_17"
            query = """select count(o_orderstatus)
                    from orders where o_orderstatus <> 'O'"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_18"
            query = """select count(o_orderkey), sum(o_orderkey), o_clerk
                    from orders where o_custkey < 1000
                    group by o_clerk, o_orderstatus"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_19"
            query = """select sum(o_orderkey)/count(o_orderkey)
                    rom orders group by o_orderstatus"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                True,
                fileSchemaType,
            )  # TODO: Change sum/count for avg KC

            queryId = "TEST_20"
            query = """select count(o_shippriority), sum(o_totalprice)
                    from orders group by o_shippriority"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_21"
            query = """with regionTemp as (
                        select r_regionkey, r_name
                        from region where r_regionkey > 2
                    ), nationTemp as
                    (
                        select n_nationkey, n_regionkey as fkey, n_name
                        from nation where n_nationkey > 3
                        order by n_nationkey
                    )
                    select regionTemp.r_name, nationTemp.n_name
                    from regionTemp inner join nationTemp
                    on regionTemp.r_regionkey = nationTemp.fkey"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            if Settings.execution_mode == ExecutionMode.GENERATOR:
                print("==============================")
                break

    executionTest(queryType)

    end_mem = gpuMemory.capture_gpu_memory_usage()

    gpuMemory.log_memory_usage(queryType, start_mem, end_mem)

Beispiel #14

Datei anzeigen

Datei: predicatesWithNullsTest.py Projekt: sparsh-ai/blazingsql

def main(dask_client, drill, spark, dir_data_file, bc, nRals):

    start_mem = gpuMemory.capture_gpu_memory_usage()

    def executionTest():
        tables = [
            "nation", "region", "customer", "lineitem", "orders", "supplier"
        ]
        data_types = [
            DataType.DASK_CUDF,
            DataType.CUDF,
            DataType.CSV,
            DataType.ORC,
            DataType.PARQUET,
        ]  # TODO json

        # Create Tables -----------------------------------------------------
        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType):
                continue
            cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables)

            # Run Query ------------------------------------------------------
            # Parameter to indicate if its necessary to order
            # the resulsets before compare them
            worder = 1
            use_percentage = False
            acceptable_difference = 0

            print("==============================")
            print(queryType)
            print("==============================")

            queryId = "TEST_01"
            query = """select MIN(n.n_nationkey), MAX(r.r_regionkey),
                    AVG(CAST((n.n_nationkey + r.r_regionkey) AS DOUBLE))
                    from nation as n
                    left outer join region as r
                    on n.n_nationkey = r.r_regionkey
                    where n.n_nationkey IS NULL"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_02"
            query = """select SUM(n1.n_nationkey) as n1key,
                    AVG(CAST((n2.n_nationkey +  n1.n_nationkey) AS DOUBLE))
                    as n2key
                    from nation as n1
                    full outer join nation as n2
                    on n1.n_nationkey = n2.n_nationkey + 10
                    where n1.n_nationkey IS NOT NULL"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                0.01,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_03"
            query = """select COUNT(n1.n_nationkey) as n1key,
                    COUNT(n2.n_nationkey +  n1.n_nationkey) as n2key
                    from nation as n1 full outer join nation as n2
                    on n1.n_nationkey = n2.n_nationkey + 10
                    where n1.n_nationkey IS NOT NULL"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_04"
            query = """select COUNT(n1.n_regionkey),
                        AVG(CAST(n1.n_regionkey AS DOUBLE))
                    from nation as n1 full outer join nation as n2
                    on n1.n_nationkey = n2.n_nationkey + 6
                    WHERE n1.n_regionkey IS NOT NULL"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                0.01,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_05"
            query = """select MIN(n.n_nationkey), MAX(n.n_nationkey)
                    from nation as n left outer join region as r
                    on n.n_nationkey = r.r_regionkey
                    WHERE n.n_nationkey IS NULL"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = 'TEST_06'
            query = """select COUNT(n.n_nationkey), AVG(r.r_regionkey)
            from nation as n left outer join region as r
            on n.n_nationkey = r.r_regionkey
            WHERE n.n_regionkey IS NULL"""
            runTest.run_query(
                bc,
                spark,  # Drill shows: Different number of columns blzSQLresult: 2 PyDrill result: 0
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_07"
            query = """select n.n_nationkey, n.n_name, r.r_regionkey,
                        r.r_name
                    from nation as n left outer join region as r
                    on n.n_nationkey = r.r_regionkey
                    WHERE r.r_name IS NULL"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_08"
            query = """select n.n_nationkey, n.n_name, r.r_regionkey,
                        r.r_name
                    from nation as n left outer join region as r
                    on n.n_nationkey = r.r_regionkey
                    WHERE n.n_name IS NOT NULL"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_09"
            query = """select MIN(n.n_nationkey), MAX(r.r_regionkey),
                    AVG(CAST((n.n_nationkey + r.r_regionkey) AS DOUBLE))
                    from nation as n
                    right outer join region as r
                    on n.n_nationkey = r.r_regionkey
                    where n.n_nationkey IS NULL"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_10"
            query = """select n.n_nationkey, n.n_name, r.r_regionkey,
                        r.r_name
                    from nation as n right outer join region as r
                    on n.n_nationkey = r.r_regionkey
                    WHERE r.r_name IS NULL"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_11"
            query = """select n.n_nationkey, n.n_name, r.r_regionkey,
                        r.r_name
                    from nation as n right outer join region as r
                    on n.n_nationkey = r.r_regionkey
                    WHERE n.n_name IS NOT NULL"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            if Settings.execution_mode == ExecutionMode.GENERATOR:
                print("==============================")
                break

    executionTest()

    end_mem = gpuMemory.capture_gpu_memory_usage()

    gpuMemory.log_memory_usage(queryType, start_mem, end_mem)

Beispiel #15

Datei anzeigen

Datei: orderbyTest.py Projekt: tspannhw/blazingsql

def main(dask_client, drill, dir_data_file, bc, nRals):

    start_mem = gpuMemory.capture_gpu_memory_usage()

    def executionTest():
        tables = ["customer", "lineitem", "orders"]
        data_types = [
            DataType.DASK_CUDF,
            DataType.CUDF,
            DataType.CSV,
            DataType.ORC,
            DataType.PARQUET,
        ]  # TODO json

        # Create Tables -----------------------------------------------------
        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType):
                continue
            cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables)

            # Run Query ------------------------------------------------------
            # Parameter to indicate if its necessary to order
            # the resulsets before compare them
            worder = 0
            use_percentage = False
            acceptable_difference = 0.01

            print("==============================")
            print(queryType)
            print("==============================")

            queryId = "TEST_01"
            query = """select c_custkey, c_acctbal from customer
                    order by c_acctbal desc, c_custkey"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_02"
            query = "select c_acctbal from customer order by c_acctbal"
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_03"
            query = """select c_custkey, c_nationkey, c_acctbal
                    from customer
                    order by c_nationkey, c_acctbal, c_custkey desc"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_04"
            query = """select c_custkey + c_nationkey, c_acctbal
                    from customer order by 1 desc, 2"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_05"
            query = """select l_linenumber, l_orderkey
                    from lineitem where l_orderkey < 50000
                    order by l_linenumber desc, l_suppkey asc,
                        l_partkey desc, l_orderkey"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_06"
            query = """select o_orderkey, o_custkey, o_totalprice, o_orderstatus
                    from orders where o_orderkey < 100
                    order by o_custkey, o_orderstatus,
                    o_shippriority, o_comment"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            if Settings.execution_mode == ExecutionMode.GENERATOR:
                print("==============================")
                break

    executionTest()

    end_mem = gpuMemory.capture_gpu_memory_usage()

    gpuMemory.log_memory_usage(queryType, start_mem, end_mem)

Beispiel #16

Datei anzeigen

def main(dask_client, drill, dir_data_file, bc, nRals):
    
    start_mem = gpuMemory.capture_gpu_memory_usage()
    
    
    def executionTest():  
        tables = ["nation", "region", "customer", "orders", "part", "partsupp", "supplier"]
        data_types =  [DataType.DASK_CUDF, DataType.CUDF, DataType.CSV, DataType.ORC, DataType.PARQUET] # TODO json

        #Create Tables ------------------------------------------------------------------------------------------------------------
        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType): continue
            cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables)
        
            #Run Query -----------------------------------------------------------------------------
            worder = 1 #Parameter to indicate if its necessary to order the resulsets before compare them
            use_percentage = False
            acceptable_difference = 0.01
            
            print('==============================')
            print(queryType)
            print('==============================')
        
            queryId = 'TEST_01'
            query = """select maxPrice, avgSize from
                (select avg(CAST(p_size AS DOUBLE)) as avgSize, max(p_retailprice) as maxPrice, min(p_retailprice) as minPrice from part ) as partAnalysis
                order by maxPrice, avgSize"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)
         
            queryId = 'TEST_02'
            query = """select custOrders.avgPrice, custOrders.numOrders from customer
                inner join
                (select o_custkey as o_custkey, avg(o_totalprice) as avgPrice, count(o_totalprice) as numOrders from orders
                where o_custkey <= 100 group by o_custkey) as custOrders
                on custOrders.o_custkey = customer.c_custkey
                where customer.c_nationkey <= 5"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)
          
            queryId = 'TEST_03'
            query = """select partSuppTemp.partKey, partAnalysis.avgSize from
                (select min(p_partkey) as partKey,  avg(CAST(p_size AS DOUBLE)) as avgSize, max(p_retailprice) as maxPrice, min(p_retailprice) as minPrice from part ) as partAnalysis
                inner join (select ps_partkey as partKey, ps_suppkey as suppKey from partsupp where ps_availqty > 2) as partSuppTemp on partAnalysis.partKey = partSuppTemp.partKey
                inner join (select s_suppkey as suppKey from supplier ) as supplierTemp on supplierTemp.suppKey = partSuppTemp.suppKey"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)
          
            queryId = 'TEST_04'
            query = """select avg(CAST(custKey AS DOUBLE)) from
                (select customer.c_custkey as custKey from
                (select min(o_custkey) as o_custkey from orders ) as tempOrders
                inner join customer on tempOrders.o_custkey = customer.c_custkey
                where customer.c_nationkey > 6) as joinedTables"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)
            
            if Settings.execution_mode == ExecutionMode.GENERATOR:
                print("==============================")
                break
          
    executionTest()
    
    end_mem = gpuMemory.capture_gpu_memory_usage()

    gpuMemory.log_memory_usage(queryType, start_mem, end_mem)

Beispiel #17

Datei anzeigen

def main(dask_client, spark, dir_data_file, bc, nRals):

    start_mem = gpuMemory.capture_gpu_memory_usage()

    def executionTest():
        tables = ["lineitem", "orders"]
        data_types = [DataType.CSV, DataType.PARQUET]  # TODO json

        # Create Tables -----------------------------------------------------
        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType):
                continue
            cs.init_spark_schema(
                spark,
                Settings.data["TestSettings"]["dataDirectory"],
                fileSchemaType=fileSchemaType,
            )
            cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables)

            # Run Query ------------------------------------------------------
            # Parameter to indicate if its necessary to order
            # the resulsets before compare them
            worder = 1
            use_percentage = False
            acceptable_difference = 0.01

            print("==============================")
            print(queryType)
            print("==============================")

            queryId = "TEST_01"
            query = """select l_shipdate, l_commitdate,
                        timestampdiff(DAY, l_commitdate, l_shipdate) as diff
                    from lineitem  limit 20"""
            query_spark = """select l_shipdate, l_commitdate,
                    datediff(l_shipdate, l_commitdate) as diff
                    from lineitem  limit 20"""
            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                query_spark=query_spark,
                print_result=True,
            )

            queryId = "TEST_02"
            query = """select l_shipdate, timestampdiff(DAY,
                         date '1970-01-01', l_shipdate) as diff
                    from lineitem  limit 20"""
            query_spark = """select l_shipdate, datediff(l_shipdate,
                            date '1970-01-01') as diff
                        from lineitem  limit 20"""
            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                query_spark=query_spark,
                print_result=True,
            )

            queryId = "TEST_03"
            query = """select * from orders
                    where timestampdiff(DAY,  date '1995-02-04',
                        o_orderdate) < 25"""
            query_spark = """select * from orders where
                    datediff(o_orderdate, date '1995-02-04') < 25"""
            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                query_spark=query_spark,
                print_result=True,
            )

            if Settings.execution_mode == ExecutionMode.GENERATOR:
                print("==============================")
                break

    executionTest()

    end_mem = gpuMemory.capture_gpu_memory_usage()

    gpuMemory.log_memory_usage(queryType, start_mem, end_mem)

Beispiel #18

Datei anzeigen

Datei: crossJoinsTest.py Projekt: youjoinit/blazingsql

def main(dask_client, spark, dir_data_file, bc, nRals):

    start_mem = gpuMemory.capture_gpu_memory_usage()

    def executionTest():
        tables = ["nation", "region", "customer", "lineitem", "orders"]
        data_types = [
            DataType.DASK_CUDF, DataType.CUDF, DataType.CSV, DataType.ORC,
            DataType.PARQUET
        ]  # TODO json

        # Create Tables ------------------------------------------------------
        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType):
                continue
            cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables)

            # Run Query ------------------------------------------------------
            # Parameter to indicate if its necessary to order
            # the resulsets before compare them
            worder = 1
            use_percentage = False
            acceptable_difference = 0

            print('==============================')
            print(queryType)
            print('==============================')

            queryId = 'TEST_01'
            query = "select * from nation cross join region"
            runTest.run_query(bc, spark, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_02'
            query = """
                    select o_orderkey, o_totalprice,
                         l_linenumber, l_shipmode
                    from orders cross join lineitem
                    where o_orderkey < 6
                    and l_receiptdate > date '1996-07-12'
                    and l_linenumber > 5
                    and o_totalprice < 74029.55
                    and o_clerk = 'Clerk#000000880'
                    and l_shipmode IN ('FOB', 'RAIL')
                    order by o_orderkey, o_totalprice, l_linenumber"""
            runTest.run_query(bc, spark, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_03'
            query = """select o_orderkey, n_nationkey
                    from nation cross join orders
                    where o_totalprice > 4000.0
                    and o_orderdate > date '1998-07-12'
                    and o_orderkey > 425000
                    group by o_orderkey, n_nationkey
                    order by o_orderkey, n_nationkey"""
            runTest.run_query(bc, spark, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_04'
            query = """
                    with cust_nation as
                    (
                        select c_custkey, c_name, n_nationkey, n_name
                        from customer inner join nation
                        on c_nationkey = n_nationkey
                        where n_nationkey > 21
                        and c_acctbal > 525.0
                        and c_custkey > 13450
                        order by c_custkey, n_nationkey
                    ), ord_lineitem as
                    (
                        select o_orderkey, l_quantity
                        from orders left join lineitem
                        on o_orderkey = l_orderkey
                        where l_shipdate > date '1998-11-12'
                        and o_totalprice > 3500.0
                        and l_quantity > 48.0
                        and l_shipmode in ('AIR', 'FOB', 'SHIP')
                        order by o_orderkey
                    )
                    select c_custkey, n_name, l_quantity
                    from ord_lineitem cross join cust_nation
                    where n_name = 'RUSSIA'
                    order by c_custkey"""
            runTest.run_query(bc, spark, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            if Settings.execution_mode == ExecutionMode.GENERATOR:
                print("==============================")
                break

    executionTest()

    end_mem = gpuMemory.capture_gpu_memory_usage()

    gpuMemory.log_memory_usage(queryType, start_mem, end_mem)

Beispiel #19

Datei anzeigen

def main(dask_client, drill, spark, dir_data_file, bc, nRals):
    start_mem = gpuMemory.capture_gpu_memory_usage()

    queryType = "String case"

    def executionTest():

        tables = ["customer", "nation", "orders"]
        data_types = [
            DataType.DASK_CUDF,
            DataType.CUDF,
            DataType.CSV,
            DataType.ORC,
            DataType.PARQUET,
        ]  # TODO json

        # Create Tables -----------------------------------------------------
        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType):
                continue
            cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables)

            # Run Query ------------------------------------------------------
            # Parameter to indicate if its necessary to order
            # the resulsets before compare them
            worder = 1
            use_percentage = False
            acceptable_difference = 0.01

            print("==============================")
            print(queryType)
            print("==============================")

            queryId = "TEST_01"
            query = """select c_custkey, UPPER(c_name) from customer
                    where SUBSTRING(c_name,1,17) = 'Customer#00000000'"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_02"
            query = """select c_custkey, upper(c_comment)
                    from customer where c_mktsegment = 'household'"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_03"
            query = """select LOWER(c_name), UPPER(c_address)
                    from customer where c_custkey < 42"""
            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_04"
            query = """select c.c_custkey, UPPER(SUBSTRING(c.c_name, 1, 8)),
                        LOWER(SUBSTRING(c.c_name, 10, 18)) || '**'
                    from customer c where c.c_custkey < 20"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_05"
            query = """select o_orderkey, upper(o_comment), lower(o_orderstatus)
                    from orders where o_custkey < 120"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_06"
            query = """select lower(o_orderpriority), lower(o_orderstatus)
                    from orders group by o_orderpriority, o_orderstatus"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_07"
            query = """select count(o_orderkey), sum(o_orderkey), lower(o_clerk)
                    from orders where o_custkey < 1000
                    group by o_clerk, o_orderstatus"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_08"
            query = """select count(o_orderkey), sum(o_orderkey), upper(o_clerk)
                    from orders where o_custkey < 1000
                    group by o_clerk, o_orderstatus"""
            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_09"
            query = """select LOWER(n_name), UPPER(n_comment) from nation
                    where n_regionkey = 4"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_10"
            query = """select upper(n_comment), lower(upper(n_comment)) from nation
                    where n_nationkey between 5 and 15"""
            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
                print_result=True,
            )

            if Settings.execution_mode == ExecutionMode.GENERATOR:
                print("==============================")
                break

    executionTest()

    end_mem = gpuMemory.capture_gpu_memory_usage()

    gpuMemory.log_memory_usage(queryType, start_mem, end_mem)

Beispiel #20

Datei anzeigen

def main(dask_client, drill, dir_data_lc, bc, nRals):

    start_mem = gpuMemory.capture_gpu_memory_usage()

    def executionTest():
        # Read Data TPCH------------------------------------------------------
        authority = "hadoop.docker.com:9000"
        ktoken = "../KrbHDFS/myconf/krb5cc_0"
        krbticket = os.path.abspath(ktoken)
        hdfs_host = "172.22.0.3"
        hdfs_port = 9000
        hdfs_driver = "libhdfs"
        print("Using krb ticket: " + krbticket)
        result, error_msg, fs = bc.hdfs(
            authority,
            host=hdfs_host,
            port=hdfs_port,
            user="******",
            driver=hdfs_driver,
            kerb_ticket=krbticket,
        )

        if result is False:
            msg = ("""WARNING: Could not connect to HDFS instance %s:%d using
                  driver %s, error was: %s""" %
                   (hdfs_host, hdfs_port, hdfs_driver, error_msg))
            print(msg)
            print("WARNING: Will ignore " + queryType)
            return

        print("Success connection to HDFS:")
        print(fs)

        hdfs_dir_data_lc = "hdfs://" + authority + dir_data_lc
        print("TPCH files at: " + hdfs_dir_data_lc)

        tables = [
            "nation",
            "region",
            "supplier",
            "customer",
            "lineitem",
            "orders",
            "part",
        ]
        # tables = ['customer']
        # TODO json1
        data_types = [DataType.CSV, DataType.ORC, DataType.PARQUET]

        for fileSchemaType in data_types:
            # if skip_test(dask_client, nRals, fileSchemaType, queryType):
            #  continue
            cs.create_hive_tables(bc,
                                  hdfs_dir_data_lc,
                                  fileSchemaType,
                                  tables=tables)

            # Run Query -----------------------------------------------------
            # Parameter to indicate if its necessary to order
            # the resulsets before compare them
            worder = 1
            use_percentage = False
            acceptable_difference = 0.01

            print("==============================")
            print(queryType)
            print("==============================")

            queryId = "TEST_01"
            query = """select count(c_custkey) as c1, count(c_acctbal) as c2
                    from customer"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_02"
            query = """select count(n_nationkey), count(n_regionkey)
                    from nation"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_03"
            query = """select count(s_suppkey), count(s_nationkey)
                    from supplier"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_04"
            query = """select count(c_custkey), sum(c_acctbal),
                    sum(c_acctbal)/count(c_acctbal),
                    min(c_custkey), max(c_nationkey),
                    (max(c_nationkey) + min(c_nationkey))/2 c_nationkey
                from customer where c_custkey < 100
                group by c_nationkey"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, "",
                              0.01, True)  # TODO: Change sum/count for avg KC

            queryId = "TEST_05"
            query = """select c.c_custkey, c.c_nationkey, n.n_regionkey
                    from customer as c inner join nation as n
                    on c.c_nationkey = n.n_nationkey
                    where n.n_regionkey = 1 and c.c_custkey < 50"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_06"
            query = """select c_custkey, c_nationkey, c_acctbal
                    from customer order by c_nationkey, c_custkey, c_acctbal"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                0,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_07"
            query = """select c_custkey + c_nationkey, c_acctbal
                    from customer order by 1, 2"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                0,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_08"
            query = """select n1.n_nationkey as supp_nation,
                    n2.n_nationkey as cust_nation,
                    l.l_extendedprice * l.l_discount
                    from supplier as s
                    inner join lineitem as l on s.s_suppkey = l.l_suppkey
                    inner join orders as o on o.o_orderkey = l.l_orderkey
                    inner join customer as c on c.c_custkey = o.o_custkey
                    inner join nation as n1 on s.s_nationkey = n1.n_nationkey
                    inner join nation as n2 on c.c_nationkey = n2.n_nationkey
                    where n1.n_nationkey = 1
                    and n2.n_nationkey = 2 and o.o_orderkey < 10000"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_09"
            query = """select c_custkey, c_nationkey as nkey
                    from customer where c_custkey < 0 and c_nationkey >= 30"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_10"
            query = """select sin(c_acctbal), cos(c_acctbal),
                    asin(c_acctbal), acos(c_acctbal),
                    ln(c_acctbal), tan(c_acctbal),
                    atan(c_acctbal), floor(c_acctbal), c_acctbal
                from customer"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                0.01,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_11"
            query = """select n1.n_nationkey as n1key,
                    n2.n_nationkey as n2key, n1.n_nationkey + n2.n_nationkey
                    from nation as n1
                    full outer join nation as n2
                    on n1.n_nationkey = n2.n_nationkey + 6
                    where n1.n_nationkey < 10
                    and n1.n_nationkey > 5"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_12"
            query = """select count(n1.n_nationkey) as n1key,
                    count(n2.n_nationkey) as n2key, count(*) as cstar
                    from nation as n1
                    full outer join nation as n2
                    on n1.n_nationkey = n2.n_nationkey + 6"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_13"
            query = """select o_orderkey, o_custkey
                    from orders where o_orderkey < 10 and o_orderkey >= 1"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_14"
            query = """select 100168549 - sum(o_orderkey)/count(o_orderkey),
                        56410984/sum(o_totalprice),
                        (123 - 945/max(o_orderkey)) /
                            (sum(81619/o_orderkey)/count(81619/o_orderkey))
                    from orders where o_orderkey < 50"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, "",
                              0.01, True)  # TODO: Change sum/count for avg KC

            queryId = "TEST_15"
            query = """select EXTRACT(YEAR FROM l_receiptdate) -
                          EXTRACT(YEAR FROM l_shipdate) as years_late,
                        EXTRACT(MONTH FROM l_receiptdate) -
                          EXTRACT(MONTH FROM l_shipdate) as months_late,
                        EXTRACT(DAY FROM l_receiptdate) -
                          EXTRACT(DAY FROM l_shipdate) as days_late
                    from lineitem
                    where l_shipdate < DATE '1993-01-01'"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

    executionTest()

    end_mem = gpuMemory.capture_gpu_memory_usage()

    gpuMemory.log_memory_usage(queryType, start_mem, end_mem)

Beispiel #21

Datei anzeigen

Datei: bindableAliasTest.py Projekt: youjoinit/blazingsql

def main(dask_client, drill, spark, dir_data_file, bc, nRals):

    start_mem = gpuMemory.capture_gpu_memory_usage()

    queryType = "Bindable Alias"

    def executionTest(queryType):
        tables = ["nation", "region", "lineitem", "orders"]
        data_types = [
            DataType.DASK_CUDF,
            DataType.CUDF,
            DataType.CSV,
            DataType.ORC,
            DataType.PARQUET,
        ]  # TODO ORC gdf parquet json

        # Create Tables -----------------------------------------------------
        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType):
                continue
            cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables)

            # Run Query ------------------------------------------------------
            # Parameter to indicate if its necessary to order the
            # resulsets before compare them
            worder = 1
            use_percentage = False
            acceptable_difference = 0.01

            print("==============================")
            print(queryType)
            print("==============================")

            queryId = "TEST_01"
            query = """select n_regionkey as rkey, n_nationkey from nation
            where n_regionkey < 3"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
                print_result=True,
            )

            queryId = "TEST_02"
            query = """select n_nationkey, n_regionkey as nr from nation
                    where n_regionkey < 3"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_03"
            query = """select n_regionkey as rkey, n_nationkey from nation
                    where n_regionkey < 3 and n_nationkey > 5"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_04"
            query = """select n_nationkey as nkey, n_comment from nation
                    where n_nationkey > 5"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_05"
            query = "select n_regionkey as rkey, n_nationkey from nation"
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_06"
            query = """select n_regionkey as rkey, n_nationkey as nkey
                from nation"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_07"
            query = """select n_nationkey as nkey, n_regionkey as rkey
                    from nation"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_08"
            query = """(select r_name as name, r_regionkey as key from region )
                    union all
                    (select n_name as name, n_nationkey as key from nation )"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_09"
            query = """select o_orderkey as okey, o_custkey as ckey,
                    o_orderdate as odate from orders
                    where o_orderpriority = '1-URGENT'
                    order by okey, ckey, odate"""
            if fileSchemaType == DataType.ORC:
                runTest.run_query(
                    bc,
                    spark,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )
            else:
                runTest.run_query(
                    bc,
                    drill,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )

            queryId = "TEST_10"
            query = """select (l_quantity + 2) as new_quantity from lineitem
                    group by l_quantity order by l_quantity asc limit 30"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_11"
            query = """select count(o_orderkey) as count_ok from orders
                    group by o_orderstatus"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_12"
            query = "select n_nationkey from nation where n_regionkey < 3"
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            # if Settings.execution_mode == ExecutionMode.GENERATOR:
            #     print("==============================")
            #     break

    executionTest(queryType)

    end_mem = gpuMemory.capture_gpu_memory_usage()

    gpuMemory.log_memory_usage(queryType, start_mem, end_mem)

Beispiel #22

Datei anzeigen

def main(dask_client, drill, dir_data_file, bc, nRals):

    start_mem = gpuMemory.capture_gpu_memory_usage()

    queryType = 'Count Distinc'

    def executionTest():
        tables = [
            'partsupp', 'lineitem', 'part', 'supplier', 'orders', 'customer',
            'region', 'nation'
        ]
        data_types = [
            DataType.DASK_CUDF, DataType.CUDF, DataType.CSV, DataType.ORC,
            DataType.PARQUET
        ]  # TODO json
        #Create Tables ------------------------------------------------------------------------------------------------------------
        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType):
                continue
            cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables)

            #Run Query -----------------------------------------------------------------------------

            worder = 1
            use_percentage = False
            acceptable_difference = 0

            print('==============================')
            print(queryType)
            print('==============================')

            queryId = 'TEST_01'
            query = "select count(distinct (n_regionkey + n_nationkey)), n_regionkey from nation group by n_regionkey"
            runTest.run_query(bc, drill, query, queryId, queryType, worder,
                              "n_regionkey", acceptable_difference,
                              use_percentage, fileSchemaType)

            queryId = 'TEST_02'
            query = "select count(distinct o_custkey), o_orderkey from orders where o_orderkey<100 group by o_orderkey, (o_orderkey + o_custkey)"
            runTest.run_query(bc, drill, query, queryId, queryType, worder,
                              "o_orderkey", acceptable_difference,
                              use_percentage, fileSchemaType)

            queryId = 'TEST_03'
            query = "select count(distinct(o_orderkey + o_custkey)) as new_col, sum(o_orderkey), o_custkey from orders group by o_custkey"
            runTest.run_query(bc, drill, query, queryId, queryType, worder,
                              "o_custkey", acceptable_difference,
                              use_percentage, fileSchemaType)

            # queryId = 'TEST_04'
            # query = "select count(distinct(o_custkey)), avg(o_totalprice), (o_orderkey + o_custkey) as num from orders where o_custkey < 100 group by o_custkey, o_orderkey"
            # runTest.run_query(bc, drill, query, queryId, queryType, worder, '', 0.01, use_percentage, fileSchemaType)

            queryId = 'TEST_05'
            query = "select count(distinct(o_custkey)), max(o_totalprice), min(o_totalprice), avg(o_totalprice) from orders group by o_custkey"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              0.01, use_percentage, fileSchemaType)

            queryId = 'TEST_06'
            query = "select n_nationkey, count(distinct (n_regionkey + n_nationkey))/count(n_nationkey) from nation group by n_nationkey"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              0.01, use_percentage, fileSchemaType)

            queryId = 'TEST_07'
            query = "select count(distinct(o_custkey)), count(distinct(o_totalprice)),  sum(o_orderkey) from orders group by o_custkey"  #count(distinct(o_orderdate)),
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            # queryId = 'TEST_08'
            # query = "select COUNT(DISTINCT(n.n_nationkey)), AVG(r.r_regionkey) from nation as n left outer join region as r on n.n_nationkey = r.r_regionkey"
            # runTest.run_query(bc, drill, query, queryId, queryType, worder, '', 0.01, use_percentage, fileSchemaType)

            queryId = 'TEST_09'
            query = "select MIN(n.n_nationkey), MAX(r.r_regionkey), COUNT(DISTINCT(n.n_nationkey + r.r_regionkey)) from nation as n left outer join region as r on n.n_nationkey = r.r_regionkey"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            # queryId = 'TEST_10'
            # query = "select COUNT(DISTINCT(n1.n_nationkey)) as n1key, COUNT(DISTINCT(n2.n_nationkey)) as n2key from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_regionkey"
            # runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)

            # queryId = 'TEST_11'
            # query = "select r.r_regionkey, n.n_nationkey, COUNT(n.n_nationkey), COUNT(DISTINCT(r.r_regionkey)), SUM(DISTINCT(n.n_nationkey + r.r_regionkey)) from nation as n left outer join region as r on n.n_nationkey = r.r_regionkey GROUP BY r.r_regionkey, n.n_nationkey"
            # runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)

            queryId = 'TEST_12'
            query = "select n1.n_regionkey, n2.n_nationkey,  MIN(n1.n_regionkey), MAX(n1.n_regionkey), AVG(n2.n_nationkey) from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_nationkey + 6 GROUP BY n1.n_regionkey, n2.n_nationkey"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            if Settings.execution_mode == ExecutionMode.GENERATOR:
                print("==============================")
                break

    executionTest()

    end_mem = gpuMemory.capture_gpu_memory_usage()

    gpuMemory.log_memory_usage(queryType, start_mem, end_mem)

Beispiel #23

Datei anzeigen

Datei: unaryOpsTest.py Projekt: youjoinit/blazingsql

def main(dask_client, drill, dir_data_file, bc, nRals):

    start_mem = gpuMemory.capture_gpu_memory_usage()

    def executionTest():
        tables = ["customer"]
        data_types = [
            DataType.DASK_CUDF,
            DataType.CUDF,
            DataType.CSV,
            DataType.ORC,
            DataType.PARQUET,
        ]  # TODO json

        # Create Tables -----------------------------------------------------
        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType):
                continue
            cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables)

            # Run Query ------------------------------------------------------
            # Parameter to indicate if its necessary to order
            # the resulsets before compare them
            worder = 1
            use_percentage = False
            acceptable_difference = 0.01

            print("==============================")
            print(queryType)
            print("==============================")

            queryId = "TEST_01"
            query = """select sin(c_acctbal), cos(c_acctbal), asin(c_acctbal),
                        acos(c_acctbal), ln(c_acctbal), tan(c_acctbal),
                        atan(c_acctbal), floor(c_acctbal), ceil(c_acctbal),
                        c_acctbal
                    from customer"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_02"
            query = """select sin(c_acctbal), cos(c_acctbal),
                        asin(c_acctbal), acos(c_acctbal), c_acctbal
                    from customer"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_03"
            query = """select sin(c_acctbal), cos(c_acctbal),
                        asin(c_acctbal), acos(c_acctbal),
                        ln(c_acctbal), c_acctbal
                    from customer"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_04"
            query = """select sin(c_acctbal), cos(c_acctbal),
                        asin(c_acctbal), acos(c_acctbal),
                        ln(c_acctbal), tan(c_acctbal),
                        atan(c_acctbal), c_acctbal
                    from customer"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_05"
            query = """select sin(c_acctbal), cos(c_acctbal),
                        asin(c_acctbal), acos(c_acctbal),
                        ln(c_acctbal), tan(c_acctbal), atan(c_acctbal),
                        floor(c_acctbal), c_acctbal
                    from customer"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_06"
            query = "select floor(c_acctbal), c_acctbal from customer"
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            if Settings.execution_mode == ExecutionMode.GENERATOR:
                print("==============================")
                break

    executionTest()

    end_mem = gpuMemory.capture_gpu_memory_usage()

    gpuMemory.log_memory_usage(queryType, start_mem, end_mem)

Beispiel #24

Datei anzeigen

def main(dask_client, drill, spark, dir_data_file, bc, nRals):

    start_mem = gpuMemory.capture_gpu_memory_usage()

    def executionTest():
        tables = ["nation", "region", "customer", "orders", "lineitem"]
        data_types = [
            DataType.DASK_CUDF,
            DataType.CUDF,
            DataType.CSV,
            DataType.ORC,
            DataType.PARQUET,
        ]  # TODO json

        # Create Tables -----------------------------------------------------
        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType):
                continue
            cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables)

            # Run Query ------------------------------------------------------
            # Parameter to indicate if its necessary to order
            # the resulsets before compare them
            worder = 1
            use_percentage = False
            acceptable_difference = 0.01

            print("==============================")
            print(queryType)
            print("==============================")

            queryId = "TEST_01"
            query = """select EXTRACT(YEAR FROM l_receiptdate) -
                        EXTRACT(YEAR FROM l_shipdate) as years_late,
                    EXTRACT(MONTH FROM l_receiptdate) -
                        EXTRACT(MONTH FROM l_shipdate) as months_late,
                    EXTRACT(DAY FROM l_receiptdate)
                        - EXTRACT(DAY FROM l_shipdate) as days_late
                    from lineitem where l_shipdate < DATE '1993-01-01'"""
            if fileSchemaType == DataType.ORC:
                runTest.run_query(
                    bc,
                    spark,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )
            else:
                runTest.run_query(
                    bc,
                    drill,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )

            queryId = "TEST_02"
            query = """select o_orderkey as okey, o_custkey as ckey,
                    (EXTRACT(YEAR FROM o_orderdate) - 5) from orders
                    where o_orderstatus = 'O' order by okey"""
            if fileSchemaType == DataType.ORC:
                runTest.run_query(
                    bc,
                    spark,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )
            else:
                runTest.run_query(
                    bc,
                    drill,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )

            queryId = "TEST_03"
            query = """select orders.o_orderkey, orders.o_orderdate,
                    orders.o_orderstatus
                    from orders inner join lineitem
                        on lineitem.l_orderkey = orders.o_orderkey
                    where orders.o_orderkey < 30 and lineitem.l_orderkey < 20
                    order by orders.o_orderkey, lineitem.l_linenumber,
                        orders.o_custkey, lineitem.l_orderkey"""
            if fileSchemaType == DataType.ORC:
                runTest.run_query(
                    bc,
                    spark,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )
            else:
                runTest.run_query(
                    bc,
                    drill,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )

            queryId = "TEST_04"
            query = """select customer.c_nationkey, customer.c_name,
                    orders.o_orderdate, lineitem.l_receiptdate
                    from customer left
                    outer join orders on customer.c_custkey = orders.o_custkey
                    inner join lineitem
                        on lineitem.l_orderkey = orders.o_orderkey
                    where customer.c_nationkey = 3
                    and customer.c_custkey < 100
                    and orders.o_orderdate < '1990-01-01'
                    order by orders.o_orderkey, lineitem.l_linenumber"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_05"
            query = """select orders.o_orderkey, orders.o_orderdate,
                    lineitem.l_receiptdate, orders.o_orderstatus
                    from orders inner join lineitem
                    on lineitem.l_receiptdate = orders.o_orderdate
                    where orders.o_orderkey < 30 and lineitem.l_orderkey < 20
                    order by orders.o_orderkey, lineitem.l_linenumber"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            # Tests for `CURRENT_DATE`, `CURRENT_TIME` and `CURRENT_TIMESTAMP`
            queryId = "TEST_06"
            query = """with current_table as (
                            select o_orderkey, current_date, o_custkey
                            from orders where o_orderkey < 350
                    )
                    select o_orderkey, o_custkey
                    from current_table"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            # Note: As we don't have support for time dtype, then `current_time` will
            # return the same as `current_timestamp`
            queryId = "TEST_07"
            query = """with current_table as (
                            select o_orderkey, current_time, o_custkey, current_timestamp
                            from orders where o_orderkey < 750
                    )
                    select o_orderkey, o_custkey
                    from current_table limit 50"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            # As `current_date`, `current_time` and `current_timestamp` always will return
            # different values by each new execution, let's not compare
            queryId = "TEST_08"
            query = """select current_date, o_orderkey, current_time, current_timestamp
                        from orders where o_orderkey < 750"""
            runTest.run_query(bc,
                              drill,
                              query,
                              queryId,
                              queryType,
                              worder,
                              "",
                              acceptable_difference,
                              use_percentage,
                              fileSchemaType,
                              comparing="false")

            queryId = "TEST_09"
            query = """select orders.o_orderkey, CURRENT_DATE, orders.o_orderdate,
                    lineitem.l_receiptdate, orders.o_orderstatus
                    from orders inner join lineitem
                    on lineitem.l_receiptdate = orders.o_orderdate
                    where orders.o_orderkey < 40 and lineitem.l_orderkey < 30
                    order by orders.o_orderkey, lineitem.l_linenumber"""
            runTest.run_query(bc,
                              drill,
                              query,
                              queryId,
                              queryType,
                              worder,
                              "",
                              acceptable_difference,
                              use_percentage,
                              fileSchemaType,
                              comparing="false")

            # This test just compare the `CURRENT_DATE` value got from Blazing
            # against the `CURRENT DATE` from python (to validate the we get the righ current date)
            queryId = "TEST_10"
            query = """select CURRENT_DATE from region"""
            result = bc.sql(query)

            import dask_cudf
            if isinstance(result, dask_cudf.core.DataFrame):
                result = result.compute()

            current_blaz_date = result.to_pandas()['CURRENT_DATE'].astype(
                'str').iloc[0]

            from datetime import date
            current_python_date = str(date.today())

            if current_blaz_date != current_python_date:
                raise Exception(
                    "Blazing CURRENT_DATE and python CURRENT DATE are differents"
                )

            # if Settings.execution_mode == ExecutionMode.GENERATOR:
            #     print("==============================")
            #     break

    executionTest()

    end_mem = gpuMemory.capture_gpu_memory_usage()

    gpuMemory.log_memory_usage(queryType, start_mem, end_mem)

Beispiel #25

Datei anzeigen

Datei: fileSystemLocalTest.py Projekt: quasiben/blazingsql

def main(dask_client, drill, dir_data_lc, bc, nRals):

    start_mem = gpuMemory.capture_gpu_memory_usage()

    def executionTest():

        #Read Data TPCH------------------------------------------------------------------------------------------------------------
        tables = [
            'nation', 'region', 'supplier', 'customer', 'lineitem', 'orders'
        ]
        data_types = [
            DataType.DASK_CUDF, DataType.CSV, DataType.ORC, DataType.PARQUET
        ]  # TODO json

        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType):
                continue
            cs.create_tables(bc, dir_data_lc, fileSchemaType, tables=tables)

            #Run Query -----------------------------------------------------------------------------
            worder = 1  #Parameter to indicate if its necessary to order the resulsets before compare them
            use_percentage = False
            acceptable_difference = 0.01

            print('==============================')
            print(queryType)
            print('==============================')

            queryId = 'TEST_01'
            query = "select count(c_custkey) as c1, count(c_acctbal) as c2 from customer"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_02'
            query = "select count(n_nationkey), count(n_regionkey) from nation"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_03'
            query = "select count(s_suppkey), count(s_nationkey) from supplier"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_04'
            query = "select count(c_custkey), sum(c_acctbal), sum(c_acctbal)/count(c_acctbal), min(c_custkey), max(c_nationkey), (max(c_nationkey) + min(c_nationkey))/2 c_nationkey from customer where c_custkey < 100 group by c_nationkey"
            runTest.run_query(
                bc, drill, query, queryId, queryType, worder, '',
                acceptable_difference, True,
                fileSchemaType)  #TODO: Change sum/count for avg KC

            queryId = 'TEST_05'
            query = "select c.c_custkey, c.c_nationkey, n.n_regionkey from customer as c inner join nation as n on c.c_nationkey = n.n_nationkey where n.n_regionkey = 1 and c.c_custkey < 50"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_06'
            query = "select c_custkey, c_nationkey, c_acctbal from customer order by c_nationkey, c_custkey, c_acctbal"
            runTest.run_query(bc, drill, query, queryId, queryType, 0, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_07'
            query = "select c_custkey + c_nationkey, c_acctbal from customer order by 1, 2"
            runTest.run_query(bc, drill, query, queryId, queryType, 0, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_08'
            query = """select n1.n_nationkey as supp_nation, n2.n_nationkey as cust_nation, l.l_extendedprice * l.l_discount 
                        from supplier as s 
                        inner join lineitem as l on s.s_suppkey = l.l_suppkey 
                        inner join orders as o on o.o_orderkey = l.l_orderkey 
                        inner join customer as c on c.c_custkey = o.o_custkey 
                        inner join nation as n1 on s.s_nationkey = n1.n_nationkey 
                        inner join nation as n2 on c.c_nationkey = n2.n_nationkey 
                        where n1.n_nationkey = 1 and n2.n_nationkey = 2 and o.o_orderkey < 10000"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_09'
            query = "select c_custkey, c_nationkey as nkey from customer where c_custkey < 0 and c_nationkey >=30"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_10'
            query = "select sin(c_acctbal), cos(c_acctbal), asin(c_acctbal), acos(c_acctbal), ln(c_acctbal), tan(c_acctbal), atan(c_acctbal), floor(c_acctbal), c_acctbal from customer"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_11'
            query = "select n1.n_nationkey as n1key, n2.n_nationkey as n2key, n1.n_nationkey + n2.n_nationkey from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_nationkey + 6 where n1.n_nationkey < 10 and n1.n_nationkey > 5"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_12'
            query = """select count(n1.n_nationkey) as n1key, count(n2.n_nationkey) as n2key, count(n2.n_nationkey) as cstar from nation as n1 
                full outer join nation as n2 on n1.n_nationkey = n2.n_nationkey + 6"""
            runTest.run_query(
                bc, drill, query, queryId, queryType, worder, '',
                acceptable_difference, use_percentage, fileSchemaType
            )  #TODO: Change count(n2.n_nationkey) as cstar as count(*) as cstar when it will be supported KC

            queryId = 'TEST_13'
            query = "select o_orderkey, o_custkey from orders where o_orderkey < 10 and o_orderkey >= 1"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_14'
            query = "select 100168549 - sum(o_orderkey)/count(o_orderkey), 56410984/sum(o_totalprice), (123 - 945/max(o_orderkey))/(sum(81619/o_orderkey)/count(81619/o_orderkey)) from orders"
            runTest.run_query(
                bc, drill, query, queryId, queryType, worder, '',
                acceptable_difference, True,
                fileSchemaType)  #TODO: Change sum/count for avg KC

            queryId = 'TEST_15'
            query = "select o_orderkey, sum(o_totalprice)/count(o_orderstatus) from orders where o_custkey < 100 group by o_orderstatus, o_orderkey"
            runTest.run_query(
                bc, drill, query, queryId, queryType, worder, '',
                acceptable_difference, True,
                fileSchemaType)  #TODO: Change sum/count for avg KC

            queryId = 'TEST_16'
            query = "select o_orderkey, o_orderstatus from orders where o_custkey < 10 and o_orderstatus <> 'O' order by o_orderkey, o_orderstatus limit 50"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_17'
            query = "select count(o_orderstatus) from orders where o_orderstatus <> 'O'"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_18'
            query = "select count(o_orderkey), sum(o_orderkey), o_clerk from orders where o_custkey < 1000 group by o_clerk, o_orderstatus"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, True, fileSchemaType)

            queryId = 'TEST_19'
            query = "select sum(o_orderkey)/count(o_orderkey) from orders group by o_orderstatus"
            runTest.run_query(
                bc, drill, query, queryId, queryType, worder, '',
                acceptable_difference, True,
                fileSchemaType)  #TODO: Change sum/count for avg KC

            queryId = 'TEST_20'
            query = "select count(o_shippriority), sum(o_totalprice) from orders group by o_shippriority"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_21'
            query = """with regionTemp as ( select r_regionkey, r_name from region where r_regionkey > 2 ),
            nationTemp as(select n_nationkey, n_regionkey as fkey, n_name from nation where n_nationkey > 3 order by n_nationkey)
            select regionTemp.r_name, nationTemp.n_name from regionTemp inner join nationTemp on regionTemp.r_regionkey = nationTemp.fkey"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            if Settings.execution_mode == ExecutionMode.GENERATOR:
                print("==============================")
                break

    executionTest()

    end_mem = gpuMemory.capture_gpu_memory_usage()

    gpuMemory.log_memory_usage(queryType, start_mem, end_mem)

Beispiel #26

Datei anzeigen

def main(dask_client, drill, spark, dir_data_file, bc, nRals):

    start_mem = gpuMemory.capture_gpu_memory_usage()

    def executionTest():

        tables = ["lineitem", "orders", "nation", "region", "customer"]
        data_types = [DataType.ORC]  # TODO gdf csv parquet json

        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType):
                continue
            cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables)

            # Run Query ------------------------------------------------------
            # Parameter to indicate if its necessary to order
            # the resulsets before compare them
            worder = 1
            use_percentage = False
            acceptable_difference = 0.01

            print("==============================")
            print(queryType)
            print("==============================")

            queryId = "TEST_01"
            query = """select * from orders
                    where o_orderdate > TIMESTAMP '1997-09-05 19:00:00'
                    order by o_orderkey limit 10"""
            query_spark = """select * from orders
                    where o_orderdate > TIMESTAMP '1997-09-05 19:00:00'
                    order by o_orderkey nulls last limit 10"""

            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
                query_spark=query_spark,
            )

            queryId = "TEST_02"
            query = """select l_suppkey, l_shipdate from lineitem
                where l_shipdate < TIMESTAMP '1993-01-01 10:12:48'
                and l_suppkey < 100 order by l_orderkey"""
            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_03"
            query = """select o_orderkey, o_orderdate from orders
                    where o_orderdate >= TIMESTAMP '1997-09-05 19:00:00'
                    order by o_orderkey limit 20"""
            query_spark = """select o_orderkey, o_orderdate from orders
                    where o_orderdate >= TIMESTAMP '1997-09-05 19:00:00'
                    order by o_orderkey nulls last limit 20"""

            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
                query_spark=query_spark,
            )

            queryId = "TEST_04"
            query = """select orders.o_orderkey, orders.o_orderdate,
                        orders.o_orderstatus, lineitem.l_receiptdate
                    from orders inner join lineitem
                    on lineitem.l_orderkey = orders.o_orderkey
                    where orders.o_orderkey < 70 and lineitem.l_orderkey < 120
                    and orders.o_orderdate < TIMESTAMP '1992-01-01 10:12:48'
                    order by orders.o_orderkey, lineitem.l_linenumber,
                    orders.o_custkey, lineitem.l_orderkey"""
            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_05"
            query = """select customer.c_nationkey, customer.c_name,
                        orders.o_orderdate
                    from customer left outer join orders
                    on customer.c_custkey = orders.o_custkey
                    inner join lineitem
                    on lineitem.l_orderkey = orders.o_orderkey
                    where customer.c_nationkey = 3
                    and customer.c_custkey < 100
                    and orders.o_orderdate < '1990-01-01 20:00:00'
                    order by orders.o_orderkey, lineitem.l_linenumber"""
            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_06"
            query = """select orders.o_orderkey, orders.o_orderdate,
                    orders.o_orderstatus
                    from orders inner join lineitem
                    on lineitem.l_orderkey = orders.o_orderkey
                    where orders.o_orderkey < 20
                    and lineitem.l_orderkey < 16
                    order by orders.o_orderkey"""
            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_07"
            query = """select cast(o_orderdate as timestamp)
                    from orders order by o_orderkey limit 5"""
            query_spark = """select cast(o_orderdate as timestamp)
                    from orders order by o_orderkey nulls last limit 5"""
            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
                query_spark=query_spark,
            )

            queryId = "TEST_08"
            query = """select o_orderkey, cast(o_orderdate as timestamp)
                    from orders where o_orderdate = date '1996-12-01'
                    order by o_orderkey limit 5"""
            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_09"
            query = """with dateTemp as (
                    select o_orderdate from orders
                    where o_orderdate > TIMESTAMP '1960-05-05 12:34:55'
                    order by o_orderkey
                    ) select count(*) from dateTemp"""
            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_10"
            query = """with dateTemp as (
                    select o_orderdate from orders
                    where o_orderdate <= TIMESTAMP '1996-12-01 00:00:00'
                    order by o_orderdate, o_orderkey limit 5
                    ) select o_orderdate as myDate from dateTemp
                    order by o_orderdate desc"""
            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_11"
            query = """with dateTemp as (
                    select o_orderdate from orders
                    where o_orderdate = TIMESTAMP '1996-12-01 19:00:00'
                    order by o_orderkey) select count(*) from dateTemp"""
            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_12"
            query = """select o_orderkey, o_orderdate from orders
                    where o_orderdate = date '1996-12-01'
                    order by o_orderkey limit 5"""
            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            if Settings.execution_mode == ExecutionMode.GENERATOR:
                print("==============================")
                break

    executionTest()

    end_mem = gpuMemory.capture_gpu_memory_usage()

    gpuMemory.log_memory_usage(queryType, start_mem, end_mem)

Beispiel #27

Datei anzeigen

Datei: mixedFileSystemTest.py Projekt: gcca/blazingsql

def main(drill, dir_data_lc, bc, nRals):

    start_mem = gpuMemory.capture_gpu_memory_usage()

    queryType = "Mixed filesystem Test"

    def executionTest(queryType):
        # Register HDFS
        bc.hdfs("35.185.48.245", host="35.185.48.245", port=54310,
                user="******")
        dir_df = dir_data_lc[dir_data_lc.find("DataSet"): len(dir_data_lc)]
        dir_data_fs_hdfs = "hdfs://35.185.48.245/" + dir_df

        # Register S3
        authority = "tpch_s3"
        # TODO percy kharo e2e-gpuci security
        bc.s3(
            authority,
            bucket_name="blazingsql-bucket",
            encryption_type=EncryptionType.NONE,
            access_key_id="",
            secret_key="",
        )
        dir_df = dir_data_lc[dir_data_lc.find("DataSet"): len(dir_data_lc)]
        dir_data_fs_s3 = "s3://" + authority + "/" + dir_df

        # parque local
        region_files = cs.get_filenames_for_table("region", dir_data_lc,
                                                  "parquet", "")
        bc.create_table("region", region_files)
        # csv local
        nation_files = cs.get_filenames_for_table("nation", dir_data_lc,
                                                  "psv", "")
        bc.create_table(
            "nation",
            nation_files,
            delimiter="|",
            dtype=cs.get_dtypes("nation"),
            names=cs.get_column_names("nation"),
        )
        # parquet from hdfs
        lineitem_files = cs.get_filenames_for_table(
            "lineitem", dir_data_lc, "parquet", dir_data_fs_hdfs
        )
        bc.create_table("lineitem", lineitem_files)
        # parquet from S3
        customer_files = cs.get_filenames_for_table(
            "customer", dir_data_lc, "parquet", ""
        )
        bc.create_table("customer", customer_files)
        # csv con hdfs
        supplier_files = cs.get_filenames_for_table(
            "supplier", dir_data_lc, "psv", dir_data_fs_hdfs
        )
        bc.create_table(
            "supplier",
            supplier_files,
            delimiter="|",
            dtype=cs.get_dtypes("supplier"),
            names=cs.get_column_names("supplier"),
        )
        # csv from S3
        orders_files = cs.get_filenames_for_table(
            "orders", dir_data_lc, "psv", dir_data_fs_s3
        )
        bc.create_table(
            "orders",
            orders_files,
            delimiter="|",
            dtype=cs.get_dtypes("orders"),
            names=cs.get_column_names("orders"),
        )

        # Run Query ------------------------------------------------------
        # Parameter to indicate if its necessary to order
        # the resulsets before compare them
        worder = 1
        use_percentage = False
        acceptable_difference = 0.01

        print("==============================")
        print(queryType)
        print("==============================")

        queryId = "TEST_01"  # parquet-s3, csv-local, parque-local
        query = """select sum(c.c_custkey)/count(c.c_custkey),
                    sum(c.c_acctbal)/count(c.c_acctbal),
                    n.n_nationkey, r.r_regionkey
                from customer as c
                inner join nation as n on c.c_nationkey = n.n_nationkey
                inner join region as r on r.r_regionkey = n.n_regionkey
                group by n.n_nationkey, r.r_regionkey"""
        runTest.run_query(
            bc,
            drill,
            query,
            queryId,
            queryType,
            worder,
            "",
            acceptable_difference,
            True,
        )  # TODO: Change sum/count for avg KC

        queryId = "TEST_02"  # parquet-s3, csv-s3
        query = """select sum(custKey)/count(custKey) from
        (select customer.c_custkey as custKey from
        (select min(o_custkey) as o_custkey from orders ) as tempOrders
        inner join customer on tempOrders.o_custkey = customer.c_custkey
        where customer.c_nationkey > 6) as joinedTables"""
        runTest.run_query(
            bc,
            drill,
            query,
            queryId,
            queryType,
            worder,
            "",
            acceptable_difference,
            True,
        )  # TODO: Change sum/count for avg KC

        queryId = "TEST_03"  # csv-s3, parquet-hdfs
        query = """select o.o_orderkey, o.o_totalprice,
                l.l_partkey, l.l_returnflag from orders as o
                inner join lineitem as l
                on o.o_orderkey = l.l_orderkey
                where l.l_orderkey < 1000"""
        runTest.run_query(
            bc,
            drill,
            query,
            queryId,
            queryType,
            worder,
            "",
            acceptable_difference,
            use_percentage,
            # fileSchemaType,
        )

        queryId = "TEST_04"  # csv-local, parquet-local, parquet-s3
        query = """select sum(c.c_custkey)/count(c.c_custkey),
                    sum(c.c_acctbal)/count(c.c_acctbal), n.n_nationkey,
                    r.r_regionkey
                from customer as c
                inner join nation as n on c.c_nationkey = n.n_nationkey
                inner join region as r on r.r_regionkey = n.n_regionkey
                group by n.n_nationkey, r.r_regionkey"""
        runTest.run_query(
            bc,
            drill,
            query,
            queryId,
            queryType,
            worder,
            "",
            acceptable_difference,
            True,
        )  # TODO: Change sum/count for avg KC

        queryId = "TEST_05"  # parquet-s3, csv-local
        query = """select c.c_custkey, c.c_nationkey, n.n_regionkey
                from customer as c
                inner join nation as n on c.c_nationkey = n.n_nationkey
                where n.n_regionkey = 1 and c.c_custkey < 50"""
        runTest.run_query(
            bc,
            drill,
            query,
            queryId,
            queryType,
            worder,
            "",
            acceptable_difference,
            use_percentage,
            # fileSchemaType,
        )

        queryId = "TEST_06"  # csv-hdfs, parquet-hdfs, parquet-s3, csv-local
        query = """select n1.n_nationkey as supp_nation,
                    n2.n_nationkey as cust_nation,
                    l.l_extendedprice * l.l_discount
                from supplier as s
                inner join lineitem as l on s.s_suppkey = l.l_suppkey
                inner join orders as o on o.o_orderkey = l.l_orderkey
                inner join customer as c on c.c_custkey = o.o_custkey
                inner join nation as n1 on s.s_nationkey = n1.n_nationkey
                inner join nation as n2 on c.c_nationkey = n2.n_nationkey
                where n1.n_nationkey = 1
                and n2.n_nationkey = 2 and o.o_orderkey < 10000"""
        runTest.run_query(
            bc,
            drill,
            query,
            queryId,
            queryType,
            worder,
            "",
            acceptable_difference,
            use_percentage,
            # fileSchemaType,
        )

        queryId = "TEST_07"  # csv-local, parquet-local
        query = """with regionTemp as (
                select r_regionkey, r_name from region where r_regionkey > 2
                ), nationTemp as(
                    select n_nationkey, n_regionkey as fkey, n_name
                    from nation where n_nationkey > 3
                    order by n_nationkey
                )
                select regionTemp.r_name, nationTemp.n_name
                from regionTemp inner join nationTemp
                on regionTemp.r_regionkey = nationTemp.fkey"""
        runTest.run_query(
            bc,
            drill,
            query,
            queryId,
            queryType,
            worder,
            "",
            acceptable_difference,
            use_percentage,
            # fileSchemaType,
        )

    executionTest(queryType)

    end_mem = gpuMemory.capture_gpu_memory_usage()

    gpuMemory.log_memory_usage(queryType, start_mem, end_mem)

Beispiel #28

Datei anzeigen

Datei: coalesceTest.py Projekt: shxinding/blazingsql

def main(dask_client, drill, dir_data_file, bc, nRals):

    start_mem = gpuMemory.capture_gpu_memory_usage()

    queryType = "Coalesce"

    def executionTest(queryType):
        tables = ["nation", "region", "customer", "orders", "lineitem"]
        # TODO json
        data_types = [
            DataType.DASK_CUDF,
            DataType.CUDF,
            DataType.CSV,
            DataType.ORC,
            DataType.PARQUET,
        ]  # TODO json

        # Create Tables -----------------------------------------------------
        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType):
                continue
            cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables)

            # Run Query ------------------------------------------------------
            # Parameter to indicate if its necessary to order
            # the resulsets before compare them
            worder = 1
            use_percentage = False
            acceptable_difference = 0.01

            # if fileSchemaType == DataType.PARQUET : print('Save file arrow')

            print("==============================")
            print(queryType)
            print("==============================")

            queryId = "TEST_01"
            query = """select n.n_nationkey, COALESCE(r.r_regionkey,-1)
                    from nation as n left outer join region as r
                    on n.n_nationkey = r.r_regionkey
                    where n.n_nationkey < 10"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
                print_result=True,
            )

            queryId = "TEST_02"
            query = """select COALESCE(orders.o_orderkey, 100),
                    COALESCE(orders.o_totalprice, 0.01) from customer
                    left outer join orders
                    on customer.c_custkey = orders.o_custkey
                    where customer.c_nationkey = 3
                    and customer.c_custkey < 500"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_03"
            query = """select COALESCE(orders.o_orderkey, customer.c_custkey),
                    COALESCE(orders.o_totalprice, customer.c_acctbal)
                    from customer left outer join orders
                    on customer.c_custkey = orders.o_custkey
                    where customer.c_nationkey = 3
                    and customer.c_custkey < 500"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_04"
            query = """select customer.c_custkey, orders.o_orderkey,
                    COALESCE(orders.o_custkey,123456) from customer
                    left outer join orders
                    on customer.c_custkey = orders.o_custkey
                    where customer.c_nationkey = 3
                    and customer.c_custkey < 500"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_05"
            query = """select COUNT(DISTINCT(COALESCE(n1.n_regionkey,32))),
                    AVG(COALESCE(n1.n_regionkey,32)) from nation as n1
                    full outer join nation as n2
                    on n1.n_nationkey = n2.n_nationkey + 6"""
            # runTest.run_query(bc, drill, query, queryId, queryType, worder,
            #  '', acceptable_difference, use_percentage, fileSchemaType)

            queryId = "TEST_06"
            query = """select SUM(COALESCE(n2.n_nationkey, 100)),
                    COUNT(DISTINCT(COALESCE(n1.n_nationkey,32))),
                    n2.n_regionkey as n1key from nation as n1
                    full outer join nation as n2
                    on n1.n_nationkey = n2.n_nationkey + 6
                    GROUP BY n2.n_regionkey"""
            # runTest.run_query(bc, drill, query, queryId, queryType, worder,
            #  '', acceptable_difference, use_percentage, fileSchemaType)

            queryId = "TEST_07"
            query = """select MIN(COALESCE(n.n_nationkey, r.r_regionkey)),
                    MAX(COALESCE(n.n_nationkey, 8)) from nation as n
                    left outer join region as r
                    on n.n_nationkey = r.r_regionkey"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_08"
            query = """select AVG(CAST(COALESCE(n.n_nationkey,
                    r.r_regionkey) AS DOUBLE)),
                    MAX(COALESCE(n.n_nationkey, 8)),
                    COUNT(COALESCE(n.n_nationkey, 12)), n.n_nationkey
                    from nation as n left outer join region as r
                    on n.n_nationkey = r.r_regionkey
                    GROUP BY n.n_nationkey"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            #     queryId = 'TEST_09'
            #     query = "select SUM(COALESCE(n2.n_nationkey, 100)),
            #       COUNT(DISTINCT(COALESCE(n1.n_nationkey,32))),
            #       COALESCE(n2.n_regionkey, 100) as n1key from nation as n1
            #       full outer join nation as n2
            #       on n1.n_nationkey = n2.n_nationkey + 6
            #       GROUP BY COALESCE(n2.n_regionkey, 100)"
            # runTest.run_query(bc, drill, query, queryId, queryType, worder,
            #       '', acceptable_difference, use_percentage, fileSchemaType)

            queryId = "TEST_10"
            query = "SELECT COALESCE(l_shipinstruct, l_comment) FROM lineitem"
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_11"
            query = """select n.n_nationkey,
                    COALESCE(r.r_comment, n.n_comment) from nation as n
                    left outer join region as r
                    on n.n_nationkey = r.r_regionkey"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_12"
            query = """SELECT COALESCE(l.l_shipinstruct, o.o_orderstatus)
                    FROM lineitem l inner join orders o
                    on l.l_orderkey = o.o_orderkey
                    where o.o_totalprice < 1574.23"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_13"
            query = """ WITH
                t1_l AS ( SELECT * FROM orders ),
                t1_r AS ( SELECT * FROM customer ),
                main_lr AS(
                    SELECT
                        COALESCE(o.o_comment, c.c_comment) AS info
                    FROM
                        t1_l o FULL JOIN t1_r c
                        ON  o.o_custkey = c.c_custkey
                        AND o.o_orderkey = c.c_nationkey
                ) SELECT * FROM main_lr
                """
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_14"
            query = """
            WITH
            ltable3 AS (
                select lineitem.l_orderkey as orderkey,
                        lineitem.l_linestatus as linestatus
                from lineitem
                where mod(lineitem.l_orderkey, 2) = 0
            ),

            rtable1 AS (
                select lineitem.l_orderkey as orderkey,
                        lineitem.l_linestatus as linestatus
                from lineitem
                where mod(lineitem.l_partkey, 6) = 0
            ),
            rtable2 AS (
                select lineitem.l_orderkey as orderkey,
                        lineitem.l_linestatus as linestatus
                from lineitem
                where mod(lineitem.l_suppkey, 4) = 0
            ),
            rtable3 AS (
                select coalesce(l.orderkey, r.orderkey)  as orderkey,
                        coalesce(l.linestatus, r.linestatus) as linestatus
                from rtable1 l full join rtable2 r
                on l.orderkey = r.orderkey
                -- and l.linestatus = r.linestatus
            ),

            lastjoin AS (
                select l.orderkey,
                        coalesce(l.linestatus, r.linestatus) as linestatus
                from ltable3 l full join rtable3 r
                on l.orderkey = r.orderkey
                and l.linestatus = r.linestatus
            )

            select * from lastjoin
            """
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            if Settings.execution_mode == ExecutionMode.GENERATOR:
                print("==============================")
                break

    executionTest(queryType)

    end_mem = gpuMemory.capture_gpu_memory_usage()

    gpuMemory.log_memory_usage(queryType, start_mem, end_mem)

Beispiel #29

Datei anzeigen

Datei: useLimitTest.py Projekt: quasiben/blazingsql

def main(dask_client, drill, spark, dir_data_file, bc, nRals):

    start_mem = gpuMemory.capture_gpu_memory_usage()

    def executionTest():
        tables = ["orders", "customer", "partsupp", "lineitem"]
        data_types = [
            DataType.DASK_CUDF, DataType.CUDF, DataType.CSV, DataType.ORC,
            DataType.PARQUET
        ]  # TODO json

        #Create Tables ------------------------------------------------------------------------------------------------------------
        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType):
                continue
            cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables)

            #Run Query -----------------------------------------------------------------------------
            worder = 0
            use_percentage = False
            acceptable_difference = 0.01

            print('==============================')
            print(queryType)
            print('==============================')

            queryId = 'TEST_01'
            query = "select o_orderkey from orders order by 1 limit 10"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_02'
            query = "select o_orderdate, o_orderkey, o_clerk from orders order by o_orderdate, o_orderkey, o_custkey, o_orderstatus, o_clerk limit 1000"
            query_spark = "select o_orderdate, o_orderkey, o_clerk from orders order by o_orderdate nulls last, o_orderkey nulls last, o_custkey nulls last, o_orderstatus nulls last, o_clerk nulls last limit 1000"
            if fileSchemaType == DataType.ORC:
                runTest.run_query(bc,
                                  spark,
                                  query,
                                  queryId,
                                  queryType,
                                  worder,
                                  '',
                                  acceptable_difference,
                                  use_percentage,
                                  fileSchemaType,
                                  query_spark=query_spark)
            else:
                runTest.run_query(bc, drill, query, queryId, queryType, worder,
                                  '', acceptable_difference, use_percentage,
                                  fileSchemaType)

            queryId = 'TEST_03'
            query = """select o_orderkey from orders where o_custkey < 300 and o_orderdate >= '1990-08-01' 
                    order by o_orderkey limit 50"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_04'
            query = """select ps_partkey, ps_availqty from partsupp where ps_availqty < 3 and ps_availqty >= 1 
                    order by ps_partkey, ps_availqty limit 50"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            #         queryId = 'TEST_05'
            #         query = """select o_orderkey, o_orderstatus from orders where o_custkey < 10 and o_orderstatus = 'O'
            #                 order by o_orderkey, o_orderstatus limit 50"""
            #         runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)

            queryId = 'TEST_06'
            query = """select orders.o_totalprice, customer.c_name from orders
                      inner join customer on orders.o_custkey = customer.c_custkey
                      order by customer.c_name, orders.o_orderkey limit 10"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_07'
            query = """(select l_shipdate, l_orderkey, l_linestatus from lineitem where l_linenumber = 1 order by 1, 2, 3, l_linenumber limit 10)
                    union all
                    (select l_shipdate, l_orderkey, l_linestatus from lineitem where l_linenumber = 1 order by 1 desc, 2, 3, l_linenumber limit 10)"""
            query_spark = """(select l_shipdate, l_orderkey, l_linestatus from lineitem where l_linenumber = 1 order by 1 nulls last, 2 nulls last, 3 nulls last, l_linenumber nulls last limit 10)
                    union all
                    (select l_shipdate, l_orderkey, l_linestatus from lineitem where l_linenumber = 1 order by 1 desc nulls first, 2 nulls last, 3 nulls last, l_linenumber nulls last limit 10)"""

            if fileSchemaType == DataType.ORC:
                runTest.run_query(bc,
                                  spark,
                                  query,
                                  queryId,
                                  queryType,
                                  1,
                                  '',
                                  acceptable_difference,
                                  use_percentage,
                                  fileSchemaType,
                                  query_spark=query_spark)
            else:
                runTest.run_query(bc, drill, query, queryId, queryType, 1, '',
                                  acceptable_difference, use_percentage,
                                  fileSchemaType)

            queryId = 'TEST_08'
            query = "select c_custkey from customer where c_custkey < 0 order by c_custkey limit 40"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_09'
            query = "select c_custkey, c_name from customer where c_custkey < 10 order by 1 limit 30"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_10'
            query = "select c_custkey, c_name from customer where c_custkey < 10 limit 30"
            runTest.run_query(bc, drill, query, queryId, queryType, 1, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_11'
            query = "select avg(CAST(c_custkey AS DOUBLE)), min(c_custkey) from customer limit 5"
            runTest.run_query(bc, drill, query, queryId, queryType, 1, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            # if Settings.execution_mode == ExecutionMode.GENERATOR:
            #     print("==============================")
            #     break

    executionTest()

    end_mem = gpuMemory.capture_gpu_memory_usage()

    gpuMemory.log_memory_usage(queryType, start_mem, end_mem)

Beispiel #30

Datei anzeigen

def main(dask_client, drill, spark, dir_data_file, bc, nRals):

    start_mem = gpuMemory.capture_gpu_memory_usage()

    def executionTest():
        tables = ["customer", "lineitem", "orders"]
        data_types = [
            DataType.DASK_CUDF,
            DataType.CUDF,
            DataType.CSV,
            DataType.ORC,
            DataType.PARQUET,
        ]  # TODO json

        # Create Tables -----------------------------------------------------
        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType):
                continue
            cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables)

            # Run Query ------------------------------------------------------
            # Parameter to indicate if its necessary to order the
            # resulsets before compare them
            worder = 1
            use_percentage = False
            acceptable_difference = 0.01

            print("==============================")
            print(queryType)
            print("==============================")

            queryId = "TEST_01"
            query = """select count(c_custkey), sum(c_acctbal),
                        avg(c_acctbal), min(c_custkey), max(c_nationkey),
                        c_nationkey
                    from customer group by c_nationkey"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_02"
            query = """select count(c_custkey), sum(c_acctbal),
                            count(c_acctbal), avg(c_acctbal), min(c_custkey),
                            max(c_custkey), c_nationkey
                        from customer where c_custkey < 50
                        group by c_nationkey"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_03"
            query = """select count(c_custkey) + sum(c_acctbal) +
                        avg(c_acctbal), min(c_custkey) - max(c_nationkey),
                        c_nationkey * 2 as key
                    from customer where  c_nationkey * 2 < 40
                    group by  c_nationkey * 2"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_04"
            query = """select c_nationkey, count(c_acctbal)
                    from customer group by c_nationkey, c_custkey"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            if fileSchemaType != DataType.ORC:
                queryId = "TEST_05"
                query = """select l.l_suppkey, l.l_linestatus,
                            min(l.l_commitdate), max(l.l_commitdate),
                            max(l.l_orderkey), count(l.l_orderkey)
                        FROM
                        (
                            SELECT l_suppkey, l_linestatus, l_shipmode,
                                l_orderkey, l_commitdate
                            from lineitem
                            WHERE l_linenumber = 6
                            and l_commitdate < DATE '1993-01-01'
                        ) AS l
                        LEFT OUTER JOIN orders AS o
                        ON l.l_orderkey + 100 = o.o_orderkey
                        GROUP BY l.l_suppkey, l.l_linestatus
                        order by l.l_suppkey, l.l_linestatus
                        limit 10000"""
                runTest.run_query(
                    bc,
                    drill,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )

            queryId = "TEST_06"
            query = """select l.l_suppkey, l.l_linestatus, max(l.l_shipmode),
                        max(l.l_orderkey), count(l.l_orderkey)
                    FROM lineitem AS l LEFT OUTER JOIN orders AS o
                    ON l.l_orderkey + 100 = o.o_orderkey
                    GROUP BY l.l_suppkey, l.l_linestatus"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_07"
            query = """SELECT count(distinct l_orderkey),
                        count(distinct l_partkey),
                        count(distinct l_suppkey)
                    FROM lineitem
                    GROUP BY l_orderkey, l_partkey, l_suppkey"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_08"
            query = """SELECT count(distinct l_partkey, l_suppkey)
                    FROM lineitem
                    GROUP BY l_partkey, l_suppkey"""

            # Failed test with nulls
            # Reported issue: https://github.com/BlazingDB/blazingsql/issues/1403
            testsWithNulls = Settings.data["RunSettings"]["testsWithNulls"]
            if testsWithNulls != "true":
                runTest.run_query(
                    bc,
                    spark,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )

            queryId = "TEST_09"
            query = """select l_orderkey, l_extendedprice, l_shipdate
                    from lineitem where l_orderkey < 100
                    group by l_orderkey, l_extendedprice,
                    l_shipdate, l_linestatus"""
            if fileSchemaType == DataType.ORC:
                runTest.run_query(
                    bc,
                    spark,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )
            else:
                runTest.run_query(
                    bc,
                    drill,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )

            if fileSchemaType != DataType.ORC:
                queryId = "TEST_10"
                query = """select l.l_suppkey, l.l_linestatus,
                            min(l.l_commitdate), max(l.l_commitdate),
                            max(l.l_orderkey), count(l.l_orderkey)
                        FROM
                        (
                            SELECT l_suppkey, l_linestatus, l_shipmode,
                                l_orderkey, l_commitdate
                            from lineitem
                            WHERE l_linenumber = 6
                            and l_commitdate < DATE '1993-01-01'
                        ) AS l
                        RIGHT OUTER JOIN orders AS o
                        ON l.l_orderkey + 100 = o.o_orderkey
                        GROUP BY l.l_suppkey, l.l_linestatus
                        order by l.l_suppkey, l.l_linestatus
                        limit 10000"""
                runTest.run_query(
                    bc,
                    drill,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )

            queryId = "TEST_11"
            query = """select l.l_suppkey, l.l_linestatus, max(l.l_shipmode),
                        max(l.l_orderkey), count(l.l_orderkey)
                    FROM lineitem AS l RIGHT OUTER JOIN orders AS o
                    ON l.l_orderkey + 100 = o.o_orderkey
                    GROUP BY l.l_suppkey, l.l_linestatus"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            # if Settings.execution_mode == ExecutionMode.GENERATOR:
            #     print("==============================")
            #     break

    executionTest()

    end_mem = gpuMemory.capture_gpu_memory_usage()

    gpuMemory.log_memory_usage(queryType, start_mem, end_mem)