def main(): print("**init end2end**") Execution.getArgs() nvmlInit() targetTestGroups = Settings.data["RunSettings"]["targetTestGroups"] # only innerJoinsTest will be with progress bar useProgressBar = False if "innerJoinsTest" in targetTestGroups: useProgressBar = True print("Using progress bar: ", useProgressBar) drill, spark = init_comparators() bc, dask_client = init_context(useProgressBar=useProgressBar) runE2ETest(bc, dask_client, drill, spark) return E2EResults()
if ((Settings.execution_mode == ExecutionMode.FULL and compareResults == "true") or Settings.execution_mode == ExecutionMode.GENERATOR): # Create Table Drill ------------------------------------------------ from pydrill.client import PyDrill drill = PyDrill(host="localhost", port=8047) cs.init_drill_schema(drill, Settings.data["TestSettings"]["dataDirectory"]) # Create Table Spark ------------------------------------------------- from pyspark.sql import SparkSession spark = SparkSession.builder.appName("timestampTest").getOrCreate() cs.init_spark_schema(spark, Settings.data["TestSettings"]["dataDirectory"]) # Create Context For BlazingSQL bc, dask_client = init_context() nRals = Settings.data["RunSettings"]["nRals"] main(dask_client, drill, spark, Settings.data["TestSettings"]["dataDirectory"], bc, nRals) if Settings.execution_mode != ExecutionMode.GENERATOR: runTest.save_log() gpuMemory.print_log_gpu_memory()
def executionTest(queryType): tables = cs.tpchTables data_types = [DataType.DASK_CUDF, DataType.CUDF, DataType.CSV, DataType.ORC, DataType.PARQUET] # TODO json bc, dask_client = init_context() #Create Tables ------------------------------------------------------------------------------------------------------------ for fileSchemaType in data_types: if skip_test(dask_client, nRals, fileSchemaType, queryType): continue cs.create_tables(bc, dir_data_file, fileSchemaType, tables) #Run Query ----------------------------------------------------------------------------- worder = 1 use_percentage = False acceptable_difference = 0.01 queryType = 'Aggregations without group by Test' print('==============================') print(queryType) print('==============================') queryId = 'TEST_07' query = "select COUNT(n1.n_nationkey) as n1key, COUNT(DISTINCT(n2.n_nationkey + n1.n_nationkey)) as n2key from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_nationkey + 10" #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryType = 'Coalesce Test' print('==============================') print(queryType) print('==============================') queryId = 'TEST_02' query = "select COALESCE(orders.o_orderkey, 100), COALESCE(orders.o_totalprice, 0.01) from customer left outer join orders on customer.c_custkey = orders.o_custkey where customer.c_nationkey = 3 and customer.c_custkey < 500" #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_03' query = "select COALESCE(orders.o_orderkey, customer.c_custkey), COALESCE(orders.o_totalprice, customer.c_acctbal) from customer left outer join orders on customer.c_custkey = orders.o_custkey where customer.c_nationkey = 3 and customer.c_custkey < 500" #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_05' query = "select COUNT(DISTINCT(COALESCE(n1.n_regionkey,32))), AVG(COALESCE(n1.n_regionkey,32)) from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_nationkey + 6 " #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) #WSM NEED TO REVISIT THIS queryId = 'TEST_06' query = "select SUM(COALESCE(n2.n_nationkey, 100)), COUNT(DISTINCT(COALESCE(n1.n_nationkey,32))), n2.n_regionkey as n1key from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_nationkey + 6 GROUP BY n2.n_regionkey" #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) #WSM NEED TO REVISIT THIS queryId = 'TEST_07' query = "select MIN(COALESCE(n.n_nationkey, r.r_regionkey)), MAX(COALESCE(n.n_nationkey, 8)) from nation as n left outer join region as r on n.n_nationkey = r.r_regionkey" #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_08' query = "select AVG(COALESCE(n.n_nationkey, r.r_regionkey)), MAX(COALESCE(n.n_nationkey, 8)) , COUNT(COALESCE(n.n_nationkey, 12)), n.n_nationkey from nation as n left outer join region as r on n.n_nationkey = r.r_regionkey GROUP BY n.n_nationkey" #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_09' query = "select SUM(COALESCE(n2.n_nationkey, 100)), COUNT(DISTINCT(COALESCE(n1.n_nationkey,32))), COALESCE(n2.n_regionkey, 100) as n1key from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_nationkey + 6 GROUP BY COALESCE(n2.n_regionkey, 100)" #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) #WSM NEED TO REVISIT THIS queryType = 'Commom Table Expressions Test' print('==============================') print(queryType) print('==============================') queryId = 'TEST_04' query = """with ordersTemp as ( select min(orders.o_orderkey) as priorityKey, o_custkey from orders group by o_custkey ), ordersjoin as( select orders.o_orderkey, orders.o_custkey/(orders.o_custkey + 1) as o_custkey, (ordersTemp.priorityKey + 1) as priorityKey from orders inner join ordersTemp on ( ordersTemp.priorityKey = orders.o_orderkey) ) select (customer.c_custkey + 1)/(customer.c_custkey - customer.c_custkey + 1) from customer inner join ordersjoin on ordersjoin.o_custkey = customer.c_custkey where (customer.c_custkey > 1 or customer.c_custkey < 100)""" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) #WSM NEED TO REVISIT THIS queryType = 'Count Distinc Test' print('==============================') print(queryType) print('==============================') queryId = 'TEST_07' query = "select count(distinct(o_custkey)), count(distinct(o_totalprice)), sum(o_orderkey) from orders group by o_custkey" #count(distinct(o_orderdate)), #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_08' query = "select COUNT(DISTINCT(n.n_nationkey)), AVG(r.r_regionkey) from nation as n left outer join region as r on n.n_nationkey = r.r_regionkey" #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', 0.01, use_percentage, fileSchemaType) queryId = 'TEST_09' query = "select MIN(n.n_nationkey), MAX(r.r_regionkey), COUNT(DISTINCT(n.n_nationkey + r.r_regionkey)) from nation as n left outer join region as r on n.n_nationkey = r.r_regionkey" #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_10' query = "select COUNT(DISTINCT(n1.n_nationkey)) as n1key, COUNT(DISTINCT(n2.n_nationkey)) as n2key from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_regionkey" #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_11' query = "select r.r_regionkey, n.n_nationkey, COUNT(n.n_nationkey), COUNT(DISTINCT(r.r_regionkey)), SUM(DISTINCT(n.n_nationkey + r.r_regionkey)) from nation as n left outer join region as r on n.n_nationkey = r.r_regionkey GROUP BY r.r_regionkey, n.n_nationkey" #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_12' query = "select n1.n_regionkey, n2.n_nationkey, MIN(n1.n_regionkey), MAX(n1.n_regionkey), AVG(n2.n_nationkey) from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_nationkey + 6 GROUP BY n1.n_regionkey, n2.n_nationkey" #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryType = 'Count without group by Test' print('==============================') print(queryType) print('==============================') queryId = 'TEST_01' query = "select count(*), count(n_nationkey) from nation" #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_02' query = "select count(n_nationkey), count(*) from nation group by n_nationkey" #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryType = 'Predicates with nulls' print('==============================') print(queryType) print('==============================') queryId = 'TEST_06' query = """select COUNT(n.n_nationkey), AVG(r.r_regionkey) from nation as n left outer join region as r on n.n_nationkey = r.r_regionkey WHERE n.n_regionkey IS NULL""" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, print_result = True) queryId = 'TEST_07' query = """select n.n_nationkey, n.n_name, r.r_regionkey, r.r_name from nation as n left outer join region as r on n.n_nationkey = r.r_regionkey WHERE r.r_name IS NULL""" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_08' #Core dump al iniciar el query query = """select n.n_nationkey, n.n_name, r.r_regionkey, r.r_name from nation as n left outer join region as r on n.n_nationkey = r.r_regionkey WHERE n.n_name IS NOT NULL""" #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryType = 'Single Node From Local Test' queryId = 'TEST_01' query = "select count(*), count(n_nationkey) from nation" #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_02' query = "select count(n_nationkey), count(*) from nation group by n_nationkey" #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryType = 'Predicates with nulls' print('==============================') print(queryType) print('==============================') queryId = 'TEST_06' query = """select COUNT(n.n_nationkey), AVG(r.r_regionkey) from nation as n left outer join region as r on n.n_nationkey = r.r_regionkey WHERE n.n_regionkey IS NULL""" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, print_result = True) queryId = 'TEST_07' query = """select n.n_nationkey, n.n_name, r.r_regionkey, r.r_name from nation as n left outer join region as r on n.n_nationkey = r.r_regionkey WHERE r.r_name IS NULL""" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_08' #Core dump al iniciar el query query = """select n.n_nationkey, n.n_name, r.r_regionkey, r.r_name from nation as n left outer join region as r on n.n_nationkey = r.r_regionkey WHERE n.n_name IS NOT NULL""" #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryType = 'Single Node From Local Test' print('==============================') print(queryType) print('==============================') queryId = 'TEST_04' query = """select count(c_custkey), sum(c_acctbal), avg(c_acctbal), min(c_custkey), max(c_nationkey), (max(c_nationkey) + min(c_nationkey))/2 c_nationkey from customer where c_custkey < 100 group by c_nationkey""" #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryType = 'Tables From Pandas Test' print('==============================') print(queryType) print('==============================') queryId = 'TEST_04' query = "select count(c_custkey), sum(c_acctbal), avg(c_acctbal), min(c_custkey), max(c_nationkey), (max(c_nationkey) + min(c_nationkey))/2 c_nationkey from customer where c_custkey < 100 group by c_nationkey" #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', 0.01, use_percentage, fileSchemaType) queryType = 'Union Test' print('==============================') print(queryType) print('==============================') queryId = 'TEST_03' query = "(select o_orderkey, o_totalprice as key from orders where o_orderkey < 100) union all (select o_orderkey, o_custkey as key from orders where o_orderkey < 300 and o_orderkey >= 200)" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryType = 'Where clause Test' print('==============================') print(queryType) print('==============================') queryId = 'TEST_10' query = "select c_custkey, c_nationkey as nkey from customer where -c_nationkey + c_acctbal > 750.3" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_11' query = "select c_custkey, c_nationkey as nkey from customer where -c_nationkey + c_acctbal > 750" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_09' query = """select o_orderkey as okey, o_custkey as ckey, o_orderdate as odate from orders where o_orderstatus = 'O' and o_orderpriority = '1-URGENT' order by okey""" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_10' query = """select max(o_totalprice) as max_price, min(o_orderdate) as min_orderdate from orders where o_orderdate = '1998-08-01'""" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_10' query = """select max(o_totalprice) as max_price, min(o_orderdate) as min_orderdate from orders where o_orderdate > '1998-08-01'""" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, print_result = True) queryType = 'New Queries' print('==============================') print(queryType) print('==============================') queryId = 'TEST_12' query = "select count(n1.n_nationkey) as n1key, count(n2.n_nationkey) as n2key, count(*) as cstar from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_nationkey + 6" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) print('==============================') print(queryType) print('==============================') queryId = 'TEST_04' query = """select count(c_custkey), sum(c_acctbal), avg(c_acctbal), min(c_custkey), max(c_nationkey), (max(c_nationkey) + min(c_nationkey))/2 c_nationkey from customer where c_custkey < 100 group by c_nationkey""" #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryType = 'Tables From Pandas Test' print('==============================') print(queryType) print('==============================') queryId = 'TEST_04' query = "select count(c_custkey), sum(c_acctbal), avg(c_acctbal), min(c_custkey), max(c_nationkey), (max(c_nationkey) + min(c_nationkey))/2 c_nationkey from customer where c_custkey < 100 group by c_nationkey" #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', 0.01, use_percentage, fileSchemaType) queryType = 'Union Test' print('==============================') print(queryType) print('==============================') queryId = 'TEST_03' query = "(select o_orderkey, o_totalprice as key from orders where o_orderkey < 100) union all (select o_orderkey, o_custkey as key from orders where o_orderkey < 300 and o_orderkey >= 200)" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryType = 'Where clause Test' print('==============================') print(queryType) print('==============================') queryId = 'TEST_10' query = "select c_custkey, c_nationkey as nkey from customer where -c_nationkey + c_acctbal > 750.3" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_11' query = "select c_custkey, c_nationkey as nkey from customer where -c_nationkey + c_acctbal > 750" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryType = 'New Queries' print('==============================') print(queryType) print('==============================') queryId = 'TEST_12' query = "select count(n1.n_nationkey) as n1key, count(n2.n_nationkey) as n2key, count(*) as cstar from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_nationkey + 6" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryType = 'Concat Test' print('==============================') print(queryType) print('==============================') queryId = 'TEST_09' query = """select o.o_orderkey, c.c_name || '-' || (c.c_custkey + 1) , o.o_orderstatus from orders o inner join customer c on o.o_custkey = c.c_custkey where c.c_custkey < 20""" queryId = 'TEST_04' query = """select c_custkey, SUBSTRING(c_name, 1, 8) from customer where c_name between 'Customer#000000009' and 'Customer#0000000011'""" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)
def main(): print("**init end2end**") Execution.getArgs() nvmlInit() dir_data_file = Settings.data["TestSettings"]["dataDirectory"] nRals = Settings.data["RunSettings"]["nRals"] drill = "drill" spark = "spark" compareResults = True if "compare_results" in Settings.data["RunSettings"]: compareResults = Settings.data["RunSettings"]["compare_results"] if (Settings.execution_mode == ExecutionMode.FULL and compareResults == "true") or Settings.execution_mode == ExecutionMode.GENERATOR: # Create Table Drill ----------------------------------------- from pydrill.client import PyDrill drill = PyDrill(host="localhost", port=8047) createSchema.init_drill_schema( drill, Settings.data["TestSettings"]["dataDirectory"], bool_test=True) createSchema.init_drill_schema( drill, Settings.data["TestSettings"]["dataDirectory"], smiles_test=True, fileSchemaType=DataType.PARQUET) # Create Table Spark ------------------------------------------------- from pyspark.sql import SparkSession spark = SparkSession.builder.appName("allE2ETest").getOrCreate() createSchema.init_spark_schema( spark, Settings.data["TestSettings"]["dataDirectory"]) createSchema.init_spark_schema( spark, Settings.data["TestSettings"]["dataDirectory"], smiles_test=True, fileSchemaType=DataType.PARQUET) targetTestGroups = Settings.data["RunSettings"]["targetTestGroups"] # only innerJoinsTest will be with progress bar useProgressBar = False if "innerJoinsTest" in targetTestGroups: useProgressBar = True print("Using progress bar: ", useProgressBar) # Create Context For BlazingSQL bc, dask_client = init_context(useProgressBar=useProgressBar) runAllTests = ( len(targetTestGroups) == 0 ) # if targetTestGroups was empty the user wants to run all the tests if runAllTests or ("hiveFileTest" in targetTestGroups): hiveFileTest.main(dask_client, spark, dir_data_file, bc, nRals) if runAllTests or ("aggregationsWithoutGroupByTest" in targetTestGroups): aggregationsWithoutGroupByTest.main(dask_client, drill, dir_data_file, bc, nRals) if runAllTests or ("coalesceTest" in targetTestGroups): coalesceTest.main(dask_client, drill, dir_data_file, bc, nRals) if runAllTests or ("columnBasisTest" in targetTestGroups): columnBasisTest.main(dask_client, drill, dir_data_file, bc, nRals) if runAllTests or ("commonTableExpressionsTest" in targetTestGroups): commonTableExpressionsTest.main(dask_client, drill, dir_data_file, bc, nRals) if runAllTests or ("countDistinctTest" in targetTestGroups): countDistinctTest.main(dask_client, drill, dir_data_file, bc, nRals) if runAllTests or ("countWithoutGroupByTest" in targetTestGroups): countWithoutGroupByTest.main(dask_client, drill, dir_data_file, bc, nRals) if runAllTests or ("dateTest" in targetTestGroups): dateTest.main(dask_client, drill, spark, dir_data_file, bc, nRals) if runAllTests or ("timestampTest" in targetTestGroups): timestampTest.main(dask_client, drill, spark, dir_data_file, bc, nRals) if runAllTests or ("toTimestampTest" in targetTestGroups): toTimestampTest.main(dask_client, spark, dir_data_file, bc, nRals) if runAllTests or ("dayOfWeekTest" in targetTestGroups): dayOfWeekTest.main(dask_client, spark, dir_data_file, bc, nRals) if runAllTests or ("fullOuterJoinsTest" in targetTestGroups): fullOuterJoinsTest.main(dask_client, drill, dir_data_file, bc, nRals) if runAllTests or ("groupByTest" in targetTestGroups): groupByTest.main(dask_client, drill, spark, dir_data_file, bc, nRals) if runAllTests or ("GroupByWitoutAggregations" in targetTestGroups): GroupByWitoutAggregations.main(dask_client, drill, dir_data_file, bc, nRals) if runAllTests or ("innerJoinsTest" in targetTestGroups): innerJoinsTest.main(dask_client, drill, dir_data_file, bc, nRals) if runAllTests or ("crossJoinsTest" in targetTestGroups): crossJoinsTest.main(dask_client, spark, dir_data_file, bc, nRals) if runAllTests or ("leftOuterJoinsTest" in targetTestGroups): leftOuterJoinsTest.main(dask_client, drill, dir_data_file, bc, nRals) if runAllTests or ("nonEquiJoinsTest" in targetTestGroups): nonEquiJoinsTest.main(dask_client, drill, spark, dir_data_file, bc, nRals) # loadDataTest.main(dask_client, bc) #check this if runAllTests or ("nestedQueriesTest" in targetTestGroups): nestedQueriesTest.main(dask_client, drill, dir_data_file, bc, nRals) if runAllTests or ("orderbyTest" in targetTestGroups): orderbyTest.main(dask_client, drill, dir_data_file, bc, nRals) if runAllTests or ("predicatesWithNulls" in targetTestGroups): predicatesWithNulls.main(dask_client, drill, spark, dir_data_file, bc, nRals) if runAllTests or ("stringTests" in targetTestGroups): stringTests.main(dask_client, drill, spark, dir_data_file, bc, nRals) if runAllTests or ("tablesFromPandasTest" in targetTestGroups): tablesFromPandasTest.main(dask_client, drill, dir_data_file, bc, nRals) if runAllTests or ("unaryOpsTest" in targetTestGroups): unaryOpsTest.main(dask_client, drill, dir_data_file, bc, nRals) if runAllTests or ("unifyTablesTest" in targetTestGroups): unifyTablesTest.main(dask_client, drill, dir_data_file, bc, nRals) if runAllTests or ("unionTest" in targetTestGroups): unionTest.main(dask_client, drill, spark, dir_data_file, bc, nRals) if runAllTests or ("useLimitTest" in targetTestGroups): useLimitTest.main(dask_client, drill, spark, dir_data_file, bc, nRals) if runAllTests or ("whereClauseTest" in targetTestGroups): whereClauseTest.main(dask_client, drill, dir_data_file, bc, nRals) if runAllTests or ("bindableAliasTest" in targetTestGroups): bindableAliasTest.main(dask_client, drill, spark, dir_data_file, bc, nRals) if runAllTests or ("booleanTest" in targetTestGroups): booleanTest.main(dask_client, drill, dir_data_file, bc, nRals) if runAllTests or ("caseTest" in targetTestGroups): caseTest.main(dask_client, drill, spark, dir_data_file, bc, nRals) if runAllTests or ("castTest" in targetTestGroups): castTest.main(dask_client, drill, spark, dir_data_file, bc, nRals) if runAllTests or ("concatTest" in targetTestGroups): concatTest.main(dask_client, drill, spark, dir_data_file, bc, nRals) if runAllTests or ("literalTest" in targetTestGroups): literalTest.main(dask_client, drill, spark, dir_data_file, bc, nRals) if runAllTests or ("dirTest" in targetTestGroups): dirTest.main(dask_client, drill, spark, dir_data_file, bc, nRals) # HDFS is not working yet # fileSystemHdfsTest.main(dask_client, drill, dir_data_file, bc) # HDFS is not working yet # mixedFileSystemTest.main(dask_client, drill, dir_data_file, bc) if runAllTests or ("likeTest" in targetTestGroups): likeTest.main(dask_client, drill, dir_data_file, bc, nRals) if runAllTests or ("substringTest" in targetTestGroups): substringTest.main(dask_client, drill, spark, dir_data_file, bc, nRals) if runAllTests or ("stringCaseTest" in targetTestGroups): stringCaseTest.main(dask_client, drill, spark, dir_data_file, bc, nRals) if runAllTests or ("wildCardTest" in targetTestGroups): wildCardTest.main(dask_client, drill, spark, dir_data_file, bc, nRals) if runAllTests or ("tpchQueriesTest" in targetTestGroups): tpchQueriesTest.main(dask_client, drill, spark, dir_data_file, bc, nRals) if runAllTests or ("roundTest" in targetTestGroups): roundTest.main(dask_client, drill, dir_data_file, bc, nRals) if runAllTests or ("fileSystemLocalTest" in targetTestGroups): fileSystemLocalTest.main(dask_client, drill, spark, dir_data_file, bc, nRals) if runAllTests or ("messageValidationTest" in targetTestGroups): messageValidationTest.main(dask_client, drill, dir_data_file, bc, nRals) testsWithNulls = Settings.data["RunSettings"]["testsWithNulls"] if testsWithNulls != "true": if Settings.execution_mode != ExecutionMode.GPUCI: if runAllTests or ("fileSystemS3Test" in targetTestGroups): fileSystemS3Test.main(dask_client, drill, dir_data_file, bc, nRals) if runAllTests or ("fileSystemGSTest" in targetTestGroups): fileSystemGSTest.main(dask_client, drill, dir_data_file, bc, nRals) if runAllTests or ("loggingTest" in targetTestGroups): loggingTest.main(dask_client, dir_data_file, bc, nRals) # timestampdiffTest.main(dask_client, spark, dir_data_file, bc, nRals) #TODO re enable this test once we have the new version of dask # https://github.com/dask/distributed/issues/4645 # https://github.com/rapidsai/cudf/issues/7773 #if runAllTests or ("smilesTest" in targetTestGroups): # smilesTest.main(dask_client, spark, dir_data_file, bc, nRals) if testsWithNulls != "true": if runAllTests or ("jsonTest" in targetTestGroups): jsonTest.main(dask_client, drill, dir_data_file, bc, nRals) if runAllTests or ("windowFunctionTest" in targetTestGroups): windowFunctionTest.main(dask_client, drill, spark, dir_data_file, bc, nRals) if runAllTests or ("windowNoPartitionTest" in targetTestGroups): windowNoPartitionTest.main(dask_client, drill, spark, dir_data_file, bc, nRals) if testsWithNulls != "true": if runAllTests or ("concurrentTest" in targetTestGroups): concurrentTest.main(dask_client, drill, dir_data_file, bc, nRals) if testsWithNulls == "true": if Settings.execution_mode != ExecutionMode.GPUCI: if runAllTests or ("tablesFromSQL" in targetTestGroups): tablesFromSQL.main(dask_client, drill, dir_data_file, bc, nRals) # WARNING!!! This Test must be the last one to test ------------------------------------------------------------------------------------------------------------------------------------------- if runAllTests or ("configOptionsTest" in targetTestGroups): configOptionsTest.main(dask_client, drill, spark, dir_data_file, bc, nRals) if Settings.execution_mode != ExecutionMode.GENERATOR: result, error_msgs = runTest.save_log( Settings.execution_mode == ExecutionMode.GPUCI) max = 0 for i in range(0, len(Settings.memory_list)): if (Settings.memory_list[i].delta) > max: max = Settings.memory_list[i].delta print("MAX DELTA: " + str(max)) print("""*********************************************************** ********************""") for i in range(0, len(Settings.memory_list)): print(Settings.memory_list[i].name + ":" + " Start Mem: " + str(Settings.memory_list[i].start_mem) + " End Mem: " + str(Settings.memory_list[i].end_mem) + " Diff: " + str(Settings.memory_list[i].delta)) return result, error_msgs return True, []
def executionTest(queryType, setInd, config_options): bc, dask_client = init_context(config_options) tables = [ "nation", "region", "customer", "lineitem", "orders", "supplier", "part", "partsupp", ] data_types = [ DataType.DASK_CUDF, DataType.CUDF, DataType.CSV, DataType.PARQUET, ] # TODO orc, json # Create Tables ------------------------------------------------------ for fileSchemaType in data_types: if skip_test(dask_client, nRals, fileSchemaType, queryType): continue cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables) # Run Query ------------------------------------------------------ worder = 1 # Parameter to indicate if its necessary to order # the resulsets before compare them use_percentage = False acceptable_difference = 0.001 print("==============================") print(queryType) print("Test set: " + str(setInd + 1) + " Options: " + str(config_options)) print("==============================") queryId = "TEST_01" query = tpch.get_tpch_query(queryId) runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_02" query = tpch.get_tpch_query(queryId) runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_03" query = tpch.get_tpch_query(queryId) runTest.run_query( bc, drill, query, queryId, queryType, worder, "", 0.1, use_percentage, fileSchemaType, ) queryId = "TEST_04" query = tpch.get_tpch_query(queryId) if fileSchemaType == DataType.ORC: runTest.run_query( bc, spark, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) else: runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, True, fileSchemaType, ) queryId = "TEST_05" query = tpch.get_tpch_query(queryId) if fileSchemaType == DataType.ORC: runTest.run_query( bc, spark, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) else: runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_06" query = tpch.get_tpch_query(queryId) runTest.run_query( bc, spark, query, queryId, queryType, worder, "", acceptable_difference, True, fileSchemaType, ) queryId = "TEST_07" query = tpch.get_tpch_query(queryId) if fileSchemaType == DataType.ORC: runTest.run_query( bc, spark, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) else: runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_08" query = tpch.get_tpch_query(queryId) if fileSchemaType == DataType.ORC: runTest.run_query( bc, spark, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) else: runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_09" query = tpch.get_tpch_query(queryId) if fileSchemaType == DataType.ORC: runTest.run_query( bc, spark, query, queryId, queryType, worder, "", acceptable_difference, True, fileSchemaType, ) else: runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, True, fileSchemaType, ) queryId = "TEST_10" query = tpch.get_tpch_query(queryId) if fileSchemaType == DataType.ORC: runTest.run_query( bc, spark, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) else: runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_11" query = tpch.get_tpch_query(queryId) # runTest.run_query( # bc, # drill, # query, # queryId, # queryType, # worder, # "", # acceptable_difference, # use_percentage, # fileSchemaType, # ) queryId = "TEST_12" query = tpch.get_tpch_query(queryId) if fileSchemaType == DataType.ORC: runTest.run_query( bc, spark, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) else: runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_13" query = tpch.get_tpch_query(queryId) runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_14" query = tpch.get_tpch_query(queryId) if fileSchemaType == DataType.ORC: runTest.run_query( bc, spark, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) else: runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_15" query = tpch.get_tpch_query(queryId) runTest.run_query( bc, spark, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_16" query = tpch.get_tpch_query(queryId) runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_17" query = tpch.get_tpch_query(queryId) runTest.run_query( bc, spark, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_18" query = tpch.get_tpch_query(queryId) if fileSchemaType == DataType.ORC: runTest.run_query( bc, spark, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) else: runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_19" query = tpch.get_tpch_query(queryId) runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, True, fileSchemaType, ) queryId = "TEST_20" query = tpch.get_tpch_query(queryId) runTest.run_query( bc, spark, query, queryId, queryType, worder, "", acceptable_difference, True, fileSchemaType, ) queryId = "TEST_21" query = tpch.get_tpch_query(queryId) runTest.run_query( bc, spark, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_22" query = tpch.get_tpch_query(queryId) # runTest.run_query( # bc, # drill, # query, # queryId, # queryType, # worder, # "", # acceptable_difference, # use_percentage, # fileSchemaType, # ) if dask_client is not None: dask_client.run(gc.collect) dask_client.run_on_scheduler(gc.collect) dask_client.close() dask_client.shutdown() del dask_client del bc
def main(): print('**init end2end**') Execution.getArgs() nvmlInit() dir_data_file = Settings.data['TestSettings']['dataDirectory'] nRals = Settings.data['RunSettings']['nRals'] drill = "drill" spark = "spark" compareResults = True if 'compare_results' in Settings.data['RunSettings']: compareResults = Settings.data['RunSettings']['compare_results'] if (Settings.execution_mode == ExecutionMode.FULL and compareResults == "true") or Settings.execution_mode == ExecutionMode.GENERATOR: # Create Table Drill ------------------------------------------------------------------------------------------------------ from pydrill.client import PyDrill drill = PyDrill(host='localhost', port=8047) createSchema.init_drill_schema( drill, Settings.data['TestSettings']['dataDirectory'], bool_test=True) # Create Table Spark ------------------------------------------------------------------------------------------------------ spark = SparkSession.builder.appName("allE2ETest").getOrCreate() createSchema.init_spark_schema( spark, Settings.data['TestSettings']['dataDirectory']) #Create Context For BlazingSQL bc, dask_client = init_context() targetTestGroups = Settings.data['RunSettings']['targetTestGroups'] runAllTests = ( len(targetTestGroups) == 0 ) # if targetTestGroups was empty the user wants to run all the tests if runAllTests or ("aggregationsWithoutGroupByTest" in targetTestGroups): aggregationsWithoutGroupByTest.main(dask_client, drill, dir_data_file, bc, nRals) if runAllTests or ("coalesceTest" in targetTestGroups): coalesceTest.main(dask_client, drill, dir_data_file, bc, nRals) #we are not supporting coalesce yet if runAllTests or ("columnBasisTest" in targetTestGroups): columnBasisTest.main(dask_client, drill, dir_data_file, bc, nRals) if runAllTests or ("commonTableExpressionsTest" in targetTestGroups): commonTableExpressionsTest.main(dask_client, drill, dir_data_file, bc, nRals) #countDistincTest.main(dask_client, drill, dir_data_file, bc) #we are not supporting count distinct yet if runAllTests or ("countWithoutGroupByTest" in targetTestGroups): countWithoutGroupByTest.main(dask_client, drill, dir_data_file, bc, nRals) if runAllTests or ("dateTest" in targetTestGroups): dateTest.main(dask_client, drill, spark, dir_data_file, bc, nRals) if runAllTests or ("timestampTest" in targetTestGroups): timestampTest.main(dask_client, drill, spark, dir_data_file, bc, nRals) if runAllTests or ("fullOuterJoinsTest" in targetTestGroups): fullOuterJoinsTest.main(dask_client, drill, dir_data_file, bc, nRals) if runAllTests or ("groupByTest" in targetTestGroups): groupByTest.main(dask_client, drill, dir_data_file, bc, nRals) if runAllTests or ("GroupByWitoutAggregations" in targetTestGroups): GroupByWitoutAggregations.main(dask_client, drill, dir_data_file, bc, nRals) if runAllTests or ("innerJoinsTest" in targetTestGroups): innerJoinsTest.main(dask_client, drill, dir_data_file, bc, nRals) if runAllTests or ("" in targetTestGroups): leftOuterJoinsTest.main(dask_client, drill, dir_data_file, bc, nRals) if runAllTests or ("nonEquiJoinsTest" in targetTestGroups): nonEquiJoinsTest.main(dask_client, drill, dir_data_file, bc, nRals) #loadDataTest.main(dask_client, bc) #check this if runAllTests or ("nestedQueriesTest" in targetTestGroups): nestedQueriesTest.main(dask_client, drill, dir_data_file, bc, nRals) if runAllTests or ("orderbyTest" in targetTestGroups): orderbyTest.main(dask_client, drill, dir_data_file, bc, nRals) if runAllTests or ("predicatesWithNulls" in targetTestGroups): predicatesWithNulls.main(dask_client, drill, dir_data_file, bc, nRals) if runAllTests or ("stringTests" in targetTestGroups): stringTests.main(dask_client, drill, dir_data_file, bc, nRals) if runAllTests or ("tablesFromPandasTest" in targetTestGroups): tablesFromPandasTest.main(dask_client, drill, dir_data_file, bc, nRals) if runAllTests or ("unaryOpsTest" in targetTestGroups): unaryOpsTest.main(dask_client, drill, dir_data_file, bc, nRals) if runAllTests or ("unifyTablesTest" in targetTestGroups): unifyTablesTest.main(dask_client, drill, dir_data_file, bc, nRals) if runAllTests or ("unionTest" in targetTestGroups): unionTest.main(dask_client, drill, spark, dir_data_file, bc, nRals) if runAllTests or ("useLimitTest" in targetTestGroups): useLimitTest.main(dask_client, drill, spark, dir_data_file, bc, nRals) if runAllTests or ("whereClauseTest" in targetTestGroups): whereClauseTest.main(dask_client, drill, dir_data_file, bc, nRals) if runAllTests or ("bindableAliasTest" in targetTestGroups): bindableAliasTest.main(dask_client, drill, spark, dir_data_file, bc, nRals) if runAllTests or ("booleanTest" in targetTestGroups): booleanTest.main(dask_client, drill, dir_data_file, bc, nRals) if runAllTests or ("caseTest" in targetTestGroups): caseTest.main(dask_client, drill, spark, dir_data_file, bc, nRals) if runAllTests or ("castTest" in targetTestGroups): castTest.main(dask_client, drill, spark, dir_data_file, bc, nRals) if runAllTests or ("concatTest" in targetTestGroups): concatTest.main(dask_client, drill, spark, dir_data_file, bc, nRals) if runAllTests or ("literalTest" in targetTestGroups): literalTest.main(dask_client, drill, spark, dir_data_file, bc, nRals) if runAllTests or ("dirTest" in targetTestGroups): dirTest.main(dask_client, drill, dir_data_file, bc, nRals) #fileSystemHdfsTest.main(dask_client, drill, dir_data_file, bc) #HDFS is not working yet #mixedFileSystemTest.main(dask_client, drill, dir_data_file, bc) #HDFS is not working yet if runAllTests or ("likeTest" in targetTestGroups): likeTest.main(dask_client, drill, dir_data_file, bc, nRals) if runAllTests or ("simpleDistributionTest" in targetTestGroups): simpleDistributionTest.main(dask_client, drill, spark, dir_data_file, bc, nRals) if runAllTests or ("substringTest" in targetTestGroups): substringTest.main(dask_client, drill, dir_data_file, bc, nRals) if runAllTests or ("wildCardTest" in targetTestGroups): wildCardTest.main(dask_client, drill, dir_data_file, bc, nRals) if runAllTests or ("tpchQueriesTest" in targetTestGroups): tpchQueriesTest.main(dask_client, drill, spark, dir_data_file, bc, nRals) if runAllTests or ("roundTest" in targetTestGroups): roundTest.main(dask_client, drill, dir_data_file, bc, nRals) if runAllTests or ("fileSystemLocalTest" in targetTestGroups): fileSystemLocalTest.main(dask_client, drill, dir_data_file, bc, nRals) if Settings.execution_mode != ExecutionMode.GPUCI: if runAllTests or ("fileSystemS3Test" in targetTestGroups): fileSystemS3Test.main(dask_client, drill, dir_data_file, bc, nRals) if runAllTests or ("fileSystemGSTest" in targetTestGroups): fileSystemGSTest.main(dask_client, drill, dir_data_file, bc, nRals) #timestampdiffTest.main(dask_client, spark, dir_data_file, bc, nRals) if Settings.execution_mode != ExecutionMode.GENERATOR: result, error_msgs = runTest.save_log() max = 0 for i in range(0, len(Settings.memory_list)): if (Settings.memory_list[i].delta) > max: max = Settings.memory_list[i].delta print("MAX DELTA: " + str(max)) print( '*******************************************************************************' ) for i in range(0, len(Settings.memory_list)): print(Settings.memory_list[i].name + ":" + " Start Mem: " + str(Settings.memory_list[i].start_mem) + " End Mem: " + str(Settings.memory_list[i].end_mem) + " Diff: " + str(Settings.memory_list[i].delta)) return result, error_msgs return True, []