Example #1
0
def main():
    print("**init end2end**")

    Execution.getArgs()

    dir_data_file = Settings.data["TestSettings"]["dataDirectory"]

    # Create Table Drill -----------------------------------------
    drill = PyDrill(host="localhost", port=8047)
    createSchema.init_drill_schema(drill, dir_data_file)

    # Sólo pasan todos los test con 100Mb
    csvFromLocalTest.main(drill, dir_data_file)

    csvFromS3Test.main(
        drill, dir_data_file
    )  # AttributeError: 'NoneType' object has no attribute '_cols'

    # vector::_M_range_check: __n
    # (which is 18446744073709551615) >= this->size() (which is 2)
    csvFromHdfsTest.main(
        drill, dir_data_file
    )

    parquetFromLocalTest.main(
        drill, dir_data_file
    )  # Sólo pasan todos los test con 100Mb

    # Pasan todos los test con 100Mb, con multiples archivos para
    # una tabla no porque no se carga bien todos los archivos.
    parquetFromS3Test.main(
        drill, dir_data_file
    )

    parquetFromHdfsTest.main(
        drill, dir_data_file
    )  # Se queda pensando en la lectura de data

    runTest.save_log()

    for i in range(0, len(Settings.memory_list)):
        print(
            Settings.memory_list[i].name
            + ":"
            + "   Start Mem: "
            + str(Settings.memory_list[i].start_mem)
            + "   End Mem: "
            + str(Settings.memory_list[i].end_mem)
            + "   Diff: "
            + str(Settings.memory_list[i].delta)
        )
Example #2
0
def main():
    print('**init performance test**')
    Execution.getArgs()

    dir_data_file = Settings.data['TestSettings']['dataDirectory']

    # Create Table Drill ------------------------------------------------------------------------------------------------------
    drill = PyDrill(host='localhost', port=8047)
    createSchema.init_drill_schema(drill, dir_data_file)

    jobId = 1

    if Settings.data['MysqlConnection']['connectEnabled']:
        from DataBase import mysqlDatabaseManager as msqldb
        jobId = msqldb.getJobId()

    for x in range(0, 10):
        performanceTest.main(drill, dir_data_file)
        runTest.save_log(job_id=jobId)
Example #3
0
def E2EResults():
    if Settings.execution_mode != ExecutionMode.GENERATOR:

        result, error_msgs = runTest.save_log(
            Settings.execution_mode == ExecutionMode.GPUCI)

        max = 0

        for i in range(0, len(Settings.memory_list)):
            if (Settings.memory_list[i].delta) > max:
                max = Settings.memory_list[i].delta

        print("MAX DELTA: " + str(max))
        print("""***********************************************************
              ********************""")

        gpuMemory.print_log_gpu_memory()

        return result, error_msgs
    return True, []
Example #4
0
    if ((Settings.execution_mode == ExecutionMode.FULL
         and compareResults == "true")
            or Settings.execution_mode == ExecutionMode.GENERATOR):
        # Create Table Drill ------------------------------------------------
        from pydrill.client import PyDrill

        drill = PyDrill(host="localhost", port=8047)
        cs.init_drill_schema(drill,
                             Settings.data["TestSettings"]["dataDirectory"])

        # Create Table Spark -------------------------------------------------
        from pyspark.sql import SparkSession

        spark = SparkSession.builder.appName("timestampTest").getOrCreate()
        cs.init_spark_schema(spark,
                             Settings.data["TestSettings"]["dataDirectory"])

    # Create Context For BlazingSQL

    bc, dask_client = init_context()

    nRals = Settings.data["RunSettings"]["nRals"]

    main(dask_client, drill, spark,
         Settings.data["TestSettings"]["dataDirectory"], bc, nRals)

    if Settings.execution_mode != ExecutionMode.GENERATOR:
        runTest.save_log()
        gpuMemory.print_log_gpu_memory()
Example #5
0
def main():
    print("**init end2end**")
    Execution.getArgs()
    nvmlInit()
    dir_data_file = Settings.data["TestSettings"]["dataDirectory"]
    nRals = Settings.data["RunSettings"]["nRals"]

    drill = "drill"
    spark = "spark"
    compareResults = True
    if "compare_results" in Settings.data["RunSettings"]:
        compareResults = Settings.data["RunSettings"]["compare_results"]

    if (Settings.execution_mode == ExecutionMode.FULL and compareResults
            == "true") or Settings.execution_mode == ExecutionMode.GENERATOR:

        # Create Table Drill -----------------------------------------
        from pydrill.client import PyDrill

        drill = PyDrill(host="localhost", port=8047)
        createSchema.init_drill_schema(
            drill,
            Settings.data["TestSettings"]["dataDirectory"],
            bool_test=True)
        createSchema.init_drill_schema(
            drill,
            Settings.data["TestSettings"]["dataDirectory"],
            smiles_test=True,
            fileSchemaType=DataType.PARQUET)

        # Create Table Spark -------------------------------------------------
        from pyspark.sql import SparkSession

        spark = SparkSession.builder.appName("allE2ETest").getOrCreate()
        createSchema.init_spark_schema(
            spark, Settings.data["TestSettings"]["dataDirectory"])
        createSchema.init_spark_schema(
            spark,
            Settings.data["TestSettings"]["dataDirectory"],
            smiles_test=True,
            fileSchemaType=DataType.PARQUET)

    targetTestGroups = Settings.data["RunSettings"]["targetTestGroups"]

    # only innerJoinsTest will be with progress bar
    useProgressBar = False
    if "innerJoinsTest" in targetTestGroups:
        useProgressBar = True

    print("Using progress bar: ", useProgressBar)

    # Create Context For BlazingSQL
    bc, dask_client = init_context(useProgressBar=useProgressBar)

    runAllTests = (
        len(targetTestGroups) == 0
    )  # if targetTestGroups was empty the user wants to run all the tests

    if runAllTests or ("hiveFileTest" in targetTestGroups):
        hiveFileTest.main(dask_client, spark, dir_data_file, bc, nRals)

    if runAllTests or ("aggregationsWithoutGroupByTest" in targetTestGroups):
        aggregationsWithoutGroupByTest.main(dask_client, drill, dir_data_file,
                                            bc, nRals)

    if runAllTests or ("coalesceTest" in targetTestGroups):
        coalesceTest.main(dask_client, drill, dir_data_file, bc, nRals)

    if runAllTests or ("columnBasisTest" in targetTestGroups):
        columnBasisTest.main(dask_client, drill, dir_data_file, bc, nRals)

    if runAllTests or ("commonTableExpressionsTest" in targetTestGroups):
        commonTableExpressionsTest.main(dask_client, drill, dir_data_file, bc,
                                        nRals)

    if runAllTests or ("countDistinctTest" in targetTestGroups):
        countDistinctTest.main(dask_client, drill, dir_data_file, bc, nRals)

    if runAllTests or ("countWithoutGroupByTest" in targetTestGroups):
        countWithoutGroupByTest.main(dask_client, drill, dir_data_file, bc,
                                     nRals)

    if runAllTests or ("dateTest" in targetTestGroups):
        dateTest.main(dask_client, drill, spark, dir_data_file, bc, nRals)

    if runAllTests or ("timestampTest" in targetTestGroups):
        timestampTest.main(dask_client, drill, spark, dir_data_file, bc, nRals)

    if runAllTests or ("toTimestampTest" in targetTestGroups):
        toTimestampTest.main(dask_client, spark, dir_data_file, bc, nRals)

    if runAllTests or ("dayOfWeekTest" in targetTestGroups):
        dayOfWeekTest.main(dask_client, spark, dir_data_file, bc, nRals)

    if runAllTests or ("fullOuterJoinsTest" in targetTestGroups):
        fullOuterJoinsTest.main(dask_client, drill, dir_data_file, bc, nRals)

    if runAllTests or ("groupByTest" in targetTestGroups):
        groupByTest.main(dask_client, drill, spark, dir_data_file, bc, nRals)

    if runAllTests or ("GroupByWitoutAggregations" in targetTestGroups):
        GroupByWitoutAggregations.main(dask_client, drill, dir_data_file, bc,
                                       nRals)

    if runAllTests or ("innerJoinsTest" in targetTestGroups):
        innerJoinsTest.main(dask_client, drill, dir_data_file, bc, nRals)

    if runAllTests or ("crossJoinsTest" in targetTestGroups):
        crossJoinsTest.main(dask_client, spark, dir_data_file, bc, nRals)

    if runAllTests or ("leftOuterJoinsTest" in targetTestGroups):
        leftOuterJoinsTest.main(dask_client, drill, dir_data_file, bc, nRals)

    if runAllTests or ("nonEquiJoinsTest" in targetTestGroups):
        nonEquiJoinsTest.main(dask_client, drill, spark, dir_data_file, bc,
                              nRals)

    # loadDataTest.main(dask_client, bc) #check this

    if runAllTests or ("nestedQueriesTest" in targetTestGroups):
        nestedQueriesTest.main(dask_client, drill, dir_data_file, bc, nRals)

    if runAllTests or ("orderbyTest" in targetTestGroups):
        orderbyTest.main(dask_client, drill, dir_data_file, bc, nRals)

    if runAllTests or ("predicatesWithNulls" in targetTestGroups):
        predicatesWithNulls.main(dask_client, drill, spark, dir_data_file, bc,
                                 nRals)

    if runAllTests or ("stringTests" in targetTestGroups):
        stringTests.main(dask_client, drill, spark, dir_data_file, bc, nRals)

    if runAllTests or ("tablesFromPandasTest" in targetTestGroups):
        tablesFromPandasTest.main(dask_client, drill, dir_data_file, bc, nRals)

    if runAllTests or ("unaryOpsTest" in targetTestGroups):
        unaryOpsTest.main(dask_client, drill, dir_data_file, bc, nRals)

    if runAllTests or ("unifyTablesTest" in targetTestGroups):
        unifyTablesTest.main(dask_client, drill, dir_data_file, bc, nRals)

    if runAllTests or ("unionTest" in targetTestGroups):
        unionTest.main(dask_client, drill, spark, dir_data_file, bc, nRals)

    if runAllTests or ("useLimitTest" in targetTestGroups):
        useLimitTest.main(dask_client, drill, spark, dir_data_file, bc, nRals)

    if runAllTests or ("whereClauseTest" in targetTestGroups):
        whereClauseTest.main(dask_client, drill, dir_data_file, bc, nRals)

    if runAllTests or ("bindableAliasTest" in targetTestGroups):
        bindableAliasTest.main(dask_client, drill, spark, dir_data_file, bc,
                               nRals)

    if runAllTests or ("booleanTest" in targetTestGroups):
        booleanTest.main(dask_client, drill, dir_data_file, bc, nRals)

    if runAllTests or ("caseTest" in targetTestGroups):
        caseTest.main(dask_client, drill, spark, dir_data_file, bc, nRals)

    if runAllTests or ("castTest" in targetTestGroups):
        castTest.main(dask_client, drill, spark, dir_data_file, bc, nRals)

    if runAllTests or ("concatTest" in targetTestGroups):
        concatTest.main(dask_client, drill, spark, dir_data_file, bc, nRals)

    if runAllTests or ("literalTest" in targetTestGroups):
        literalTest.main(dask_client, drill, spark, dir_data_file, bc, nRals)

    if runAllTests or ("dirTest" in targetTestGroups):
        dirTest.main(dask_client, drill, spark, dir_data_file, bc, nRals)

    # HDFS is not working yet
    # fileSystemHdfsTest.main(dask_client, drill, dir_data_file, bc)

    # HDFS is not working yet
    # mixedFileSystemTest.main(dask_client, drill, dir_data_file, bc)

    if runAllTests or ("likeTest" in targetTestGroups):
        likeTest.main(dask_client, drill, dir_data_file, bc, nRals)

    if runAllTests or ("substringTest" in targetTestGroups):
        substringTest.main(dask_client, drill, spark, dir_data_file, bc, nRals)

    if runAllTests or ("stringCaseTest" in targetTestGroups):
        stringCaseTest.main(dask_client, drill, spark, dir_data_file, bc,
                            nRals)

    if runAllTests or ("wildCardTest" in targetTestGroups):
        wildCardTest.main(dask_client, drill, spark, dir_data_file, bc, nRals)

    if runAllTests or ("tpchQueriesTest" in targetTestGroups):
        tpchQueriesTest.main(dask_client, drill, spark, dir_data_file, bc,
                             nRals)

    if runAllTests or ("roundTest" in targetTestGroups):
        roundTest.main(dask_client, drill, dir_data_file, bc, nRals)

    if runAllTests or ("fileSystemLocalTest" in targetTestGroups):
        fileSystemLocalTest.main(dask_client, drill, spark, dir_data_file, bc,
                                 nRals)

    if runAllTests or ("messageValidationTest" in targetTestGroups):
        messageValidationTest.main(dask_client, drill, dir_data_file, bc,
                                   nRals)

    testsWithNulls = Settings.data["RunSettings"]["testsWithNulls"]
    if testsWithNulls != "true":
        if Settings.execution_mode != ExecutionMode.GPUCI:
            if runAllTests or ("fileSystemS3Test" in targetTestGroups):
                fileSystemS3Test.main(dask_client, drill, dir_data_file, bc,
                                      nRals)

            if runAllTests or ("fileSystemGSTest" in targetTestGroups):
                fileSystemGSTest.main(dask_client, drill, dir_data_file, bc,
                                      nRals)

    if runAllTests or ("loggingTest" in targetTestGroups):
        loggingTest.main(dask_client, dir_data_file, bc, nRals)

    # timestampdiffTest.main(dask_client, spark, dir_data_file, bc, nRals)

    #TODO re enable this test once we have the new version of dask
    # https://github.com/dask/distributed/issues/4645
    # https://github.com/rapidsai/cudf/issues/7773
    #if runAllTests or ("smilesTest" in targetTestGroups):
    #    smilesTest.main(dask_client, spark, dir_data_file, bc, nRals)

    if testsWithNulls != "true":
        if runAllTests or ("jsonTest" in targetTestGroups):
            jsonTest.main(dask_client, drill, dir_data_file, bc, nRals)

    if runAllTests or ("windowFunctionTest" in targetTestGroups):
        windowFunctionTest.main(dask_client, drill, spark, dir_data_file, bc,
                                nRals)

    if runAllTests or ("windowNoPartitionTest" in targetTestGroups):
        windowNoPartitionTest.main(dask_client, drill, spark, dir_data_file,
                                   bc, nRals)

    if testsWithNulls != "true":
        if runAllTests or ("concurrentTest" in targetTestGroups):
            concurrentTest.main(dask_client, drill, dir_data_file, bc, nRals)

    if testsWithNulls == "true":
        if Settings.execution_mode != ExecutionMode.GPUCI:
            if runAllTests or ("tablesFromSQL" in targetTestGroups):
                tablesFromSQL.main(dask_client, drill, dir_data_file, bc,
                                   nRals)

    # WARNING!!! This Test must be the last one to test -------------------------------------------------------------------------------------------------------------------------------------------
    if runAllTests or ("configOptionsTest" in targetTestGroups):
        configOptionsTest.main(dask_client, drill, spark, dir_data_file, bc,
                               nRals)

    if Settings.execution_mode != ExecutionMode.GENERATOR:

        result, error_msgs = runTest.save_log(
            Settings.execution_mode == ExecutionMode.GPUCI)

        max = 0
        for i in range(0, len(Settings.memory_list)):
            if (Settings.memory_list[i].delta) > max:
                max = Settings.memory_list[i].delta

        print("MAX DELTA: " + str(max))
        print("""***********************************************************
              ********************""")

        for i in range(0, len(Settings.memory_list)):
            print(Settings.memory_list[i].name + ":" + "   Start Mem: " +
                  str(Settings.memory_list[i].start_mem) + "   End Mem: " +
                  str(Settings.memory_list[i].end_mem) + "   Diff: " +
                  str(Settings.memory_list[i].delta))

        return result, error_msgs

    return True, []
Example #6
0
def main(dask_client, bc):

    # Create Table Drill ------------------------------------------------

    from pydrill.client import PyDrill

    drill = PyDrill(host="localhost", port=8047)

    dir_data_lc = Settings.data["TestSettings"]["dataDirectory"]

    for x in range(5):

        # [numberOfFiles, type_nation, type_region, type_supplier,
        #  type_customer, type_lineitem, type_orders]

        run = []

        if x == 0:
            run = [1, "psv", "psv", "psv", "psv", "psv", "psv"]
        elif x == 1:
            run = [
                2, "parquet", "parquet", "parquet", "parquet", "parquet",
                "parquet"
            ]
        elif x == 2:
            run = [6, "parquet", "psv", "parquet", "psv", "parquet", "psv"]
        elif x == 3:
            run = [10, "psv", "parquet", "psv", "parquet", "psv", "parquet"]
        elif x == 4:
            run = [12, "psv", "psv", "parquet", "parquet", "psv", "parquet"]

        print("============================================================")
        print("Running " + str(x + 1) + ":")
        print("Número de Archivos: " + str(run[0]))
        print("Type of files for Nation: " + run[1])
        print("Type of files for Region: " + run[2])
        print("Type of files for Supplier: " + run[3])
        print("Type of files for Customer: " + run[4])
        print("Type of files for Lineitem: " + run[5])
        print("Type of files for Orders: " + run[6])
        print("============================================================")
        print("1")
        num_files = run[0]
        print("2")
        cs.init_drill_schema(drill, dir_data_lc, n_files=num_files)
        print("3")
        # Read Data TPCH-----------------------------------------------------
        nation_files = cs.get_filenames_table("nation", dir_data_lc, num_files,
                                              run[1])
        bc.create_table(
            "nation",
            nation_files,
            delimiter="|",
            dtype=cs.get_dtypes("nation"),
            names=cs.get_column_names("nation"),
        )

        region_files = cs.get_filenames_table("region", dir_data_lc, num_files,
                                              run[2])
        bc.create_table(
            "region",
            region_files,
            delimiter="|",
            dtype=cs.get_dtypes("region"),
            names=cs.get_column_names("region"),
        )

        supplier_files = cs.get_filenames_table("supplier", dir_data_lc,
                                                num_files, run[3])
        bc.create_table(
            "supplier",
            supplier_files,
            delimiter="|",
            dtype=cs.get_dtypes("supplier"),
            names=cs.get_column_names("supplier"),
        )

        customer_files = cs.get_filenames_table("customer", dir_data_lc,
                                                num_files, run[4])
        bc.create_table(
            "customer",
            customer_files,
            delimiter="|",
            dtype=cs.get_dtypes("customer"),
            names=cs.get_column_names("customer"),
        )

        lineitem_files = cs.get_filenames_table("lineitem", dir_data_lc,
                                                num_files, run[5])
        bc.create_table(
            "lineitem",
            lineitem_files,
            delimiter="|",
            dtype=cs.get_dtypes("lineitem"),
            names=cs.get_column_names("lineitem"),
        )

        orders_files = cs.get_filenames_table("orders", dir_data_lc, num_files,
                                              run[6])
        bc.create_table(
            "orders",
            orders_files,
            delimiter="|",
            dtype=cs.get_dtypes("orders"),
            names=cs.get_column_names("orders"),
        )

        # Run Query ------------------------------------------------------
        # Parameter to indicate if its necessary to order
        # the resulsets before compare them
        worder = 1
        use_percentage = False
        acceptable_difference = 0.01
        queryType = "Load Data Test"

        print("==============================")
        print(queryType)
        print("==============================")

        queryId = "TEST_01"
        query = """select count(c_custkey) as c1, count(c_acctbal) as c2
                from customer"""
        runTest.run_query(
            bc,
            drill,
            query,
            queryId,
            queryType,
            worder,
            "",
            acceptable_difference,
            use_percentage,
            # fileSchemaType,
        )

        queryId = "TEST_02"
        query = "select count(n_nationkey), count(n_regionkey) from nation"
        runTest.run_query(
            bc,
            drill,
            query,
            queryId,
            queryType,
            worder,
            "",
            acceptable_difference,
            use_percentage,
            # fileSchemaType,
        )

        queryId = "TEST_03"
        query = "select count(s_suppkey), count(s_nationkey) from supplier"
        runTest.run_query(
            bc,
            drill,
            query,
            queryId,
            queryType,
            worder,
            "",
            acceptable_difference,
            use_percentage,
            # fileSchemaType,
        )

        queryId = "TEST_04"
        query = """select count(c_custkey), sum(c_acctbal),
                    sum(c_acctbal)/count(c_acctbal), min(c_custkey),
                    max(c_nationkey),
                    (max(c_nationkey) + min(c_nationkey))/2 c_nationkey
                    from customer
                    where c_custkey < 100 group by c_nationkey"""
        runTest.run_query(
            bc,
            drill,
            query,
            queryId,
            queryType,
            worder,
            "",
            acceptable_difference,
            True,
        )  # TODO: Change sum/count for avg KC

        queryId = "TEST_05"
        query = """select c.c_custkey, c.c_nationkey, n.n_regionkey
                from customer as c inner join nation as n
                on c.c_nationkey = n.n_nationkey
                where n.n_regionkey = 1 and c.c_custkey < 50"""
        runTest.run_query(
            bc,
            drill,
            query,
            queryId,
            queryType,
            worder,
            "",
            acceptable_difference,
            use_percentage,
            # fileSchemaType,
        )

        queryId = "TEST_06"
        query = """select c_custkey, c_nationkey, c_acctbal
                from customer order by c_nationkey, c_acctbal"""
        runTest.run_query(
            bc,
            drill,
            query,
            queryId,
            queryType,
            0,
            "",
            acceptable_difference,
            use_percentage,
            # fileSchemaType,
        )

        queryId = "TEST_07"
        query = """select c_custkey + c_nationkey, c_acctbal
                from customer order by 1, 2"""
        runTest.run_query(
            bc,
            drill,
            query,
            queryId,
            queryType,
            0,
            "",
            acceptable_difference,
            use_percentage,
            # fileSchemaType,
        )

        queryId = "TEST_08"
        query = """select n1.n_nationkey as supp_nation,
                n2.n_nationkey as cust_nation,
                l.l_extendedprice * l.l_discount
                from supplier as s
                inner join lineitem as l on s.s_suppkey = l.l_suppkey
                inner join orders as o on o.o_orderkey = l.l_orderkey
                inner join customer as c on c.c_custkey = o.o_custkey
                inner join nation as n1 on s.s_nationkey = n1.n_nationkey
                inner join nation as n2 on c.c_nationkey = n2.n_nationkey
                where n1.n_nationkey = 1
                and n2.n_nationkey = 2 and o.o_orderkey < 10000"""
        runTest.run_query(
            bc,
            drill,
            query,
            queryId,
            queryType,
            worder,
            "",
            acceptable_difference,
            use_percentage,
            # fileSchemaType,
        )

        queryId = "TEST_09"
        query = """select c_custkey, c_nationkey as nkey
                from customer where c_custkey < 0 and c_nationkey >= 30"""
        runTest.run_query(
            bc,
            drill,
            query,
            queryId,
            queryType,
            worder,
            "",
            acceptable_difference,
            use_percentage,
            # fileSchemaType,
        )

        queryId = "TEST_10"
        query = """select sin(c_acctbal), cos(c_acctbal),
                    sin(c_acctbal), acos(c_acctbal),
                    ln(c_acctbal), tan(c_acctbal),
                    atan(c_acctbal), floor(c_acctbal),
                    c_acctbal
                from customer"""
        runTest.run_query(
            bc,
            drill,
            query,
            queryId,
            queryType,
            worder,
            "",
            acceptable_difference,
            use_percentage,
            # fileSchemaType,
        )

        queryId = "TEST_11"
        query = """select n1.n_nationkey as n1key,
                n2.n_nationkey as n2key, n1.n_nationkey + n2.n_nationkey
                from nation as n1 full outer join nation as n2
                on n1.n_nationkey = n2.n_nationkey + 6
                where n1.n_nationkey < 10 and n1.n_nationkey > 5"""
        runTest.run_query(
            bc,
            drill,
            query,
            queryId,
            queryType,
            worder,
            "",
            acceptable_difference,
            use_percentage,
            # fileSchemaType,
        )

        queryId = "TEST_12"
        query = """select count(n1.n_nationkey) as n1key,
                    count(n2.n_nationkey) as n2key, count(*) as cstar
                from nation as n1 full outer join nation as n2
                on n1.n_nationkey = n2.n_nationkey + 6"""
        runTest.run_query(
            bc,
            drill,
            query,
            queryId,
            queryType,
            worder,
            "",
            acceptable_difference,
            use_percentage,
            # fileSchemaType,
        )

        queryId = "TEST_13"
        query = """select o_orderkey, o_custkey from orders
                where o_orderkey < 10 and o_orderkey >= 1"""
        runTest.run_query(
            bc,
            drill,
            query,
            queryId,
            queryType,
            worder,
            "",
            acceptable_difference,
            use_percentage,
            # fileSchemaType,
        )

        queryId = "TEST_14"
        query = """select 100168549 - sum(o_orderkey)/count(o_orderkey),
                    56410984 / sum(o_totalprice),
                    (123 - 945/max(o_orderkey)) / (sum(81619/o_orderkey) /
                    count(81619/o_orderkey))
                from orders"""
        runTest.run_query(
            bc,
            drill,
            query,
            queryId,
            queryType,
            worder,
            "",
            acceptable_difference,
            True,
        )  # TODO: Change sum/count for avg KC

        queryId = "TEST_15"
        query = """select o_orderkey, sum(o_totalprice)/count(o_orderstatus)
                from orders where o_custkey < 100
                group by o_orderstatus, o_orderkey"""
        runTest.run_query(
            bc,
            drill,
            query,
            queryId,
            queryType,
            worder,
            "",
            acceptable_difference,
            use_percentage,
            # fileSchemaType,
        )

        queryId = "TEST_16"
        query = """select o_orderkey, o_orderstatus
                from orders where o_custkey < 10
                and o_orderstatus <> 'O'
                order by o_orderkey, o_orderstatus
                limit 50"""
        runTest.run_query(
            bc,
            drill,
            query,
            queryId,
            queryType,
            worder,
            "",
            acceptable_difference,
            use_percentage,
            # fileSchemaType,
        )

        queryId = "TEST_17"
        query = """select count(o_orderstatus)
                from orders where o_orderstatus <> 'O'"""
        runTest.run_query(
            bc,
            drill,
            query,
            queryId,
            queryType,
            worder,
            "",
            acceptable_difference,
            use_percentage,
            # fileSchemaType,
        )

        queryId = "TEST_18"
        query = """select count(o_orderkey), sum(o_orderkey), o_clerk
                from orders where o_custkey < 1000
                group by o_clerk, o_orderstatus"""
        runTest.run_query(
            bc,
            drill,
            query,
            queryId,
            queryType,
            worder,
            "",
            acceptable_difference,
            use_percentage,
            # fileSchemaType,
        )

        queryId = "TEST_19"
        query = """select sum(o_orderkey)/count(o_orderkey)
                from orders group by o_orderstatus"""
        runTest.run_query(
            bc,
            drill,
            query,
            queryId,
            queryType,
            worder,
            "",
            acceptable_difference,
            True,
        )  # TODO: Change sum/count for avg KC

        queryId = "TEST_20"
        query = """select count(o_shippriority), sum(o_totalprice)
                from orders group by o_shippriority"""
        runTest.run_query(
            bc,
            drill,
            query,
            queryId,
            queryType,
            worder,
            "",
            acceptable_difference,
            use_percentage,
            # fileSchemaType,
        )

        queryId = "TEST_21"
        query = """with regionTemp as ( select r_regionkey,
                 r_name from region where r_regionkey > 2 ),
                nationTemp as(select n_nationkey, n_regionkey as fkey,
                n_name from nation where n_nationkey > 3
                order by n_nationkey)
                select regionTemp.r_name, nationTemp.n_name
                from regionTemp inner join nationTemp on
                regionTemp.r_regionkey = nationTemp.fkey"""
        runTest.run_query(
            bc,
            drill,
            query,
            queryId,
            queryType,
            worder,
            "",
            acceptable_difference,
            use_percentage,
            # fileSchemaType,
        )

        queryId = "TEST_22"
        query = """select o.o_totalprice, l.l_partkey
                from orders as o
                left outer join lineitem as l on o.o_custkey = l.l_linenumber
                and l.l_suppkey = o.o_orderkey where l.l_linenumber < 1000"""
        runTest.run_query_performance(
            bc,
            drill,
            query,
            queryId,
            queryType,
            worder,
            "",
            acceptable_difference,
            use_percentage,
            # fileSchemaType,
        )

        queryId = "TEST_23"
        query = """select o.o_orderkey, o.o_totalprice,
                l.l_partkey, l.l_returnflag from lineitem as l
                inner join orders as o on o.o_orderkey = l.l_orderkey
                inner join customer as c on c.c_custkey = o.o_custkey
                where c.c_custkey < 1000"""
        runTest.run_query_performance(
            bc,
            drill,
            query,
            queryId,
            queryType,
            worder,
            "",
            acceptable_difference,
            use_percentage,
            # fileSchemaType,
        )

        queryId = "TEST_24"
        query = """select o.o_orderkey, o.o_totalprice,
                l.l_partkey, l.l_linestatus from orders as o
                full outer join lineitem as l on
                l.l_orderkey = o.o_orderkey where o.o_orderkey < 1000"""
        runTest.run_query_performance(
            bc,
            drill,
            query,
            queryId,
            queryType,
            worder,
            "",
            acceptable_difference,
            use_percentage,
            # fileSchemaType,
        )

        runTest.save_log()
Example #7
0
def main():
    print('**init end2end**')
    Execution.getArgs()
    nvmlInit()
    dir_data_file = Settings.data['TestSettings']['dataDirectory']
    nRals = Settings.data['RunSettings']['nRals']

    drill = "drill"
    spark = "spark"

    compareResults = True
    if 'compare_results' in Settings.data['RunSettings']:
        compareResults = Settings.data['RunSettings']['compare_results']

    if (Settings.execution_mode == ExecutionMode.FULL and compareResults
            == "true") or Settings.execution_mode == ExecutionMode.GENERATOR:

        # Create Table Drill ------------------------------------------------------------------------------------------------------
        from pydrill.client import PyDrill
        drill = PyDrill(host='localhost', port=8047)
        createSchema.init_drill_schema(
            drill,
            Settings.data['TestSettings']['dataDirectory'],
            bool_test=True)

        # Create Table Spark ------------------------------------------------------------------------------------------------------
        spark = SparkSession.builder.appName("allE2ETest").getOrCreate()
        createSchema.init_spark_schema(
            spark, Settings.data['TestSettings']['dataDirectory'])

    #Create Context For BlazingSQL
    bc, dask_client = init_context()

    targetTestGroups = Settings.data['RunSettings']['targetTestGroups']
    runAllTests = (
        len(targetTestGroups) == 0
    )  # if targetTestGroups was empty the user wants to run all the tests

    if runAllTests or ("aggregationsWithoutGroupByTest" in targetTestGroups):
        aggregationsWithoutGroupByTest.main(dask_client, drill, dir_data_file,
                                            bc, nRals)

    if runAllTests or ("coalesceTest" in targetTestGroups):
        coalesceTest.main(dask_client, drill, dir_data_file, bc,
                          nRals)  #we are not supporting coalesce yet

    if runAllTests or ("columnBasisTest" in targetTestGroups):
        columnBasisTest.main(dask_client, drill, dir_data_file, bc, nRals)

    if runAllTests or ("commonTableExpressionsTest" in targetTestGroups):
        commonTableExpressionsTest.main(dask_client, drill, dir_data_file, bc,
                                        nRals)

    #countDistincTest.main(dask_client, drill, dir_data_file, bc) #we are not supporting count distinct yet

    if runAllTests or ("countWithoutGroupByTest" in targetTestGroups):
        countWithoutGroupByTest.main(dask_client, drill, dir_data_file, bc,
                                     nRals)

    if runAllTests or ("dateTest" in targetTestGroups):
        dateTest.main(dask_client, drill, spark, dir_data_file, bc, nRals)

    if runAllTests or ("timestampTest" in targetTestGroups):
        timestampTest.main(dask_client, drill, spark, dir_data_file, bc, nRals)

    if runAllTests or ("fullOuterJoinsTest" in targetTestGroups):
        fullOuterJoinsTest.main(dask_client, drill, dir_data_file, bc, nRals)

    if runAllTests or ("groupByTest" in targetTestGroups):
        groupByTest.main(dask_client, drill, dir_data_file, bc, nRals)

    if runAllTests or ("GroupByWitoutAggregations" in targetTestGroups):
        GroupByWitoutAggregations.main(dask_client, drill, dir_data_file, bc,
                                       nRals)

    if runAllTests or ("innerJoinsTest" in targetTestGroups):
        innerJoinsTest.main(dask_client, drill, dir_data_file, bc, nRals)

    if runAllTests or ("" in targetTestGroups):
        leftOuterJoinsTest.main(dask_client, drill, dir_data_file, bc, nRals)

    if runAllTests or ("nonEquiJoinsTest" in targetTestGroups):
        nonEquiJoinsTest.main(dask_client, drill, dir_data_file, bc, nRals)

    #loadDataTest.main(dask_client, bc) #check this

    if runAllTests or ("nestedQueriesTest" in targetTestGroups):
        nestedQueriesTest.main(dask_client, drill, dir_data_file, bc, nRals)

    if runAllTests or ("orderbyTest" in targetTestGroups):
        orderbyTest.main(dask_client, drill, dir_data_file, bc, nRals)

    if runAllTests or ("predicatesWithNulls" in targetTestGroups):
        predicatesWithNulls.main(dask_client, drill, dir_data_file, bc, nRals)

    if runAllTests or ("stringTests" in targetTestGroups):
        stringTests.main(dask_client, drill, dir_data_file, bc, nRals)

    if runAllTests or ("tablesFromPandasTest" in targetTestGroups):
        tablesFromPandasTest.main(dask_client, drill, dir_data_file, bc, nRals)

    if runAllTests or ("unaryOpsTest" in targetTestGroups):
        unaryOpsTest.main(dask_client, drill, dir_data_file, bc, nRals)

    if runAllTests or ("unifyTablesTest" in targetTestGroups):
        unifyTablesTest.main(dask_client, drill, dir_data_file, bc, nRals)

    if runAllTests or ("unionTest" in targetTestGroups):
        unionTest.main(dask_client, drill, spark, dir_data_file, bc, nRals)

    if runAllTests or ("useLimitTest" in targetTestGroups):
        useLimitTest.main(dask_client, drill, spark, dir_data_file, bc, nRals)

    if runAllTests or ("whereClauseTest" in targetTestGroups):
        whereClauseTest.main(dask_client, drill, dir_data_file, bc, nRals)

    if runAllTests or ("bindableAliasTest" in targetTestGroups):
        bindableAliasTest.main(dask_client, drill, spark, dir_data_file, bc,
                               nRals)

    if runAllTests or ("booleanTest" in targetTestGroups):
        booleanTest.main(dask_client, drill, dir_data_file, bc, nRals)

    if runAllTests or ("caseTest" in targetTestGroups):
        caseTest.main(dask_client, drill, spark, dir_data_file, bc, nRals)

    if runAllTests or ("castTest" in targetTestGroups):
        castTest.main(dask_client, drill, spark, dir_data_file, bc, nRals)

    if runAllTests or ("concatTest" in targetTestGroups):
        concatTest.main(dask_client, drill, spark, dir_data_file, bc, nRals)

    if runAllTests or ("literalTest" in targetTestGroups):
        literalTest.main(dask_client, drill, spark, dir_data_file, bc, nRals)

    if runAllTests or ("dirTest" in targetTestGroups):
        dirTest.main(dask_client, drill, dir_data_file, bc, nRals)

    #fileSystemHdfsTest.main(dask_client, drill, dir_data_file, bc) #HDFS is not working yet

    #mixedFileSystemTest.main(dask_client, drill, dir_data_file, bc) #HDFS is not working yet

    if runAllTests or ("likeTest" in targetTestGroups):
        likeTest.main(dask_client, drill, dir_data_file, bc, nRals)

    if runAllTests or ("simpleDistributionTest" in targetTestGroups):
        simpleDistributionTest.main(dask_client, drill, spark, dir_data_file,
                                    bc, nRals)

    if runAllTests or ("substringTest" in targetTestGroups):
        substringTest.main(dask_client, drill, dir_data_file, bc, nRals)

    if runAllTests or ("wildCardTest" in targetTestGroups):
        wildCardTest.main(dask_client, drill, dir_data_file, bc, nRals)

    if runAllTests or ("tpchQueriesTest" in targetTestGroups):
        tpchQueriesTest.main(dask_client, drill, spark, dir_data_file, bc,
                             nRals)

    if runAllTests or ("roundTest" in targetTestGroups):
        roundTest.main(dask_client, drill, dir_data_file, bc, nRals)

    if runAllTests or ("fileSystemLocalTest" in targetTestGroups):
        fileSystemLocalTest.main(dask_client, drill, dir_data_file, bc, nRals)

    if Settings.execution_mode != ExecutionMode.GPUCI:
        if runAllTests or ("fileSystemS3Test" in targetTestGroups):
            fileSystemS3Test.main(dask_client, drill, dir_data_file, bc, nRals)

        if runAllTests or ("fileSystemGSTest" in targetTestGroups):
            fileSystemGSTest.main(dask_client, drill, dir_data_file, bc, nRals)

    #timestampdiffTest.main(dask_client, spark, dir_data_file, bc, nRals)

    if Settings.execution_mode != ExecutionMode.GENERATOR:

        result, error_msgs = runTest.save_log()

        max = 0
        for i in range(0, len(Settings.memory_list)):
            if (Settings.memory_list[i].delta) > max:
                max = Settings.memory_list[i].delta

        print("MAX DELTA: " + str(max))
        print(
            '*******************************************************************************'
        )

        for i in range(0, len(Settings.memory_list)):
            print(Settings.memory_list[i].name + ":" + "   Start Mem: " +
                  str(Settings.memory_list[i].start_mem) + "   End Mem: " +
                  str(Settings.memory_list[i].end_mem) + "   Diff: " +
                  str(Settings.memory_list[i].delta))

        return result, error_msgs

    return True, []
Example #8
0
def main(dask_client, bc):

    # Create Table Drill ------------------------------------------------------------------------------------------------------

    drill = PyDrill(host='localhost', port=8047)

    dir_data_lc = Settings.data['TestSettings']['dataDirectory']

    for x in range(5):

        # [numberOfFiles, type_nation, type_region, type_supplier, type_customer, type_lineitem, type_orders]

        run = []

        if x == 0:
            run = [1, 'psv', 'psv', 'psv', 'psv', 'psv', 'psv']
        elif x == 1:
            run = [
                2, 'parquet', 'parquet', 'parquet', 'parquet', 'parquet',
                'parquet'
            ]
        elif x == 2:
            run = [6, 'parquet', 'psv', 'parquet', 'psv', 'parquet', 'psv']
        elif x == 3:
            run = [10, 'psv', 'parquet', 'psv', 'parquet', 'psv', 'parquet']
        elif x == 4:
            run = [12, 'psv', 'psv', 'parquet', 'parquet', 'psv', 'parquet']

        print(
            "======================================================================="
        )
        print("Running " + str(x + 1) + ":")
        print("Número de Archivos: " + str(run[0]))
        print("Type of files for Nation: " + run[1])
        print("Type of files for Region: " + run[2])
        print("Type of files for Supplier: " + run[3])
        print("Type of files for Customer: " + run[4])
        print("Type of files for Lineitem: " + run[5])
        print("Type of files for Orders: " + run[6])
        print(
            "======================================================================="
        )
        print("1")
        num_files = run[0]
        print("2")
        cs.init_drill_schema(drill, dir_data_lc, n_files=num_files)
        print("3")
        #Read Data TPCH------------------------------------------------------------------------------------------------------------
        nation_files = cs.get_filenames_table('nation', dir_data_lc, num_files,
                                              run[1])
        bc.create_table('nation',
                        nation_files,
                        delimiter='|',
                        dtype=cs.get_dtypes('nation'),
                        names=cs.get_column_names('nation'))

        region_files = cs.get_filenames_table('region', dir_data_lc, num_files,
                                              run[2])
        bc.create_table('region',
                        region_files,
                        delimiter='|',
                        dtype=cs.get_dtypes('region'),
                        names=cs.get_column_names('region'))

        supplier_files = cs.get_filenames_table('supplier', dir_data_lc,
                                                num_files, run[3])
        bc.create_table('supplier',
                        supplier_files,
                        delimiter='|',
                        dtype=cs.get_dtypes('supplier'),
                        names=cs.get_column_names('supplier'))

        customer_files = cs.get_filenames_table('customer', dir_data_lc,
                                                num_files, run[4])
        bc.create_table('customer',
                        customer_files,
                        delimiter='|',
                        dtype=cs.get_dtypes('customer'),
                        names=cs.get_column_names('customer'))

        lineitem_files = cs.get_filenames_table('lineitem', dir_data_lc,
                                                num_files, run[5])
        bc.create_table('lineitem',
                        lineitem_files,
                        delimiter='|',
                        dtype=cs.get_dtypes('lineitem'),
                        names=cs.get_column_names('lineitem'))

        orders_files = cs.get_filenames_table('orders', dir_data_lc, num_files,
                                              run[6])
        bc.create_table('orders',
                        orders_files,
                        delimiter='|',
                        dtype=cs.get_dtypes('orders'),
                        names=cs.get_column_names('orders'))

        #Run Query -----------------------------------------------------------------------------
        worder = 1  #Parameter to indicate if its necessary to order the resulsets before compare them
        use_percentage = False
        acceptable_difference = 0.01
        queryType = 'Load Data Test'

        print('==============================')
        print(queryType)
        print('==============================')

        queryId = 'TEST_01'
        query = "select count(c_custkey) as c1, count(c_acctbal) as c2 from customer"
        runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                          acceptable_difference, use_percentage,
                          fileSchemaType)

        queryId = 'TEST_02'
        query = "select count(n_nationkey), count(n_regionkey) from nation"
        runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                          acceptable_difference, use_percentage,
                          fileSchemaType)

        queryId = 'TEST_03'
        query = "select count(s_suppkey), count(s_nationkey) from supplier"
        runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                          acceptable_difference, use_percentage,
                          fileSchemaType)

        queryId = 'TEST_04'
        query = "select count(c_custkey), sum(c_acctbal), sum(c_acctbal)/count(c_acctbal), min(c_custkey), max(c_nationkey), (max(c_nationkey) + min(c_nationkey))/2 c_nationkey from customer where c_custkey < 100 group by c_nationkey"
        runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                          acceptable_difference,
                          True)  #TODO: Change sum/count for avg KC

        queryId = 'TEST_05'
        query = "select c.c_custkey, c.c_nationkey, n.n_regionkey from customer as c inner join nation as n on c.c_nationkey = n.n_nationkey where n.n_regionkey = 1 and c.c_custkey < 50"
        runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                          acceptable_difference, use_percentage,
                          fileSchemaType)

        queryId = 'TEST_06'
        query = "select c_custkey, c_nationkey, c_acctbal from customer order by c_nationkey, c_acctbal"
        runTest.run_query(bc, drill, query, queryId, queryType, 0, '',
                          acceptable_difference, use_percentage,
                          fileSchemaType)

        queryId = 'TEST_07'
        query = "select c_custkey + c_nationkey, c_acctbal from customer order by 1, 2"
        runTest.run_query(bc, drill, query, queryId, queryType, 0, '',
                          acceptable_difference, use_percentage,
                          fileSchemaType)

        queryId = 'TEST_08'
        query = "select n1.n_nationkey as supp_nation, n2.n_nationkey as cust_nation, l.l_extendedprice * l.l_discount from supplier as s inner join lineitem as l on s.s_suppkey = l.l_suppkey inner join orders as o on o.o_orderkey = l.l_orderkey inner join customer as c on c.c_custkey = o.o_custkey inner join nation as n1 on s.s_nationkey = n1.n_nationkey inner join nation as n2 on c.c_nationkey = n2.n_nationkey where n1.n_nationkey = 1 and n2.n_nationkey = 2 and o.o_orderkey < 10000"
        runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                          acceptable_difference, use_percentage,
                          fileSchemaType)

        queryId = 'TEST_09'
        query = "select c_custkey, c_nationkey as nkey from customer where c_custkey < 0 and c_nationkey >=30"
        runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                          acceptable_difference, use_percentage,
                          fileSchemaType)

        queryId = 'TEST_10'
        query = "select sin(c_acctbal), cos(c_acctbal), asin(c_acctbal), acos(c_acctbal), ln(c_acctbal), tan(c_acctbal), atan(c_acctbal), floor(c_acctbal), c_acctbal from customer"
        runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                          acceptable_difference, use_percentage,
                          fileSchemaType)

        queryId = 'TEST_11'
        query = "select n1.n_nationkey as n1key, n2.n_nationkey as n2key, n1.n_nationkey + n2.n_nationkey from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_nationkey + 6 where n1.n_nationkey < 10 and n1.n_nationkey > 5"
        runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                          acceptable_difference, use_percentage,
                          fileSchemaType)

        queryId = 'TEST_12'
        query = "select count(n1.n_nationkey) as n1key, count(n2.n_nationkey) as n2key, count(*) as cstar from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_nationkey + 6"
        runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                          acceptable_difference, use_percentage,
                          fileSchemaType)

        queryId = 'TEST_13'
        query = "select o_orderkey, o_custkey from orders where o_orderkey < 10 and o_orderkey >= 1"
        runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                          acceptable_difference, use_percentage,
                          fileSchemaType)

        queryId = 'TEST_14'
        query = "select 100168549 - sum(o_orderkey)/count(o_orderkey), 56410984/sum(o_totalprice), (123 - 945/max(o_orderkey))/(sum(81619/o_orderkey)/count(81619/o_orderkey)) from orders"
        runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                          acceptable_difference,
                          True)  #TODO: Change sum/count for avg KC

        queryId = 'TEST_15'
        query = "select o_orderkey, sum(o_totalprice)/count(o_orderstatus) from orders where o_custkey < 100 group by o_orderstatus, o_orderkey"
        runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                          acceptable_difference, use_percentage,
                          fileSchemaType)

        queryId = 'TEST_16'
        query = "select o_orderkey, o_orderstatus from orders where o_custkey < 10 and o_orderstatus <> 'O' order by o_orderkey, o_orderstatus limit 50"
        runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                          acceptable_difference, use_percentage,
                          fileSchemaType)

        queryId = 'TEST_17'
        query = "select count(o_orderstatus) from orders where o_orderstatus <> 'O'"
        runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                          acceptable_difference, use_percentage,
                          fileSchemaType)

        queryId = 'TEST_18'
        query = "select count(o_orderkey), sum(o_orderkey), o_clerk from orders where o_custkey < 1000 group by o_clerk, o_orderstatus"
        runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                          acceptable_difference, use_percentage,
                          fileSchemaType)

        queryId = 'TEST_19'
        query = "select sum(o_orderkey)/count(o_orderkey) from orders group by o_orderstatus"
        runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                          acceptable_difference,
                          True)  #TODO: Change sum/count for avg KC

        queryId = 'TEST_20'
        query = "select count(o_shippriority), sum(o_totalprice) from orders group by o_shippriority"
        runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                          acceptable_difference, use_percentage,
                          fileSchemaType)

        queryId = 'TEST_21'
        query = """with regionTemp as ( select r_regionkey, r_name from region where r_regionkey > 2 ),
        nationTemp as(select n_nationkey, n_regionkey as fkey, n_name from nation where n_nationkey > 3 order by n_nationkey)
        select regionTemp.r_name, nationTemp.n_name from regionTemp inner join nationTemp on regionTemp.r_regionkey = nationTemp.fkey"""
        runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                          acceptable_difference, use_percentage,
                          fileSchemaType)

        queryId = 'TEST_22'
        query = """select o.o_totalprice, l.l_partkey from orders as o
            left outer join lineitem as l on o.o_custkey = l.l_linenumber and l.l_suppkey = o.o_orderkey where l.l_linenumber < 1000"""
        runTest.run_query_performance(bc, drill, query, queryId, queryType,
                                      worder, '', acceptable_difference,
                                      use_percentage, fileSchemaType)

        queryId = 'TEST_23'
        query = """select o.o_orderkey, o.o_totalprice, l.l_partkey, l.l_returnflag from lineitem as l 
            inner join orders as o on o.o_orderkey = l.l_orderkey
            inner join customer as c on c.c_custkey = o.o_custkey where c.c_custkey < 1000"""
        runTest.run_query_performance(bc, drill, query, queryId, queryType,
                                      worder, '', acceptable_difference,
                                      use_percentage, fileSchemaType)

        queryId = 'TEST_24'
        query = """select o.o_orderkey, o.o_totalprice, l.l_partkey, l.l_linestatus from orders as o
            full outer join lineitem as l on l.l_orderkey = o.o_orderkey where o.o_orderkey < 1000"""
        runTest.run_query_performance(bc, drill, query, queryId, queryType,
                                      worder, '', acceptable_difference,
                                      use_percentage, fileSchemaType)

        runTest.save_log()