Beispiel #1
0
def createTestEleven(dataTable):
    # prelude and query
    output_file, exp_output_file = data_gen_utils.openFileHandles(
        11, TEST_DIR=TEST_BASE_DIR)
    output_file.write('--\n')
    output_file.write('-- Testing for batching queries\n')
    output_file.write('-- 2 queries with NO overlap\n')
    output_file.write('--\n')
    output_file.write('-- Query in SQL:\n')
    output_file.write(
        '-- SELECT col4 FROM tbl3_batch WHERE col1 >= 10 AND col1 < 20;\n')
    output_file.write(
        '-- SELECT col4 FROM tbl3_batch WHERE col1 >= 800 AND col1 < 830;\n')
    output_file.write('--\n')
    output_file.write('--\n')
    output_file.write('batch_queries()\n')
    output_file.write('s1=select(db1.tbl3_batch.col1,10,20)\n')
    output_file.write('s2=select(db1.tbl3_batch.col1,800,830)\n')
    output_file.write('batch_execute()\n')
    output_file.write('f1=fetch(db1.tbl3_batch.col4,s1)\n')
    output_file.write('f2=fetch(db1.tbl3_batch.col4,s2)\n')
    output_file.write('print(f1)\n')
    output_file.write('print(f2)\n')
    # generate expected restuls.
    dfSelectMask1 = (dataTable['col1'] >= 10) & (dataTable['col1'] < 20)
    dfSelectMask2 = (dataTable['col1'] >= 800) & (dataTable['col1'] < 830)
    output1 = dataTable[dfSelectMask1]['col4']
    output2 = dataTable[dfSelectMask2]['col4']
    exp_output_file.write(data_gen_utils.outputPrint(output1))
    exp_output_file.write('\n\n')
    exp_output_file.write(data_gen_utils.outputPrint(output2))
    exp_output_file.write('\n')
    data_gen_utils.closeFileHandles(output_file, exp_output_file)
Beispiel #2
0
def createTest29(dataTable, dataSize):
    output_file, exp_output_file = data_gen_utils.openFileHandles(
        29, TEST_DIR=TEST_BASE_DIR)
    output_file.write('--\n')
    output_file.write('-- Query in SQL:\n')
    # selectivity =
    offset = np.max([1, int(dataSize / 5000)])
    offset2 = np.max([2, int(dataSize / 2500)])
    val1 = np.random.randint(0, int((dataSize / 5) - offset))
    val2 = np.random.randint(0, int((dataSize / 5) - offset2))
    # generate test 29
    output_file.write('--\n')
    output_file.write(
        '-- tbl4_clustered_btree has a secondary sorted index on col2, and a clustered b-tree index on col3\n'
    )
    output_file.write('-- testing for correctness\n')
    output_file.write('--\n')
    output_file.write('-- Query in SQL:\n')
    output_file.write(
        '-- SELECT col1 FROM tbl4_clustered_btree WHERE col3 >= {} and col3 < {};\n'
        .format(val1, val1 + offset))
    output_file.write(
        '-- SELECT col1 FROM tbl4_clustered_btree WHERE col3 >= {} and col3 < {};\n'
        .format(val2, val2 + offset2))
    output_file.write('--\n')
    output_file.write(
        '-- since col3 has a clustered index, the index is expected to be used by the select operator\n'
    )
    output_file.write(
        's1=select(db1.tbl4_clustered_btree.col3,{},{})\n'.format(
            val1, val1 + offset))
    output_file.write('f1=fetch(db1.tbl4_clustered_btree.col1,s1)\n')
    output_file.write('print(f1)\n')
    output_file.write(
        's2=select(db1.tbl4_clustered_btree.col3,{},{})\n'.format(
            val2, val2 + offset2))
    output_file.write('f2=fetch(db1.tbl4_clustered_btree.col1,s2)\n')
    output_file.write('print(f2)\n')
    # generate expected results
    dfSelectMask1 = (dataTable['col3'] >= val1) & (dataTable['col3'] <
                                                   (val1 + offset))
    dfSelectMask2 = (dataTable['col3'] >= val2) & (dataTable['col3'] <
                                                   (val2 + offset2))
    output1 = dataTable[dfSelectMask1]['col1']
    output2 = dataTable[dfSelectMask2]['col1']
    exp_output_file.write(data_gen_utils.outputPrint(output1))
    exp_output_file.write('\n\n')
    exp_output_file.write(data_gen_utils.outputPrint(output2))
    exp_output_file.write('\n')
    data_gen_utils.closeFileHandles(output_file, exp_output_file)
Beispiel #3
0
def createTestFifteen(dataTable):
    # prelude and queryDOCKER_TEST_BASE_DIR
    output_file, exp_output_file = data_gen_utils.openFileHandles(
        15, TEST_DIR=TEST_BASE_DIR)
    output_file.write('--\n')
    output_file.write('-- Testing for batching queries\n')
    output_file.write('-- Queries with full overlap (subsumption)\n')
    output_file.write('--\n')
    randomVal = np.random.randint(1000, 9900)
    output_file.write('-- Query in SQL:\n')
    output_file.write('-- 10 Queries of the type:\n')
    output_file.write(
        '-- SELECT col1 FROM tbl3_batch WHERE col4 >= _ AND col4 < _;\n')
    output_file.write('--\n')
    output_file.write('--\n')
    output_file.write('batch_queries()\n')
    for i in range(10):
        output_file.write('s{}=select(db1.tbl3_batch.col4,{},{})\n'.format(
            i, randomVal + (2 * i), randomVal + 60 - (2 * i)))
    output_file.write('batch_execute()\n')
    for i in range(10):
        output_file.write('f{}=fetch(db1.tbl3_batch.col1,s{})\n'.format(i, i))
    for i in range(10):
        output_file.write('print(f{})\n'.format(i))
    #generate expected results
    for i in range(10):
        dfSelectMask = (dataTable['col4'] >=
                        (randomVal + (2 * i))) & (dataTable['col4'] <
                                                  (randomVal + 60 - (2 * i)))
        output = dataTable[dfSelectMask]['col1']
        exp_output_file.write(data_gen_utils.outputPrint(output))
        exp_output_file.write('\n\n')
    data_gen_utils.closeFileHandles(output_file, exp_output_file)
Beispiel #4
0
def createTestFourteen(dataTable):
    # prelude and query
    output_file, exp_output_file = data_gen_utils.openFileHandles(
        14, TEST_DIR=TEST_BASE_DIR)
    output_file.write('--\n')
    output_file.write('-- Testing for batching queries\n')
    output_file.write('-- Queries with no overlap\n')
    output_file.write('--\n')
    output_file.write('-- Query in SQL:\n')
    output_file.write('-- 10 Queries of the type:\n')
    output_file.write(
        '-- SELECT col1 FROM tbl3_batch WHERE col4 >= _ AND col4 < _;\n')
    output_file.write('--\n')
    output_file.write('--\n')
    output_file.write('batch_queries()\n')
    for i in range(10):
        output_file.write('s{}=select(db1.tbl3_batch.col4,{},{})\n'.format(
            i, (1000 * i), (1000 * i) + 30))
    output_file.write('batch_execute()\n')
    for i in range(10):
        output_file.write('f{}=fetch(db1.tbl3_batch.col1,s{})\n'.format(i, i))
    for i in range(10):
        output_file.write('print(f{})\n'.format(i))
    #generate expected results
    for i in range(10):
        dfSelectMask = (dataTable['col4'] >= (1000 * i)) & (dataTable['col4'] <
                                                            ((1000 * i) + 30))
        output = dataTable[dfSelectMask]['col1']
        exp_output_file.write(data_gen_utils.outputPrint(output))
        exp_output_file.write('\n\n')
    data_gen_utils.closeFileHandles(output_file, exp_output_file)
Beispiel #5
0
def createTest22(dataTable, dataSize):
    output_file, exp_output_file = data_gen_utils.openFileHandles(22, TEST_DIR=TEST_BASE_DIR)
    offset = np.max([1, int(dataSize/10)])
    offset2 = 2000
    val1 = np.random.randint(0, int((dataSize/5) - offset))
    val2 = np.random.randint(0, 8000)
    output_file.write('-- Test for a clustered index select followed by a second predicate\n')
    output_file.write('--\n')
    output_file.write('-- Query in SQL:\n')
    output_file.write('-- SELECT sum(col1) FROM tbl4 WHERE (col3 >= {} and col3 < {}) AND (col2 >= {} and col2 < {});\n'.format(val1, val1+offset, val2, val2+offset2))
    output_file.write('--\n')
    output_file.write('s1=select(db1.tbl4.col3,{},{})\n'.format(val1, val1 + offset))
    output_file.write('f1=fetch(db1.tbl4.col2,s1)\n')
    output_file.write('s2=select(s1,f1,{},{})\n'.format(val2, val2 + offset2))
    output_file.write('f2=fetch(db1.tbl4.col1,s2)\n')
    output_file.write('print(f2)\n')
    output_file.write('a1=sum(f2)\n')
    output_file.write('print(a1)\n')
    # generate expected results
    dfSelectMask1Low = dataTable['col3'] >= val1
    dfSelectMask1High = dataTable['col3'] < (val1 + offset)
    dfSelectMask2Low = dataTable['col2'] >= val2
    dfSelectMask2High = dataTable['col2'] < (val2 + offset2)
    dfTotalMask = dfSelectMask1Low & dfSelectMask1High & dfSelectMask2Low & dfSelectMask2High
    values = dataTable[dfTotalMask]['col1']
    exp_output_file.write(data_gen_utils.outputPrint(values))
    exp_output_file.write('\n\n')
    exp_output_file.write(str(values.sum()) + '\n')
    data_gen_utils.closeFileHandles(output_file, exp_output_file)
Beispiel #6
0
def createTests16And17(dataTable, dataSize):
    # 1 / 1000 tuples should qualify on average. This is so that most time is spent on scans & not fetches or prints
    offset = np.max([1, int(dataSize / 5000)])
    query_starts = np.random.randint(0, (dataSize / 8), size=(100))
    output_file16, exp_output_file16 = data_gen_utils.openFileHandles(
        16, TEST_DIR=TEST_BASE_DIR)
    output_file17, exp_output_file17 = data_gen_utils.openFileHandles(
        17, TEST_DIR=TEST_BASE_DIR)
    output_file16.write('--\n')
    output_file16.write('-- Control timing for without batching\n')
    output_file16.write('-- Queries for 16 and 17 are identical.\n')
    output_file16.write('-- Query in SQL:\n')
    output_file16.write('-- 100 Queries of the type:\n')
    output_file16.write(
        '-- SELECT col3 FROM tbl3_batch WHERE col2 >= _ AND col2 < _;\n')
    output_file16.write('--\n')
    output_file17.write('--\n')
    output_file17.write('-- Same queries with batching\n')
    output_file17.write('-- Queries for 16 and 17 are identical.\n')
    output_file17.write('--\n')
    output_file17.write('batch_queries()\n')
    for i in range(100):
        output_file16.write('s{}=select(db1.tbl3_batch.col2,{},{})\n'.format(
            i, query_starts[i], query_starts[i] + offset))
        output_file17.write('s{}=select(db1.tbl3_batch.col2,{},{})\n'.format(
            i, query_starts[i], query_starts[i] + offset))
    output_file17.write('batch_execute()\n')
    for i in range(100):
        output_file16.write('f{}=fetch(db1.tbl3_batch.col3,s{})\n'.format(
            i, i))
        output_file17.write('f{}=fetch(db1.tbl3_batch.col3,s{})\n'.format(
            i, i))
    for i in range(100):
        output_file16.write('print(f{})\n'.format(i))
        output_file17.write('print(f{})\n'.format(i))
    # generate expected results
    for i in range(100):
        dfSelectMask = (dataTable['col2'] >= query_starts[i]) & (
            (dataTable['col2'] < (query_starts[i] + offset)))
        output = dataTable[dfSelectMask]['col3']
        exp_output_file16.write(data_gen_utils.outputPrint(output))
        exp_output_file16.write('\n\n')
        exp_output_file17.write(data_gen_utils.outputPrint(output))
        exp_output_file17.write('\n\n')
    data_gen_utils.closeFileHandles(output_file16, exp_output_file16)
    data_gen_utils.closeFileHandles(output_file17, exp_output_file17)