def createTestEleven(dataTable): # prelude and query output_file, exp_output_file = data_gen_utils.openFileHandles( 11, TEST_DIR=TEST_BASE_DIR) output_file.write('--\n') output_file.write('-- Testing for batching queries\n') output_file.write('-- 2 queries with NO overlap\n') output_file.write('--\n') output_file.write('-- Query in SQL:\n') output_file.write( '-- SELECT col4 FROM tbl3_batch WHERE col1 >= 10 AND col1 < 20;\n') output_file.write( '-- SELECT col4 FROM tbl3_batch WHERE col1 >= 800 AND col1 < 830;\n') output_file.write('--\n') output_file.write('--\n') output_file.write('batch_queries()\n') output_file.write('s1=select(db1.tbl3_batch.col1,10,20)\n') output_file.write('s2=select(db1.tbl3_batch.col1,800,830)\n') output_file.write('batch_execute()\n') output_file.write('f1=fetch(db1.tbl3_batch.col4,s1)\n') output_file.write('f2=fetch(db1.tbl3_batch.col4,s2)\n') output_file.write('print(f1)\n') output_file.write('print(f2)\n') # generate expected restuls. dfSelectMask1 = (dataTable['col1'] >= 10) & (dataTable['col1'] < 20) dfSelectMask2 = (dataTable['col1'] >= 800) & (dataTable['col1'] < 830) output1 = dataTable[dfSelectMask1]['col4'] output2 = dataTable[dfSelectMask2]['col4'] exp_output_file.write(data_gen_utils.outputPrint(output1)) exp_output_file.write('\n\n') exp_output_file.write(data_gen_utils.outputPrint(output2)) exp_output_file.write('\n') data_gen_utils.closeFileHandles(output_file, exp_output_file)
def createTest29(dataTable, dataSize): output_file, exp_output_file = data_gen_utils.openFileHandles( 29, TEST_DIR=TEST_BASE_DIR) output_file.write('--\n') output_file.write('-- Query in SQL:\n') # selectivity = offset = np.max([1, int(dataSize / 5000)]) offset2 = np.max([2, int(dataSize / 2500)]) val1 = np.random.randint(0, int((dataSize / 5) - offset)) val2 = np.random.randint(0, int((dataSize / 5) - offset2)) # generate test 29 output_file.write('--\n') output_file.write( '-- tbl4_clustered_btree has a secondary sorted index on col2, and a clustered b-tree index on col3\n' ) output_file.write('-- testing for correctness\n') output_file.write('--\n') output_file.write('-- Query in SQL:\n') output_file.write( '-- SELECT col1 FROM tbl4_clustered_btree WHERE col3 >= {} and col3 < {};\n' .format(val1, val1 + offset)) output_file.write( '-- SELECT col1 FROM tbl4_clustered_btree WHERE col3 >= {} and col3 < {};\n' .format(val2, val2 + offset2)) output_file.write('--\n') output_file.write( '-- since col3 has a clustered index, the index is expected to be used by the select operator\n' ) output_file.write( 's1=select(db1.tbl4_clustered_btree.col3,{},{})\n'.format( val1, val1 + offset)) output_file.write('f1=fetch(db1.tbl4_clustered_btree.col1,s1)\n') output_file.write('print(f1)\n') output_file.write( 's2=select(db1.tbl4_clustered_btree.col3,{},{})\n'.format( val2, val2 + offset2)) output_file.write('f2=fetch(db1.tbl4_clustered_btree.col1,s2)\n') output_file.write('print(f2)\n') # generate expected results dfSelectMask1 = (dataTable['col3'] >= val1) & (dataTable['col3'] < (val1 + offset)) dfSelectMask2 = (dataTable['col3'] >= val2) & (dataTable['col3'] < (val2 + offset2)) output1 = dataTable[dfSelectMask1]['col1'] output2 = dataTable[dfSelectMask2]['col1'] exp_output_file.write(data_gen_utils.outputPrint(output1)) exp_output_file.write('\n\n') exp_output_file.write(data_gen_utils.outputPrint(output2)) exp_output_file.write('\n') data_gen_utils.closeFileHandles(output_file, exp_output_file)
def createTestFifteen(dataTable): # prelude and queryDOCKER_TEST_BASE_DIR output_file, exp_output_file = data_gen_utils.openFileHandles( 15, TEST_DIR=TEST_BASE_DIR) output_file.write('--\n') output_file.write('-- Testing for batching queries\n') output_file.write('-- Queries with full overlap (subsumption)\n') output_file.write('--\n') randomVal = np.random.randint(1000, 9900) output_file.write('-- Query in SQL:\n') output_file.write('-- 10 Queries of the type:\n') output_file.write( '-- SELECT col1 FROM tbl3_batch WHERE col4 >= _ AND col4 < _;\n') output_file.write('--\n') output_file.write('--\n') output_file.write('batch_queries()\n') for i in range(10): output_file.write('s{}=select(db1.tbl3_batch.col4,{},{})\n'.format( i, randomVal + (2 * i), randomVal + 60 - (2 * i))) output_file.write('batch_execute()\n') for i in range(10): output_file.write('f{}=fetch(db1.tbl3_batch.col1,s{})\n'.format(i, i)) for i in range(10): output_file.write('print(f{})\n'.format(i)) #generate expected results for i in range(10): dfSelectMask = (dataTable['col4'] >= (randomVal + (2 * i))) & (dataTable['col4'] < (randomVal + 60 - (2 * i))) output = dataTable[dfSelectMask]['col1'] exp_output_file.write(data_gen_utils.outputPrint(output)) exp_output_file.write('\n\n') data_gen_utils.closeFileHandles(output_file, exp_output_file)
def createTestFourteen(dataTable): # prelude and query output_file, exp_output_file = data_gen_utils.openFileHandles( 14, TEST_DIR=TEST_BASE_DIR) output_file.write('--\n') output_file.write('-- Testing for batching queries\n') output_file.write('-- Queries with no overlap\n') output_file.write('--\n') output_file.write('-- Query in SQL:\n') output_file.write('-- 10 Queries of the type:\n') output_file.write( '-- SELECT col1 FROM tbl3_batch WHERE col4 >= _ AND col4 < _;\n') output_file.write('--\n') output_file.write('--\n') output_file.write('batch_queries()\n') for i in range(10): output_file.write('s{}=select(db1.tbl3_batch.col4,{},{})\n'.format( i, (1000 * i), (1000 * i) + 30)) output_file.write('batch_execute()\n') for i in range(10): output_file.write('f{}=fetch(db1.tbl3_batch.col1,s{})\n'.format(i, i)) for i in range(10): output_file.write('print(f{})\n'.format(i)) #generate expected results for i in range(10): dfSelectMask = (dataTable['col4'] >= (1000 * i)) & (dataTable['col4'] < ((1000 * i) + 30)) output = dataTable[dfSelectMask]['col1'] exp_output_file.write(data_gen_utils.outputPrint(output)) exp_output_file.write('\n\n') data_gen_utils.closeFileHandles(output_file, exp_output_file)
def createTest22(dataTable, dataSize): output_file, exp_output_file = data_gen_utils.openFileHandles(22, TEST_DIR=TEST_BASE_DIR) offset = np.max([1, int(dataSize/10)]) offset2 = 2000 val1 = np.random.randint(0, int((dataSize/5) - offset)) val2 = np.random.randint(0, 8000) output_file.write('-- Test for a clustered index select followed by a second predicate\n') output_file.write('--\n') output_file.write('-- Query in SQL:\n') output_file.write('-- SELECT sum(col1) FROM tbl4 WHERE (col3 >= {} and col3 < {}) AND (col2 >= {} and col2 < {});\n'.format(val1, val1+offset, val2, val2+offset2)) output_file.write('--\n') output_file.write('s1=select(db1.tbl4.col3,{},{})\n'.format(val1, val1 + offset)) output_file.write('f1=fetch(db1.tbl4.col2,s1)\n') output_file.write('s2=select(s1,f1,{},{})\n'.format(val2, val2 + offset2)) output_file.write('f2=fetch(db1.tbl4.col1,s2)\n') output_file.write('print(f2)\n') output_file.write('a1=sum(f2)\n') output_file.write('print(a1)\n') # generate expected results dfSelectMask1Low = dataTable['col3'] >= val1 dfSelectMask1High = dataTable['col3'] < (val1 + offset) dfSelectMask2Low = dataTable['col2'] >= val2 dfSelectMask2High = dataTable['col2'] < (val2 + offset2) dfTotalMask = dfSelectMask1Low & dfSelectMask1High & dfSelectMask2Low & dfSelectMask2High values = dataTable[dfTotalMask]['col1'] exp_output_file.write(data_gen_utils.outputPrint(values)) exp_output_file.write('\n\n') exp_output_file.write(str(values.sum()) + '\n') data_gen_utils.closeFileHandles(output_file, exp_output_file)
def createTests16And17(dataTable, dataSize): # 1 / 1000 tuples should qualify on average. This is so that most time is spent on scans & not fetches or prints offset = np.max([1, int(dataSize / 5000)]) query_starts = np.random.randint(0, (dataSize / 8), size=(100)) output_file16, exp_output_file16 = data_gen_utils.openFileHandles( 16, TEST_DIR=TEST_BASE_DIR) output_file17, exp_output_file17 = data_gen_utils.openFileHandles( 17, TEST_DIR=TEST_BASE_DIR) output_file16.write('--\n') output_file16.write('-- Control timing for without batching\n') output_file16.write('-- Queries for 16 and 17 are identical.\n') output_file16.write('-- Query in SQL:\n') output_file16.write('-- 100 Queries of the type:\n') output_file16.write( '-- SELECT col3 FROM tbl3_batch WHERE col2 >= _ AND col2 < _;\n') output_file16.write('--\n') output_file17.write('--\n') output_file17.write('-- Same queries with batching\n') output_file17.write('-- Queries for 16 and 17 are identical.\n') output_file17.write('--\n') output_file17.write('batch_queries()\n') for i in range(100): output_file16.write('s{}=select(db1.tbl3_batch.col2,{},{})\n'.format( i, query_starts[i], query_starts[i] + offset)) output_file17.write('s{}=select(db1.tbl3_batch.col2,{},{})\n'.format( i, query_starts[i], query_starts[i] + offset)) output_file17.write('batch_execute()\n') for i in range(100): output_file16.write('f{}=fetch(db1.tbl3_batch.col3,s{})\n'.format( i, i)) output_file17.write('f{}=fetch(db1.tbl3_batch.col3,s{})\n'.format( i, i)) for i in range(100): output_file16.write('print(f{})\n'.format(i)) output_file17.write('print(f{})\n'.format(i)) # generate expected results for i in range(100): dfSelectMask = (dataTable['col2'] >= query_starts[i]) & ( (dataTable['col2'] < (query_starts[i] + offset))) output = dataTable[dfSelectMask]['col3'] exp_output_file16.write(data_gen_utils.outputPrint(output)) exp_output_file16.write('\n\n') exp_output_file17.write(data_gen_utils.outputPrint(output)) exp_output_file17.write('\n\n') data_gen_utils.closeFileHandles(output_file16, exp_output_file16) data_gen_utils.closeFileHandles(output_file17, exp_output_file17)