def createTests26And27(dataTable, dataSize): output_file26, exp_output_file26 = data_gen_utils.openFileHandles( 26, TEST_DIR=TEST_BASE_DIR) output_file27, exp_output_file27 = data_gen_utils.openFileHandles( 27, TEST_DIR=TEST_BASE_DIR) offset = np.max([2, int(dataSize / 500)]) output_file26.write( '-- Test for a non-clustered index select followed by an aggregate (control-test, many queries)\n' ) output_file26.write( '-- Compare to test 27 for timing differences between B-tree and scan for highly selective queries\n' ) output_file26.write('--\n') output_file26.write('-- Query form in SQL:\n') output_file26.write( '-- SELECT avg(col3) FROM tbl4_ctrl WHERE (col2 >= _ and col2 < _);\n') output_file26.write('--\n') output_file27.write( '-- Test for a non-clustered index select followed by an aggregate (many queries)\n' ) output_file27.write('--\n') output_file27.write('-- Query form in SQL:\n') output_file27.write( '-- SELECT avg(col3) FROM tbl4 WHERE (col2 >= _ and col2 < _);\n') output_file27.write('--\n') for i in range(100): val1 = np.random.randint(0, int((dataSize / 5) - offset)) output_file26.write('s{}=select(db1.tbl4_ctrl.col2,{},{})\n'.format( i, val1, val1 + offset)) output_file26.write('f{}=fetch(db1.tbl4_ctrl.col3,s{})\n'.format(i, i)) output_file26.write('a{}=avg(f{})\n'.format(i, i)) output_file26.write('print(a{})\n'.format(i)) output_file27.write('s{}=select(db1.tbl4.col2,{},{})\n'.format( i, val1, val1 + offset)) output_file27.write('f{}=fetch(db1.tbl4.col3,s{})\n'.format(i, i)) output_file27.write('a{}=avg(f{})\n'.format(i, i)) output_file27.write('print(a{})\n'.format(i)) # generate expected results dfSelectMask1 = (dataTable['col2'] >= val1) & (dataTable['col2'] < (val1 + offset)) values = dataTable[dfSelectMask1]['col3'] mean_result = np.round(values.mean(), PLACES_TO_ROUND) if (math.isnan(mean_result)): exp_output_file26.write('0.00\n') exp_output_file27.write('0.00\n') else: exp_output_file26.write('{:0.2f}\n'.format(mean_result)) exp_output_file27.write('{:0.2f}\n'.format(mean_result)) data_gen_utils.closeFileHandles(output_file26, exp_output_file26) data_gen_utils.closeFileHandles(output_file27, exp_output_file27)
def createTests20And21(dataTable, dataSize): output_file20, exp_output_file20 = data_gen_utils.openFileHandles(20, TEST_DIR=TEST_BASE_DIR) output_file21, exp_output_file21 = data_gen_utils.openFileHandles(21, TEST_DIR=TEST_BASE_DIR) output_file20.write('--\n') output_file20.write('-- Query in SQL:\n') # selectivity = offset = np.max([1, int(dataSize/5000)]) offset2 = np.max([2, int(dataSize/2500)]) val1 = np.random.randint(0, int((dataSize/5) - offset)) val2 = np.random.randint(0, int((dataSize/5) - offset2)) # generate test 20 output_file20.write('-- SELECT col1 FROM tbl4_ctrl WHERE col3 >= {} and col3 < {};\n'.format(val1, val1+offset)) output_file20.write('-- SELECT col1 FROM tbl4_ctrl WHERE col3 >= {} and col3 < {};\n'.format(val2, val2+offset2)) output_file20.write('--\n') output_file20.write('s1=select(db1.tbl4_ctrl.col3,{},{})\n'.format(val1, val1 + offset)) output_file20.write('f1=fetch(db1.tbl4_ctrl.col1,s1)\n') output_file20.write('print(f1)\n') output_file20.write('s2=select(db1.tbl4_ctrl.col3,{},{})\n'.format(val2, val2 + offset2)) output_file20.write('f2=fetch(db1.tbl4_ctrl.col1,s2)\n') output_file20.write('print(f2)\n') # generate test 21 output_file21.write('--\n') output_file21.write('-- tbl3 has a secondary b-tree tree index on col2, and a clustered index on col3 with the form of a sorted column\n') output_file21.write('-- testing for correctness\n') output_file21.write('--\n') output_file21.write('-- Query in SQL:\n') output_file21.write('-- SELECT col1 FROM tbl4 WHERE col3 >= {} and col3 < {};\n'.format(val1, val1+offset)) output_file21.write('-- SELECT col1 FROM tbl4 WHERE col3 >= {} and col3 < {};\n'.format(val2, val2+offset2)) output_file21.write('--\n') output_file21.write('-- since col3 has a clustered index, the index is expected to be used by the select operator\n') output_file21.write('s1=select(db1.tbl4.col3,{},{})\n'.format(val1, val1 + offset)) output_file21.write('f1=fetch(db1.tbl4.col1,s1)\n') output_file21.write('print(f1)\n') output_file21.write('s2=select(db1.tbl4.col3,{},{})\n'.format(val2, val2 + offset2)) output_file21.write('f2=fetch(db1.tbl4.col1,s2)\n') output_file21.write('print(f2)\n') # generate expected results dfSelectMask1 = (dataTable['col3'] >= val1) & (dataTable['col3'] < (val1 + offset)) dfSelectMask2 = (dataTable['col3'] >= val2) & (dataTable['col3'] < (val2 + offset2)) output1 = dataTable[dfSelectMask1]['col1'] output2 = dataTable[dfSelectMask2]['col1'] for exp_output_file in [exp_output_file20, exp_output_file21]: exp_output_file.write(data_gen_utils.outputPrint(output1)) exp_output_file.write('\n\n') exp_output_file.write(data_gen_utils.outputPrint(output2)) exp_output_file.write('\n') data_gen_utils.closeFileHandles(output_file20, exp_output_file20) data_gen_utils.closeFileHandles(output_file21, exp_output_file21)
def createTest19(): output_file, exp_output_file = data_gen_utils.openFileHandles(19, TEST_DIR=TEST_BASE_DIR) output_file.write('-- Test for creating table with indexes\n') output_file.write('--\n') output_file.write('-- Table tbl4 has a clustered index with col3 being the leading column.\n') output_file.write('-- The clustered index has the form of a sorted column.\n') output_file.write('-- The table also has a secondary btree index.\n') output_file.write('--\n') output_file.write('-- Loads data from: data4_btree.csv\n') output_file.write('--\n') output_file.write('-- Create Table\n') output_file.write('create(tbl,"tbl4",db1,4)\n') output_file.write('create(col,"col1",db1.tbl4)\n') output_file.write('create(col,"col2",db1.tbl4)\n') output_file.write('create(col,"col3",db1.tbl4)\n') output_file.write('create(col,"col4",db1.tbl4)\n') output_file.write('-- Create a clustered index on col3\n') output_file.write('create(idx,db1.tbl4.col3,sorted,clustered)\n') output_file.write('-- Create an unclustered btree index on col2\n') output_file.write('create(idx,db1.tbl4.col2,btree,unclustered)\n') output_file.write('--\n') output_file.write('--\n') output_file.write('-- Load data immediately in the form of a clustered index\n') output_file.write('load(\"'+DOCKER_TEST_BASE_DIR+'/data4_btree.csv\")\n') output_file.write('--\n') output_file.write('-- Testing that the data and their indexes are durable on disk.\n') output_file.write('shutdown\n') # no expected results data_gen_utils.closeFileHandles(output_file, exp_output_file)
def createTest22(dataTable, dataSize): output_file, exp_output_file = data_gen_utils.openFileHandles(22, TEST_DIR=TEST_BASE_DIR) offset = np.max([1, int(dataSize/10)]) offset2 = 2000 val1 = np.random.randint(0, int((dataSize/5) - offset)) val2 = np.random.randint(0, 8000) output_file.write('-- Test for a clustered index select followed by a second predicate\n') output_file.write('--\n') output_file.write('-- Query in SQL:\n') output_file.write('-- SELECT sum(col1) FROM tbl4 WHERE (col3 >= {} and col3 < {}) AND (col2 >= {} and col2 < {});\n'.format(val1, val1+offset, val2, val2+offset2)) output_file.write('--\n') output_file.write('s1=select(db1.tbl4.col3,{},{})\n'.format(val1, val1 + offset)) output_file.write('f1=fetch(db1.tbl4.col2,s1)\n') output_file.write('s2=select(s1,f1,{},{})\n'.format(val2, val2 + offset2)) output_file.write('f2=fetch(db1.tbl4.col1,s2)\n') output_file.write('print(f2)\n') output_file.write('a1=sum(f2)\n') output_file.write('print(a1)\n') # generate expected results dfSelectMask1Low = dataTable['col3'] >= val1 dfSelectMask1High = dataTable['col3'] < (val1 + offset) dfSelectMask2Low = dataTable['col2'] >= val2 dfSelectMask2High = dataTable['col2'] < (val2 + offset2) dfTotalMask = dfSelectMask1Low & dfSelectMask1High & dfSelectMask2Low & dfSelectMask2High values = dataTable[dfTotalMask]['col1'] exp_output_file.write(data_gen_utils.outputPrint(values)) exp_output_file.write('\n\n') exp_output_file.write(str(values.sum()) + '\n') data_gen_utils.closeFileHandles(output_file, exp_output_file)
def createTest18(): # prelude output_file, exp_output_file = data_gen_utils.openFileHandles( 18, TEST_DIR=TEST_BASE_DIR) output_file.write( '-- Create a control table that is identical to the one in test19.dsl, but\n' ) output_file.write('-- without any indexes\n') output_file.write('--\n') output_file.write('-- Loads data from: data4_ctrl.csv\n') output_file.write('--\n') output_file.write('-- Create Table\n') output_file.write('create(tbl,"tbl4_ctrl",db1,4)\n') output_file.write('create(col,"col1",db1.tbl4_ctrl)\n') output_file.write('create(col,"col2",db1.tbl4_ctrl)\n') output_file.write('create(col,"col3",db1.tbl4_ctrl)\n') output_file.write('create(col,"col4",db1.tbl4_ctrl)\n') output_file.write('--\n') output_file.write('-- Load data immediately\n') output_file.write('load(\"' + DOCKER_TEST_BASE_DIR + '/data4_ctrl.csv\")\n') output_file.write('--\n') output_file.write( '-- Testing that the data and their indexes are durable on disk.\n') output_file.write('shutdown\n') # no expected results data_gen_utils.closeFileHandles(output_file, exp_output_file)
def createTest30(dataTable, dataSize): output_file, exp_output_file = data_gen_utils.openFileHandles( 30, TEST_DIR=TEST_BASE_DIR) offset = np.max([2, int(dataSize / 1000)]) output_file.write( '-- Test for a non-clustered index select followed by an aggregate\n') output_file.write('--\n') output_file.write('-- Query form in SQL:\n') output_file.write( '-- SELECT sum(col3) FROM tbl4_clustered_btree WHERE (col2 >= _ and col2 < _);\n' ) output_file.write('--\n') for i in range(5): val1 = np.random.randint(0, int((dataSize / 5) - offset)) output_file.write( 's{}=select(db1.tbl4_clustered_btree.col2,{},{})\n'.format( i, val1, val1 + offset)) output_file.write( 'f{}=fetch(db1.tbl4_clustered_btree.col3,s{})\n'.format(i, i)) output_file.write('a{}=sum(f{})\n'.format(i, i)) output_file.write('print(a{})\n'.format(i)) # generate expected results dfSelectMask1 = (dataTable['col2'] >= val1) & (dataTable['col2'] < (val1 + offset)) values = dataTable[dfSelectMask1]['col3'] sum_result = values.sum() if (math.isnan(sum_result)): exp_output_file.write('0\n') else: exp_output_file.write(str(sum_result) + '\n') data_gen_utils.closeFileHandles(output_file, exp_output_file)
def createTest31(): # prelude output_file, exp_output_file = data_gen_utils.openFileHandles(31, TEST_DIR=TEST_BASE_DIR) output_file.write('-- Creates tables for join tests\n') output_file.write('-- without any indexes\n') output_file.write('create(tbl,"tbl5_fact",db1,4)\n') output_file.write('create(col,"col1",db1.tbl5_fact)\n') output_file.write('create(col,"col2",db1.tbl5_fact)\n') output_file.write('create(col,"col3",db1.tbl5_fact)\n') output_file.write('create(col,"col4",db1.tbl5_fact)\n') output_file.write('load("'+DOCKER_TEST_BASE_DIR+'/data5_fact.csv")\n') output_file.write('--\n') output_file.write('create(tbl,"tbl5_dim1",db1,3)\n') output_file.write('create(col,"col1",db1.tbl5_dim1)\n') output_file.write('create(col,"col2",db1.tbl5_dim1)\n') output_file.write('create(col,"col3",db1.tbl5_dim1)\n') output_file.write('load("'+DOCKER_TEST_BASE_DIR+'/data5_dimension1.csv")\n') output_file.write('--\n') output_file.write('create(tbl,"tbl5_dim2",db1,2)\n') output_file.write('create(col,"col1",db1.tbl5_dim2)\n') output_file.write('create(col,"col2",db1.tbl5_dim2)\n') output_file.write('load("'+DOCKER_TEST_BASE_DIR+'/data5_dimension2.csv")\n') output_file.write('-- Testing that the data and their indexes are durable on disk.\n') output_file.write('shutdown\n') # no expected results data_gen_utils.closeFileHandles(output_file, exp_output_file)
def createTest37(factTable, dimTable1, dataSizeFact, dataSizeDim1, selectivityFact, selectivityDim1): output_file, exp_output_file = data_gen_utils.openFileHandles(37, TEST_DIR=TEST_BASE_DIR) output_file.write('-- join test 4 - hashing many-many with larger selectivities.\n') output_file.write('-- Select + Join + aggregation\n') output_file.write('-- Query in SQL:\n') output_file.write('-- SELECT sum(tbl5_fact.col2), avg(tbl5_dim1.col1) FROM tbl5_fact,tbl5_dim1 WHERE tbl5_fact.col1=tbl5_dim1.col1 AND tbl5_fact.col2 < {} AND tbl5_dim1.col3<{};\n'.format(int(selectivityFact * (dataSizeFact / 5)), int((dataSizeDim1/5) * selectivityDim1))) output_file.write('--\n') output_file.write('--\n') output_file.write('p1=select(db1.tbl5_fact.col2,null, {})\n'.format(int(selectivityFact * (dataSizeFact / 5)))) output_file.write('p2=select(db1.tbl5_dim1.col3,null, {})\n'.format(int((dataSizeDim1/5) * selectivityDim1))) output_file.write('f1=fetch(db1.tbl5_fact.col1,p1)\n') output_file.write('f2=fetch(db1.tbl5_dim1.col1,p2)\n') output_file.write('t1,t2=join(f1,p1,f2,p2,hash)\n') output_file.write('col2joined=fetch(db1.tbl5_fact.col2,t1)\n') output_file.write('col1joined=fetch(db1.tbl5_dim1.col1,t2)\n') output_file.write('a1=sum(col2joined)\n') output_file.write('a2=avg(col1joined)\n') output_file.write('print(a1,a2)\n') # generate expected results dfFactTableMask = (factTable['col2'] < int(selectivityFact * (dataSizeFact / 5))) dfDimTableMask = (dimTable1['col3'] < int((dataSizeDim1/5) * selectivityDim1)) preJoinFact = factTable[dfFactTableMask] preJoinDim1 = dimTable1[dfDimTableMask] joinedTable = preJoinFact.merge(preJoinDim1, left_on = 'col1', right_on = 'col1', suffixes=('','_right')) col2ValuesSum = joinedTable['col2'].sum() col1ValuesMean = joinedTable['col1'].mean() if (math.isnan(col2ValuesSum)): exp_output_file.write('0,') else: exp_output_file.write('{},'.format(col2ValuesSum)) if (math.isnan(col1ValuesMean)): exp_output_file.write('0.00\n') else: exp_output_file.write('{:0.2f}\n'.format(col1ValuesMean))
def createTestSix(dataTable, dataSizeTableTwo, approxNumOutputTuples): # prelude output_file, exp_output_file = data_gen_utils.openFileHandles( 6, TEST_DIR=TEST_BASE_DIR) output_file.write('-- Addition\n') output_file.write('--\n') # query offset = approxNumOutputTuples highestHighVal = int((dataSizeTableTwo / 2) - offset) selectValLess = np.random.randint(int(-1 * (dataSizeTableTwo / 2)), highestHighVal) selectValGreater = selectValLess + offset output_file.write( '-- SELECT col2+col3 FROM tbl2 WHERE col1 >= {} AND col1 < {};\n'. format(selectValLess, selectValGreater)) output_file.write('s11=select(db1.tbl2.col1,{},{})\n'.format( selectValLess, selectValGreater)) output_file.write('f11=fetch(db1.tbl2.col2,s11)\n') output_file.write('f12=fetch(db1.tbl2.col3,s11)\n') output_file.write('a11=add(f11,f12)\n') output_file.write('print(a11)\n') # generate expected results dfSelectMaskGT = dataTable['col1'] >= selectValLess dfSelectMaskLT = dataTable['col1'] < selectValGreater output = dataTable[dfSelectMaskGT & dfSelectMaskLT]['col3'] + dataTable[ dfSelectMaskGT & dfSelectMaskLT]['col2'] exp_output_file.write(output.to_string(header=False, index=False)) exp_output_file.write('\n') data_gen_utils.closeFileHandles(output_file, exp_output_file)
def createTestSeven(dataTable, dataSizeTableTwo, approxNumOutputTuples): output_file, exp_output_file = data_gen_utils.openFileHandles( 7, TEST_DIR=TEST_BASE_DIR) output_file.write('-- Subtraction\n') output_file.write('--\n') offset = approxNumOutputTuples highestHighVal = int((dataSizeTableTwo / 2) - offset) selectValLess = np.random.randint(int(-1 * (dataSizeTableTwo / 2)), highestHighVal) selectValGreater = selectValLess + offset output_file.write( '-- SELECT col3-col2 FROM tbl2 WHERE col1 >= {} AND col1 < {};\n'. format(selectValLess, selectValGreater)) output_file.write('s21=select(db1.tbl2.col1,{},{})\n'.format( selectValLess, selectValGreater)) output_file.write('f21=fetch(db1.tbl2.col2,s21)\n') output_file.write('f22=fetch(db1.tbl2.col3,s21)\n') output_file.write('s21=sub(f22,f21)\n') output_file.write('print(s21)\n') # generate expected results dfSelectMaskGT = dataTable['col1'] >= selectValLess dfSelectMaskLT = dataTable['col1'] < selectValGreater output = dataTable[dfSelectMaskGT & dfSelectMaskLT]['col3'] - dataTable[ dfSelectMaskGT & dfSelectMaskLT]['col2'] exp_output_file.write(output.to_string(header=False, index=False)) exp_output_file.write('\n') data_gen_utils.closeFileHandles(output_file, exp_output_file)
def createTestFifteen(dataTable): # prelude and queryDOCKER_TEST_BASE_DIR output_file, exp_output_file = data_gen_utils.openFileHandles( 15, TEST_DIR=TEST_BASE_DIR) output_file.write('--\n') output_file.write('-- Testing for batching queries\n') output_file.write('-- Queries with full overlap (subsumption)\n') output_file.write('--\n') randomVal = np.random.randint(1000, 9900) output_file.write('-- Query in SQL:\n') output_file.write('-- 10 Queries of the type:\n') output_file.write( '-- SELECT col1 FROM tbl3_batch WHERE col4 >= _ AND col4 < _;\n') output_file.write('--\n') output_file.write('--\n') output_file.write('batch_queries()\n') for i in range(10): output_file.write('s{}=select(db1.tbl3_batch.col4,{},{})\n'.format( i, randomVal + (2 * i), randomVal + 60 - (2 * i))) output_file.write('batch_execute()\n') for i in range(10): output_file.write('f{}=fetch(db1.tbl3_batch.col1,s{})\n'.format(i, i)) for i in range(10): output_file.write('print(f{})\n'.format(i)) #generate expected results for i in range(10): dfSelectMask = (dataTable['col4'] >= (randomVal + (2 * i))) & (dataTable['col4'] < (randomVal + 60 - (2 * i))) output = dataTable[dfSelectMask]['col1'] exp_output_file.write(data_gen_utils.outputPrint(output)) exp_output_file.write('\n\n') data_gen_utils.closeFileHandles(output_file, exp_output_file)
def createTestFourteen(dataTable): # prelude and query output_file, exp_output_file = data_gen_utils.openFileHandles( 14, TEST_DIR=TEST_BASE_DIR) output_file.write('--\n') output_file.write('-- Testing for batching queries\n') output_file.write('-- Queries with no overlap\n') output_file.write('--\n') output_file.write('-- Query in SQL:\n') output_file.write('-- 10 Queries of the type:\n') output_file.write( '-- SELECT col1 FROM tbl3_batch WHERE col4 >= _ AND col4 < _;\n') output_file.write('--\n') output_file.write('--\n') output_file.write('batch_queries()\n') for i in range(10): output_file.write('s{}=select(db1.tbl3_batch.col4,{},{})\n'.format( i, (1000 * i), (1000 * i) + 30)) output_file.write('batch_execute()\n') for i in range(10): output_file.write('f{}=fetch(db1.tbl3_batch.col1,s{})\n'.format(i, i)) for i in range(10): output_file.write('print(f{})\n'.format(i)) #generate expected results for i in range(10): dfSelectMask = (dataTable['col4'] >= (1000 * i)) & (dataTable['col4'] < ((1000 * i) + 30)) output = dataTable[dfSelectMask]['col1'] exp_output_file.write(data_gen_utils.outputPrint(output)) exp_output_file.write('\n\n') data_gen_utils.closeFileHandles(output_file, exp_output_file)
def createTestFive(dataTable, dataSizeTableTwo, approxSelectivity): # prelude output_file, exp_output_file = data_gen_utils.openFileHandles(5, TEST_DIR=TEST_BASE_DIR) output_file.write('-- Summation\n') output_file.write('--\n') # query offset = int(approxSelectivity * dataSizeTableTwo) highestHighVal = int((dataSizeTableTwo/2) - offset) selectValLess = np.random.randint(int(-1 * (dataSizeTableTwo/2)), highestHighVal) selectValGreater = selectValLess + offset output_file.write('-- SELECT SUM(col3) FROM tbl2 WHERE col1 >= {} AND col1 < {};\n'.format(selectValLess, selectValGreater)) output_file.write('s1=select(db1.tbl2.col1,{},{})\n'.format(selectValLess, selectValGreater)) output_file.write('f1=fetch(db1.tbl2.col3,s1)\n') output_file.write('a1=sum(f1)\n') output_file.write('print(a1)\n') output_file.write('--\n') output_file.write('-- SELECT SUM(col1) FROM tbl2;\n') output_file.write('a2=sum(db1.tbl2.col1)\n') output_file.write('print(a2)\n') # generate expected results dfSelectMaskGT = dataTable['col1'] >= selectValLess dfSelectMaskLT = dataTable['col1'] < selectValGreater output = dataTable[dfSelectMaskGT & dfSelectMaskLT]['col3'] exp_output_file.write(str(int(output.sum()))) exp_output_file.write('\n') exp_output_file.write(str(int(dataTable['col1'].sum()))) exp_output_file.write('\n') data_gen_utils.closeFileHandles(output_file, exp_output_file)
def createTestTwo(dataTable): # write out test output_file, exp_output_file = data_gen_utils.openFileHandles(2, TEST_DIR=TEST_BASE_DIR) output_file.write('-- Test Select + Fetch\n') output_file.write('--\n') ### Part 1 # write query out selectValLess = 20 output_file.write('-- SELECT col1 FROM tbl1 WHERE col1 < {};\n'.format(selectValLess)) output_file.write('s1=select(db1.tbl1.col1,null,{})\n'.format(selectValLess)) output_file.write('f1=fetch(db1.tbl1.col1,s1)\n') output_file.write('print(f1)\n') output_file.write('--\n') # generate expected results dfSelectMaskLT = dataTable['col1'] < selectValLess output = dataTable[dfSelectMaskLT]['col1'] exp_output_file.write(output.to_string(header=False,index=False)) exp_output_file.write('\n\n') #write query 2 out selectValGreater = 987 output_file.write('-- SELECT col2 FROM tbl1 WHERE col1 >= {};\n'.format(selectValGreater)) output_file.write('s2=select(db1.tbl1.col1,{},null)\n'.format(selectValGreater)) output_file.write('f2=fetch(db1.tbl1.col2,s2)\n') output_file.write('print(f2)\n') dfSelectMaskGT = dataTable['col1'] >= selectValGreater output = dataTable[dfSelectMaskGT]['col2'] exp_output_file.write(output.to_string(header=False,index=False)) exp_output_file.write('\n\n') data_gen_utils.closeFileHandles(output_file, exp_output_file)
def createTestTen(): # prelude output_file, exp_output_file = data_gen_utils.openFileHandles( 10, TEST_DIR=TEST_BASE_DIR) output_file.write('-- Load Test Data 2\n') output_file.write('-- Create a table to run batch queries on\n') output_file.write('--\n') # query output_file.write('-- Loads data from: data3_batch.csv\n') output_file.write('--\n') output_file.write('-- Create Table\n') output_file.write('create(tbl,"tbl3_batch",db1,4)\n') output_file.write('create(col,"col1",db1.tbl3_batch)\n') output_file.write('create(col,"col2",db1.tbl3_batch)\n') output_file.write('create(col,"col3",db1.tbl3_batch)\n') output_file.write('create(col,"col4",db1.tbl3_batch)\n') output_file.write('--\n') output_file.write('-- Load data immediately\n') output_file.write('load(\"' + DOCKER_TEST_BASE_DIR + '/data3_batch.csv\")\n') output_file.write('--\n') output_file.write('-- Testing that the data is durable on disk.\n') output_file.write('shutdown\n') # no expected results data_gen_utils.closeFileHandles(output_file, exp_output_file)
def createTestNine(dataTable, dataSizeTableTwo, approxSelectivity): output_file, exp_output_file = data_gen_utils.openFileHandles(9, TEST_DIR=TEST_BASE_DIR) output_file.write('-- Big Bad Boss Test! Milestone 1\n') output_file.write('-- It\'s basically just the previous tests put together\n') output_file.write('-- But also, its.... Boss test!\n') output_file.write('\n') approxSelectivityEachSubclause = np.sqrt(approxSelectivity) offset = int(approxSelectivityEachSubclause * dataSizeTableTwo) highestHighVal = int((dataSizeTableTwo/2) - offset) selectValLess1 = np.random.randint(int(-1 * (dataSizeTableTwo/2)), highestHighVal) selectValLess2 = np.random.randint(int(-1 * (dataSizeTableTwo/2)), highestHighVal) selectValGreater1 = selectValLess1 + offset selectValGreater2 = selectValLess2 + offset output_file.write('-- SELECT avg(col1+col2), min(col2), max(col3), avg(col3-col2), sum(col3-col2) FROM tbl2 WHERE (col1 >= {} AND col1 < {}) AND (col2 >= {} AND col2 < {});\n'.format(selectValLess1, selectValGreater1, selectValLess2, selectValGreater2)) output_file.write('s1=select(db1.tbl2.col1,{},{})\n'.format(selectValLess1, selectValGreater1)) output_file.write('sf1=fetch(db1.tbl2.col2,s1)\n') output_file.write('s2=select(s1,sf1,{},{})\n'.format(selectValLess2, selectValGreater2)) output_file.write('f1=fetch(db1.tbl2.col1,s2)\n') output_file.write('f2=fetch(db1.tbl2.col2,s2)\n') output_file.write('f3=fetch(db1.tbl2.col3,s2)\n') output_file.write('add12=add(f1,f2)\n') output_file.write('out1=avg(add12)\n') output_file.write('out2=min(f2)\n') output_file.write('out3=max(f3)\n') output_file.write('sub32=sub(f3,f2)\n') output_file.write('out4=avg(sub32)\n') output_file.write('out5=sum(sub32)\n') output_file.write('print(out1,out2,out3,out4,out5)\n') #expected results # generate expected results dfSelectMaskGT1 = dataTable['col1'] >= selectValLess1 dfSelectMaskLT1 = dataTable['col1'] < selectValGreater1 dfSelectMaskGT2 = dataTable['col2'] >= selectValLess2 dfSelectMaskLT2 = dataTable['col2'] < selectValGreater2 totalMask = dfSelectMaskGT1 & dfSelectMaskLT1 & dfSelectMaskGT2 & dfSelectMaskLT2 col1pluscol2 = dataTable[totalMask]['col1'] + dataTable[totalMask]['col2'] col3minuscol2 = dataTable[totalMask]['col3'] - dataTable[totalMask]['col2'] # round any mean output1 = np.round(col1pluscol2.mean(), PLACES_TO_ROUND) if (math.isnan(output1)): exp_output_file.write('0.00,') else: exp_output_file.write('{:0.2f},'.format(output1)) output2 = dataTable[totalMask]['col2'].min() output3 = dataTable[totalMask]['col3'].max() # round any mean output4 = np.round(col3minuscol2.mean(), PLACES_TO_ROUND) output5 = col3minuscol2.sum() exp_output_file.write(str(output2) + ',') exp_output_file.write(str(output3) + ',') if (math.isnan(output4)): exp_output_file.write('0.00,') else: exp_output_file.write('{:0.2f},'.format(output4)) if (math.isnan(output5)): exp_output_file.write('0,') else: exp_output_file.write('{}\n'.format(output5)) data_gen_utils.closeFileHandles(output_file, exp_output_file)
def createTest36(factTable, dimTable2, dataSizeFact, dataSizeDim2, selectivityFact, selectivityDim2): output_file, exp_output_file = data_gen_utils.openFileHandles(36, TEST_DIR=TEST_BASE_DIR) output_file.write('-- join test 3 - hashing many-one with larger selectivities.\n') output_file.write('-- Select + Join + aggregation\n') output_file.write('-- Performs the join using hashing\n') output_file.write('-- Query in SQL:\n') output_file.write('-- SELECT avg(tbl5_fact.col2), sum(tbl5_dim2.col2) FROM tbl5_fact,tbl5_dim2 WHERE tbl5_fact.col4=tbl5_dim2.col1 AND tbl5_fact.col2 < {} AND tbl5_dim2.col1<{};\n'.format(int((dataSizeFact/5) * selectivityFact), int(selectivityDim2 * dataSizeDim2))) output_file.write('--\n') output_file.write('--\n') output_file.write('p1=select(db1.tbl5_fact.col2,null, {})\n'.format(int((dataSizeFact/5) * selectivityFact))) output_file.write('p2=select(db1.tbl5_dim2.col1,null, {})\n'.format(int(dataSizeDim2 * selectivityDim2))) output_file.write('f1=fetch(db1.tbl5_fact.col4,p1)\n') output_file.write('f2=fetch(db1.tbl5_dim2.col1,p2)\n') output_file.write('t1,t2=join(f1,p1,f2,p2,hash)\n') output_file.write('col2joined=fetch(db1.tbl5_fact.col2,t1)\n') output_file.write('col2t2joined=fetch(db1.tbl5_dim2.col2,t2)\n') output_file.write('a1=avg(col2joined)\n') output_file.write('a2=sum(col2t2joined)\n') output_file.write('print(a1,a2)\n') # generate expected results dfFactTableMask = (factTable['col2'] < int((dataSizeFact/5) * selectivityFact)) dfDimTableMask = (dimTable2['col1'] < int(dataSizeDim2 * selectivityDim2)) preJoinFact = factTable[dfFactTableMask] preJoinDim2 = dimTable2[dfDimTableMask] joinedTable = preJoinFact.merge(preJoinDim2, left_on = 'col4', right_on = 'col1', suffixes=('','_right')) col2ValuesMean = joinedTable['col2'].mean() col3ValuesSum = joinedTable['col2_right'].sum() if (math.isnan(col2ValuesMean)): exp_output_file.write('0.00,') else: exp_output_file.write('{:0.2f},'.format(col2ValuesMean)) if (math.isnan(col3ValuesSum)): exp_output_file.write('0\n') else: exp_output_file.write('{}\n'.format(col3ValuesSum))
def createTestEleven(dataTable): # prelude and query output_file, exp_output_file = data_gen_utils.openFileHandles( 11, TEST_DIR=TEST_BASE_DIR) output_file.write('--\n') output_file.write('-- Testing for batching queries\n') output_file.write('-- 2 queries with NO overlap\n') output_file.write('--\n') output_file.write('-- Query in SQL:\n') output_file.write( '-- SELECT col4 FROM tbl3_batch WHERE col1 >= 10 AND col1 < 20;\n') output_file.write( '-- SELECT col4 FROM tbl3_batch WHERE col1 >= 800 AND col1 < 830;\n') output_file.write('--\n') output_file.write('--\n') output_file.write('batch_queries()\n') output_file.write('s1=select(db1.tbl3_batch.col1,10,20)\n') output_file.write('s2=select(db1.tbl3_batch.col1,800,830)\n') output_file.write('batch_execute()\n') output_file.write('f1=fetch(db1.tbl3_batch.col4,s1)\n') output_file.write('f2=fetch(db1.tbl3_batch.col4,s2)\n') output_file.write('print(f1)\n') output_file.write('print(f2)\n') # generate expected restuls. dfSelectMask1 = (dataTable['col1'] >= 10) & (dataTable['col1'] < 20) dfSelectMask2 = (dataTable['col1'] >= 800) & (dataTable['col1'] < 830) output1 = dataTable[dfSelectMask1]['col4'] output2 = dataTable[dfSelectMask2]['col4'] exp_output_file.write(data_gen_utils.outputPrint(output1)) exp_output_file.write('\n\n') exp_output_file.write(data_gen_utils.outputPrint(output2)) exp_output_file.write('\n') data_gen_utils.closeFileHandles(output_file, exp_output_file)
def createTests16And17(dataTable, dataSize): # 1 / 1000 tuples should qualify on average. This is so that most time is spent on scans & not fetches or prints offset = np.max([1, int(dataSize / 5000)]) query_starts = np.random.randint(0, (dataSize / 8), size=(100)) output_file16, exp_output_file16 = data_gen_utils.openFileHandles( 16, TEST_DIR=TEST_BASE_DIR) output_file17, exp_output_file17 = data_gen_utils.openFileHandles( 17, TEST_DIR=TEST_BASE_DIR) output_file16.write('--\n') output_file16.write('-- Control timing for without batching\n') output_file16.write('-- Queries for 16 and 17 are identical.\n') output_file16.write('-- Query in SQL:\n') output_file16.write('-- 100 Queries of the type:\n') output_file16.write( '-- SELECT col3 FROM tbl3_batch WHERE col2 >= _ AND col2 < _;\n') output_file16.write('--\n') output_file17.write('--\n') output_file17.write('-- Same queries with batching\n') output_file17.write('-- Queries for 16 and 17 are identical.\n') output_file17.write('--\n') output_file17.write('batch_queries()\n') for i in range(100): output_file16.write('s{}=select(db1.tbl3_batch.col2,{},{})\n'.format( i, query_starts[i], query_starts[i] + offset)) output_file17.write('s{}=select(db1.tbl3_batch.col2,{},{})\n'.format( i, query_starts[i], query_starts[i] + offset)) output_file17.write('batch_execute()\n') for i in range(100): output_file16.write('f{}=fetch(db1.tbl3_batch.col3,s{})\n'.format( i, i)) output_file17.write('f{}=fetch(db1.tbl3_batch.col3,s{})\n'.format( i, i)) for i in range(100): output_file16.write('print(f{})\n'.format(i)) output_file17.write('print(f{})\n'.format(i)) # generate expected results for i in range(100): dfSelectMask = (dataTable['col2'] >= query_starts[i]) & ( (dataTable['col2'] < (query_starts[i] + offset))) output = dataTable[dfSelectMask]['col3'] exp_output_file16.write(data_gen_utils.outputPrint(output)) exp_output_file16.write('\n\n') exp_output_file17.write(data_gen_utils.outputPrint(output)) exp_output_file17.write('\n\n') data_gen_utils.closeFileHandles(output_file16, exp_output_file16) data_gen_utils.closeFileHandles(output_file17, exp_output_file17)
def createTestEight(dataTable, dataSizeTableTwo, approxSelectivity): output_file, exp_output_file = data_gen_utils.openFileHandles( 8, TEST_DIR=TEST_BASE_DIR) output_file.write('-- Min,Max\n') output_file.write('--\n') offset = int(approxSelectivity * dataSizeTableTwo) highestHighVal = int((dataSizeTableTwo / 2) - offset) selectValLess = np.random.randint(int(-1 * (dataSizeTableTwo / 2)), highestHighVal) selectValGreater = selectValLess + offset output_file.write('-- Min\n') output_file.write( '-- SELECT min(col1) FROM tbl2 WHERE col1 >= {} AND col1 < {};\n'. format(selectValLess, selectValGreater)) output_file.write('s1=select(db1.tbl2.col1,{},{})\n'.format( selectValLess, selectValGreater)) output_file.write('f1=fetch(db1.tbl2.col1,s1)\n') output_file.write('m1=min(f1)\n') output_file.write('print(m1)\n') output_file.write('--\n') output_file.write( '-- SELECT min(col2) FROM tbl2 WHERE col1 >= {} AND col1 < {};\n'. format(selectValLess, selectValGreater)) output_file.write('f2=fetch(db1.tbl2.col2,s1)\n') output_file.write('m2=min(f2)\n') output_file.write('print(m2)\n') output_file.write('--\n') output_file.write('--\n') output_file.write('-- Max\n') output_file.write( '-- SELECT max(col1) FROM tbl2 WHERE col1 >= {} AND col1 < {};\n'. format(selectValLess, selectValGreater)) output_file.write('s21=select(db1.tbl2.col1,{},{})\n'.format( selectValLess, selectValGreater)) output_file.write('f21=fetch(db1.tbl2.col1,s21)\n') output_file.write('m21=max(f21)\n') output_file.write('print(m21)\n') output_file.write('--\n') output_file.write( '-- SELECT max(col2) FROM tbl2 WHERE col1 >= {} AND col1 < {};\n'. format(selectValLess, selectValGreater)) output_file.write('f22=fetch(db1.tbl2.col2,s21)\n') output_file.write('m22=max(f22)\n') output_file.write('print(m22)\n') # generate expected results dfSelectMaskGT = dataTable['col1'] >= selectValLess dfSelectMaskLT = dataTable['col1'] < selectValGreater output1 = dataTable[dfSelectMaskGT & dfSelectMaskLT]['col1'].min() output2 = dataTable[dfSelectMaskGT & dfSelectMaskLT]['col2'].min() output3 = dataTable[dfSelectMaskGT & dfSelectMaskLT]['col1'].max() output4 = dataTable[dfSelectMaskGT & dfSelectMaskLT]['col2'].max() exp_output_file.write(str(output1) + '\n') exp_output_file.write(str(output2) + '\n') exp_output_file.write(str(output3) + '\n') exp_output_file.write(str(output4) + '\n') data_gen_utils.closeFileHandles(output_file, exp_output_file)
def createTestFive(dataTable, dataSizeTableTwo, approxSelectivity): # prelude output_file26, exp_output_file26 = data_gen_utils.openFileHandles( 101, TEST_DIR=TEST_BASE_DIR) output_file27, exp_output_file27 = data_gen_utils.openFileHandles( 102, TEST_DIR=TEST_BASE_DIR) #output_file26, exp_output_file = data_gen_utils.openFileHandles(filenum, TEST_DIR=TEST_BASE_DIR) # output_file.write('-- Summation\n') # output_file.write('--\n') # query offset = int(approxSelectivity * dataSizeTableTwo) highestHighVal = int((dataSizeTableTwo / 2) - offset) # selectValLess = np.random.randint(int(-1 * (dataSizeTableTwo/2)), highestHighVal) # selectValGreater = selectValLess + offset # output_file.write('-- SELECT SUM(col3) FROM tbl2 WHERE col1 >= {} AND col1 < {};\n'.format(selectValLess, selectValGreater)) print("YUH") for i in range(100): selectValLess = np.random.randint(0, highestHighVal) selectValGreater = selectValLess + offset #val1 = np.random.randint(0, int((dataSize/5) - offset)) output_file26.write('s{}=select(db1.tbl4_ctrl.col2,{},{})\n'.format( i, np.array(dataTable['col2'])[selectValLess], np.array(dataTable['col2'])[selectValGreater])) output_file26.write('f{}=fetch(db1.tbl4_ctrl.col3,s{})\n'.format(i, i)) output_file26.write('a{}=avg(f{})\n'.format(i, i)) output_file26.write('print(a{})\n'.format(i)) output_file27.write('s{}=select(db1.tbl4.col2,{},{})\n'.format( i, np.array(dataTable['col2'])[selectValLess], np.array(dataTable['col2'])[selectValGreater])) output_file27.write('f{}=fetch(db1.tbl4.col3,s{})\n'.format(i, i)) output_file27.write('a{}=avg(f{})\n'.format(i, i)) output_file27.write('print(a{})\n'.format(i)) # for i in range(100): # output_file.write('s1=select(db1.tbl2.col1,{},{})\n'.format(selectValLess, selectValGreater)) # output_file.write('f1=fetch(db1.tbl2.col3,s1)\n') # output_file.write('f1=fetch(db1.tbl2.col2,s1)\n') # generate expected results print("mann") data_gen_utils.closeFileHandles(output_file26, exp_output_file26) data_gen_utils.closeFileHandles(output_file27, exp_output_file27)
def createTestOne(): # write out test output_file, exp_output_file = data_gen_utils.openFileHandles(1, TEST_DIR=TEST_BASE_DIR) output_file.write('-- Load+create Data and shut down of tbl1 which has 1 attribute only\n') output_file.write('create(db,\"db1\")\n') output_file.write('create(tbl,\"tbl1\",db1,2)\n') output_file.write('create(col,\"col1\",db1.tbl1)\n') output_file.write('create(col,\"col2\",db1.tbl1)\n') output_file.write('load(\"'+DOCKER_TEST_BASE_DIR+'/data1_generated.csv\")\n') output_file.write('shutdown\n') # generate expected results data_gen_utils.closeFileHandles(output_file, exp_output_file)
def createTest43(dataTable): output_file, exp_output_file = data_gen_utils.openFileHandles( 43, TEST_DIR=TEST_BASE_DIR) output_file.write( '-- Scalability test: A large number of inserts, deletes and updates, followed by a number of queries\n' ) output_file.write('--\n') dataTable = createRandomInserts(dataTable, 100, output_file) dataTable = createRandomUpdates(dataTable, 100, output_file) dataTable = createRandomDeletes(dataTable, 100, output_file) createRandomSelects(dataTable, 5, output_file, exp_output_file) data_gen_utils.closeFileHandles(output_file, exp_output_file)
def createTest32(factTable, dimTable2, dataSizeFact, dataSizeDim2, selectivityFact, selectivityDim2): output_file, exp_output_file = data_gen_utils.openFileHandles( 32, TEST_DIR=TEST_BASE_DIR) output_file.write( '-- First join test - nested-loop. Select + Join + aggregation\n') output_file.write('-- Performs the join using nested loops\n') output_file.write('-- Do this only on reasonable sized tables! (O(n^2))\n') output_file.write('-- Query in SQL:\n') output_file.write( '-- SELECT avg(tbl5_fact.col2), sum(tbl5_fact.col3) FROM tbl5_fact,tbl5_dim2 WHERE tbl5_fact.col4=tbl5_dim2.col1 AND tbl5_fact.col2 < {} AND tbl5_dim2.col1<{};\n' .format(int((dataSizeFact / 5) * selectivityFact), int(selectivityDim2 * dataSizeDim2))) output_file.write('--\n') output_file.write('--\n') output_file.write('p1=select(db1.tbl5_fact.col2,null, {})\n'.format( int((dataSizeFact / 5) * selectivityFact))) output_file.write('p2=select(db1.tbl5_dim2.col1,null, {})\n'.format( int(dataSizeDim2 * selectivityDim2))) #output_file.write('print(p1)\n') #output_file.write('print(p2)\n') output_file.write('f1=fetch(db1.tbl5_fact.col4,p1)\n') output_file.write('f2=fetch(db1.tbl5_dim2.col1,p2)\n') output_file.write('t1,t2=join(f1,p1,f2,p2,nested-loop)\n') output_file.write('col2joined=fetch(db1.tbl5_fact.col2,t1)\n') output_file.write('col3joined=fetch(db1.tbl5_fact.col3,t2)\n') output_file.write('a1=avg(col2joined)\n') output_file.write('a2=sum(col3joined)\n') output_file.write('print(a1,a2)\n') # generate expected results dfFactTableMask = (factTable['col2'] < int( (dataSizeFact / 5) * selectivityFact)) dfDimTableMask = (dimTable2['col1'] < int(dataSizeDim2 * selectivityDim2)) preJoinFact = factTable[dfFactTableMask] preJoinDim2 = dimTable2[dfDimTableMask] print("prejoin fact ", preJoinFact) print("prejoin dim2", preJoinDim2) joinedTable = preJoinFact.merge(preJoinDim2, left_on='col4', right_on='col1', suffixes=('', '_right')) print("joinedtable", joinedTable) col2ValuesMean = joinedTable['col2'].mean() col3ValuesSum = joinedTable['col3'].sum() print("col3 joinedtable,", joinedTable['col3']) if (math.isnan(col2ValuesMean)): exp_output_file.write('0.00,') else: exp_output_file.write('{:0.2f},'.format(col2ValuesMean)) if (math.isnan(col3ValuesSum)): exp_output_file.write('0\n') else: exp_output_file.write('{}\n'.format(col3ValuesSum))
def createTest42(dataTable): output_file, exp_output_file = data_gen_utils.openFileHandles( 42, TEST_DIR=TEST_BASE_DIR) output_file.write( '-- Correctness test: Delete values and run queries after inserts, updates, and deletes\n' ) output_file.write('--\n') output_file.write('-- DELETE FROM tbl5 WHERE col1 = -10;\n') output_file.write('-- DELETE FROM tbl5 WHERE col2 = -22;\n') output_file.write('-- DELETE FROM tbl5 WHERE col1 = -30;\n') output_file.write('-- DELETE FROM tbl5 WHERE col3 = -444;\n') output_file.write('-- DELETE FROM tbl5 WHERE col1 = -50;\n') output_file.write( '-- SELECT col1 FROM tbl5 WHERE col2 >= -100 AND col2 < 20;\n') output_file.write('--\n') output_file.write('d1=select(db1.tbl5.col1,-10,-9)\n') output_file.write('relational_delete(db1.tbl5,d1)\n') output_file.write('d2=select(db1.tbl5.col2,-22,-21)\n') output_file.write('relational_delete(db1.tbl5,d2)\n') output_file.write('d3=select(db1.tbl5.col1,-30,-29)\n') output_file.write('relational_delete(db1.tbl5,d3)\n') output_file.write('d4=select(db1.tbl5.col3,-444,-443)\n') output_file.write('relational_delete(db1.tbl5,d4)\n') output_file.write('d5=select(db1.tbl5.col1,-50,-49)\n') output_file.write('relational_delete(db1.tbl5,d5)\n') output_file.write('s1=select(db1.tbl5.col2,-100,20)\n') output_file.write('f1=fetch(db1.tbl5.col1,s1)\n') output_file.write('print(f1)\n') # update dataTable dataTable = dataTable[dataTable.col1 != -10] dataTable = dataTable[dataTable.col2 != -22] dataTable = dataTable[dataTable.col1 != -30] dataTable = dataTable[dataTable.col3 != -444] dataTable = dataTable[dataTable.col1 != -50] dfSelectMask1 = dataTable['col2'] >= -100 dfSelectMask2 = dataTable['col2'] < 20 output = dataTable[dfSelectMask1 & dfSelectMask2]['col1'] if len(output) > 0: exp_output_file.write(output.to_string(header=False, index=False)) exp_output_file.write('\n') data_gen_utils.closeFileHandles(output_file, exp_output_file) return dataTable
def createTest39(dataTable, approxSelectivity): output_file, exp_output_file = data_gen_utils.openFileHandles( 39, TEST_DIR=TEST_BASE_DIR) dataSize = len(dataTable) offset = int(approxSelectivity * dataSize) highestHighVal = int((dataSize / 2) - offset) selectValLess = np.random.randint(-55, -11) selectValGreater = selectValLess + offset selectValLess2 = np.random.randint(-10, 0) selectValGreater2 = selectValLess2 + offset output_file.write( '-- Correctness test: Test for updates on columns with index\n') output_file.write('--\n') output_file.write( '-- SELECT col1 FROM tbl5 WHERE col2 >= {} AND col2 < {};\n'.format( selectValLess, selectValGreater)) output_file.write('--\n') output_file.write('s1=select(db1.tbl5.col2,{},{})\n'.format( selectValLess, selectValGreater)) output_file.write('f1=fetch(db1.tbl5.col1,s1)\n') output_file.write('print(f1)\n') output_file.write('--\n') output_file.write( '-- SELECT col3 FROM tbl5 WHERE col1 >= {} AND col1 < {};\n'.format( selectValLess2, selectValGreater2)) output_file.write('--\n') output_file.write('s2=select(db1.tbl5.col1,{},{})\n'.format( selectValLess2, selectValGreater2)) output_file.write('f2=fetch(db1.tbl5.col3,s2)\n') output_file.write('print(f2)\n') # generate expected results dfSelectMaskGT = dataTable['col2'] >= selectValLess dfSelectMaskLT = dataTable['col2'] < selectValGreater output = dataTable[dfSelectMaskGT & dfSelectMaskLT]['col1'] if len(output) > 0: exp_output_file.write(output.to_string(header=False, index=False)) exp_output_file.write('\n\n') dfSelectMaskGT2 = dataTable['col1'] >= selectValLess2 dfSelectMaskLT2 = dataTable['col1'] < selectValGreater2 output = dataTable[dfSelectMaskGT2 & dfSelectMaskLT2]['col3'] if len(output) > 0: exp_output_file.write(output.to_string(header=False, index=False)) exp_output_file.write('\n') data_gen_utils.closeFileHandles(output_file, exp_output_file)
def createTestThree(dataTable): # prelude output_file, exp_output_file = data_gen_utils.openFileHandles(3, TEST_DIR=TEST_BASE_DIR) output_file.write('-- Test Multiple Selects + Average\n') output_file.write('--\n') # query selectValLess = 956 selectValGreater = 972 output_file.write('-- SELECT avg(col2) FROM tbl1 WHERE col1 >= {} and col1 < {};\n'.format(selectValLess, selectValGreater)) output_file.write('s1=select(db1.tbl1.col1,956,972)\n') output_file.write('f1=fetch(db1.tbl1.col2,s1)\n') output_file.write('a1=avg(f1)\n') output_file.write('print(a1)\n') # generate expected result dfSelectMaskGT = dataTable['col1'] >= selectValLess dfSelectMaskLT = dataTable['col1'] < selectValGreater output = dataTable[dfSelectMaskGT & dfSelectMaskLT]['col2'] exp_output_file.write(str(np.round(output.mean(), PLACES_TO_ROUND))) exp_output_file.write('\n') data_gen_utils.closeFileHandles(output_file, exp_output_file)
def createTests40(dataTable): output_file, exp_output_file = data_gen_utils.openFileHandles( 40, TEST_DIR=TEST_BASE_DIR) output_file.write('-- Correctness test: Update values\n') output_file.write('--\n') output_file.write('-- UPDATE tbl5 SET col1 = -10 WHERE col1 = -1;\n') output_file.write('-- UPDATE tbl5 SET col1 = -20 WHERE col2 = -22;\n') output_file.write('-- UPDATE tbl5 SET col1 = -30 WHERE col1 = -3;\n') output_file.write('-- UPDATE tbl5 SET col1 = -40 WHERE col3 = -444;\n') output_file.write('-- UPDATE tbl5 SET col1 = -50 WHERE col1 = -5;\n') output_file.write('--\n') output_file.write('u1=select(db1.tbl5.col1,-1,0)\n') output_file.write('relational_update(db1.tbl5.col1,u1,-10)\n') output_file.write('u2=select(db1.tbl5.col2,-22,-21)\n') output_file.write('relational_update(db1.tbl5.col1,u2,-20)\n') output_file.write('u3=select(db1.tbl5.col1,-3,-2)\n') output_file.write('relational_update(db1.tbl5.col1,u3,-30)\n') output_file.write('u4=select(db1.tbl5.col3,-444,-443)\n') output_file.write('relational_update(db1.tbl5.col1,u4,-40)\n') output_file.write('u5=select(db1.tbl5.col1,-5,-4)\n') output_file.write('relational_update(db1.tbl5.col1,u5,-50)\n') output_file.write('shutdown\n') # update dataTable dfSelectMaskEq = dataTable['col1'] == -1 dataTable.loc[dfSelectMaskEq, 'col1'] = -10 dfSelectMaskEq = dataTable['col2'] == -22 dataTable.loc[dfSelectMaskEq, 'col1'] = -20 dfSelectMaskEq = dataTable['col1'] == -3 dataTable.loc[dfSelectMaskEq, 'col1'] = -30 dfSelectMaskEq = dataTable['col3'] == -444 dataTable.loc[dfSelectMaskEq, 'col1'] = -40 dfSelectMaskEq = dataTable['col1'] == -5 dataTable.loc[dfSelectMaskEq, 'col1'] = -50 # no expected results data_gen_utils.closeFileHandles(output_file, exp_output_file) return dataTable
def createTest41(dataTable): output_file, exp_output_file = data_gen_utils.openFileHandles( 41, TEST_DIR=TEST_BASE_DIR) selectValLess = np.random.randint(-200, -100) selectValGreater = np.random.randint(10, 100) output_file.write( '-- Correctness test: Run query after inserts and updates\n') output_file.write('--\n') output_file.write( '-- SELECT col1 FROM tbl5 WHERE col2 >= {} AND col2 < {};\n'.format( selectValLess, selectValGreater)) output_file.write('--\n') output_file.write('s1=select(db1.tbl5.col2,{},{})\n'.format( selectValLess, selectValGreater)) output_file.write('f1=fetch(db1.tbl5.col1,s1)\n') output_file.write('print(f1)\n') # generate expected results dfSelectMask = (dataTable['col2'] >= selectValLess) & (dataTable['col2'] < selectValGreater) output = dataTable[dfSelectMask]['col1'] exp_output_file.write(output.to_string(header=False, index=False)) data_gen_utils.closeFileHandles(output_file, exp_output_file)
def createTestFour(dataTable): # prelude output_file, exp_output_file = data_gen_utils.openFileHandles(4, TEST_DIR=TEST_BASE_DIR) output_file.write('-- Load Test Data 2\n') output_file.write('--\n') output_file.write('-- Load+create+insert Data and shut down of tbl2 which has 4 attributes\n') output_file.write('create(tbl,\"tbl2\",db1,4)\n') output_file.write('create(col,\"col1\",db1.tbl2)\n') output_file.write('create(col,\"col2\",db1.tbl2)\n') output_file.write('create(col,\"col3\",db1.tbl2)\n') output_file.write('create(col,\"col4\",db1.tbl2)\n') output_file.write('load(\"'+DOCKER_TEST_BASE_DIR+'/data2_generated.csv\")\n') output_file.write('relational_insert(db1.tbl2,-1,-11,-111,-1111)\n') output_file.write('relational_insert(db1.tbl2,-2,-22,-222,-2222)\n') output_file.write('relational_insert(db1.tbl2,-3,-33,-333,-2222)\n') output_file.write('relational_insert(db1.tbl2,-4,-44,-444,-2222)\n') output_file.write('relational_insert(db1.tbl2,-5,-55,-555,-2222)\n') output_file.write('relational_insert(db1.tbl2,-6,-66,-666,-2222)\n') output_file.write('relational_insert(db1.tbl2,-7,-77,-777,-2222)\n') output_file.write('relational_insert(db1.tbl2,-8,-88,-888,-2222)\n') output_file.write('relational_insert(db1.tbl2,-9,-99,-999,-2222)\n') output_file.write('relational_insert(db1.tbl2,-10,-11,0,-34)\n') output_file.write('shutdown\n') # columns need to align for append to work deltaTable = pd.DataFrame([[-1, -11, -111, -1111], [-2, -22, -222, -2222], [-3, -33, -333, -2222], [-4, -44, -444, -2222], [-5, -55, -555, -2222], [-6, -66, -666, -2222], [-7, -77, -777, -2222], [-8, -88, -888, -2222], [-9, -99, -999, -2222], [-10, -11, 0, -34]], columns=['col1', 'col2', 'col3', 'col4']) dataTable = dataTable.append(deltaTable) data_gen_utils.closeFileHandles(output_file, exp_output_file) return dataTable