Python openFileHandles Exemples, data_gen_utils.openFileHandles Python Exemples

Exemple #1

0

Afficher le fichier

def createTests26And27(dataTable, dataSize):
    output_file26, exp_output_file26 = data_gen_utils.openFileHandles(
        26, TEST_DIR=TEST_BASE_DIR)
    output_file27, exp_output_file27 = data_gen_utils.openFileHandles(
        27, TEST_DIR=TEST_BASE_DIR)
    offset = np.max([2, int(dataSize / 500)])
    output_file26.write(
        '-- Test for a non-clustered index select followed by an aggregate (control-test, many queries)\n'
    )
    output_file26.write(
        '-- Compare to test 27 for timing differences between B-tree and scan for highly selective queries\n'
    )
    output_file26.write('--\n')
    output_file26.write('-- Query form in SQL:\n')
    output_file26.write(
        '-- SELECT avg(col3) FROM tbl4_ctrl WHERE (col2 >= _ and col2 < _);\n')
    output_file26.write('--\n')
    output_file27.write(
        '-- Test for a non-clustered index select followed by an aggregate (many queries)\n'
    )
    output_file27.write('--\n')
    output_file27.write('-- Query form in SQL:\n')
    output_file27.write(
        '-- SELECT avg(col3) FROM tbl4 WHERE (col2 >= _ and col2 < _);\n')
    output_file27.write('--\n')
    for i in range(100):
        val1 = np.random.randint(0, int((dataSize / 5) - offset))
        output_file26.write('s{}=select(db1.tbl4_ctrl.col2,{},{})\n'.format(
            i, val1, val1 + offset))
        output_file26.write('f{}=fetch(db1.tbl4_ctrl.col3,s{})\n'.format(i, i))
        output_file26.write('a{}=avg(f{})\n'.format(i, i))
        output_file26.write('print(a{})\n'.format(i))
        output_file27.write('s{}=select(db1.tbl4.col2,{},{})\n'.format(
            i, val1, val1 + offset))
        output_file27.write('f{}=fetch(db1.tbl4.col3,s{})\n'.format(i, i))
        output_file27.write('a{}=avg(f{})\n'.format(i, i))
        output_file27.write('print(a{})\n'.format(i))
        # generate expected results
        dfSelectMask1 = (dataTable['col2'] >= val1) & (dataTable['col2'] <
                                                       (val1 + offset))
        values = dataTable[dfSelectMask1]['col3']
        mean_result = np.round(values.mean(), PLACES_TO_ROUND)
        if (math.isnan(mean_result)):
            exp_output_file26.write('0.00\n')
            exp_output_file27.write('0.00\n')
        else:
            exp_output_file26.write('{:0.2f}\n'.format(mean_result))
            exp_output_file27.write('{:0.2f}\n'.format(mean_result))
    data_gen_utils.closeFileHandles(output_file26, exp_output_file26)
    data_gen_utils.closeFileHandles(output_file27, exp_output_file27)

Exemple #2

0

Afficher le fichier

Fichier : milestone3.py Projet : moh-osman3/Relational-DB

def createTests20And21(dataTable, dataSize):
    output_file20, exp_output_file20 = data_gen_utils.openFileHandles(20, TEST_DIR=TEST_BASE_DIR)
    output_file21, exp_output_file21 = data_gen_utils.openFileHandles(21, TEST_DIR=TEST_BASE_DIR)
    output_file20.write('--\n')
    output_file20.write('-- Query in SQL:\n')
    # selectivity = 
    offset = np.max([1, int(dataSize/5000)])
    offset2 = np.max([2, int(dataSize/2500)])
    val1 = np.random.randint(0, int((dataSize/5) - offset))
    val2 = np.random.randint(0, int((dataSize/5) - offset2))
    # generate test 20
    output_file20.write('-- SELECT col1 FROM tbl4_ctrl WHERE col3 >= {} and col3 < {};\n'.format(val1, val1+offset))
    output_file20.write('-- SELECT col1 FROM tbl4_ctrl WHERE col3 >= {} and col3 < {};\n'.format(val2, val2+offset2))
    output_file20.write('--\n')
    output_file20.write('s1=select(db1.tbl4_ctrl.col3,{},{})\n'.format(val1, val1 + offset))
    output_file20.write('f1=fetch(db1.tbl4_ctrl.col1,s1)\n')
    output_file20.write('print(f1)\n')
    output_file20.write('s2=select(db1.tbl4_ctrl.col3,{},{})\n'.format(val2, val2 + offset2))
    output_file20.write('f2=fetch(db1.tbl4_ctrl.col1,s2)\n')
    output_file20.write('print(f2)\n')
    # generate test 21
    output_file21.write('--\n')
    output_file21.write('-- tbl3 has a secondary b-tree tree index on col2, and a clustered index on col3 with the form of a sorted column\n')
    output_file21.write('-- testing for correctness\n')
    output_file21.write('--\n')
    output_file21.write('-- Query in SQL:\n')
    output_file21.write('-- SELECT col1 FROM tbl4 WHERE col3 >= {} and col3 < {};\n'.format(val1, val1+offset))
    output_file21.write('-- SELECT col1 FROM tbl4 WHERE col3 >= {} and col3 < {};\n'.format(val2, val2+offset2))
    output_file21.write('--\n')
    output_file21.write('-- since col3 has a clustered index, the index is expected to be used by the select operator\n')
    output_file21.write('s1=select(db1.tbl4.col3,{},{})\n'.format(val1, val1 + offset))
    output_file21.write('f1=fetch(db1.tbl4.col1,s1)\n')
    output_file21.write('print(f1)\n')
    output_file21.write('s2=select(db1.tbl4.col3,{},{})\n'.format(val2, val2 + offset2))
    output_file21.write('f2=fetch(db1.tbl4.col1,s2)\n')
    output_file21.write('print(f2)\n')
    # generate expected results
    dfSelectMask1 = (dataTable['col3'] >= val1) & (dataTable['col3'] < (val1 + offset))
    dfSelectMask2 = (dataTable['col3'] >= val2) & (dataTable['col3'] < (val2 + offset2))
    output1 = dataTable[dfSelectMask1]['col1']
    output2 = dataTable[dfSelectMask2]['col1']
    for exp_output_file in [exp_output_file20, exp_output_file21]:
        exp_output_file.write(data_gen_utils.outputPrint(output1))
        exp_output_file.write('\n\n')
        exp_output_file.write(data_gen_utils.outputPrint(output2))
        exp_output_file.write('\n')
    data_gen_utils.closeFileHandles(output_file20, exp_output_file20)
    data_gen_utils.closeFileHandles(output_file21, exp_output_file21)

Exemple #3

0

Afficher le fichier

Fichier : milestone3.py Projet : moh-osman3/Relational-DB

def createTest19():
    output_file, exp_output_file = data_gen_utils.openFileHandles(19, TEST_DIR=TEST_BASE_DIR)
    output_file.write('-- Test for creating table with indexes\n')
    output_file.write('--\n')
    output_file.write('-- Table tbl4 has a clustered index with col3 being the leading column.\n')
    output_file.write('-- The clustered index has the form of a sorted column.\n')
    output_file.write('-- The table also has a secondary btree index.\n')
    output_file.write('--\n')
    output_file.write('-- Loads data from: data4_btree.csv\n')
    output_file.write('--\n')
    output_file.write('-- Create Table\n')
    output_file.write('create(tbl,"tbl4",db1,4)\n')
    output_file.write('create(col,"col1",db1.tbl4)\n')
    output_file.write('create(col,"col2",db1.tbl4)\n')
    output_file.write('create(col,"col3",db1.tbl4)\n')
    output_file.write('create(col,"col4",db1.tbl4)\n')
    output_file.write('-- Create a clustered index on col3\n')
    output_file.write('create(idx,db1.tbl4.col3,sorted,clustered)\n')
    output_file.write('-- Create an unclustered btree index on col2\n')
    output_file.write('create(idx,db1.tbl4.col2,btree,unclustered)\n')
    output_file.write('--\n')
    output_file.write('--\n')
    output_file.write('-- Load data immediately in the form of a clustered index\n')
    output_file.write('load(\"'+DOCKER_TEST_BASE_DIR+'/data4_btree.csv\")\n')
    output_file.write('--\n')
    output_file.write('-- Testing that the data and their indexes are durable on disk.\n')
    output_file.write('shutdown\n')
    # no expected results
    data_gen_utils.closeFileHandles(output_file, exp_output_file)

Exemple #4

0

Afficher le fichier

Fichier : milestone3.py Projet : moh-osman3/Relational-DB

def createTest22(dataTable, dataSize):
    output_file, exp_output_file = data_gen_utils.openFileHandles(22, TEST_DIR=TEST_BASE_DIR)
    offset = np.max([1, int(dataSize/10)])
    offset2 = 2000
    val1 = np.random.randint(0, int((dataSize/5) - offset))
    val2 = np.random.randint(0, 8000)
    output_file.write('-- Test for a clustered index select followed by a second predicate\n')
    output_file.write('--\n')
    output_file.write('-- Query in SQL:\n')
    output_file.write('-- SELECT sum(col1) FROM tbl4 WHERE (col3 >= {} and col3 < {}) AND (col2 >= {} and col2 < {});\n'.format(val1, val1+offset, val2, val2+offset2))
    output_file.write('--\n')
    output_file.write('s1=select(db1.tbl4.col3,{},{})\n'.format(val1, val1 + offset))
    output_file.write('f1=fetch(db1.tbl4.col2,s1)\n')
    output_file.write('s2=select(s1,f1,{},{})\n'.format(val2, val2 + offset2))
    output_file.write('f2=fetch(db1.tbl4.col1,s2)\n')
    output_file.write('print(f2)\n')
    output_file.write('a1=sum(f2)\n')
    output_file.write('print(a1)\n')
    # generate expected results
    dfSelectMask1Low = dataTable['col3'] >= val1
    dfSelectMask1High = dataTable['col3'] < (val1 + offset)
    dfSelectMask2Low = dataTable['col2'] >= val2
    dfSelectMask2High = dataTable['col2'] < (val2 + offset2)
    dfTotalMask = dfSelectMask1Low & dfSelectMask1High & dfSelectMask2Low & dfSelectMask2High
    values = dataTable[dfTotalMask]['col1']
    exp_output_file.write(data_gen_utils.outputPrint(values))
    exp_output_file.write('\n\n')
    exp_output_file.write(str(values.sum()) + '\n')
    data_gen_utils.closeFileHandles(output_file, exp_output_file)

Exemple #5

0

Afficher le fichier

def createTest18():
    # prelude
    output_file, exp_output_file = data_gen_utils.openFileHandles(
        18, TEST_DIR=TEST_BASE_DIR)
    output_file.write(
        '-- Create a control table that is identical to the one in test19.dsl, but\n'
    )
    output_file.write('-- without any indexes\n')
    output_file.write('--\n')
    output_file.write('-- Loads data from: data4_ctrl.csv\n')
    output_file.write('--\n')
    output_file.write('-- Create Table\n')
    output_file.write('create(tbl,"tbl4_ctrl",db1,4)\n')
    output_file.write('create(col,"col1",db1.tbl4_ctrl)\n')
    output_file.write('create(col,"col2",db1.tbl4_ctrl)\n')
    output_file.write('create(col,"col3",db1.tbl4_ctrl)\n')
    output_file.write('create(col,"col4",db1.tbl4_ctrl)\n')
    output_file.write('--\n')
    output_file.write('-- Load data immediately\n')
    output_file.write('load(\"' + DOCKER_TEST_BASE_DIR +
                      '/data4_ctrl.csv\")\n')
    output_file.write('--\n')
    output_file.write(
        '-- Testing that the data and their indexes are durable on disk.\n')
    output_file.write('shutdown\n')
    # no expected results
    data_gen_utils.closeFileHandles(output_file, exp_output_file)

Exemple #6

0

Afficher le fichier

def createTest30(dataTable, dataSize):
    output_file, exp_output_file = data_gen_utils.openFileHandles(
        30, TEST_DIR=TEST_BASE_DIR)
    offset = np.max([2, int(dataSize / 1000)])
    output_file.write(
        '-- Test for a non-clustered index select followed by an aggregate\n')
    output_file.write('--\n')
    output_file.write('-- Query form in SQL:\n')
    output_file.write(
        '-- SELECT sum(col3) FROM tbl4_clustered_btree WHERE (col2 >= _ and col2 < _);\n'
    )
    output_file.write('--\n')
    for i in range(5):
        val1 = np.random.randint(0, int((dataSize / 5) - offset))
        output_file.write(
            's{}=select(db1.tbl4_clustered_btree.col2,{},{})\n'.format(
                i, val1, val1 + offset))
        output_file.write(
            'f{}=fetch(db1.tbl4_clustered_btree.col3,s{})\n'.format(i, i))
        output_file.write('a{}=sum(f{})\n'.format(i, i))
        output_file.write('print(a{})\n'.format(i))
        # generate expected results
        dfSelectMask1 = (dataTable['col2'] >= val1) & (dataTable['col2'] <
                                                       (val1 + offset))
        values = dataTable[dfSelectMask1]['col3']
        sum_result = values.sum()
        if (math.isnan(sum_result)):
            exp_output_file.write('0\n')
        else:
            exp_output_file.write(str(sum_result) + '\n')
    data_gen_utils.closeFileHandles(output_file, exp_output_file)

Exemple #7

0

Afficher le fichier

def createTest31():
    # prelude
    output_file, exp_output_file = data_gen_utils.openFileHandles(31, TEST_DIR=TEST_BASE_DIR)
    output_file.write('-- Creates tables for join tests\n')
    output_file.write('-- without any indexes\n')
    output_file.write('create(tbl,"tbl5_fact",db1,4)\n')
    output_file.write('create(col,"col1",db1.tbl5_fact)\n')
    output_file.write('create(col,"col2",db1.tbl5_fact)\n')
    output_file.write('create(col,"col3",db1.tbl5_fact)\n')
    output_file.write('create(col,"col4",db1.tbl5_fact)\n')
    output_file.write('load("'+DOCKER_TEST_BASE_DIR+'/data5_fact.csv")\n')
    output_file.write('--\n')
    output_file.write('create(tbl,"tbl5_dim1",db1,3)\n')
    output_file.write('create(col,"col1",db1.tbl5_dim1)\n')
    output_file.write('create(col,"col2",db1.tbl5_dim1)\n')
    output_file.write('create(col,"col3",db1.tbl5_dim1)\n')
    output_file.write('load("'+DOCKER_TEST_BASE_DIR+'/data5_dimension1.csv")\n')
    output_file.write('--\n')
    output_file.write('create(tbl,"tbl5_dim2",db1,2)\n')
    output_file.write('create(col,"col1",db1.tbl5_dim2)\n')
    output_file.write('create(col,"col2",db1.tbl5_dim2)\n')
    output_file.write('load("'+DOCKER_TEST_BASE_DIR+'/data5_dimension2.csv")\n')
    output_file.write('-- Testing that the data and their indexes are durable on disk.\n')
    output_file.write('shutdown\n')
    # no expected results
    data_gen_utils.closeFileHandles(output_file, exp_output_file)

Exemple #8

0

Afficher le fichier

def createTest37(factTable, dimTable1, dataSizeFact, dataSizeDim1, selectivityFact, selectivityDim1):
    output_file, exp_output_file = data_gen_utils.openFileHandles(37, TEST_DIR=TEST_BASE_DIR)
    output_file.write('-- join test 4 - hashing many-many with larger selectivities.\n')
    output_file.write('-- Select + Join + aggregation\n')
    output_file.write('-- Query in SQL:\n')
    output_file.write('-- SELECT sum(tbl5_fact.col2), avg(tbl5_dim1.col1) FROM tbl5_fact,tbl5_dim1 WHERE tbl5_fact.col1=tbl5_dim1.col1 AND tbl5_fact.col2 < {} AND tbl5_dim1.col3<{};\n'.format(int(selectivityFact * (dataSizeFact / 5)), int((dataSizeDim1/5) * selectivityDim1)))
    output_file.write('--\n')
    output_file.write('--\n')
    output_file.write('p1=select(db1.tbl5_fact.col2,null, {})\n'.format(int(selectivityFact * (dataSizeFact / 5))))
    output_file.write('p2=select(db1.tbl5_dim1.col3,null, {})\n'.format(int((dataSizeDim1/5) * selectivityDim1)))
    output_file.write('f1=fetch(db1.tbl5_fact.col1,p1)\n')
    output_file.write('f2=fetch(db1.tbl5_dim1.col1,p2)\n')
    output_file.write('t1,t2=join(f1,p1,f2,p2,hash)\n')
    output_file.write('col2joined=fetch(db1.tbl5_fact.col2,t1)\n')
    output_file.write('col1joined=fetch(db1.tbl5_dim1.col1,t2)\n')
    output_file.write('a1=sum(col2joined)\n')
    output_file.write('a2=avg(col1joined)\n')
    output_file.write('print(a1,a2)\n')
    # generate expected results
    dfFactTableMask = (factTable['col2'] < int(selectivityFact * (dataSizeFact / 5)))
    dfDimTableMask = (dimTable1['col3'] < int((dataSizeDim1/5) * selectivityDim1))
    preJoinFact = factTable[dfFactTableMask]
    preJoinDim1 = dimTable1[dfDimTableMask]
    joinedTable = preJoinFact.merge(preJoinDim1, left_on = 'col1', right_on = 'col1', suffixes=('','_right'))
    col2ValuesSum = joinedTable['col2'].sum()
    col1ValuesMean = joinedTable['col1'].mean()
    if (math.isnan(col2ValuesSum)):
        exp_output_file.write('0,')
    else:
        exp_output_file.write('{},'.format(col2ValuesSum))
    if (math.isnan(col1ValuesMean)):
        exp_output_file.write('0.00\n')
    else:
        exp_output_file.write('{:0.2f}\n'.format(col1ValuesMean))

Exemple #9

0

Afficher le fichier

def createTestSix(dataTable, dataSizeTableTwo, approxNumOutputTuples):
    # prelude
    output_file, exp_output_file = data_gen_utils.openFileHandles(
        6, TEST_DIR=TEST_BASE_DIR)
    output_file.write('-- Addition\n')
    output_file.write('--\n')
    # query
    offset = approxNumOutputTuples
    highestHighVal = int((dataSizeTableTwo / 2) - offset)
    selectValLess = np.random.randint(int(-1 * (dataSizeTableTwo / 2)),
                                      highestHighVal)
    selectValGreater = selectValLess + offset
    output_file.write(
        '-- SELECT col2+col3 FROM tbl2 WHERE col1 >= {} AND col1 < {};\n'.
        format(selectValLess, selectValGreater))
    output_file.write('s11=select(db1.tbl2.col1,{},{})\n'.format(
        selectValLess, selectValGreater))
    output_file.write('f11=fetch(db1.tbl2.col2,s11)\n')
    output_file.write('f12=fetch(db1.tbl2.col3,s11)\n')
    output_file.write('a11=add(f11,f12)\n')
    output_file.write('print(a11)\n')
    # generate expected results
    dfSelectMaskGT = dataTable['col1'] >= selectValLess
    dfSelectMaskLT = dataTable['col1'] < selectValGreater
    output = dataTable[dfSelectMaskGT & dfSelectMaskLT]['col3'] + dataTable[
        dfSelectMaskGT & dfSelectMaskLT]['col2']
    exp_output_file.write(output.to_string(header=False, index=False))
    exp_output_file.write('\n')
    data_gen_utils.closeFileHandles(output_file, exp_output_file)

Exemple #10

0

Afficher le fichier

def createTestSeven(dataTable, dataSizeTableTwo, approxNumOutputTuples):
    output_file, exp_output_file = data_gen_utils.openFileHandles(
        7, TEST_DIR=TEST_BASE_DIR)
    output_file.write('-- Subtraction\n')
    output_file.write('--\n')
    offset = approxNumOutputTuples
    highestHighVal = int((dataSizeTableTwo / 2) - offset)
    selectValLess = np.random.randint(int(-1 * (dataSizeTableTwo / 2)),
                                      highestHighVal)
    selectValGreater = selectValLess + offset
    output_file.write(
        '-- SELECT col3-col2 FROM tbl2 WHERE col1 >= {} AND col1 < {};\n'.
        format(selectValLess, selectValGreater))
    output_file.write('s21=select(db1.tbl2.col1,{},{})\n'.format(
        selectValLess, selectValGreater))
    output_file.write('f21=fetch(db1.tbl2.col2,s21)\n')
    output_file.write('f22=fetch(db1.tbl2.col3,s21)\n')
    output_file.write('s21=sub(f22,f21)\n')
    output_file.write('print(s21)\n')
    # generate expected results
    dfSelectMaskGT = dataTable['col1'] >= selectValLess
    dfSelectMaskLT = dataTable['col1'] < selectValGreater
    output = dataTable[dfSelectMaskGT & dfSelectMaskLT]['col3'] - dataTable[
        dfSelectMaskGT & dfSelectMaskLT]['col2']
    exp_output_file.write(output.to_string(header=False, index=False))
    exp_output_file.write('\n')
    data_gen_utils.closeFileHandles(output_file, exp_output_file)

Exemple #11

0

Afficher le fichier

Fichier : milestone2.py Projet : BGroever/cs165

def createTestFifteen(dataTable):
    # prelude and queryDOCKER_TEST_BASE_DIR
    output_file, exp_output_file = data_gen_utils.openFileHandles(
        15, TEST_DIR=TEST_BASE_DIR)
    output_file.write('--\n')
    output_file.write('-- Testing for batching queries\n')
    output_file.write('-- Queries with full overlap (subsumption)\n')
    output_file.write('--\n')
    randomVal = np.random.randint(1000, 9900)
    output_file.write('-- Query in SQL:\n')
    output_file.write('-- 10 Queries of the type:\n')
    output_file.write(
        '-- SELECT col1 FROM tbl3_batch WHERE col4 >= _ AND col4 < _;\n')
    output_file.write('--\n')
    output_file.write('--\n')
    output_file.write('batch_queries()\n')
    for i in range(10):
        output_file.write('s{}=select(db1.tbl3_batch.col4,{},{})\n'.format(
            i, randomVal + (2 * i), randomVal + 60 - (2 * i)))
    output_file.write('batch_execute()\n')
    for i in range(10):
        output_file.write('f{}=fetch(db1.tbl3_batch.col1,s{})\n'.format(i, i))
    for i in range(10):
        output_file.write('print(f{})\n'.format(i))
    #generate expected results
    for i in range(10):
        dfSelectMask = (dataTable['col4'] >=
                        (randomVal + (2 * i))) & (dataTable['col4'] <
                                                  (randomVal + 60 - (2 * i)))
        output = dataTable[dfSelectMask]['col1']
        exp_output_file.write(data_gen_utils.outputPrint(output))
        exp_output_file.write('\n\n')
    data_gen_utils.closeFileHandles(output_file, exp_output_file)

Exemple #12

0

Afficher le fichier

Fichier : milestone2.py Projet : BGroever/cs165

def createTestFourteen(dataTable):
    # prelude and query
    output_file, exp_output_file = data_gen_utils.openFileHandles(
        14, TEST_DIR=TEST_BASE_DIR)
    output_file.write('--\n')
    output_file.write('-- Testing for batching queries\n')
    output_file.write('-- Queries with no overlap\n')
    output_file.write('--\n')
    output_file.write('-- Query in SQL:\n')
    output_file.write('-- 10 Queries of the type:\n')
    output_file.write(
        '-- SELECT col1 FROM tbl3_batch WHERE col4 >= _ AND col4 < _;\n')
    output_file.write('--\n')
    output_file.write('--\n')
    output_file.write('batch_queries()\n')
    for i in range(10):
        output_file.write('s{}=select(db1.tbl3_batch.col4,{},{})\n'.format(
            i, (1000 * i), (1000 * i) + 30))
    output_file.write('batch_execute()\n')
    for i in range(10):
        output_file.write('f{}=fetch(db1.tbl3_batch.col1,s{})\n'.format(i, i))
    for i in range(10):
        output_file.write('print(f{})\n'.format(i))
    #generate expected results
    for i in range(10):
        dfSelectMask = (dataTable['col4'] >= (1000 * i)) & (dataTable['col4'] <
                                                            ((1000 * i) + 30))
        output = dataTable[dfSelectMask]['col1']
        exp_output_file.write(data_gen_utils.outputPrint(output))
        exp_output_file.write('\n\n')
    data_gen_utils.closeFileHandles(output_file, exp_output_file)

Exemple #13

0

Afficher le fichier

Fichier : milestone1.py Projet : moh-osman3/Relational-DB

def createTestFive(dataTable, dataSizeTableTwo, approxSelectivity):
	# prelude
	output_file, exp_output_file = data_gen_utils.openFileHandles(5, TEST_DIR=TEST_BASE_DIR)
	output_file.write('-- Summation\n')
	output_file.write('--\n')
	# query
	offset = int(approxSelectivity * dataSizeTableTwo)
	highestHighVal = int((dataSizeTableTwo/2) - offset)
	selectValLess = np.random.randint(int(-1 * (dataSizeTableTwo/2)), highestHighVal)
	selectValGreater = selectValLess + offset
	output_file.write('-- SELECT SUM(col3) FROM tbl2 WHERE col1 >= {} AND col1 < {};\n'.format(selectValLess, selectValGreater))
	output_file.write('s1=select(db1.tbl2.col1,{},{})\n'.format(selectValLess, selectValGreater))
	output_file.write('f1=fetch(db1.tbl2.col3,s1)\n')
	output_file.write('a1=sum(f1)\n')
	output_file.write('print(a1)\n')
	output_file.write('--\n')
	output_file.write('-- SELECT SUM(col1) FROM tbl2;\n')
	output_file.write('a2=sum(db1.tbl2.col1)\n')
	output_file.write('print(a2)\n')
	# generate expected results
	dfSelectMaskGT = dataTable['col1'] >= selectValLess
	dfSelectMaskLT = dataTable['col1'] < selectValGreater
	output = dataTable[dfSelectMaskGT & dfSelectMaskLT]['col3']
	exp_output_file.write(str(int(output.sum())))
	exp_output_file.write('\n')
	exp_output_file.write(str(int(dataTable['col1'].sum())))
	exp_output_file.write('\n')
	data_gen_utils.closeFileHandles(output_file, exp_output_file)

Exemple #14

0

Afficher le fichier

Fichier : milestone1.py Projet : moh-osman3/Relational-DB

def createTestTwo(dataTable):
	# write out test
	output_file, exp_output_file = data_gen_utils.openFileHandles(2, TEST_DIR=TEST_BASE_DIR)
	output_file.write('-- Test Select + Fetch\n')
	output_file.write('--\n')
	### Part 1
	# write query out
	selectValLess = 20
	output_file.write('-- SELECT col1 FROM tbl1 WHERE col1 < {};\n'.format(selectValLess))
	output_file.write('s1=select(db1.tbl1.col1,null,{})\n'.format(selectValLess))
	output_file.write('f1=fetch(db1.tbl1.col1,s1)\n')
	output_file.write('print(f1)\n')
	output_file.write('--\n')
	# generate expected results
	dfSelectMaskLT = dataTable['col1'] < selectValLess
	output = dataTable[dfSelectMaskLT]['col1']
	exp_output_file.write(output.to_string(header=False,index=False))
	exp_output_file.write('\n\n')
	#write query 2 out
	selectValGreater = 987
	output_file.write('-- SELECT col2 FROM tbl1 WHERE col1 >= {};\n'.format(selectValGreater))
	output_file.write('s2=select(db1.tbl1.col1,{},null)\n'.format(selectValGreater))
	output_file.write('f2=fetch(db1.tbl1.col2,s2)\n')
	output_file.write('print(f2)\n')
	dfSelectMaskGT = dataTable['col1'] >= selectValGreater
	output = dataTable[dfSelectMaskGT]['col2']
	exp_output_file.write(output.to_string(header=False,index=False))
	exp_output_file.write('\n\n')
	data_gen_utils.closeFileHandles(output_file, exp_output_file)

Exemple #15

0

Afficher le fichier

Fichier : milestone2.py Projet : BGroever/cs165

def createTestTen():
    # prelude
    output_file, exp_output_file = data_gen_utils.openFileHandles(
        10, TEST_DIR=TEST_BASE_DIR)
    output_file.write('-- Load Test Data 2\n')
    output_file.write('-- Create a table to run batch queries on\n')
    output_file.write('--\n')
    # query
    output_file.write('-- Loads data from: data3_batch.csv\n')
    output_file.write('--\n')
    output_file.write('-- Create Table\n')
    output_file.write('create(tbl,"tbl3_batch",db1,4)\n')
    output_file.write('create(col,"col1",db1.tbl3_batch)\n')
    output_file.write('create(col,"col2",db1.tbl3_batch)\n')
    output_file.write('create(col,"col3",db1.tbl3_batch)\n')
    output_file.write('create(col,"col4",db1.tbl3_batch)\n')
    output_file.write('--\n')
    output_file.write('-- Load data immediately\n')
    output_file.write('load(\"' + DOCKER_TEST_BASE_DIR +
                      '/data3_batch.csv\")\n')
    output_file.write('--\n')
    output_file.write('-- Testing that the data is durable on disk.\n')
    output_file.write('shutdown\n')
    # no expected results
    data_gen_utils.closeFileHandles(output_file, exp_output_file)

Exemple #16

0

Afficher le fichier

Fichier : milestone1.py Projet : moh-osman3/Relational-DB

def createTestNine(dataTable, dataSizeTableTwo, approxSelectivity):
	output_file, exp_output_file = data_gen_utils.openFileHandles(9, TEST_DIR=TEST_BASE_DIR)
	output_file.write('-- Big Bad Boss Test! Milestone 1\n')
	output_file.write('-- It\'s basically just the previous tests put together\n')
	output_file.write('-- But also, its.... Boss test!\n')
	output_file.write('\n')
	approxSelectivityEachSubclause = np.sqrt(approxSelectivity)
	offset = int(approxSelectivityEachSubclause * dataSizeTableTwo)
	highestHighVal = int((dataSizeTableTwo/2) - offset)
	selectValLess1 = np.random.randint(int(-1 * (dataSizeTableTwo/2)), highestHighVal)
	selectValLess2 = np.random.randint(int(-1 * (dataSizeTableTwo/2)), highestHighVal)
	selectValGreater1 = selectValLess1 + offset
	selectValGreater2 = selectValLess2 + offset
	output_file.write('-- SELECT avg(col1+col2), min(col2), max(col3), avg(col3-col2), sum(col3-col2) FROM tbl2 WHERE (col1 >= {} AND col1 < {}) AND (col2 >= {} AND col2 < {});\n'.format(selectValLess1, selectValGreater1, selectValLess2, selectValGreater2))
	output_file.write('s1=select(db1.tbl2.col1,{},{})\n'.format(selectValLess1, selectValGreater1))
	output_file.write('sf1=fetch(db1.tbl2.col2,s1)\n')
	output_file.write('s2=select(s1,sf1,{},{})\n'.format(selectValLess2, selectValGreater2))
	output_file.write('f1=fetch(db1.tbl2.col1,s2)\n')
	output_file.write('f2=fetch(db1.tbl2.col2,s2)\n')
	output_file.write('f3=fetch(db1.tbl2.col3,s2)\n')
	output_file.write('add12=add(f1,f2)\n')
	output_file.write('out1=avg(add12)\n')
	output_file.write('out2=min(f2)\n')
	output_file.write('out3=max(f3)\n')
	output_file.write('sub32=sub(f3,f2)\n')
	output_file.write('out4=avg(sub32)\n')
	output_file.write('out5=sum(sub32)\n')
	output_file.write('print(out1,out2,out3,out4,out5)\n')
	#expected results
	# generate expected results
	dfSelectMaskGT1 = dataTable['col1'] >= selectValLess1
	dfSelectMaskLT1 = dataTable['col1'] < selectValGreater1
	dfSelectMaskGT2 = dataTable['col2'] >= selectValLess2
	dfSelectMaskLT2 = dataTable['col2'] < selectValGreater2
	totalMask = dfSelectMaskGT1 & dfSelectMaskLT1 & dfSelectMaskGT2 & dfSelectMaskLT2
	col1pluscol2 = dataTable[totalMask]['col1'] + dataTable[totalMask]['col2']
	col3minuscol2 = dataTable[totalMask]['col3'] - dataTable[totalMask]['col2']
	# round any mean
	output1 = np.round(col1pluscol2.mean(), PLACES_TO_ROUND)
	if (math.isnan(output1)):
		exp_output_file.write('0.00,')
	else:
		exp_output_file.write('{:0.2f},'.format(output1))

	output2 = dataTable[totalMask]['col2'].min()
	output3 = dataTable[totalMask]['col3'].max()
	# round any mean
	output4 = np.round(col3minuscol2.mean(), PLACES_TO_ROUND)
	output5 = col3minuscol2.sum()
	exp_output_file.write(str(output2) + ',')
	exp_output_file.write(str(output3) + ',')
	if (math.isnan(output4)):
		exp_output_file.write('0.00,')
	else:
		exp_output_file.write('{:0.2f},'.format(output4))
	if (math.isnan(output5)):
		exp_output_file.write('0,')
	else:
		exp_output_file.write('{}\n'.format(output5))
	data_gen_utils.closeFileHandles(output_file, exp_output_file)

Exemple #17

0

Afficher le fichier

def createTest36(factTable, dimTable2, dataSizeFact, dataSizeDim2, selectivityFact, selectivityDim2):
    output_file, exp_output_file = data_gen_utils.openFileHandles(36, TEST_DIR=TEST_BASE_DIR)
    output_file.write('-- join test 3 - hashing many-one with larger selectivities.\n')
    output_file.write('-- Select + Join + aggregation\n')
    output_file.write('-- Performs the join using hashing\n')
    output_file.write('-- Query in SQL:\n')
    output_file.write('-- SELECT avg(tbl5_fact.col2), sum(tbl5_dim2.col2) FROM tbl5_fact,tbl5_dim2 WHERE tbl5_fact.col4=tbl5_dim2.col1 AND tbl5_fact.col2 < {} AND tbl5_dim2.col1<{};\n'.format(int((dataSizeFact/5) * selectivityFact), int(selectivityDim2 * dataSizeDim2)))
    output_file.write('--\n')
    output_file.write('--\n')
    output_file.write('p1=select(db1.tbl5_fact.col2,null, {})\n'.format(int((dataSizeFact/5) * selectivityFact)))
    output_file.write('p2=select(db1.tbl5_dim2.col1,null, {})\n'.format(int(dataSizeDim2 * selectivityDim2)))
    output_file.write('f1=fetch(db1.tbl5_fact.col4,p1)\n')
    output_file.write('f2=fetch(db1.tbl5_dim2.col1,p2)\n')
    output_file.write('t1,t2=join(f1,p1,f2,p2,hash)\n')
    output_file.write('col2joined=fetch(db1.tbl5_fact.col2,t1)\n')
    output_file.write('col2t2joined=fetch(db1.tbl5_dim2.col2,t2)\n')
    output_file.write('a1=avg(col2joined)\n')
    output_file.write('a2=sum(col2t2joined)\n')
    output_file.write('print(a1,a2)\n')
    # generate expected results
    dfFactTableMask = (factTable['col2'] < int((dataSizeFact/5) * selectivityFact))
    dfDimTableMask = (dimTable2['col1'] < int(dataSizeDim2 * selectivityDim2))
    preJoinFact = factTable[dfFactTableMask]
    preJoinDim2 = dimTable2[dfDimTableMask]
    joinedTable = preJoinFact.merge(preJoinDim2, left_on = 'col4', right_on = 'col1', suffixes=('','_right'))
    col2ValuesMean = joinedTable['col2'].mean()
    col3ValuesSum = joinedTable['col2_right'].sum()
    if (math.isnan(col2ValuesMean)):
        exp_output_file.write('0.00,')
    else:
        exp_output_file.write('{:0.2f},'.format(col2ValuesMean))
    if (math.isnan(col3ValuesSum)):
        exp_output_file.write('0\n')
    else:
        exp_output_file.write('{}\n'.format(col3ValuesSum))

Exemple #18

0

Afficher le fichier

Fichier : milestone2.py Projet : BGroever/cs165

def createTestEleven(dataTable):
    # prelude and query
    output_file, exp_output_file = data_gen_utils.openFileHandles(
        11, TEST_DIR=TEST_BASE_DIR)
    output_file.write('--\n')
    output_file.write('-- Testing for batching queries\n')
    output_file.write('-- 2 queries with NO overlap\n')
    output_file.write('--\n')
    output_file.write('-- Query in SQL:\n')
    output_file.write(
        '-- SELECT col4 FROM tbl3_batch WHERE col1 >= 10 AND col1 < 20;\n')
    output_file.write(
        '-- SELECT col4 FROM tbl3_batch WHERE col1 >= 800 AND col1 < 830;\n')
    output_file.write('--\n')
    output_file.write('--\n')
    output_file.write('batch_queries()\n')
    output_file.write('s1=select(db1.tbl3_batch.col1,10,20)\n')
    output_file.write('s2=select(db1.tbl3_batch.col1,800,830)\n')
    output_file.write('batch_execute()\n')
    output_file.write('f1=fetch(db1.tbl3_batch.col4,s1)\n')
    output_file.write('f2=fetch(db1.tbl3_batch.col4,s2)\n')
    output_file.write('print(f1)\n')
    output_file.write('print(f2)\n')
    # generate expected restuls.
    dfSelectMask1 = (dataTable['col1'] >= 10) & (dataTable['col1'] < 20)
    dfSelectMask2 = (dataTable['col1'] >= 800) & (dataTable['col1'] < 830)
    output1 = dataTable[dfSelectMask1]['col4']
    output2 = dataTable[dfSelectMask2]['col4']
    exp_output_file.write(data_gen_utils.outputPrint(output1))
    exp_output_file.write('\n\n')
    exp_output_file.write(data_gen_utils.outputPrint(output2))
    exp_output_file.write('\n')
    data_gen_utils.closeFileHandles(output_file, exp_output_file)

Exemple #19

0

Afficher le fichier

Fichier : milestone2.py Projet : BGroever/cs165

def createTests16And17(dataTable, dataSize):
    # 1 / 1000 tuples should qualify on average. This is so that most time is spent on scans & not fetches or prints
    offset = np.max([1, int(dataSize / 5000)])
    query_starts = np.random.randint(0, (dataSize / 8), size=(100))
    output_file16, exp_output_file16 = data_gen_utils.openFileHandles(
        16, TEST_DIR=TEST_BASE_DIR)
    output_file17, exp_output_file17 = data_gen_utils.openFileHandles(
        17, TEST_DIR=TEST_BASE_DIR)
    output_file16.write('--\n')
    output_file16.write('-- Control timing for without batching\n')
    output_file16.write('-- Queries for 16 and 17 are identical.\n')
    output_file16.write('-- Query in SQL:\n')
    output_file16.write('-- 100 Queries of the type:\n')
    output_file16.write(
        '-- SELECT col3 FROM tbl3_batch WHERE col2 >= _ AND col2 < _;\n')
    output_file16.write('--\n')
    output_file17.write('--\n')
    output_file17.write('-- Same queries with batching\n')
    output_file17.write('-- Queries for 16 and 17 are identical.\n')
    output_file17.write('--\n')
    output_file17.write('batch_queries()\n')
    for i in range(100):
        output_file16.write('s{}=select(db1.tbl3_batch.col2,{},{})\n'.format(
            i, query_starts[i], query_starts[i] + offset))
        output_file17.write('s{}=select(db1.tbl3_batch.col2,{},{})\n'.format(
            i, query_starts[i], query_starts[i] + offset))
    output_file17.write('batch_execute()\n')
    for i in range(100):
        output_file16.write('f{}=fetch(db1.tbl3_batch.col3,s{})\n'.format(
            i, i))
        output_file17.write('f{}=fetch(db1.tbl3_batch.col3,s{})\n'.format(
            i, i))
    for i in range(100):
        output_file16.write('print(f{})\n'.format(i))
        output_file17.write('print(f{})\n'.format(i))
    # generate expected results
    for i in range(100):
        dfSelectMask = (dataTable['col2'] >= query_starts[i]) & (
            (dataTable['col2'] < (query_starts[i] + offset)))
        output = dataTable[dfSelectMask]['col3']
        exp_output_file16.write(data_gen_utils.outputPrint(output))
        exp_output_file16.write('\n\n')
        exp_output_file17.write(data_gen_utils.outputPrint(output))
        exp_output_file17.write('\n\n')
    data_gen_utils.closeFileHandles(output_file16, exp_output_file16)
    data_gen_utils.closeFileHandles(output_file17, exp_output_file17)

Exemple #20

0

Afficher le fichier

def createTestEight(dataTable, dataSizeTableTwo, approxSelectivity):
    output_file, exp_output_file = data_gen_utils.openFileHandles(
        8, TEST_DIR=TEST_BASE_DIR)
    output_file.write('-- Min,Max\n')
    output_file.write('--\n')
    offset = int(approxSelectivity * dataSizeTableTwo)
    highestHighVal = int((dataSizeTableTwo / 2) - offset)
    selectValLess = np.random.randint(int(-1 * (dataSizeTableTwo / 2)),
                                      highestHighVal)
    selectValGreater = selectValLess + offset
    output_file.write('-- Min\n')
    output_file.write(
        '-- SELECT min(col1) FROM tbl2 WHERE col1 >= {} AND col1 < {};\n'.
        format(selectValLess, selectValGreater))
    output_file.write('s1=select(db1.tbl2.col1,{},{})\n'.format(
        selectValLess, selectValGreater))
    output_file.write('f1=fetch(db1.tbl2.col1,s1)\n')
    output_file.write('m1=min(f1)\n')
    output_file.write('print(m1)\n')
    output_file.write('--\n')
    output_file.write(
        '-- SELECT min(col2) FROM tbl2 WHERE col1 >= {} AND col1 < {};\n'.
        format(selectValLess, selectValGreater))
    output_file.write('f2=fetch(db1.tbl2.col2,s1)\n')
    output_file.write('m2=min(f2)\n')
    output_file.write('print(m2)\n')
    output_file.write('--\n')
    output_file.write('--\n')
    output_file.write('-- Max\n')
    output_file.write(
        '-- SELECT max(col1) FROM tbl2 WHERE col1 >= {} AND col1 < {};\n'.
        format(selectValLess, selectValGreater))
    output_file.write('s21=select(db1.tbl2.col1,{},{})\n'.format(
        selectValLess, selectValGreater))
    output_file.write('f21=fetch(db1.tbl2.col1,s21)\n')
    output_file.write('m21=max(f21)\n')
    output_file.write('print(m21)\n')
    output_file.write('--\n')
    output_file.write(
        '-- SELECT max(col2) FROM tbl2 WHERE col1 >= {} AND col1 < {};\n'.
        format(selectValLess, selectValGreater))
    output_file.write('f22=fetch(db1.tbl2.col2,s21)\n')
    output_file.write('m22=max(f22)\n')
    output_file.write('print(m22)\n')
    # generate expected results
    dfSelectMaskGT = dataTable['col1'] >= selectValLess
    dfSelectMaskLT = dataTable['col1'] < selectValGreater
    output1 = dataTable[dfSelectMaskGT & dfSelectMaskLT]['col1'].min()
    output2 = dataTable[dfSelectMaskGT & dfSelectMaskLT]['col2'].min()
    output3 = dataTable[dfSelectMaskGT & dfSelectMaskLT]['col1'].max()
    output4 = dataTable[dfSelectMaskGT & dfSelectMaskLT]['col2'].max()
    exp_output_file.write(str(output1) + '\n')
    exp_output_file.write(str(output2) + '\n')
    exp_output_file.write(str(output3) + '\n')
    exp_output_file.write(str(output4) + '\n')
    data_gen_utils.closeFileHandles(output_file, exp_output_file)

Exemple #21

0

Afficher le fichier

Fichier : exp1.py Projet : moh-osman3/Relational-DB

def createTestFive(dataTable, dataSizeTableTwo, approxSelectivity):
    # prelude
    output_file26, exp_output_file26 = data_gen_utils.openFileHandles(
        101, TEST_DIR=TEST_BASE_DIR)
    output_file27, exp_output_file27 = data_gen_utils.openFileHandles(
        102, TEST_DIR=TEST_BASE_DIR)
    #output_file26, exp_output_file = data_gen_utils.openFileHandles(filenum, TEST_DIR=TEST_BASE_DIR)
    # output_file.write('-- Summation\n')
    # output_file.write('--\n')
    # query
    offset = int(approxSelectivity * dataSizeTableTwo)
    highestHighVal = int((dataSizeTableTwo / 2) - offset)
    # selectValLess = np.random.randint(int(-1 * (dataSizeTableTwo/2)), highestHighVal)
    # selectValGreater = selectValLess + offset
    # output_file.write('-- SELECT SUM(col3) FROM tbl2 WHERE col1 >= {} AND col1 < {};\n'.format(selectValLess, selectValGreater))
    print("YUH")
    for i in range(100):
        selectValLess = np.random.randint(0, highestHighVal)
        selectValGreater = selectValLess + offset
        #val1 = np.random.randint(0, int((dataSize/5) - offset))
        output_file26.write('s{}=select(db1.tbl4_ctrl.col2,{},{})\n'.format(
            i,
            np.array(dataTable['col2'])[selectValLess],
            np.array(dataTable['col2'])[selectValGreater]))
        output_file26.write('f{}=fetch(db1.tbl4_ctrl.col3,s{})\n'.format(i, i))
        output_file26.write('a{}=avg(f{})\n'.format(i, i))
        output_file26.write('print(a{})\n'.format(i))
        output_file27.write('s{}=select(db1.tbl4.col2,{},{})\n'.format(
            i,
            np.array(dataTable['col2'])[selectValLess],
            np.array(dataTable['col2'])[selectValGreater]))
        output_file27.write('f{}=fetch(db1.tbl4.col3,s{})\n'.format(i, i))
        output_file27.write('a{}=avg(f{})\n'.format(i, i))
        output_file27.write('print(a{})\n'.format(i))
    # for i in range(100):
    #     output_file.write('s1=select(db1.tbl2.col1,{},{})\n'.format(selectValLess, selectValGreater))
    #     output_file.write('f1=fetch(db1.tbl2.col3,s1)\n')
    #     output_file.write('f1=fetch(db1.tbl2.col2,s1)\n')
    # generate expected results
    print("mann")
    data_gen_utils.closeFileHandles(output_file26, exp_output_file26)
    data_gen_utils.closeFileHandles(output_file27, exp_output_file27)

Exemple #22

0

Afficher le fichier

Fichier : milestone1.py Projet : moh-osman3/Relational-DB

def createTestOne():
	# write out test
	output_file, exp_output_file = data_gen_utils.openFileHandles(1, TEST_DIR=TEST_BASE_DIR)
	output_file.write('-- Load+create Data and shut down of tbl1 which has 1 attribute only\n')
	output_file.write('create(db,\"db1\")\n')
	output_file.write('create(tbl,\"tbl1\",db1,2)\n')
	output_file.write('create(col,\"col1\",db1.tbl1)\n')
	output_file.write('create(col,\"col2\",db1.tbl1)\n')
	output_file.write('load(\"'+DOCKER_TEST_BASE_DIR+'/data1_generated.csv\")\n')
	output_file.write('shutdown\n')
	# generate expected results
	data_gen_utils.closeFileHandles(output_file, exp_output_file)

Exemple #23

0

Afficher le fichier

Fichier : milestone5.py Projet : BGroever/cs165

def createTest43(dataTable):
    output_file, exp_output_file = data_gen_utils.openFileHandles(
        43, TEST_DIR=TEST_BASE_DIR)
    output_file.write(
        '-- Scalability test: A large number of inserts, deletes and updates, followed by a number of queries\n'
    )
    output_file.write('--\n')
    dataTable = createRandomInserts(dataTable, 100, output_file)
    dataTable = createRandomUpdates(dataTable, 100, output_file)
    dataTable = createRandomDeletes(dataTable, 100, output_file)
    createRandomSelects(dataTable, 5, output_file, exp_output_file)
    data_gen_utils.closeFileHandles(output_file, exp_output_file)

Exemple #24

0

Afficher le fichier

def createTest32(factTable, dimTable2, dataSizeFact, dataSizeDim2,
                 selectivityFact, selectivityDim2):
    output_file, exp_output_file = data_gen_utils.openFileHandles(
        32, TEST_DIR=TEST_BASE_DIR)
    output_file.write(
        '-- First join test - nested-loop. Select + Join + aggregation\n')
    output_file.write('-- Performs the join using nested loops\n')
    output_file.write('-- Do this only on reasonable sized tables! (O(n^2))\n')
    output_file.write('-- Query in SQL:\n')
    output_file.write(
        '-- SELECT avg(tbl5_fact.col2), sum(tbl5_fact.col3) FROM tbl5_fact,tbl5_dim2 WHERE tbl5_fact.col4=tbl5_dim2.col1 AND tbl5_fact.col2 < {} AND tbl5_dim2.col1<{};\n'
        .format(int((dataSizeFact / 5) * selectivityFact),
                int(selectivityDim2 * dataSizeDim2)))
    output_file.write('--\n')
    output_file.write('--\n')
    output_file.write('p1=select(db1.tbl5_fact.col2,null, {})\n'.format(
        int((dataSizeFact / 5) * selectivityFact)))
    output_file.write('p2=select(db1.tbl5_dim2.col1,null, {})\n'.format(
        int(dataSizeDim2 * selectivityDim2)))
    #output_file.write('print(p1)\n')
    #output_file.write('print(p2)\n')
    output_file.write('f1=fetch(db1.tbl5_fact.col4,p1)\n')
    output_file.write('f2=fetch(db1.tbl5_dim2.col1,p2)\n')
    output_file.write('t1,t2=join(f1,p1,f2,p2,nested-loop)\n')
    output_file.write('col2joined=fetch(db1.tbl5_fact.col2,t1)\n')
    output_file.write('col3joined=fetch(db1.tbl5_fact.col3,t2)\n')
    output_file.write('a1=avg(col2joined)\n')
    output_file.write('a2=sum(col3joined)\n')
    output_file.write('print(a1,a2)\n')
    # generate expected results
    dfFactTableMask = (factTable['col2'] < int(
        (dataSizeFact / 5) * selectivityFact))
    dfDimTableMask = (dimTable2['col1'] < int(dataSizeDim2 * selectivityDim2))
    preJoinFact = factTable[dfFactTableMask]
    preJoinDim2 = dimTable2[dfDimTableMask]
    print("prejoin fact ", preJoinFact)
    print("prejoin dim2", preJoinDim2)
    joinedTable = preJoinFact.merge(preJoinDim2,
                                    left_on='col4',
                                    right_on='col1',
                                    suffixes=('', '_right'))
    print("joinedtable", joinedTable)
    col2ValuesMean = joinedTable['col2'].mean()
    col3ValuesSum = joinedTable['col3'].sum()
    print("col3 joinedtable,", joinedTable['col3'])
    if (math.isnan(col2ValuesMean)):
        exp_output_file.write('0.00,')
    else:
        exp_output_file.write('{:0.2f},'.format(col2ValuesMean))
    if (math.isnan(col3ValuesSum)):
        exp_output_file.write('0\n')
    else:
        exp_output_file.write('{}\n'.format(col3ValuesSum))

Exemple #25

0

Afficher le fichier

Fichier : milestone5.py Projet : BGroever/cs165

def createTest42(dataTable):
    output_file, exp_output_file = data_gen_utils.openFileHandles(
        42, TEST_DIR=TEST_BASE_DIR)
    output_file.write(
        '-- Correctness test: Delete values and run queries after inserts, updates, and deletes\n'
    )
    output_file.write('--\n')
    output_file.write('-- DELETE FROM tbl5 WHERE col1 = -10;\n')
    output_file.write('-- DELETE FROM tbl5 WHERE col2 = -22;\n')
    output_file.write('-- DELETE FROM tbl5 WHERE col1 = -30;\n')
    output_file.write('-- DELETE FROM tbl5 WHERE col3 = -444;\n')
    output_file.write('-- DELETE FROM tbl5 WHERE col1 = -50;\n')
    output_file.write(
        '-- SELECT col1 FROM tbl5 WHERE col2 >= -100 AND col2 < 20;\n')
    output_file.write('--\n')
    output_file.write('d1=select(db1.tbl5.col1,-10,-9)\n')
    output_file.write('relational_delete(db1.tbl5,d1)\n')
    output_file.write('d2=select(db1.tbl5.col2,-22,-21)\n')
    output_file.write('relational_delete(db1.tbl5,d2)\n')
    output_file.write('d3=select(db1.tbl5.col1,-30,-29)\n')
    output_file.write('relational_delete(db1.tbl5,d3)\n')
    output_file.write('d4=select(db1.tbl5.col3,-444,-443)\n')
    output_file.write('relational_delete(db1.tbl5,d4)\n')
    output_file.write('d5=select(db1.tbl5.col1,-50,-49)\n')
    output_file.write('relational_delete(db1.tbl5,d5)\n')
    output_file.write('s1=select(db1.tbl5.col2,-100,20)\n')
    output_file.write('f1=fetch(db1.tbl5.col1,s1)\n')
    output_file.write('print(f1)\n')

    # update dataTable
    dataTable = dataTable[dataTable.col1 != -10]
    dataTable = dataTable[dataTable.col2 != -22]
    dataTable = dataTable[dataTable.col1 != -30]
    dataTable = dataTable[dataTable.col3 != -444]
    dataTable = dataTable[dataTable.col1 != -50]

    dfSelectMask1 = dataTable['col2'] >= -100
    dfSelectMask2 = dataTable['col2'] < 20
    output = dataTable[dfSelectMask1 & dfSelectMask2]['col1']

    if len(output) > 0:
        exp_output_file.write(output.to_string(header=False, index=False))
        exp_output_file.write('\n')

    data_gen_utils.closeFileHandles(output_file, exp_output_file)
    return dataTable

Exemple #26

0

Afficher le fichier

Fichier : milestone5.py Projet : BGroever/cs165

def createTest39(dataTable, approxSelectivity):
    output_file, exp_output_file = data_gen_utils.openFileHandles(
        39, TEST_DIR=TEST_BASE_DIR)
    dataSize = len(dataTable)
    offset = int(approxSelectivity * dataSize)
    highestHighVal = int((dataSize / 2) - offset)
    selectValLess = np.random.randint(-55, -11)
    selectValGreater = selectValLess + offset
    selectValLess2 = np.random.randint(-10, 0)
    selectValGreater2 = selectValLess2 + offset
    output_file.write(
        '-- Correctness test: Test for updates on columns with index\n')
    output_file.write('--\n')
    output_file.write(
        '-- SELECT col1 FROM tbl5 WHERE col2 >= {} AND col2 < {};\n'.format(
            selectValLess, selectValGreater))
    output_file.write('--\n')
    output_file.write('s1=select(db1.tbl5.col2,{},{})\n'.format(
        selectValLess, selectValGreater))
    output_file.write('f1=fetch(db1.tbl5.col1,s1)\n')
    output_file.write('print(f1)\n')
    output_file.write('--\n')
    output_file.write(
        '-- SELECT col3 FROM tbl5 WHERE col1 >= {} AND col1 < {};\n'.format(
            selectValLess2, selectValGreater2))
    output_file.write('--\n')
    output_file.write('s2=select(db1.tbl5.col1,{},{})\n'.format(
        selectValLess2, selectValGreater2))
    output_file.write('f2=fetch(db1.tbl5.col3,s2)\n')
    output_file.write('print(f2)\n')
    # generate expected results
    dfSelectMaskGT = dataTable['col2'] >= selectValLess
    dfSelectMaskLT = dataTable['col2'] < selectValGreater
    output = dataTable[dfSelectMaskGT & dfSelectMaskLT]['col1']
    if len(output) > 0:
        exp_output_file.write(output.to_string(header=False, index=False))
        exp_output_file.write('\n\n')

    dfSelectMaskGT2 = dataTable['col1'] >= selectValLess2
    dfSelectMaskLT2 = dataTable['col1'] < selectValGreater2
    output = dataTable[dfSelectMaskGT2 & dfSelectMaskLT2]['col3']
    if len(output) > 0:
        exp_output_file.write(output.to_string(header=False, index=False))
        exp_output_file.write('\n')
    data_gen_utils.closeFileHandles(output_file, exp_output_file)

Exemple #27

0

Afficher le fichier

Fichier : milestone1.py Projet : moh-osman3/Relational-DB

def createTestThree(dataTable):
	# prelude
	output_file, exp_output_file = data_gen_utils.openFileHandles(3, TEST_DIR=TEST_BASE_DIR)
	output_file.write('-- Test Multiple Selects + Average\n')
	output_file.write('--\n')
	# query
	selectValLess = 956
	selectValGreater = 972
	output_file.write('-- SELECT avg(col2) FROM tbl1 WHERE col1 >= {} and col1 < {};\n'.format(selectValLess, selectValGreater))
	output_file.write('s1=select(db1.tbl1.col1,956,972)\n')
	output_file.write('f1=fetch(db1.tbl1.col2,s1)\n')
	output_file.write('a1=avg(f1)\n')
	output_file.write('print(a1)\n')
	# generate expected result
	dfSelectMaskGT = dataTable['col1'] >= selectValLess
	dfSelectMaskLT = dataTable['col1'] < selectValGreater
	output = dataTable[dfSelectMaskGT & dfSelectMaskLT]['col2']
	exp_output_file.write(str(np.round(output.mean(), PLACES_TO_ROUND)))
	exp_output_file.write('\n')
	data_gen_utils.closeFileHandles(output_file, exp_output_file)

Exemple #28

0

Afficher le fichier

Fichier : milestone5.py Projet : BGroever/cs165

def createTests40(dataTable):
    output_file, exp_output_file = data_gen_utils.openFileHandles(
        40, TEST_DIR=TEST_BASE_DIR)
    output_file.write('-- Correctness test: Update values\n')
    output_file.write('--\n')
    output_file.write('-- UPDATE tbl5 SET col1 = -10 WHERE col1 = -1;\n')
    output_file.write('-- UPDATE tbl5 SET col1 = -20 WHERE col2 = -22;\n')
    output_file.write('-- UPDATE tbl5 SET col1 = -30 WHERE col1 = -3;\n')
    output_file.write('-- UPDATE tbl5 SET col1 = -40 WHERE col3 = -444;\n')
    output_file.write('-- UPDATE tbl5 SET col1 = -50 WHERE col1 = -5;\n')
    output_file.write('--\n')
    output_file.write('u1=select(db1.tbl5.col1,-1,0)\n')
    output_file.write('relational_update(db1.tbl5.col1,u1,-10)\n')
    output_file.write('u2=select(db1.tbl5.col2,-22,-21)\n')
    output_file.write('relational_update(db1.tbl5.col1,u2,-20)\n')
    output_file.write('u3=select(db1.tbl5.col1,-3,-2)\n')
    output_file.write('relational_update(db1.tbl5.col1,u3,-30)\n')
    output_file.write('u4=select(db1.tbl5.col3,-444,-443)\n')
    output_file.write('relational_update(db1.tbl5.col1,u4,-40)\n')
    output_file.write('u5=select(db1.tbl5.col1,-5,-4)\n')
    output_file.write('relational_update(db1.tbl5.col1,u5,-50)\n')
    output_file.write('shutdown\n')
    # update dataTable
    dfSelectMaskEq = dataTable['col1'] == -1
    dataTable.loc[dfSelectMaskEq, 'col1'] = -10

    dfSelectMaskEq = dataTable['col2'] == -22
    dataTable.loc[dfSelectMaskEq, 'col1'] = -20

    dfSelectMaskEq = dataTable['col1'] == -3
    dataTable.loc[dfSelectMaskEq, 'col1'] = -30

    dfSelectMaskEq = dataTable['col3'] == -444
    dataTable.loc[dfSelectMaskEq, 'col1'] = -40

    dfSelectMaskEq = dataTable['col1'] == -5
    dataTable.loc[dfSelectMaskEq, 'col1'] = -50

    # no expected results
    data_gen_utils.closeFileHandles(output_file, exp_output_file)
    return dataTable

Exemple #29

0

Afficher le fichier

Fichier : milestone5.py Projet : BGroever/cs165

def createTest41(dataTable):
    output_file, exp_output_file = data_gen_utils.openFileHandles(
        41, TEST_DIR=TEST_BASE_DIR)
    selectValLess = np.random.randint(-200, -100)
    selectValGreater = np.random.randint(10, 100)
    output_file.write(
        '-- Correctness test: Run query after inserts and updates\n')
    output_file.write('--\n')
    output_file.write(
        '-- SELECT col1 FROM tbl5 WHERE col2 >= {} AND col2 < {};\n'.format(
            selectValLess, selectValGreater))
    output_file.write('--\n')
    output_file.write('s1=select(db1.tbl5.col2,{},{})\n'.format(
        selectValLess, selectValGreater))
    output_file.write('f1=fetch(db1.tbl5.col1,s1)\n')
    output_file.write('print(f1)\n')
    # generate expected results
    dfSelectMask = (dataTable['col2'] >= selectValLess) & (dataTable['col2'] <
                                                           selectValGreater)
    output = dataTable[dfSelectMask]['col1']
    exp_output_file.write(output.to_string(header=False, index=False))
    data_gen_utils.closeFileHandles(output_file, exp_output_file)

Exemple #30

0

Afficher le fichier

Fichier : milestone1.py Projet : moh-osman3/Relational-DB

def createTestFour(dataTable):
	# prelude
	output_file, exp_output_file = data_gen_utils.openFileHandles(4, TEST_DIR=TEST_BASE_DIR)
	output_file.write('-- Load Test Data 2\n')
	output_file.write('--\n')
	output_file.write('-- Load+create+insert Data and shut down of tbl2 which has 4 attributes\n')
	output_file.write('create(tbl,\"tbl2\",db1,4)\n')
	output_file.write('create(col,\"col1\",db1.tbl2)\n')
	output_file.write('create(col,\"col2\",db1.tbl2)\n')
	output_file.write('create(col,\"col3\",db1.tbl2)\n')
	output_file.write('create(col,\"col4\",db1.tbl2)\n')
	output_file.write('load(\"'+DOCKER_TEST_BASE_DIR+'/data2_generated.csv\")\n')
	output_file.write('relational_insert(db1.tbl2,-1,-11,-111,-1111)\n')
	output_file.write('relational_insert(db1.tbl2,-2,-22,-222,-2222)\n')
	output_file.write('relational_insert(db1.tbl2,-3,-33,-333,-2222)\n')
	output_file.write('relational_insert(db1.tbl2,-4,-44,-444,-2222)\n')
	output_file.write('relational_insert(db1.tbl2,-5,-55,-555,-2222)\n')
	output_file.write('relational_insert(db1.tbl2,-6,-66,-666,-2222)\n')
	output_file.write('relational_insert(db1.tbl2,-7,-77,-777,-2222)\n')
	output_file.write('relational_insert(db1.tbl2,-8,-88,-888,-2222)\n')
	output_file.write('relational_insert(db1.tbl2,-9,-99,-999,-2222)\n')
	output_file.write('relational_insert(db1.tbl2,-10,-11,0,-34)\n')
	output_file.write('shutdown\n')

	# columns need to align for append to work
	deltaTable = pd.DataFrame([[-1, -11, -111, -1111],
		[-2, -22, -222, -2222],
		[-3, -33, -333, -2222],
		[-4, -44, -444, -2222],
		[-5, -55, -555, -2222],
		[-6, -66, -666, -2222],
		[-7, -77, -777, -2222],
		[-8, -88, -888, -2222],
		[-9, -99, -999, -2222],
		[-10, -11, 0, -34]], columns=['col1', 'col2', 'col3', 'col4'])
	
	dataTable = dataTable.append(deltaTable)
	data_gen_utils.closeFileHandles(output_file, exp_output_file)
	return dataTable