예제 #1
0
파일: bloom.py 프로젝트: limjiayi/Portfolio
def task3(configData):
    #if you wish to use this code to perform task 3, you may do so
    #NOTE task 3 will require you to remake your bloom filter multiple times to perform the appropriate trials
    #this will necessitate either making a new bloom filter constructor or changing the config dictionary to
    #hold the appropriate values for k and n (filter size) based on c value, derived as in the notes
    #REMEMBER for type 2 hashes n must be prime.  util.findNextPrime(n) is provided for you to use to find the next largest
    #prime value of some integer.
    c = 15
    configData['n'] = configData['m'] * c
    configData['type'] = 2

    # instantiate bloom filter object
    bf = BloomFilter(configData, c)

    # bfInputData holds a list of integers.  Using these values you must :
    #   insert the first configData['m'] of them into the bloom filter
    #   test all of them for membership in the bloom filter
    bfInputData = util.readIntFileDat(configData['inFileName'])
    if (len(bfInputData) == 0):
        print('No Data to add to bloom filter')
        return
    else:
        print('bfInputData has ' + str(len(bfInputData)) + ' elements')
    # testBF will insert elements and test membership
    outputResList = testBF_3(bfInputData, bf, configData['m'])
    # write results to output file
    # util.writeFileDat(configData['outFileName'], outputResList)
    # load appropriate validation data list for this hash function and compare to results
    # util.compareResults(outputResList, configData)

    print('Task 3 complete')
예제 #2
0
def task1(configData):
    # if you wish to use this code to perform task 1, you may do so
    # NOTE : task 1 does not require you to instantiate a bloom filter

    # Start random chosen data
    bfInputData = util.readIntFileDat(configData['inFileName'])
    ht1 = HashType1(configData, True)
    ht2 = HashType2(configData, True)
    ht1.k = 1
    ht2.k = 1
    ht1list = []
    ht2list = []
    x = bfInputData[:10000]
    for i in range(0, 10000):
        ht1list.append(ht1.getHashList(bfInputData[i]))
        ht2list.append(ht2.getHashList(bfInputData[i]))

    plt.scatter(x, ht1list, marker='.', s=1)
    plt.axis((0, max(x), 0, max(ht1list)[0]))
    plt.title('Type 1 Hash Function Values Mapped')
    plt.xlabel('input data value')
    plt.ylabel('hash value')
    plt.show()

    plt.scatter(x, ht2list, marker='.', s=1)
    plt.axis((0, max(x), 0, max(ht2list)[0]))
    plt.title('Type 2 Hash Function Values Mapped')
    plt.xlabel('input data value')
    plt.ylabel('hash value')
    plt.show()

    ht1list = []
    ht2list = []
    x = []
    for i in range(0, 20000):
        if bfInputData[i] % 2 == 0:
            x.append(bfInputData[i])
            ht1list.append(ht1.getHashList(bfInputData[i]))
            ht2list.append(ht2.getHashList(bfInputData[i]))

    plt.scatter(x, ht1list, marker='.', s=1)
    plt.axis((0, max(x), 0, max(ht1list)[0]))
    plt.title('Type 1 Hash Function Even Values Mapped')
    plt.xlabel('input data value')
    plt.ylabel('hash value')
    plt.show()

    plt.scatter(x, ht2list, marker='.', s=1)
    plt.axis((0, max(x), 0, max(ht2list)[0]))
    plt.title('Type 2 Hash Function Even Values Mapped')
    plt.xlabel('input data value')
    plt.ylabel('hash value')
    plt.show()

    print('Task 1 complete')
예제 #3
0
def task1(configData):
    bfInputData = util.readIntFileDat(configData['inFileName'])
    ht1 = HashType1(configData, True)
    ht2 = HashType2(configData, True)
    ht1.k = 1
    ht2.k = 1
    ht1list = []
    ht2list = []
    x = bfInputData[:10000]
    for i in range(0, 10000):
        ht1list.append(ht1.getHashList(bfInputData[i]))
        ht2list.append(ht2.getHashList(bfInputData[i]))

    plt.scatter(x, ht1list, marker='.', s=1)
    plt.axis((0, max(x), 0, max(ht1list)[0]))
    plt.title('Type 1 Hash Function Values Mapped')
    plt.xlabel('input data value')
    plt.ylabel('hash value')
    plt.show()

    plt.scatter(x, ht2list, marker='.', s=1)
    plt.axis((0, max(x), 0, max(ht2list)[0]))
    plt.title('Type 2 Hash Function Values Mapped')
    plt.xlabel('input data value')
    plt.ylabel('hash value')
    plt.show()

    ht1list = []
    ht2list = []
    x = []
    for i in range(0, 20000):
        if bfInputData[i] % 2 == 0:
            x.append(bfInputData[i])
            ht1list.append(ht1.getHashList(bfInputData[i]))
            ht2list.append(ht2.getHashList(bfInputData[i]))

    plt.scatter(x, ht1list, marker='.', s=1)
    plt.axis((0, max(x), 0, max(ht1list)[0]))
    plt.title('Type 1 Hash Function Even Values Mapped')
    plt.xlabel('input data value')
    plt.ylabel('hash value')
    plt.show()

    plt.scatter(x, ht2list, marker='.', s=1)
    plt.axis((0, max(x), 0, max(ht2list)[0]))
    plt.title('Type 2 Hash Function Even Values Mapped')
    plt.xlabel('input data value')
    plt.ylabel('hash value')
    plt.show()

    print('Task 1 complete')
예제 #4
0
def task2(configData):
    # instantiate bloom filter object
    bf = BloomFilter(configData)

    # bfInputData holds a list of integers.  Using these values you must :
    #   insert the first configData['m'] of them into the bloom filter
    #   test all of them for membership in the bloom filter
    bfInputData = util.readIntFileDat(configData['inFileName'])
    if (len(bfInputData) == 0):
        print('No Data to add to bloom filter')
        return
    else:
        print('bfInputData has ' + str(len(bfInputData)) + ' elements')
    # testBF will insert elements and test membership
    outputResList = testBF(bfInputData, bf, configData['m'])
    # write results to output file
    util.writeFileDat(configData['outFileName'], outputResList)
    # load appropriate validation data list for this hash function and compare to results
    util.compareResults(outputResList, configData)
    print('Task 2 complete')
예제 #5
0
def computeFalsePositive(configData, numTrials, k, c, hashType):
    configData['k'] = k
    configData['type'] = hashType
    configData['n'] = util.findNextPrime(c * configData['m'])
    sumFalsePositive = 0
    for i in range(0, numTrials):
        # initialize bloom filter
        bf = BloomFilter(configData)
        bfInputData = util.readIntFileDat(configData['inFileName'])
        # add data to bloom filter
        for j in range(0, configData['m']):
            bf.add(bfInputData[j])
        falsePositive = 0
        # test false positive
        for l in range(configData['m'], len(bfInputData)):
            if bf.contains(bfInputData[l]):
                falsePositive += 1
        sumFalsePositive += falsePositive / float(configData['m'])
    avgFalsePositive = sumFalsePositive / numTrials
    return avgFalsePositive