def task3(configData): #if you wish to use this code to perform task 3, you may do so #NOTE task 3 will require you to remake your bloom filter multiple times to perform the appropriate trials #this will necessitate either making a new bloom filter constructor or changing the config dictionary to #hold the appropriate values for k and n (filter size) based on c value, derived as in the notes #REMEMBER for type 2 hashes n must be prime. util.findNextPrime(n) is provided for you to use to find the next largest #prime value of some integer. c = 15 configData['n'] = configData['m'] * c configData['type'] = 2 # instantiate bloom filter object bf = BloomFilter(configData, c) # bfInputData holds a list of integers. Using these values you must : # insert the first configData['m'] of them into the bloom filter # test all of them for membership in the bloom filter bfInputData = util.readIntFileDat(configData['inFileName']) if (len(bfInputData) == 0): print('No Data to add to bloom filter') return else: print('bfInputData has ' + str(len(bfInputData)) + ' elements') # testBF will insert elements and test membership outputResList = testBF_3(bfInputData, bf, configData['m']) # write results to output file # util.writeFileDat(configData['outFileName'], outputResList) # load appropriate validation data list for this hash function and compare to results # util.compareResults(outputResList, configData) print('Task 3 complete')
def task1(configData): # if you wish to use this code to perform task 1, you may do so # NOTE : task 1 does not require you to instantiate a bloom filter # Start random chosen data bfInputData = util.readIntFileDat(configData['inFileName']) ht1 = HashType1(configData, True) ht2 = HashType2(configData, True) ht1.k = 1 ht2.k = 1 ht1list = [] ht2list = [] x = bfInputData[:10000] for i in range(0, 10000): ht1list.append(ht1.getHashList(bfInputData[i])) ht2list.append(ht2.getHashList(bfInputData[i])) plt.scatter(x, ht1list, marker='.', s=1) plt.axis((0, max(x), 0, max(ht1list)[0])) plt.title('Type 1 Hash Function Values Mapped') plt.xlabel('input data value') plt.ylabel('hash value') plt.show() plt.scatter(x, ht2list, marker='.', s=1) plt.axis((0, max(x), 0, max(ht2list)[0])) plt.title('Type 2 Hash Function Values Mapped') plt.xlabel('input data value') plt.ylabel('hash value') plt.show() ht1list = [] ht2list = [] x = [] for i in range(0, 20000): if bfInputData[i] % 2 == 0: x.append(bfInputData[i]) ht1list.append(ht1.getHashList(bfInputData[i])) ht2list.append(ht2.getHashList(bfInputData[i])) plt.scatter(x, ht1list, marker='.', s=1) plt.axis((0, max(x), 0, max(ht1list)[0])) plt.title('Type 1 Hash Function Even Values Mapped') plt.xlabel('input data value') plt.ylabel('hash value') plt.show() plt.scatter(x, ht2list, marker='.', s=1) plt.axis((0, max(x), 0, max(ht2list)[0])) plt.title('Type 2 Hash Function Even Values Mapped') plt.xlabel('input data value') plt.ylabel('hash value') plt.show() print('Task 1 complete')
def task1(configData): bfInputData = util.readIntFileDat(configData['inFileName']) ht1 = HashType1(configData, True) ht2 = HashType2(configData, True) ht1.k = 1 ht2.k = 1 ht1list = [] ht2list = [] x = bfInputData[:10000] for i in range(0, 10000): ht1list.append(ht1.getHashList(bfInputData[i])) ht2list.append(ht2.getHashList(bfInputData[i])) plt.scatter(x, ht1list, marker='.', s=1) plt.axis((0, max(x), 0, max(ht1list)[0])) plt.title('Type 1 Hash Function Values Mapped') plt.xlabel('input data value') plt.ylabel('hash value') plt.show() plt.scatter(x, ht2list, marker='.', s=1) plt.axis((0, max(x), 0, max(ht2list)[0])) plt.title('Type 2 Hash Function Values Mapped') plt.xlabel('input data value') plt.ylabel('hash value') plt.show() ht1list = [] ht2list = [] x = [] for i in range(0, 20000): if bfInputData[i] % 2 == 0: x.append(bfInputData[i]) ht1list.append(ht1.getHashList(bfInputData[i])) ht2list.append(ht2.getHashList(bfInputData[i])) plt.scatter(x, ht1list, marker='.', s=1) plt.axis((0, max(x), 0, max(ht1list)[0])) plt.title('Type 1 Hash Function Even Values Mapped') plt.xlabel('input data value') plt.ylabel('hash value') plt.show() plt.scatter(x, ht2list, marker='.', s=1) plt.axis((0, max(x), 0, max(ht2list)[0])) plt.title('Type 2 Hash Function Even Values Mapped') plt.xlabel('input data value') plt.ylabel('hash value') plt.show() print('Task 1 complete')
def task2(configData): # instantiate bloom filter object bf = BloomFilter(configData) # bfInputData holds a list of integers. Using these values you must : # insert the first configData['m'] of them into the bloom filter # test all of them for membership in the bloom filter bfInputData = util.readIntFileDat(configData['inFileName']) if (len(bfInputData) == 0): print('No Data to add to bloom filter') return else: print('bfInputData has ' + str(len(bfInputData)) + ' elements') # testBF will insert elements and test membership outputResList = testBF(bfInputData, bf, configData['m']) # write results to output file util.writeFileDat(configData['outFileName'], outputResList) # load appropriate validation data list for this hash function and compare to results util.compareResults(outputResList, configData) print('Task 2 complete')
def computeFalsePositive(configData, numTrials, k, c, hashType): configData['k'] = k configData['type'] = hashType configData['n'] = util.findNextPrime(c * configData['m']) sumFalsePositive = 0 for i in range(0, numTrials): # initialize bloom filter bf = BloomFilter(configData) bfInputData = util.readIntFileDat(configData['inFileName']) # add data to bloom filter for j in range(0, configData['m']): bf.add(bfInputData[j]) falsePositive = 0 # test false positive for l in range(configData['m'], len(bfInputData)): if bf.contains(bfInputData[l]): falsePositive += 1 sumFalsePositive += falsePositive / float(configData['m']) avgFalsePositive = sumFalsePositive / numTrials return avgFalsePositive