Exemple #1
0
 def __init__(self, config, genHashes):
     """
     Args:
         config (dictionary):contains configuration data for this hashing object
         genHashes (boolean) : whether or not to generate new hash seeds/coefficients
                                 (Task 2 provides hash seeds/coefficients,
                                 Tasks 1 and 3 require you to make them yourself)
     """
     self.k = config['k']
     self.n = config['n']
     self.N = config['N']
     self.Prime = util.findNextPrime(self.n)
     self.NPrime = util.findNextPrime(self.N)
     # generate new random hashes, or not if task2
     if genHashes:
         # set random seed to be generated seed at config load
         # random.seed(config['genSeed'])
         # build lists of coefficients self.a and self.b
         self.a = []
         self.b = []
         for i in range(0, self.k):
             self.a.append(random.randint(1, self.n))
             self.b.append(random.randint(0, self.n))
     # if not gen (task 2), then use hashes that are in config dictionary
     else:
         self.a = config['a']
         self.b = config['b']
Exemple #2
0
    def __init__(self, config):
        """
        Args:
            config (dictionary): Configuration of this bloom filter
            config['N'] (int) universe size
            config['m'] (int) number of elements to add to filter
            config['n'] (int) number of bits in bloom filter storage array
            config['k'] (int) number of hash functions to use
            config['task'] (int) the task this bloom filter is to perform (1,2,3)
            config['type'] (int) type of hash function (1, 2, -1==unknown type)
            if type 1 hash :
                config['seeds'] (list of k ints) : seed values for k hash functions for type 1 hash function
            if type 2 hash :
                config['a'] (list of k ints) : a coefficients for k hash functions for type 2 hash function
                config['b'] (list of k ints) : b coefficients for k hash functions for type 2 hash function

            genHashes (boolean) : whether or not to generate new hash seeds/coefficients
                                    (Task 2 provides hash seeds/coefficients,
                                    Tasks 1 and 3 require you to make them yourself)
        """
        # task this boom filter is performing
        self.task = config['task']
        # task 1 and 3 reguire generated seeds for hashes, task 2 uses provided seeds/coefficients
        genHashes = (self.task != 3)
        # type of hash for this bloom filter
        self.type = config['type']

        # check if n is prime
        n = int(config['n'])
        if not util.checkIfPrime(n):
            config['n'] = util.findNextPrime(n + 1)
        else:
            config['n'] = n

        P = int(config['N'])
        if not util.checkIfPrime(P):
            config['P'] = util.findNextPrime(P + 1)
        else:
            config['P'] = P

        if (self.type == 1):
            self.hashFunc = HashType1(config, genHashes)
        elif (self.type == 2):
            self.hashFunc = HashType2(config, genHashes)
        # elif(self.type == 3): #add your own hashes
        else:
            print('BloomFilter for task ' + str(self.task) +
                  ' ctor : Unknown Hash type : ' + str(self.type))

        bf_size = self.hashFunc.n
        bf_arr = []
        for i in range(0, bf_size):
            bf_arr.append(0)
        self.bf_arr = bf_arr
Exemple #3
0
    def __init__(self, config, genHashes, c):
        """
        Args:
            config (dictionary):contains configuration data for this hashing object            
            genHashes (boolean) : whether or not to generate new hash seeds/coefficients 
                                    (Task 2 provides hash seeds/coefficients, 
                                    Tasks 1 and 3 require you to make them yourself) 
        """
        self.k = config['k']
        self.N = config['N']
        self.P = util.findNextPrime(self.N)

        if config['task'] == 3:
            self.n = c * config['m']
        else:
            self.n = config['n']

        #generate new random hashes, or not if task2
        if genHashes:
            #set random seed to be generated seed at config load
            random.seed(config['genSeed'])
            #build lists of coefficients self.a and self.b
            self.a = random.randint(low=0, high=(self.n - 1), size=self.k)
            self.b = random.randint(low=0, high=(self.n - 1), size=self.k)

        #if not gen (task 2), then use hashes that are in config dictionary
        else:
            self.a = config['a']
            self.b = config['b']
Exemple #4
0
 def __init__(self, config, genHashes):
     self.k = config['k']
     self.n = config['n']
     self.N = config['N']
     self.prime = util.findNextPrime(self.n)
     self.NPrime = util.findNextPrime(self.N)
     #generate new random hashes, or not if task2
     if genHashes:
         #set random seed to be generated seed at config load
         # random.seed(config['genSeed'])
         #build lists of coefficients self.a and self.b
         a = []
         b = []
         for i in range(0, self.k):
             a.append(random.randint(1, self.n - 1))
             b.append(random.randint(0, self.n - 1))
         self.a = a
         self.b = b
     #if not gen (task 2), then use hashes that are in config dictionary
     else:
         self.a = config['a']
         self.b = config['b']
Exemple #5
0
def computeFalsePositive(configData, numTrials, k, c, hashType):
    configData['k'] = k
    configData['type'] = hashType
    configData['n'] = util.findNextPrime(c * configData['m'])
    sumFalsePositive = 0
    for i in range(0, numTrials):
        # initialize bloom filter
        bf = BloomFilter(configData)
        bfInputData = util.readIntFileDat(configData['inFileName'])
        # add data to bloom filter
        for j in range(0, configData['m']):
            bf.add(bfInputData[j])
        falsePositive = 0
        # test false positive
        for l in range(configData['m'], len(bfInputData)):
            if bf.contains(bfInputData[l]):
                falsePositive += 1
        sumFalsePositive += falsePositive / float(configData['m'])
    avgFalsePositive = sumFalsePositive / numTrials
    return avgFalsePositive