Exemple #1
0
 def increment(self, element: object):
     element_str = str(element).encode('utf-8')
     if self.hashing_function == 'murmurhash3':
         offset = smhasher.murmur3_x64_128(element_str) % self.bitmap_size
     else:
         hasher = pyhash.lookup3()
         offset = hasher(element_str) % self.bitmap_size
     self.bit_map[offset] = True
Exemple #2
0
 def increment(self, element: object) -> None:
     element_str = str(element).encode('utf-8')
     if self.hashing_function == 'murmurhash3':
         h = smhasher.murmur3_x64_128(
             element_str, self.seed) % self.hash_function_upper_bound
     elif self.hashing_function == 'mmh3':
         h = mmh3.hash(element_str,
                       seed=self.seed) % self.hash_function_upper_bound
     else:
         hasher = pyhash.lookup3()
         h = hasher(element_str, str(
             self.seed)) % self.hash_function_upper_bound
     self.bitmap[self.count_trailing_zeros(h, self.sketch_size)] = True
Exemple #3
0
    def __init__(self, size=65536, k=7, name='bf', load=False):
        if load:
            self.load(name)
        else:
            self.size = size
            if k > 18 or k <= 0:
                print('k should be > 0 & <= 18')
                return None
            self.k = k
            self.name = name
            self.bitarray = bitarray.bitarray('0' * self.size)
            self.tables = [[set() for j in range(self.size)]
                           for i in range(self.k)]

        self.hashes = [
            pyhash.fnv1_64(),
            pyhash.murmur2_x64_64a(),
            pyhash.murmur3_x64_128(),
            pyhash.lookup3(),
            pyhash.super_fast_hash(),
            pyhash.city_128(),
            pyhash.spooky_128(),
            pyhash.farm_128(),
            pyhash.metro_128(),
            pyhash.mum_64(),
            pyhash.t1_64(),
            pyhash.xx_64(),
            lambda str: int(hashlib.md5(str.encode('utf-8')).hexdigest(), 16),
            lambda str: int(hashlib.sha1(str.encode('utf-8')).hexdigest(), 16),
            lambda str: int(
                hashlib.sha224(str.encode('utf-8')).hexdigest(), 16),
            lambda str: int(
                hashlib.sha256(str.encode('utf-8')).hexdigest(), 16),
            lambda str: int(
                hashlib.sha384(str.encode('utf-8')).hexdigest(), 16),
            lambda str: int(
                hashlib.sha512(str.encode('utf-8')).hexdigest(), 16)
        ]
import pyhash
import sys
import matplotlib.mlab as mlab
import matplotlib.pyplot as plt

bloomFilterSize = 10
bit_vector = []

#hashFunctions
fnv = pyhash.fnv1a_32()
mur = pyhash.murmur3_32()
lookup = pyhash.lookup3()
super1 = pyhash.super_fast_hash()
city = pyhash.city_64()
spooky = pyhash.spooky_32()
farm = pyhash.farm_32()
metro = pyhash.metro_64()
mum = pyhash.mum_64()
xx = pyhash.xx_32()
#10 hash functions
hashfuncs = [fnv, mur, lookup, super1, city, spooky, farm, metro, mum, xx]
#hash


def insertBloom(kmer, hashFuncCount):
    global bloomFilterSize
    global bit_vector
    index = 0
    for hf in hashfuncs:
        if (index <= hashFuncCount):
            if (bit_vector[hf(kmer) % bloomFilterSize] == 0):