class StoreBitArrayTests(unittest.TestCase): 'Whenever we change a BloomFilter, ensure that we Memcache our changes.' def setUp(self): super(self.__class__, self).setUp() self.dilberts = BloomFilter({'rajiv', 'raj'}, key='dilberts') def tearDown(self): self.dilberts.memcache.delete(self.dilberts.key) super(self.__class__, self).tearDown() def test_init_gets_stored(self): 'When we __init__() on an iterable, ensure we Memcache the bit array' office_space = BloomFilter(key='dilberts') assert office_space._bit_array == self.dilberts._bit_array def test_add_gets_stored(self): 'When we add() an element, ensure that we Memcache the bit array' self.dilberts.add('dan') office_space = BloomFilter(key='dilberts') assert office_space._bit_array == self.dilberts._bit_array def test_update_gets_stored(self): 'When we update() with elements, ensure that we Memcache the bit array' self.dilberts.update({'dan', 'eric'}) office_space = BloomFilter(key='dilberts') assert office_space._bit_array == self.dilberts._bit_array def test_clear_gets_stored(self): 'When we clear() all elements, ensure that we Memcache the bit array' self.dilberts.clear() office_space = BloomFilter(key='dilberts') assert office_space._bit_array == self.dilberts._bit_array
def main(): print("Hello!") try: client = MongoClient("mongodb+srv://amartya:[email protected]/test?retryWrites=true&w=majority") except ConnectionError: print("Server not available") bloom_test = BloomFilter(20, 0.02) bloom_test.add("basu") bloom_test.add("basu") if "basu" in bloom_test: print("Object exists") else: print("The searched item does NOT exist in the cache. Read from persistent datastore")
def create_bloom(set, capacity=3000, error_rate=0.001): # Create Bloom filter bf = BloomFilter(capacity=capacity, error_rate=error_rate) for x in set: bf.add(x) return bf
# Nombres de usuarios a ser añadidos nombres_existentes = [ 'ironman', 'thor', 'american_captain', 'spiderman', 'loki', 'wolverine', 'black_widow', 'hulk', 'deadpool', 'nick_fury', 'thanos', 'Dr_strange', 'venon', 'odin', 'magneto', 'black_panter', 'rocket', 'gamora', 'ultron', 'groot', 'ant_man' ] # Palabras no existentes nombres_no_existentes = [ 'superman', 'batman', 'wonder_woman', 'green_Lantern', 'he_Man', 'batgirl', 'lion_O', 'shazam', 'aquaman', 'green_arrow', 'flash', 'tygro', 'Cheetara' ] for item in nombres_existentes: bloom_filtro.add(item) shuffle(nombres_existentes) shuffle(nombres_no_existentes) prueba = nombres_existentes[:10] + nombres_no_existentes shuffle(prueba) print("=======================================================") for word in prueba: if bloom_filtro.check(word): if word in nombres_no_existentes: print("'{}' es un falso positivo!".format(word)) else: print("'{}' probablemente existe!".format(word)) else: print("'{}' definitivamente no existe!".format(word))
print("Optimal number of Hash Functions:{}\n".format(bloom_obj.hash)) # words to be added word_present = [ 'abound', 'abounds', 'abundance', 'abundant', 'accessable', 'bloom', 'blossom', 'bolster', 'bonny', 'bonus', 'bonuses' ] # word not added word_absent = [ 'bluff', 'cheater', 'hate', 'war', 'humanity', 'racism', 'hurt', 'nuke', 'gloomy', 'facebook' ] for item in word_present: bloom_obj.add(item) shuffle(word_present) shuffle(word_absent) test_words = word_present[:10] + word_absent shuffle(test_words) for word in test_words: if bloom_obj.check(word): if word in word_absent: print("'{}' is a false positive!".format(word)) else: print("'{}' is probably present!".format(word)) else: print("'{}' is definitely not present!".format(word))
def test_add(self): dilberts = BloomFilter() assert 'rajiv' not in dilberts assert 'raj' not in dilberts assert 'dan' not in dilberts assert 'eric' not in dilberts assert len(dilberts) == 0 dilberts.add('rajiv') assert 'rajiv' in dilberts assert 'raj' not in dilberts assert 'dan' not in dilberts assert 'eric' not in dilberts assert len(dilberts) == 1 dilberts.add('raj') assert 'rajiv' in dilberts assert 'raj' in dilberts assert 'dan' not in dilberts assert 'eric' not in dilberts assert len(dilberts) == 2 dilberts.add('rajiv') assert 'rajiv' in dilberts assert 'raj' in dilberts assert 'dan' not in dilberts assert 'eric' not in dilberts assert len(dilberts) == 2 dilberts.add('raj') assert 'rajiv' in dilberts assert 'raj' in dilberts assert 'dan' not in dilberts assert 'eric' not in dilberts assert len(dilberts) == 2 dilberts.add('dan') assert 'rajiv' in dilberts assert 'raj' in dilberts assert 'dan' in dilberts assert 'eric' not in dilberts assert len(dilberts) == 3 dilberts.add('eric') assert 'rajiv' in dilberts assert 'raj' in dilberts assert 'dan' in dilberts assert 'eric' in dilberts assert len(dilberts) == 4
def test_item_in_filter(self): word = "dog" filter = BloomFilter(10) filter.add(word) self.assertIn(word, filter)
def test(): ''' basic testing functions ''' blm = BloomFilter() blm.init(10, 0.05) blm.add("this is a test") print(blm.check("this is a test")) print(blm.check("blah")) print(blm) print(blm.bloom_array) blm.export('./dist/py_bloom.blm') print('\n\ncheck imported BloomFilter!') blm2 = BloomFilter() blm2.load('./dist/py_bloom.blm') print(blm2.check("this is a test")) print(blm2.check("blah")) print(blm2) print(blm2.bloom_array) blm2.add('yet another test') print("\n\ncheck intersection") blm3 = blm.intersection(blm2) print(blm3) print(blm3.check("this is a test")) print(blm3.check("yet another test")) print("\n\ncheck union") blm3 = blm.union(blm2) print(blm3) print(blm3.check("this is a test")) print('\n\ntest using `in`') print("this is a test" in blm3) print(blm3.check("yet another test")) print(blm3.estimate_elements()) print(blm.jaccard_index(blm2)) print ('\n\nexport to hex') hex_out = blm.export_hex() print(hex_out) print('import hex') blm4 = BloomFilter() blm4.load_hex(hex_out) print(blm4) # on disk code check print('\n\nbloom filter on disk') blmd = BloomFilterOnDisk() blmd.initialize('./dist/py_ondisk.blm', 10, 0.05) blmd.add("this is a test") print(blmd.check('this is a test')) print('Check use of in keyword ("this is a test" in blmd): ', 'this is a test' in blmd) print(blmd.check('yet another test')) # blmd.union(blm4) # blmd.intersection(blm) # print(blmd.jaccard_index(blm2)) print(blmd) # print ('\n\nexport to hex') # hex_out = blmd.export_hex() # print(hex_out) blmd.close()
count = (size / MAX_SIZE) * log(2) optimalSize = int(complex(size).real) optimalCount = int(complex(count).real) print optimalSize, optimalCount bloom = BloomFilter(optimalSize, optimalCount) falsePositive = False inserted = 0; falseValue = ""; while not falsePositive: for i in range(MAX_SIZE / 100): bloom.add(generateValue()) inserted += 1 print(bloom) for i in range(MAX_SIZE / 100): falseValue = generateOther() falsePositive = (falseValue in bloom) if falsePositive: break; print "=========================" print "False positive at '", falseValue, "' after ", inserted, " insertions"