Example #1
0
class StoreBitArrayTests(unittest.TestCase):
    'Whenever we change a BloomFilter, ensure that we Memcache our changes.'

    def setUp(self):
        super(self.__class__, self).setUp()
        self.dilberts = BloomFilter({'rajiv', 'raj'}, key='dilberts')

    def tearDown(self):
        self.dilberts.memcache.delete(self.dilberts.key)
        super(self.__class__, self).tearDown()

    def test_init_gets_stored(self):
        'When we __init__() on an iterable, ensure we Memcache the bit array'
        office_space = BloomFilter(key='dilberts')
        assert office_space._bit_array == self.dilberts._bit_array

    def test_add_gets_stored(self):
        'When we add() an element, ensure that we Memcache the bit array'
        self.dilberts.add('dan')
        office_space = BloomFilter(key='dilberts')
        assert office_space._bit_array == self.dilberts._bit_array

    def test_update_gets_stored(self):
        'When we update() with elements, ensure that we Memcache the bit array'
        self.dilberts.update({'dan', 'eric'})
        office_space = BloomFilter(key='dilberts')
        assert office_space._bit_array == self.dilberts._bit_array

    def test_clear_gets_stored(self):
        'When we clear() all elements, ensure that we Memcache the bit array'
        self.dilberts.clear()
        office_space = BloomFilter(key='dilberts')
        assert office_space._bit_array == self.dilberts._bit_array
Example #2
0
def main():
    print("Hello!")
    try:
        client = MongoClient("mongodb+srv://amartya:[email protected]/test?retryWrites=true&w=majority")
    except ConnectionError:
        print("Server not available")
        
    bloom_test = BloomFilter(20, 0.02)
    bloom_test.add("basu")
    bloom_test.add("basu")
    if "basu" in bloom_test:
        print("Object exists")
    else:
        print("The searched item does NOT exist in the cache. Read from persistent datastore")
Example #3
0
def create_bloom(set, capacity=3000, error_rate=0.001):
    # Create Bloom filter
    bf = BloomFilter(capacity=capacity, error_rate=error_rate)
    for x in set:
        bf.add(x)
    return bf
Example #4
0
# Nombres de usuarios a ser aƱadidos
nombres_existentes = [
    'ironman', 'thor', 'american_captain', 'spiderman', 'loki', 'wolverine',
    'black_widow', 'hulk', 'deadpool', 'nick_fury', 'thanos', 'Dr_strange',
    'venon', 'odin', 'magneto', 'black_panter', 'rocket', 'gamora', 'ultron',
    'groot', 'ant_man'
]

# Palabras no existentes
nombres_no_existentes = [
    'superman', 'batman', 'wonder_woman', 'green_Lantern', 'he_Man', 'batgirl',
    'lion_O', 'shazam', 'aquaman', 'green_arrow', 'flash', 'tygro', 'Cheetara'
]

for item in nombres_existentes:
    bloom_filtro.add(item)

shuffle(nombres_existentes)
shuffle(nombres_no_existentes)

prueba = nombres_existentes[:10] + nombres_no_existentes
shuffle(prueba)
print("=======================================================")
for word in prueba:
    if bloom_filtro.check(word):
        if word in nombres_no_existentes:
            print("'{}' es un falso positivo!".format(word))
        else:
            print("'{}' probablemente existe!".format(word))
    else:
        print("'{}' definitivamente no existe!".format(word))
Example #5
0
print("Optimal number of Hash Functions:{}\n".format(bloom_obj.hash))

# words to be added
word_present = [
    'abound', 'abounds', 'abundance', 'abundant', 'accessable', 'bloom',
    'blossom', 'bolster', 'bonny', 'bonus', 'bonuses'
]

# word not added
word_absent = [
    'bluff', 'cheater', 'hate', 'war', 'humanity', 'racism', 'hurt', 'nuke',
    'gloomy', 'facebook'
]

for item in word_present:
    bloom_obj.add(item)

shuffle(word_present)
shuffle(word_absent)

test_words = word_present[:10] + word_absent
shuffle(test_words)
for word in test_words:
    if bloom_obj.check(word):
        if word in word_absent:
            print("'{}' is a false positive!".format(word))
        else:
            print("'{}' is probably present!".format(word))
    else:
        print("'{}' is definitely not present!".format(word))
Example #6
0
    def test_add(self):
        dilberts = BloomFilter()
        assert 'rajiv' not in dilberts
        assert 'raj' not in dilberts
        assert 'dan' not in dilberts
        assert 'eric' not in dilberts
        assert len(dilberts) == 0

        dilberts.add('rajiv')
        assert 'rajiv' in dilberts
        assert 'raj' not in dilberts
        assert 'dan' not in dilberts
        assert 'eric' not in dilberts
        assert len(dilberts) == 1

        dilberts.add('raj')
        assert 'rajiv' in dilberts
        assert 'raj' in dilberts
        assert 'dan' not in dilberts
        assert 'eric' not in dilberts
        assert len(dilberts) == 2

        dilberts.add('rajiv')
        assert 'rajiv' in dilberts
        assert 'raj' in dilberts
        assert 'dan' not in dilberts
        assert 'eric' not in dilberts
        assert len(dilberts) == 2

        dilberts.add('raj')
        assert 'rajiv' in dilberts
        assert 'raj' in dilberts
        assert 'dan' not in dilberts
        assert 'eric' not in dilberts
        assert len(dilberts) == 2

        dilberts.add('dan')
        assert 'rajiv' in dilberts
        assert 'raj' in dilberts
        assert 'dan' in dilberts
        assert 'eric' not in dilberts
        assert len(dilberts) == 3

        dilberts.add('eric')
        assert 'rajiv' in dilberts
        assert 'raj' in dilberts
        assert 'dan' in dilberts
        assert 'eric' in dilberts
        assert len(dilberts) == 4
 def test_item_in_filter(self):
     word = "dog"
     filter = BloomFilter(10)
     filter.add(word)
     self.assertIn(word, filter)
def test():
    ''' basic testing functions '''
    blm = BloomFilter()
    blm.init(10, 0.05)
    blm.add("this is a test")
    print(blm.check("this is a test"))
    print(blm.check("blah"))
    print(blm)
    print(blm.bloom_array)
    blm.export('./dist/py_bloom.blm')

    print('\n\ncheck imported BloomFilter!')

    blm2 = BloomFilter()
    blm2.load('./dist/py_bloom.blm')
    print(blm2.check("this is a test"))
    print(blm2.check("blah"))
    print(blm2)
    print(blm2.bloom_array)

    blm2.add('yet another test')

    print("\n\ncheck intersection")
    blm3 = blm.intersection(blm2)
    print(blm3)
    print(blm3.check("this is a test"))
    print(blm3.check("yet another test"))

    print("\n\ncheck union")
    blm3 = blm.union(blm2)
    print(blm3)
    print(blm3.check("this is a test"))
    print('\n\ntest using `in`')
    print("this is a test" in blm3)
    print(blm3.check("yet another test"))
    print(blm3.estimate_elements())

    print(blm.jaccard_index(blm2))

    print ('\n\nexport to hex')
    hex_out = blm.export_hex()
    print(hex_out)
    print('import hex')
    blm4 = BloomFilter()
    blm4.load_hex(hex_out)
    print(blm4)

    # on disk code check
    print('\n\nbloom filter on disk')
    blmd = BloomFilterOnDisk()
    blmd.initialize('./dist/py_ondisk.blm', 10, 0.05)
    blmd.add("this is a test")
    print(blmd.check('this is a test'))
    print('Check use of in keyword ("this is a test" in blmd): ',
          'this is a test' in blmd)
    print(blmd.check('yet another test'))
    # blmd.union(blm4)
    # blmd.intersection(blm)
    # print(blmd.jaccard_index(blm2))
    print(blmd)
    # print ('\n\nexport to hex')
    # hex_out = blmd.export_hex()
    # print(hex_out)
    blmd.close()
Example #9
0
count = (size / MAX_SIZE) * log(2)

optimalSize = int(complex(size).real)
optimalCount = int(complex(count).real)

print optimalSize, optimalCount

bloom = BloomFilter(optimalSize, optimalCount)

falsePositive = False
inserted = 0;
falseValue = "";

while not falsePositive:

    for i in range(MAX_SIZE / 100):
        bloom.add(generateValue())
        inserted += 1
    print(bloom)

    for i in range(MAX_SIZE / 100):
        falseValue = generateOther()
        falsePositive = (falseValue in bloom)
        if falsePositive:
            break;

print "========================="
print "False positive at '", falseValue, "' after ", inserted, " insertions"