Ejemplo n.º 1
0
print("Optimal number of Hash Functions:{}\n".format(bloom_obj.hash))

# words to be added
word_present = [
    'abound', 'abounds', 'abundance', 'abundant', 'accessable', 'bloom',
    'blossom', 'bolster', 'bonny', 'bonus', 'bonuses'
]

# word not added
word_absent = [
    'bluff', 'cheater', 'hate', 'war', 'humanity', 'racism', 'hurt', 'nuke',
    'gloomy', 'facebook'
]

for item in word_present:
    bloom_obj.add(item)

shuffle(word_present)
shuffle(word_absent)

test_words = word_present[:10] + word_absent
shuffle(test_words)
for word in test_words:
    if bloom_obj.check(word):
        if word in word_absent:
            print("'{}' is a false positive!".format(word))
        else:
            print("'{}' is probably present!".format(word))
    else:
        print("'{}' is definitely not present!".format(word))
Ejemplo n.º 2
0
    'ironman', 'thor', 'american_captain', 'spiderman', 'loki', 'wolverine',
    'black_widow', 'hulk', 'deadpool', 'nick_fury', 'thanos', 'Dr_strange',
    'venon', 'odin', 'magneto', 'black_panter', 'rocket', 'gamora', 'ultron',
    'groot', 'ant_man'
]

# Palabras no existentes
nombres_no_existentes = [
    'superman', 'batman', 'wonder_woman', 'green_Lantern', 'he_Man', 'batgirl',
    'lion_O', 'shazam', 'aquaman', 'green_arrow', 'flash', 'tygro', 'Cheetara'
]

for item in nombres_existentes:
    bloom_filtro.add(item)

shuffle(nombres_existentes)
shuffle(nombres_no_existentes)

prueba = nombres_existentes[:10] + nombres_no_existentes
shuffle(prueba)
print("=======================================================")
for word in prueba:
    if bloom_filtro.check(word):
        if word in nombres_no_existentes:
            print("'{}' es un falso positivo!".format(word))
        else:
            print("'{}' probablemente existe!".format(word))
    else:
        print("'{}' definitivamente no existe!".format(word))
print("=======================================================")
def test():
    ''' basic testing functions '''
    blm = BloomFilter()
    blm.init(10, 0.05)
    blm.add("this is a test")
    print(blm.check("this is a test"))
    print(blm.check("blah"))
    print(blm)
    print(blm.bloom_array)
    blm.export('./dist/py_bloom.blm')

    print('\n\ncheck imported BloomFilter!')

    blm2 = BloomFilter()
    blm2.load('./dist/py_bloom.blm')
    print(blm2.check("this is a test"))
    print(blm2.check("blah"))
    print(blm2)
    print(blm2.bloom_array)

    blm2.add('yet another test')

    print("\n\ncheck intersection")
    blm3 = blm.intersection(blm2)
    print(blm3)
    print(blm3.check("this is a test"))
    print(blm3.check("yet another test"))

    print("\n\ncheck union")
    blm3 = blm.union(blm2)
    print(blm3)
    print(blm3.check("this is a test"))
    print('\n\ntest using `in`')
    print("this is a test" in blm3)
    print(blm3.check("yet another test"))
    print(blm3.estimate_elements())

    print(blm.jaccard_index(blm2))

    print ('\n\nexport to hex')
    hex_out = blm.export_hex()
    print(hex_out)
    print('import hex')
    blm4 = BloomFilter()
    blm4.load_hex(hex_out)
    print(blm4)

    # on disk code check
    print('\n\nbloom filter on disk')
    blmd = BloomFilterOnDisk()
    blmd.initialize('./dist/py_ondisk.blm', 10, 0.05)
    blmd.add("this is a test")
    print(blmd.check('this is a test'))
    print('Check use of in keyword ("this is a test" in blmd): ',
          'this is a test' in blmd)
    print(blmd.check('yet another test'))
    # blmd.union(blm4)
    # blmd.intersection(blm)
    # print(blmd.jaccard_index(blm2))
    print(blmd)
    # print ('\n\nexport to hex')
    # hex_out = blmd.export_hex()
    # print(hex_out)
    blmd.close()