def test__validate_hashs_failure(): bloom_filter = BloomFilter(None, [], BIT_VECTOR_SIZE) hash_ = hashing_function.hash_key(DATA_KEY, BIT_VECTOR_SIZE) assert bloom_filter._validate_hashs([6]) == False
def test__validate_hashs_success(): bloom_filter = BloomFilter(None, [], BIT_VECTOR_SIZE) hash_ = hashing_function.hash_key(DATA_KEY, BIT_VECTOR_SIZE) assert bloom_filter._validate_hashs([hash_]) == True
def test_constructor(self): bf = BloomFilter(10, 3) self.assertEqual(len(bf.hash_fns), 3, 'has generated the instructed num of hash functions') self.assertEqual(len(bf.bits), 10, 'prepares an array of bits of instructed size')
def test__update_bit_vector(): hashes = [4] bloom_filter = BloomFilter(None, [], BIT_VECTOR_SIZE) bloom_filter._update_bit_vector(hashes) assert bloom_filter.bit_vector == [0, 0, 0, 0, 1]
def test_add_item_one_item(self): ''' Tests that an item can be added to the Bloom Filter. ''' bf = BloomFilter(1, 0.05) bf.add('christian') self.assertTrue(bf.check('christian'))
def test_num_hashes_calculates_the_correct_number_of_hash_functions(self): ''' Tests that the bloom filter calculates the correct number of hash functions to use. ''' num_items = 20 prob = 0.05 bf = BloomFilter(num_items, prob) self.assertEqual(bf.num_hash_fns, 4) num_items = 1000 prob = 0.25 bf = BloomFilter(num_items, prob) self.assertEqual(bf.num_hash_fns, 1) num_items = 10000 prob = 0.02 bf = BloomFilter(num_items, prob) self.assertEqual(bf.num_hash_fns, 5)
def test_add_item_different_probability_one_item(self): ''' Tests that an item can be added to the Bloom Filter with different false positive probabilities. ''' bf = BloomFilter(1, 0.10) bf.add('christian') self.assertTrue(bf.check('christian')) bf = BloomFilter(1, 0.5) bf.add('christian') self.assertTrue(bf.check('christian')) bf = BloomFilter(1, 0.9) bf.add('christian') self.assertTrue(bf.check('christian'))
def test_bit_array_size_calculates_correct_array_size(self): ''' Tests that the bloom filter calculates the correct number of hash functions to use. ''' num_items = 20 prob = 0.05 bf = BloomFilter(num_items, prob) self.assertEqual(bf.bit_array_size, 124) num_items = 1000 prob = 0.25 bf = BloomFilter(num_items, prob) self.assertEqual(bf.bit_array_size, 2885) num_items = 10000 prob = 0.02 bf = BloomFilter(num_items, prob) self.assertEqual(bf.bit_array_size, 81423)
def test__check_key_existence_failure(): db_repository = BasicDBRepository() db_repository.data_map = {DATA_KEY: DATA_VALUE} bloom_filter = BloomFilter(db_repository, [], BIT_VECTOR_SIZE) bloom_filter.bit_vector = [0, 0, 0, 0, 0] hash_ = hashing_function.hash_key(DATA_KEY, BIT_VECTOR_SIZE) assert bloom_filter._check_key_existence([hash_]) == False
def test_read_with_existing_key_successfully(): hash_functions = [hashing_function] db_repository = BasicDBRepository() db_repository.data_map = {DATA_KEY: DATA_VALUE} bloom_filter = BloomFilter(db_repository, hash_functions, BIT_VECTOR_SIZE) bloom_filter.bit_vector = [0, 0, 0, 0, 1] result = bloom_filter.get_data_with_key(DATA_KEY) assert result == DATA_VALUE
def testFileCreation(): path = './files/bands.bloom' try: BloomFilter(10, 0.00000001, path) except Exception as e: print_failure("Test failed with: %s" % str(e)) raise e else: import os if os.path.exists(path): print_success("File created successfully") else: print_failure("Failed to crate file")
def test_insert_new_data_successfully(): """ Tests insertion of new data into a new Bloom filter """ hash_functions = [hashing_function] db_repository = BasicDBRepository() bloom_filter = BloomFilter(db_repository, hash_functions, BIT_VECTOR_SIZE) bloom_filter.insert_new_data(DATA_KEY, DATA_VALUE) assert bloom_filter.bit_vector == [0, 0, 0, 0, 1] assert db_repository.show_data() == {DATA_KEY: DATA_VALUE}
def test_add_item_multiple_items(self): ''' Tests that multiple items can be added to the Bloom Filter. ''' bf = BloomFilter(4, 0.05) bf.add('christian') bf.add('daniel') bf.add('debra') bf.add('charles-adrian') self.assertTrue(bf.check('christian')) self.assertTrue(bf.check('daniel')) self.assertTrue(bf.check('debra')) self.assertTrue(bf.check('charles-adrian'))
def test_add_item_different_probability_multiple_items(self): ''' Tests that multiple items can be added to the Bloom Filter with different false positive probabilities. ''' bf = BloomFilter(4, 0.10) bf.add('christian') bf.add('daniel') bf.add('debra') bf.add('charles-adrian') self.assertTrue(bf.check('christian')) self.assertTrue(bf.check('daniel')) self.assertTrue(bf.check('debra')) self.assertTrue(bf.check('charles-adrian')) bf = BloomFilter(4, 0.50) bf.add('christian') bf.add('daniel') bf.add('debra') bf.add('charles-adrian') self.assertTrue(bf.check('christian')) self.assertTrue(bf.check('daniel')) self.assertTrue(bf.check('debra')) self.assertTrue(bf.check('charles-adrian')) bf = BloomFilter(4, 0.9) bf.add('christian') bf.add('daniel') bf.add('debra') bf.add('charles-adrian') self.assertTrue(bf.check('christian')) self.assertTrue(bf.check('daniel')) self.assertTrue(bf.check('debra')) self.assertTrue(bf.check('charles-adrian'))
def test_check_gives_response_regardless_of_key(self): ''' Tests that the Bloom Filter will give a response no matter the key. We cannot test any further, as we don't know with certainty whether we'll get a false positive. ''' bf = BloomFilter(1, 0.05) bf.add('christian') self.assertTrue(bf.check('christian')) r1 = bf.check('daniel') in (True, False) r2 = bf.check('charles') in (True, False) self.assertTrue(r1) self.assertTrue(r2)
def testBasicBloomFilter(): items = ['glass', 'moon', 'mouse', 'cat', 'extra', 'pot'] filepath = './files/randomwords.bloom' bloomfilter = BloomFilter(len(items), 0.001, filepath) bloomfilter.add(items) nitems = ['Jackson', 'Inline', 'Whatever'] ci = 0 for i in items: if i in bloomfilter: print_success("Found in bloomfilter: %s" % i) else: ci += 1 print_failure("Did not find in bloomfilter: %s" % i) if ci != 0: print_failure( "\nFirst part of test failed with %d elemented missed\n\n" % ci) else: print_success( "\nTest passed!, all elements of bloomfilter are found\n\n") ci = 0 for i in nitems: if i in bloomfilter: ci += 1 print_success("Found in bloomfilter: %s" % i) else: print_failure("Did not find in bloomfilter: %s" % i) if ci != 0: print_failure( "\nFirst part of test failed with %d elemented found\n\n" % ci) else: print_success( "\nTest passed!, all elements that are not in the bloomfilter weren't found\n\n" )
def test_lookup(self): bf = BloomFilter(10, 3) bf.insert(12) self.assertTrue(bf.lookup(12), 'should detect it was inserted')
def test_insert(self): bf = BloomFilter(10, 3) bf.insert(12)
def setUp(self): self.filter = BloomFilter()
def test_add_word_with_value_out_of_bounds_of_bloom_array_is_oooook(self): self.filter = BloomFilter() self.filter.add_word('zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz')
small_n * 7, small_n * 8, small_n * 9, small_n * 10 ] for mxd in M_RANGE: m = int(mxd) search_times_bf = [] search_times_fl = [] initial_size_bf = [] size_after_insertion_bf = [] false_positives = [0] * ITER disk_access = [0] * ITER k = get_k(m, small_n) print( f'[STARTED AT] [{time.ctime(time.time())}] n: {big_n}, m: {m}, k: {k}' ) for i in range(ITER): bloom_filter = BloomFilter(m, k) initial_size_bf.append(get_deep_size(bloom_filter)) bloom_filter, L_file, universe_file = generate_files_and_insert_to_bloom_filter( bloom_filter, big_n, small_n) size_after_insertion_bf.append(get_deep_size(bloom_filter)) current_time_bf = [] current_time_fl = [] with open('experimentos/files/universe_file.txt', 'r') as universe_file: total_usernames_list = [ w.strip('\n') for w in universe_file.readlines() ] for username_query in total_usernames_list: ti_bf = time.time() username_might_be_in_file = bloom_filter.check( username_query)
def testOldBloomFilterFile(): items = loadTop2MFile() bloomfilter = BloomFilter(len(items), 0.0001, './files/urls.bloom') bloomfilter.add(items) nitems = [ "atapattu.net", "exyu.info", "sergioalbanese.it", "leshopping.com.br", "natura-naturans.de", "footbuddies.com", "sieuthimucin.net", "kuma-bus.co.jp", "biit.no", "kndkmc.or.kr", "flygo.net", "0552.ua", "tsukashin.com", "sites.google.com/a/marist.com", "ad-topia.com", "wpgogo.com", "meme-italia.it", "centrostudihelios.it", "lubimayaoptika.by", "thewandergirls.com", "vrapps.co", "oksdf.com", "whalesmall.co.kr", "bankbazar.com", "matthewsdiehl.com", "videoplaylistgallery.com", "bestmuseum.ru", "it-site.net", "taunigma.biz", "cdcpederzoli.it", "vikaskumrai.com", "lodensoftware.com", "barami.us", "tilsonhome.com", "proutene-kosiky.cz", "rboots.ru", "commercedc.com.my", "energia.fi", "budidayanews.blogspot.com", "aghed.blogfa.com", "petcareclinics.co.uk", "simonpattonracing.com", "alliebeth.com", "powershop.jp", "pismotechnic.com", "sacramentocoaches.org", "alloextra.com", "book-of-ra-deluxe.de", "beststyle.me", "btr-k.ru", "sceny-z-uzycia.blog.pl", "my7sports.com", "gazetaonline.in", "vicensvives.es", "etaal.gov.in", "site-pedia.ro", "sweatvac.com", "camberleywebdesign.com", "lettre-de-motivation-facile.com", "importir.org", "eapc.cat", "smartdiesel.ro", "onewideo.com", "goodrelaxation.com", "plasenta.com.tr", "openfilepro.com", "sempatiza.es", "hljfood.net", "stream2watch.eu", "pointofreturn.com", "certificadosprofesionalidad.com", "pavementresources.com", "daralab.com", "businesstechnologyguide.com.au", "sanostra.es", "view-events.com", "antiques-jp.com", "starina-mebel.ru", "chajian110.com", "eyworkflow.com", "keenitsolution.com", "vitadomo.ch", "kintek.com.au", "theship.co.uk", "mgs-subs.de", "innovservers.com", "kindersoaps.com", "hotelpitangueiras.com.br", "noweprzetargi.pl", "notebooksapp.com", "hrtrainingcenter.com", "sunshinenigeria.com", "x-sense.jp", "agenciafosforo.com.br", "topfriend.org", "paranoidprintco.bigcartel.com", "metropolitan.org.uk", "jpi.at", "fixplanta.net", "i2office.co.uk", "lisboatriathlon.com", "just2craft.fr", "downloaden.nl", "epharmadeals.gr", "bettd.com", "fix.no", "sumaprojektow.pl", "atcloudspeakers.co.uk", "fashionjudy.com", "dshop.vn", "tribesports.co.uk", "trinityviaggistudio.it", "encyclosights.com", "dhg-marketing.de", "xiayizhan.cn", "gfxtra2.net/user/Orten", "fetchrobotics.com", "max-joomla.ru", "lrt.ru", "turk-diziler14.ir", "eidos.ic.i.u-tokyo.ac.jp/~tau", "liberacionprofesional.com", "brainvoyager.com", "sms2greece.com", "mumok.at", "usa.edu.co", "it-nomikai.jp", "lmls.org", "corporatetrainerclub.com", "kho.fi", "miuu.de", "chelles-creations.com", "swimstyle.com", "4options.pt", "vod.blog.pl", "woodsplitterdirect.com", "ou.com", "21bitcoingratis.com", "tattoo-sprueche.de", "igpublish.com", "spreewald-unterkuenfte.de", "sanluissa.cl", "puppyleague.tumblr.com", "staybcnapart.com", "cpt.gov.vn", "greenhex.net", "dallastechnologies.in", "powerwerks.com", "santillana.com.uy", "prozonemarketing.com", "mybadges.com", "readysettroll.com", "prochoiceny.nationbuilder.com", "factsfromfiction.blogspot.com", "ktateb.com", "inmueblesbanorte.com.mx", "aliceinweddingland.co.uk", "tvdsb.on.ca", "reddit.com/user/amirarice", "mafiamax.com", "postgazetesi.com", "inventorum.com", "nonsologhiaccio.com", "citejournal.org", "facesculptormd.com", "wtop.blogspot.com.au", "elo-street.blogspot.co.uk", "ebarticles.com", "orexvideourok.ru", "forexstrategy.com", "othersdayquotesz.com", "ebdesignhelper.co.uk", "utterycolorfulkryptonite.tumblr.com", "arandz.com", "ighwoods.com", "ajdirabota.mk", "va.org.uk", "ankrecruitment.co.in", "acturacionlib-tol.com.mx", "h3technician.com", "otfreetraffic.net", "onottouch.org", "apenziraha.com", "uytra.com", "sc-lohfelden.de", "lberni.ca", "portaktiv.sk", "ichcoffee.ru", "ord-travel.com", "rdineavvocatitreviso.it", "upazena.com", "log-transmission-entreprise.com", "inoheld.de", "ukouri.com", "righttrac.com", "heteachertoolkit.com", "concepts-auto.com", "awrencevilleweather.com", "allouslab.net", "hirazi.blogfa.com", "60degree.blogsky.com", "omeditorial.com", "therside.su", "androvercentre.com", "olitape.ru", "sservatoreseriale.it", "lackjackround.com", "freeworks.com", "avenecommerce.com", "-bike.co.uk", "tntechnologies.in", "stutesol.com", "zoneclubs.com", "ath.umn.edu/~olver", "ncotexkkm.ru", "hubbybody.com", "kcthundernation.com", "ostmap.com", "ent-wifi.com", "killdrick.co.uk", "irtualeduca.org", "piritualite2000.com", "nvestingontrack.com", "utsuzunhikayeleri.tumblr.com", "kentoo.com", "raycor.com", "2volt.sk", "ornogameonline.com", "edicalyoga.in", "evue-amateur.com", "retaportobello.com", "abernacleatl.com", "en-mart.com", "rzanic.com", "ridgersteel.com", "sgguncesi.com", "lobemoving.net", "utodoplnky-prodej.cz", "35678.com", "objscott.com", "esource-auto.ru", "exnokom.com", "dainternet.com", "ions-net.com", "ostonmamas.com", "ecolafacil.com.br", "-daama.com", "kis.org.hk", "odrum-rentacar.com", "zieu.pl", "ookshopblog.com", "on-dualitypress.org", "lmedauniversity.org", "achelwcole.com", "omethingvalue.com", "luczykarze.pl", "asargrup.com", "ousageguide.com", "nexplainedstuff.com", "alifa.org", "reebtcd.org", "anbu-city.com", "uciemanet.fr", "iwoyuen.com", "ishes.dev", "ilgiyuvasi1.info", "xdcm.com", "ppincomereport.com", "hbs.in", "ecure128.com", "itpro.us", "rl.com.tw", "echgeeker.com", "naptech.com", "otalhorsechannel.com", "vantisport.de", "ictorfoo.com", "ost5.ir", "anouba-sugarfree.com", "xpomuseum.com", "oodvood.com", "anzaniaonline-cn.org", "cecat.tw", "enteparaviajar.com", "eadsplus.us", "ertificadoecool.com.br", "lp.co.jp", "elesur.sr", "stituto-besta.it", "adioboxapp.com.br", "nmaya0001.tumblr.com", "onnectedfamilies.org", "etzaehler.de", "espreslaporte.com", "remierescene.net", "obileaff.mobi", "eddingcollectibles.com", "ngular.dev", "huo-hot.com", "repaidstart.com", "n--b1agviax.xn--p1ai", "mooozesh.ir", "elun.com" ] ci = 0 for i in items: if not (i in bloomfilter): # remove not if you want to use the prints ci += 1 if ci != 0: print_failure( "\nFirst part of test failed with %d elemented missed\n\n" % ci) else: print_success( "\nTest passed!, all elements of bloomfilter are found\n\n") ci = [] for i in nitems: if not i in bloomfilter: ci.append(i) print(ci) print_success( "%d element are not malicious ouf of %d, and they are printed above" % (len(ci), len(items)))
def setUp(self): self.bloom_filter = BloomFilter(247, 5) self.words = ['hola', 'como', 'estan', 'holis'] self.bloom_filter.insert_list(self.words)
def setUp(self): self.bloom = BloomFilter(31)