Example #1
0
def test__validate_hashs_success():

    bloom_filter = BloomFilter(None, [], BIT_VECTOR_SIZE)

    hash_ = hashing_function.hash_key(DATA_KEY, BIT_VECTOR_SIZE)

    assert bloom_filter._validate_hashs([hash_]) == True
Example #2
0
def test__validate_hashs_failure():

    bloom_filter = BloomFilter(None, [], BIT_VECTOR_SIZE)

    hash_ = hashing_function.hash_key(DATA_KEY, BIT_VECTOR_SIZE)

    assert bloom_filter._validate_hashs([6]) == False
Example #3
0
def testOldBloomFilterFile():
	items = loadTop2MFile()

	bloomfilter = BloomFilter(len(items), 0.0001, './files/urls.bloom')

	bloomfilter.add(items)

	nitems = ["atapattu.net","exyu.info","sergioalbanese.it","leshopping.com.br","natura-naturans.de","footbuddies.com","sieuthimucin.net","kuma-bus.co.jp","biit.no","kndkmc.or.kr","flygo.net","0552.ua","tsukashin.com","sites.google.com/a/marist.com","ad-topia.com","wpgogo.com","meme-italia.it","centrostudihelios.it","lubimayaoptika.by","thewandergirls.com","vrapps.co","oksdf.com","whalesmall.co.kr","bankbazar.com","matthewsdiehl.com","videoplaylistgallery.com","bestmuseum.ru","it-site.net","taunigma.biz","cdcpederzoli.it","vikaskumrai.com","lodensoftware.com","barami.us","tilsonhome.com","proutene-kosiky.cz","rboots.ru","commercedc.com.my","energia.fi","budidayanews.blogspot.com","aghed.blogfa.com","petcareclinics.co.uk","simonpattonracing.com","alliebeth.com","powershop.jp","pismotechnic.com","sacramentocoaches.org","alloextra.com","book-of-ra-deluxe.de","beststyle.me","btr-k.ru","sceny-z-uzycia.blog.pl","my7sports.com","gazetaonline.in","vicensvives.es","etaal.gov.in","site-pedia.ro","sweatvac.com","camberleywebdesign.com","lettre-de-motivation-facile.com","importir.org","eapc.cat","smartdiesel.ro","onewideo.com","goodrelaxation.com","plasenta.com.tr","openfilepro.com","sempatiza.es","hljfood.net","stream2watch.eu","pointofreturn.com","certificadosprofesionalidad.com","pavementresources.com","daralab.com","businesstechnologyguide.com.au","sanostra.es","view-events.com","antiques-jp.com","starina-mebel.ru","chajian110.com","eyworkflow.com","keenitsolution.com","vitadomo.ch","kintek.com.au","theship.co.uk","mgs-subs.de","innovservers.com","kindersoaps.com","hotelpitangueiras.com.br","noweprzetargi.pl","notebooksapp.com","hrtrainingcenter.com","sunshinenigeria.com","x-sense.jp","agenciafosforo.com.br","topfriend.org","paranoidprintco.bigcartel.com","metropolitan.org.uk","jpi.at","fixplanta.net","i2office.co.uk","lisboatriathlon.com","just2craft.fr","downloaden.nl","epharmadeals.gr","bettd.com","fix.no","sumaprojektow.pl","atcloudspeakers.co.uk","fashionjudy.com","dshop.vn","tribesports.co.uk","trinityviaggistudio.it","encyclosights.com","dhg-marketing.de","xiayizhan.cn","gfxtra2.net/user/Orten","fetchrobotics.com","max-joomla.ru","lrt.ru","turk-diziler14.ir","eidos.ic.i.u-tokyo.ac.jp/~tau","liberacionprofesional.com","brainvoyager.com","sms2greece.com","mumok.at","usa.edu.co","it-nomikai.jp","lmls.org","corporatetrainerclub.com","kho.fi","miuu.de","chelles-creations.com","swimstyle.com","4options.pt","vod.blog.pl","woodsplitterdirect.com","ou.com","21bitcoingratis.com","tattoo-sprueche.de","igpublish.com","spreewald-unterkuenfte.de","sanluissa.cl","puppyleague.tumblr.com","staybcnapart.com","cpt.gov.vn","greenhex.net","dallastechnologies.in","powerwerks.com","santillana.com.uy","prozonemarketing.com","mybadges.com","readysettroll.com","prochoiceny.nationbuilder.com","factsfromfiction.blogspot.com","ktateb.com","inmueblesbanorte.com.mx","aliceinweddingland.co.uk","tvdsb.on.ca","reddit.com/user/amirarice","mafiamax.com","postgazetesi.com","inventorum.com","nonsologhiaccio.com","citejournal.org","facesculptormd.com","wtop.blogspot.com.au","elo-street.blogspot.co.uk","ebarticles.com","orexvideourok.ru","forexstrategy.com","othersdayquotesz.com","ebdesignhelper.co.uk","utterycolorfulkryptonite.tumblr.com","arandz.com","ighwoods.com","ajdirabota.mk","va.org.uk","ankrecruitment.co.in","acturacionlib-tol.com.mx","h3technician.com","otfreetraffic.net","onottouch.org","apenziraha.com","uytra.com","sc-lohfelden.de","lberni.ca","portaktiv.sk","ichcoffee.ru","ord-travel.com","rdineavvocatitreviso.it","upazena.com","log-transmission-entreprise.com","inoheld.de","ukouri.com","righttrac.com","heteachertoolkit.com","concepts-auto.com","awrencevilleweather.com","allouslab.net","hirazi.blogfa.com","60degree.blogsky.com","omeditorial.com","therside.su","androvercentre.com","olitape.ru","sservatoreseriale.it","lackjackround.com","freeworks.com","avenecommerce.com","-bike.co.uk","tntechnologies.in","stutesol.com","zoneclubs.com","ath.umn.edu/~olver","ncotexkkm.ru","hubbybody.com","kcthundernation.com","ostmap.com","ent-wifi.com","killdrick.co.uk","irtualeduca.org","piritualite2000.com","nvestingontrack.com","utsuzunhikayeleri.tumblr.com","kentoo.com","raycor.com","2volt.sk","ornogameonline.com","edicalyoga.in","evue-amateur.com","retaportobello.com","abernacleatl.com","en-mart.com","rzanic.com","ridgersteel.com","sgguncesi.com","lobemoving.net","utodoplnky-prodej.cz","35678.com","objscott.com","esource-auto.ru","exnokom.com","dainternet.com","ions-net.com","ostonmamas.com","ecolafacil.com.br","-daama.com","kis.org.hk","odrum-rentacar.com","zieu.pl","ookshopblog.com","on-dualitypress.org","lmedauniversity.org","achelwcole.com","omethingvalue.com","luczykarze.pl","asargrup.com","ousageguide.com","nexplainedstuff.com","alifa.org","reebtcd.org","anbu-city.com","uciemanet.fr","iwoyuen.com","ishes.dev","ilgiyuvasi1.info","xdcm.com","ppincomereport.com","hbs.in","ecure128.com","itpro.us","rl.com.tw","echgeeker.com","naptech.com","otalhorsechannel.com","vantisport.de","ictorfoo.com","ost5.ir","anouba-sugarfree.com","xpomuseum.com","oodvood.com","anzaniaonline-cn.org","cecat.tw","enteparaviajar.com","eadsplus.us","ertificadoecool.com.br","lp.co.jp","elesur.sr","stituto-besta.it","adioboxapp.com.br","nmaya0001.tumblr.com","onnectedfamilies.org","etzaehler.de","espreslaporte.com","remierescene.net","obileaff.mobi","eddingcollectibles.com","ngular.dev","huo-hot.com","repaidstart.com","n--b1agviax.xn--p1ai","mooozesh.ir","elun.com"]

	ci = 0
	for i in items:
		if not (i in bloomfilter): # remove not if you want to use the prints
			ci += 1

	if ci != 0 :
		print_failure("\nFirst part of test failed with %d elemented missed\n\n" % ci)
	else:
		print_success("\nTest passed!, all elements of bloomfilter are found\n\n")

	ci = []
	for i in nitems:
		if not i in bloomfilter:
			ci.append(i)

	print(ci)
	print_success("%d element are not malicious ouf of %d, and they are printed above" % (len(ci), len(items)))
Example #4
0
    def test_add_item_one_item(self):
        '''
        Tests that an item can be added to the Bloom Filter.
        '''
        bf = BloomFilter(1, 0.05)
        bf.add('christian')

        self.assertTrue(bf.check('christian'))
Example #5
0
def test__update_bit_vector():

    hashes = [4]
    bloom_filter = BloomFilter(None, [], BIT_VECTOR_SIZE)

    bloom_filter._update_bit_vector(hashes)

    assert bloom_filter.bit_vector == [0, 0, 0, 0, 1]
Example #6
0
def test__check_key_existence_failure():

    db_repository = BasicDBRepository()
    db_repository.data_map = {DATA_KEY: DATA_VALUE}
    bloom_filter = BloomFilter(db_repository, [], BIT_VECTOR_SIZE)
    bloom_filter.bit_vector = [0, 0, 0, 0, 0]

    hash_ = hashing_function.hash_key(DATA_KEY, BIT_VECTOR_SIZE)

    assert bloom_filter._check_key_existence([hash_]) == False
Example #7
0
def test_read_with_existing_key_successfully():

    hash_functions = [hashing_function]
    db_repository = BasicDBRepository()
    db_repository.data_map = {DATA_KEY: DATA_VALUE}
    bloom_filter = BloomFilter(db_repository, hash_functions, BIT_VECTOR_SIZE)
    bloom_filter.bit_vector = [0, 0, 0, 0, 1]

    result = bloom_filter.get_data_with_key(DATA_KEY)

    assert result == DATA_VALUE
Example #8
0
def test_insert_new_data_successfully():
    """
	Tests insertion of new data into a new Bloom filter
	"""

    hash_functions = [hashing_function]
    db_repository = BasicDBRepository()
    bloom_filter = BloomFilter(db_repository, hash_functions, BIT_VECTOR_SIZE)

    bloom_filter.insert_new_data(DATA_KEY, DATA_VALUE)

    assert bloom_filter.bit_vector == [0, 0, 0, 0, 1]
    assert db_repository.show_data() == {DATA_KEY: DATA_VALUE}
Example #9
0
class BloomFilterTestCase(unittest.TestCase):

    def setUp(self):
        self.bloom_filter = BloomFilter(247, 5)
        self.words = ['hola', 'como', 'estan', 'holis']
        self.bloom_filter.insert_list(self.words)

    def test_initial_conditions(self):
        for word in self.words:
            self.assertEqual(self.bloom_filter.check(word), 1)
    
    def test_word_i_know_not_in(self):
        self.assertEqual(self.bloom_filter.check("xd"), 0)
Example #10
0
    def test_constructor(self):
        bf = BloomFilter(10, 3)

        self.assertEqual(len(bf.hash_fns), 3,
                'has generated the instructed num of hash functions')
        self.assertEqual(len(bf.bits), 10,
                'prepares an array of bits of instructed size')
Example #11
0
    def test_num_hashes_calculates_the_correct_number_of_hash_functions(self):
        '''
        Tests that the bloom filter calculates the correct number
        of hash functions to use.
        '''
        num_items = 20
        prob = 0.05
        bf = BloomFilter(num_items, prob)
        self.assertEqual(bf.num_hash_fns, 4)

        num_items = 1000
        prob = 0.25
        bf = BloomFilter(num_items, prob)
        self.assertEqual(bf.num_hash_fns, 1)

        num_items = 10000
        prob = 0.02
        bf = BloomFilter(num_items, prob)
        self.assertEqual(bf.num_hash_fns, 5)
Example #12
0
    def test_bit_array_size_calculates_correct_array_size(self):
        '''
            Tests that the bloom filter calculates the correct number
            of hash functions to use.
            '''
        num_items = 20
        prob = 0.05
        bf = BloomFilter(num_items, prob)
        self.assertEqual(bf.bit_array_size, 124)

        num_items = 1000
        prob = 0.25
        bf = BloomFilter(num_items, prob)
        self.assertEqual(bf.bit_array_size, 2885)

        num_items = 10000
        prob = 0.02
        bf = BloomFilter(num_items, prob)
        self.assertEqual(bf.bit_array_size, 81423)
Example #13
0
def testBasicBloomFilter():
    items = ['glass', 'moon', 'mouse', 'cat', 'extra', 'pot']

    filepath = './files/randomwords.bloom'

    bloomfilter = BloomFilter(len(items), 0.001, filepath)

    bloomfilter.add(items)

    nitems = ['Jackson', 'Inline', 'Whatever']

    ci = 0
    for i in items:
        if i in bloomfilter:
            print_success("Found in bloomfilter: %s" % i)
        else:
            ci += 1
            print_failure("Did not find in bloomfilter: %s" % i)

    if ci != 0:
        print_failure(
            "\nFirst part of test failed with %d elemented missed\n\n" % ci)
    else:
        print_success(
            "\nTest passed!, all elements of bloomfilter are found\n\n")

    ci = 0
    for i in nitems:
        if i in bloomfilter:
            ci += 1
            print_success("Found in bloomfilter: %s" % i)
        else:
            print_failure("Did not find in bloomfilter: %s" % i)

    if ci != 0:
        print_failure(
            "\nFirst part of test failed with %d elemented found\n\n" % ci)
    else:
        print_success(
            "\nTest passed!, all elements that are not in the bloomfilter weren't found\n\n"
        )
Example #14
0
def testFileCreation():
    path = './files/bands.bloom'
    try:
        BloomFilter(10, 0.00000001, path)
    except Exception as e:
        print_failure("Test failed with: %s" % str(e))
        raise e
    else:
        import os
        if os.path.exists(path):
            print_success("File created successfully")
        else:
            print_failure("Failed to crate file")
Example #15
0
def testBasicBloomFilter():
	items = ['glass', 'moon', 'mouse', 'cat', 'extra', 'pot']

	filepath = './files/randomwords.bloom'
	
	bloomfilter = BloomFilter(len(items), 0.001, filepath)
	
	bloomfilter.add(items)

	nitems = ['Jackson', 'Inline', 'Whatever']

	ci = 0
	for i in items:
		if i in bloomfilter:
			print_success("Found in bloomfilter: %s" % i)
		else:
			ci += 1
			print_failure("Did not find in bloomfilter: %s" % i)

	if ci != 0 :
		print_failure("\nFirst part of test failed with %d elemented missed\n\n" % ci)
	else:
		print_success("\nTest passed!, all elements of bloomfilter are found\n\n")


	ci = 0
	for i in nitems:
		if i in bloomfilter:
			ci += 1
			print_success("Found in bloomfilter: %s" % i)
		else:
			print_failure("Did not find in bloomfilter: %s" % i)

	if ci != 0 :
		print_failure("\nFirst part of test failed with %d elemented found\n\n" % ci)
	else:
		print_success("\nTest passed!, all elements that are not in the bloomfilter weren't found\n\n")
Example #16
0
    def test_check_gives_response_regardless_of_key(self):
        '''
        Tests that the Bloom Filter will give a response
        no matter the key. We cannot test any further, as we don't
        know with certainty whether we'll get a false positive.
        '''
        bf = BloomFilter(1, 0.05)
        bf.add('christian')

        self.assertTrue(bf.check('christian'))

        r1 = bf.check('daniel') in (True, False)
        r2 = bf.check('charles') in (True, False)

        self.assertTrue(r1)
        self.assertTrue(r2)
Example #17
0
 def test_lookup(self):
     bf = BloomFilter(10, 3)
     bf.insert(12)
     self.assertTrue(bf.lookup(12), 'should detect it was inserted')
Example #18
0
 def test_insert(self):
     bf = BloomFilter(10, 3)
     bf.insert(12)
Example #19
0
 def test_add_word_with_value_out_of_bounds_of_bloom_array_is_oooook(self):
     self.filter = BloomFilter()
     self.filter.add_word('zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz')
Example #20
0
    def test_add_item_different_probability_multiple_items(self):
        '''
        Tests that multiple items can be added to the Bloom Filter with
        different false positive probabilities.
        '''
        bf = BloomFilter(4, 0.10)
        bf.add('christian')
        bf.add('daniel')
        bf.add('debra')
        bf.add('charles-adrian')

        self.assertTrue(bf.check('christian'))
        self.assertTrue(bf.check('daniel'))
        self.assertTrue(bf.check('debra'))
        self.assertTrue(bf.check('charles-adrian'))

        bf = BloomFilter(4, 0.50)
        bf.add('christian')
        bf.add('daniel')
        bf.add('debra')
        bf.add('charles-adrian')

        self.assertTrue(bf.check('christian'))
        self.assertTrue(bf.check('daniel'))
        self.assertTrue(bf.check('debra'))
        self.assertTrue(bf.check('charles-adrian'))

        bf = BloomFilter(4, 0.9)
        bf.add('christian')
        bf.add('daniel')
        bf.add('debra')
        bf.add('charles-adrian')

        self.assertTrue(bf.check('christian'))
        self.assertTrue(bf.check('daniel'))
        self.assertTrue(bf.check('debra'))
        self.assertTrue(bf.check('charles-adrian'))
Example #21
0
    def test_add_item_different_probability_one_item(self):
        '''
        Tests that an item can be added to the Bloom Filter with
        different false positive probabilities.
        '''
        bf = BloomFilter(1, 0.10)
        bf.add('christian')

        self.assertTrue(bf.check('christian'))

        bf = BloomFilter(1, 0.5)
        bf.add('christian')

        self.assertTrue(bf.check('christian'))

        bf = BloomFilter(1, 0.9)
        bf.add('christian')

        self.assertTrue(bf.check('christian'))
Example #22
0
    def test_add_item_multiple_items(self):
        '''
        Tests that multiple items can be added to the Bloom Filter.
        '''
        bf = BloomFilter(4, 0.05)
        bf.add('christian')
        bf.add('daniel')
        bf.add('debra')
        bf.add('charles-adrian')

        self.assertTrue(bf.check('christian'))
        self.assertTrue(bf.check('daniel'))
        self.assertTrue(bf.check('debra'))
        self.assertTrue(bf.check('charles-adrian'))
Example #23
0
class MyTestCase(unittest.TestCase):
    def setUp(self):
        self.filter = BloomFilter()

    def test_validate_when_passed_a_letter_returns_its_place_in_alphabet(self):
        self.assertEqual(1, self.filter.validate('a'))
        self.assertEqual(13, self.filter.validate('m'))
        self.assertEqual(26, self.filter.validate('z'))

    def test_validate_is_case_insensitive(self):
        self.assertEqual(self.filter.validate('a'), self.filter.validate('A'))
        self.assertEqual(self.filter.validate('m'), self.filter.validate('M'))
        self.assertEqual(self.filter.validate('z'), self.filter.validate('Z'))

    def test_validate_when_passed_a_word_returns_sum_of_character_values(self):
        self.assertEqual(1630, self.filter.validate('cat'))
        self.assertEqual(2468, self.filter.validate('dog'))

    def test_add_word_to_filter_toggles_flag_at_array_index_n(self):
        self.filter.add_word('cat')
        self.assertTrue(self.filter.has_word('cat'))

    def test_has_word_returns_false_for_not_found_words(self):
        self.assertFalse(self.filter.has_word('sup'))

    def test_add_word_with_value_out_of_bounds_of_bloom_array_is_oooook(self):
        self.filter = BloomFilter()
        self.filter.add_word('zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz')

    def test_wordlist_loaded(self):
        self.filter.load_wordlist("../src/word_list.txt")
        self.assertTrue(len(self.filter.bloom_array) > 1)

    def test_find_word_in_wordlist_returns_true(self):
        self.filter.load_wordlist("../src/word_list.txt")
        self.assertTrue(self.filter.has_word("A'asia"))

    def test_find_word_not_in_wordlist_returns_false(self):
        self.filter.load_wordlist("../src/word_list.txt")
        self.assertFalse(self.filter.has_word("booooooo"))
Example #24
0
 def setUp(self):
     self.filter = BloomFilter()
Example #25
0
 def setUp(self):
     self.bloom = BloomFilter(31)
     small_n * 7, small_n * 8, small_n * 9, small_n * 10
 ]
 for mxd in M_RANGE:
     m = int(mxd)
     search_times_bf = []
     search_times_fl = []
     initial_size_bf = []
     size_after_insertion_bf = []
     false_positives = [0] * ITER
     disk_access = [0] * ITER
     k = get_k(m, small_n)
     print(
         f'[STARTED AT] [{time.ctime(time.time())}] n: {big_n}, m: {m}, k: {k}'
     )
     for i in range(ITER):
         bloom_filter = BloomFilter(m, k)
         initial_size_bf.append(get_deep_size(bloom_filter))
         bloom_filter, L_file, universe_file = generate_files_and_insert_to_bloom_filter(
             bloom_filter, big_n, small_n)
         size_after_insertion_bf.append(get_deep_size(bloom_filter))
         current_time_bf = []
         current_time_fl = []
         with open('experimentos/files/universe_file.txt',
                   'r') as universe_file:
             total_usernames_list = [
                 w.strip('\n') for w in universe_file.readlines()
             ]
             for username_query in total_usernames_list:
                 ti_bf = time.time()
                 username_might_be_in_file = bloom_filter.check(
                     username_query)
Example #27
0
def testOldBloomFilterFile():
    items = loadTop2MFile()

    bloomfilter = BloomFilter(len(items), 0.0001, './files/urls.bloom')

    bloomfilter.add(items)

    nitems = [
        "atapattu.net", "exyu.info", "sergioalbanese.it", "leshopping.com.br",
        "natura-naturans.de", "footbuddies.com", "sieuthimucin.net",
        "kuma-bus.co.jp", "biit.no", "kndkmc.or.kr", "flygo.net", "0552.ua",
        "tsukashin.com", "sites.google.com/a/marist.com", "ad-topia.com",
        "wpgogo.com", "meme-italia.it", "centrostudihelios.it",
        "lubimayaoptika.by", "thewandergirls.com", "vrapps.co", "oksdf.com",
        "whalesmall.co.kr", "bankbazar.com", "matthewsdiehl.com",
        "videoplaylistgallery.com", "bestmuseum.ru", "it-site.net",
        "taunigma.biz", "cdcpederzoli.it", "vikaskumrai.com",
        "lodensoftware.com", "barami.us", "tilsonhome.com",
        "proutene-kosiky.cz", "rboots.ru", "commercedc.com.my", "energia.fi",
        "budidayanews.blogspot.com", "aghed.blogfa.com",
        "petcareclinics.co.uk", "simonpattonracing.com", "alliebeth.com",
        "powershop.jp", "pismotechnic.com", "sacramentocoaches.org",
        "alloextra.com", "book-of-ra-deluxe.de", "beststyle.me", "btr-k.ru",
        "sceny-z-uzycia.blog.pl", "my7sports.com", "gazetaonline.in",
        "vicensvives.es", "etaal.gov.in", "site-pedia.ro", "sweatvac.com",
        "camberleywebdesign.com", "lettre-de-motivation-facile.com",
        "importir.org", "eapc.cat", "smartdiesel.ro", "onewideo.com",
        "goodrelaxation.com", "plasenta.com.tr", "openfilepro.com",
        "sempatiza.es", "hljfood.net", "stream2watch.eu", "pointofreturn.com",
        "certificadosprofesionalidad.com", "pavementresources.com",
        "daralab.com", "businesstechnologyguide.com.au", "sanostra.es",
        "view-events.com", "antiques-jp.com", "starina-mebel.ru",
        "chajian110.com", "eyworkflow.com", "keenitsolution.com",
        "vitadomo.ch", "kintek.com.au", "theship.co.uk", "mgs-subs.de",
        "innovservers.com", "kindersoaps.com", "hotelpitangueiras.com.br",
        "noweprzetargi.pl", "notebooksapp.com", "hrtrainingcenter.com",
        "sunshinenigeria.com", "x-sense.jp", "agenciafosforo.com.br",
        "topfriend.org", "paranoidprintco.bigcartel.com",
        "metropolitan.org.uk", "jpi.at", "fixplanta.net", "i2office.co.uk",
        "lisboatriathlon.com", "just2craft.fr", "downloaden.nl",
        "epharmadeals.gr", "bettd.com", "fix.no", "sumaprojektow.pl",
        "atcloudspeakers.co.uk", "fashionjudy.com", "dshop.vn",
        "tribesports.co.uk", "trinityviaggistudio.it", "encyclosights.com",
        "dhg-marketing.de", "xiayizhan.cn", "gfxtra2.net/user/Orten",
        "fetchrobotics.com", "max-joomla.ru", "lrt.ru", "turk-diziler14.ir",
        "eidos.ic.i.u-tokyo.ac.jp/~tau", "liberacionprofesional.com",
        "brainvoyager.com", "sms2greece.com", "mumok.at", "usa.edu.co",
        "it-nomikai.jp", "lmls.org", "corporatetrainerclub.com", "kho.fi",
        "miuu.de", "chelles-creations.com", "swimstyle.com", "4options.pt",
        "vod.blog.pl", "woodsplitterdirect.com", "ou.com",
        "21bitcoingratis.com", "tattoo-sprueche.de", "igpublish.com",
        "spreewald-unterkuenfte.de", "sanluissa.cl", "puppyleague.tumblr.com",
        "staybcnapart.com", "cpt.gov.vn", "greenhex.net",
        "dallastechnologies.in", "powerwerks.com", "santillana.com.uy",
        "prozonemarketing.com", "mybadges.com", "readysettroll.com",
        "prochoiceny.nationbuilder.com", "factsfromfiction.blogspot.com",
        "ktateb.com", "inmueblesbanorte.com.mx", "aliceinweddingland.co.uk",
        "tvdsb.on.ca", "reddit.com/user/amirarice", "mafiamax.com",
        "postgazetesi.com", "inventorum.com", "nonsologhiaccio.com",
        "citejournal.org", "facesculptormd.com", "wtop.blogspot.com.au",
        "elo-street.blogspot.co.uk", "ebarticles.com", "orexvideourok.ru",
        "forexstrategy.com", "othersdayquotesz.com", "ebdesignhelper.co.uk",
        "utterycolorfulkryptonite.tumblr.com", "arandz.com", "ighwoods.com",
        "ajdirabota.mk", "va.org.uk", "ankrecruitment.co.in",
        "acturacionlib-tol.com.mx", "h3technician.com", "otfreetraffic.net",
        "onottouch.org", "apenziraha.com", "uytra.com", "sc-lohfelden.de",
        "lberni.ca", "portaktiv.sk", "ichcoffee.ru", "ord-travel.com",
        "rdineavvocatitreviso.it", "upazena.com",
        "log-transmission-entreprise.com", "inoheld.de", "ukouri.com",
        "righttrac.com", "heteachertoolkit.com", "concepts-auto.com",
        "awrencevilleweather.com", "allouslab.net", "hirazi.blogfa.com",
        "60degree.blogsky.com", "omeditorial.com", "therside.su",
        "androvercentre.com", "olitape.ru", "sservatoreseriale.it",
        "lackjackround.com", "freeworks.com", "avenecommerce.com",
        "-bike.co.uk", "tntechnologies.in", "stutesol.com", "zoneclubs.com",
        "ath.umn.edu/~olver", "ncotexkkm.ru", "hubbybody.com",
        "kcthundernation.com", "ostmap.com", "ent-wifi.com", "killdrick.co.uk",
        "irtualeduca.org", "piritualite2000.com", "nvestingontrack.com",
        "utsuzunhikayeleri.tumblr.com", "kentoo.com", "raycor.com", "2volt.sk",
        "ornogameonline.com", "edicalyoga.in", "evue-amateur.com",
        "retaportobello.com", "abernacleatl.com", "en-mart.com", "rzanic.com",
        "ridgersteel.com", "sgguncesi.com", "lobemoving.net",
        "utodoplnky-prodej.cz", "35678.com", "objscott.com", "esource-auto.ru",
        "exnokom.com", "dainternet.com", "ions-net.com", "ostonmamas.com",
        "ecolafacil.com.br", "-daama.com", "kis.org.hk", "odrum-rentacar.com",
        "zieu.pl", "ookshopblog.com", "on-dualitypress.org",
        "lmedauniversity.org", "achelwcole.com", "omethingvalue.com",
        "luczykarze.pl", "asargrup.com", "ousageguide.com",
        "nexplainedstuff.com", "alifa.org", "reebtcd.org", "anbu-city.com",
        "uciemanet.fr", "iwoyuen.com", "ishes.dev", "ilgiyuvasi1.info",
        "xdcm.com", "ppincomereport.com", "hbs.in", "ecure128.com", "itpro.us",
        "rl.com.tw", "echgeeker.com", "naptech.com", "otalhorsechannel.com",
        "vantisport.de", "ictorfoo.com", "ost5.ir", "anouba-sugarfree.com",
        "xpomuseum.com", "oodvood.com", "anzaniaonline-cn.org", "cecat.tw",
        "enteparaviajar.com", "eadsplus.us", "ertificadoecool.com.br",
        "lp.co.jp", "elesur.sr", "stituto-besta.it", "adioboxapp.com.br",
        "nmaya0001.tumblr.com", "onnectedfamilies.org", "etzaehler.de",
        "espreslaporte.com", "remierescene.net", "obileaff.mobi",
        "eddingcollectibles.com", "ngular.dev", "huo-hot.com",
        "repaidstart.com", "n--b1agviax.xn--p1ai", "mooozesh.ir", "elun.com"
    ]

    ci = 0
    for i in items:
        if not (i in bloomfilter):  # remove not if you want to use the prints
            ci += 1

    if ci != 0:
        print_failure(
            "\nFirst part of test failed with %d elemented missed\n\n" % ci)
    else:
        print_success(
            "\nTest passed!, all elements of bloomfilter are found\n\n")

    ci = []
    for i in nitems:
        if not i in bloomfilter:
            ci.append(i)

    print(ci)
    print_success(
        "%d element are not malicious ouf of %d, and they are printed above" %
        (len(ci), len(items)))
Example #28
0
 def setUp(self):
     self.bloom_filter = BloomFilter(247, 5)
     self.words = ['hola', 'como', 'estan', 'holis']
     self.bloom_filter.insert_list(self.words)
Example #29
0
class TestBloomFilter(unittest.TestCase):
    def setUp(self):
        self.bloom = BloomFilter(31)

    def tearDown(self):
        del self.bloom

    def test_hash1_function(self):
        self.assertEqual(24, self.bloom.hash1("0123456789"))
        self.assertEqual(12, self.bloom.hash1("5678901234"))
        self.assertEqual(6, self.bloom.hash1("8901234567"))
        self.assertEqual(0, self.bloom.hash1("BloomFilter"))

    def test_hash2_function(self):
        self.assertEqual(26, self.bloom.hash2("0123456789"))
        self.assertEqual(0, self.bloom.hash2("5678901234"))
        self.assertEqual(13, self.bloom.hash2("8901234567"))
        self.assertEqual(15, self.bloom.hash2("BloomFilter"))

    def test_add_check_value(self):
        # Generate cases and add to Bloom filter
        cases = []
        base = "0123456789"
        for i in range(0, 10):
            value = base[i:] + base[:i]
            cases.append(value)
            self.bloom.add(value)

        # Test successful cases
        for case in cases:
            self.assertTrue(self.bloom.is_value(case))

        # Test non-exist cases
        self.assertFalse(self.bloom.is_value("HelloWorld!"))
        self.assertFalse(self.bloom.is_value("Fake value"))
        self.assertFalse(self.bloom.is_value("000000000"))
        self.assertFalse(self.bloom.is_value("no_data"))

        # Test false positive case
        self.assertTrue(self.bloom.is_value("111111111"))