Example #1
0
def test__validate_hashs_failure():

    bloom_filter = BloomFilter(None, [], BIT_VECTOR_SIZE)

    hash_ = hashing_function.hash_key(DATA_KEY, BIT_VECTOR_SIZE)

    assert bloom_filter._validate_hashs([6]) == False
Example #2
0
def test__validate_hashs_success():

    bloom_filter = BloomFilter(None, [], BIT_VECTOR_SIZE)

    hash_ = hashing_function.hash_key(DATA_KEY, BIT_VECTOR_SIZE)

    assert bloom_filter._validate_hashs([hash_]) == True
Example #3
0
    def test_constructor(self):
        bf = BloomFilter(10, 3)

        self.assertEqual(len(bf.hash_fns), 3,
                'has generated the instructed num of hash functions')
        self.assertEqual(len(bf.bits), 10,
                'prepares an array of bits of instructed size')
Example #4
0
def test__update_bit_vector():

    hashes = [4]
    bloom_filter = BloomFilter(None, [], BIT_VECTOR_SIZE)

    bloom_filter._update_bit_vector(hashes)

    assert bloom_filter.bit_vector == [0, 0, 0, 0, 1]
Example #5
0
    def test_add_item_one_item(self):
        '''
        Tests that an item can be added to the Bloom Filter.
        '''
        bf = BloomFilter(1, 0.05)
        bf.add('christian')

        self.assertTrue(bf.check('christian'))
Example #6
0
    def test_num_hashes_calculates_the_correct_number_of_hash_functions(self):
        '''
        Tests that the bloom filter calculates the correct number
        of hash functions to use.
        '''
        num_items = 20
        prob = 0.05
        bf = BloomFilter(num_items, prob)
        self.assertEqual(bf.num_hash_fns, 4)

        num_items = 1000
        prob = 0.25
        bf = BloomFilter(num_items, prob)
        self.assertEqual(bf.num_hash_fns, 1)

        num_items = 10000
        prob = 0.02
        bf = BloomFilter(num_items, prob)
        self.assertEqual(bf.num_hash_fns, 5)
Example #7
0
    def test_add_item_different_probability_one_item(self):
        '''
        Tests that an item can be added to the Bloom Filter with
        different false positive probabilities.
        '''
        bf = BloomFilter(1, 0.10)
        bf.add('christian')

        self.assertTrue(bf.check('christian'))

        bf = BloomFilter(1, 0.5)
        bf.add('christian')

        self.assertTrue(bf.check('christian'))

        bf = BloomFilter(1, 0.9)
        bf.add('christian')

        self.assertTrue(bf.check('christian'))
Example #8
0
    def test_bit_array_size_calculates_correct_array_size(self):
        '''
            Tests that the bloom filter calculates the correct number
            of hash functions to use.
            '''
        num_items = 20
        prob = 0.05
        bf = BloomFilter(num_items, prob)
        self.assertEqual(bf.bit_array_size, 124)

        num_items = 1000
        prob = 0.25
        bf = BloomFilter(num_items, prob)
        self.assertEqual(bf.bit_array_size, 2885)

        num_items = 10000
        prob = 0.02
        bf = BloomFilter(num_items, prob)
        self.assertEqual(bf.bit_array_size, 81423)
Example #9
0
def test__check_key_existence_failure():

    db_repository = BasicDBRepository()
    db_repository.data_map = {DATA_KEY: DATA_VALUE}
    bloom_filter = BloomFilter(db_repository, [], BIT_VECTOR_SIZE)
    bloom_filter.bit_vector = [0, 0, 0, 0, 0]

    hash_ = hashing_function.hash_key(DATA_KEY, BIT_VECTOR_SIZE)

    assert bloom_filter._check_key_existence([hash_]) == False
Example #10
0
def test_read_with_existing_key_successfully():

    hash_functions = [hashing_function]
    db_repository = BasicDBRepository()
    db_repository.data_map = {DATA_KEY: DATA_VALUE}
    bloom_filter = BloomFilter(db_repository, hash_functions, BIT_VECTOR_SIZE)
    bloom_filter.bit_vector = [0, 0, 0, 0, 1]

    result = bloom_filter.get_data_with_key(DATA_KEY)

    assert result == DATA_VALUE
Example #11
0
def testFileCreation():
    path = './files/bands.bloom'
    try:
        BloomFilter(10, 0.00000001, path)
    except Exception as e:
        print_failure("Test failed with: %s" % str(e))
        raise e
    else:
        import os
        if os.path.exists(path):
            print_success("File created successfully")
        else:
            print_failure("Failed to crate file")
Example #12
0
def test_insert_new_data_successfully():
    """
	Tests insertion of new data into a new Bloom filter
	"""

    hash_functions = [hashing_function]
    db_repository = BasicDBRepository()
    bloom_filter = BloomFilter(db_repository, hash_functions, BIT_VECTOR_SIZE)

    bloom_filter.insert_new_data(DATA_KEY, DATA_VALUE)

    assert bloom_filter.bit_vector == [0, 0, 0, 0, 1]
    assert db_repository.show_data() == {DATA_KEY: DATA_VALUE}
Example #13
0
    def test_add_item_multiple_items(self):
        '''
        Tests that multiple items can be added to the Bloom Filter.
        '''
        bf = BloomFilter(4, 0.05)
        bf.add('christian')
        bf.add('daniel')
        bf.add('debra')
        bf.add('charles-adrian')

        self.assertTrue(bf.check('christian'))
        self.assertTrue(bf.check('daniel'))
        self.assertTrue(bf.check('debra'))
        self.assertTrue(bf.check('charles-adrian'))
Example #14
0
    def test_add_item_different_probability_multiple_items(self):
        '''
        Tests that multiple items can be added to the Bloom Filter with
        different false positive probabilities.
        '''
        bf = BloomFilter(4, 0.10)
        bf.add('christian')
        bf.add('daniel')
        bf.add('debra')
        bf.add('charles-adrian')

        self.assertTrue(bf.check('christian'))
        self.assertTrue(bf.check('daniel'))
        self.assertTrue(bf.check('debra'))
        self.assertTrue(bf.check('charles-adrian'))

        bf = BloomFilter(4, 0.50)
        bf.add('christian')
        bf.add('daniel')
        bf.add('debra')
        bf.add('charles-adrian')

        self.assertTrue(bf.check('christian'))
        self.assertTrue(bf.check('daniel'))
        self.assertTrue(bf.check('debra'))
        self.assertTrue(bf.check('charles-adrian'))

        bf = BloomFilter(4, 0.9)
        bf.add('christian')
        bf.add('daniel')
        bf.add('debra')
        bf.add('charles-adrian')

        self.assertTrue(bf.check('christian'))
        self.assertTrue(bf.check('daniel'))
        self.assertTrue(bf.check('debra'))
        self.assertTrue(bf.check('charles-adrian'))
Example #15
0
    def test_check_gives_response_regardless_of_key(self):
        '''
        Tests that the Bloom Filter will give a response
        no matter the key. We cannot test any further, as we don't
        know with certainty whether we'll get a false positive.
        '''
        bf = BloomFilter(1, 0.05)
        bf.add('christian')

        self.assertTrue(bf.check('christian'))

        r1 = bf.check('daniel') in (True, False)
        r2 = bf.check('charles') in (True, False)

        self.assertTrue(r1)
        self.assertTrue(r2)
Example #16
0
def testBasicBloomFilter():
    items = ['glass', 'moon', 'mouse', 'cat', 'extra', 'pot']

    filepath = './files/randomwords.bloom'

    bloomfilter = BloomFilter(len(items), 0.001, filepath)

    bloomfilter.add(items)

    nitems = ['Jackson', 'Inline', 'Whatever']

    ci = 0
    for i in items:
        if i in bloomfilter:
            print_success("Found in bloomfilter: %s" % i)
        else:
            ci += 1
            print_failure("Did not find in bloomfilter: %s" % i)

    if ci != 0:
        print_failure(
            "\nFirst part of test failed with %d elemented missed\n\n" % ci)
    else:
        print_success(
            "\nTest passed!, all elements of bloomfilter are found\n\n")

    ci = 0
    for i in nitems:
        if i in bloomfilter:
            ci += 1
            print_success("Found in bloomfilter: %s" % i)
        else:
            print_failure("Did not find in bloomfilter: %s" % i)

    if ci != 0:
        print_failure(
            "\nFirst part of test failed with %d elemented found\n\n" % ci)
    else:
        print_success(
            "\nTest passed!, all elements that are not in the bloomfilter weren't found\n\n"
        )
Example #17
0
 def test_lookup(self):
     bf = BloomFilter(10, 3)
     bf.insert(12)
     self.assertTrue(bf.lookup(12), 'should detect it was inserted')
Example #18
0
 def test_insert(self):
     bf = BloomFilter(10, 3)
     bf.insert(12)
Example #19
0
 def setUp(self):
     self.filter = BloomFilter()
Example #20
0
 def test_add_word_with_value_out_of_bounds_of_bloom_array_is_oooook(self):
     self.filter = BloomFilter()
     self.filter.add_word('zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz')
     small_n * 7, small_n * 8, small_n * 9, small_n * 10
 ]
 for mxd in M_RANGE:
     m = int(mxd)
     search_times_bf = []
     search_times_fl = []
     initial_size_bf = []
     size_after_insertion_bf = []
     false_positives = [0] * ITER
     disk_access = [0] * ITER
     k = get_k(m, small_n)
     print(
         f'[STARTED AT] [{time.ctime(time.time())}] n: {big_n}, m: {m}, k: {k}'
     )
     for i in range(ITER):
         bloom_filter = BloomFilter(m, k)
         initial_size_bf.append(get_deep_size(bloom_filter))
         bloom_filter, L_file, universe_file = generate_files_and_insert_to_bloom_filter(
             bloom_filter, big_n, small_n)
         size_after_insertion_bf.append(get_deep_size(bloom_filter))
         current_time_bf = []
         current_time_fl = []
         with open('experimentos/files/universe_file.txt',
                   'r') as universe_file:
             total_usernames_list = [
                 w.strip('\n') for w in universe_file.readlines()
             ]
             for username_query in total_usernames_list:
                 ti_bf = time.time()
                 username_might_be_in_file = bloom_filter.check(
                     username_query)
Example #22
0
def testOldBloomFilterFile():
    items = loadTop2MFile()

    bloomfilter = BloomFilter(len(items), 0.0001, './files/urls.bloom')

    bloomfilter.add(items)

    nitems = [
        "atapattu.net", "exyu.info", "sergioalbanese.it", "leshopping.com.br",
        "natura-naturans.de", "footbuddies.com", "sieuthimucin.net",
        "kuma-bus.co.jp", "biit.no", "kndkmc.or.kr", "flygo.net", "0552.ua",
        "tsukashin.com", "sites.google.com/a/marist.com", "ad-topia.com",
        "wpgogo.com", "meme-italia.it", "centrostudihelios.it",
        "lubimayaoptika.by", "thewandergirls.com", "vrapps.co", "oksdf.com",
        "whalesmall.co.kr", "bankbazar.com", "matthewsdiehl.com",
        "videoplaylistgallery.com", "bestmuseum.ru", "it-site.net",
        "taunigma.biz", "cdcpederzoli.it", "vikaskumrai.com",
        "lodensoftware.com", "barami.us", "tilsonhome.com",
        "proutene-kosiky.cz", "rboots.ru", "commercedc.com.my", "energia.fi",
        "budidayanews.blogspot.com", "aghed.blogfa.com",
        "petcareclinics.co.uk", "simonpattonracing.com", "alliebeth.com",
        "powershop.jp", "pismotechnic.com", "sacramentocoaches.org",
        "alloextra.com", "book-of-ra-deluxe.de", "beststyle.me", "btr-k.ru",
        "sceny-z-uzycia.blog.pl", "my7sports.com", "gazetaonline.in",
        "vicensvives.es", "etaal.gov.in", "site-pedia.ro", "sweatvac.com",
        "camberleywebdesign.com", "lettre-de-motivation-facile.com",
        "importir.org", "eapc.cat", "smartdiesel.ro", "onewideo.com",
        "goodrelaxation.com", "plasenta.com.tr", "openfilepro.com",
        "sempatiza.es", "hljfood.net", "stream2watch.eu", "pointofreturn.com",
        "certificadosprofesionalidad.com", "pavementresources.com",
        "daralab.com", "businesstechnologyguide.com.au", "sanostra.es",
        "view-events.com", "antiques-jp.com", "starina-mebel.ru",
        "chajian110.com", "eyworkflow.com", "keenitsolution.com",
        "vitadomo.ch", "kintek.com.au", "theship.co.uk", "mgs-subs.de",
        "innovservers.com", "kindersoaps.com", "hotelpitangueiras.com.br",
        "noweprzetargi.pl", "notebooksapp.com", "hrtrainingcenter.com",
        "sunshinenigeria.com", "x-sense.jp", "agenciafosforo.com.br",
        "topfriend.org", "paranoidprintco.bigcartel.com",
        "metropolitan.org.uk", "jpi.at", "fixplanta.net", "i2office.co.uk",
        "lisboatriathlon.com", "just2craft.fr", "downloaden.nl",
        "epharmadeals.gr", "bettd.com", "fix.no", "sumaprojektow.pl",
        "atcloudspeakers.co.uk", "fashionjudy.com", "dshop.vn",
        "tribesports.co.uk", "trinityviaggistudio.it", "encyclosights.com",
        "dhg-marketing.de", "xiayizhan.cn", "gfxtra2.net/user/Orten",
        "fetchrobotics.com", "max-joomla.ru", "lrt.ru", "turk-diziler14.ir",
        "eidos.ic.i.u-tokyo.ac.jp/~tau", "liberacionprofesional.com",
        "brainvoyager.com", "sms2greece.com", "mumok.at", "usa.edu.co",
        "it-nomikai.jp", "lmls.org", "corporatetrainerclub.com", "kho.fi",
        "miuu.de", "chelles-creations.com", "swimstyle.com", "4options.pt",
        "vod.blog.pl", "woodsplitterdirect.com", "ou.com",
        "21bitcoingratis.com", "tattoo-sprueche.de", "igpublish.com",
        "spreewald-unterkuenfte.de", "sanluissa.cl", "puppyleague.tumblr.com",
        "staybcnapart.com", "cpt.gov.vn", "greenhex.net",
        "dallastechnologies.in", "powerwerks.com", "santillana.com.uy",
        "prozonemarketing.com", "mybadges.com", "readysettroll.com",
        "prochoiceny.nationbuilder.com", "factsfromfiction.blogspot.com",
        "ktateb.com", "inmueblesbanorte.com.mx", "aliceinweddingland.co.uk",
        "tvdsb.on.ca", "reddit.com/user/amirarice", "mafiamax.com",
        "postgazetesi.com", "inventorum.com", "nonsologhiaccio.com",
        "citejournal.org", "facesculptormd.com", "wtop.blogspot.com.au",
        "elo-street.blogspot.co.uk", "ebarticles.com", "orexvideourok.ru",
        "forexstrategy.com", "othersdayquotesz.com", "ebdesignhelper.co.uk",
        "utterycolorfulkryptonite.tumblr.com", "arandz.com", "ighwoods.com",
        "ajdirabota.mk", "va.org.uk", "ankrecruitment.co.in",
        "acturacionlib-tol.com.mx", "h3technician.com", "otfreetraffic.net",
        "onottouch.org", "apenziraha.com", "uytra.com", "sc-lohfelden.de",
        "lberni.ca", "portaktiv.sk", "ichcoffee.ru", "ord-travel.com",
        "rdineavvocatitreviso.it", "upazena.com",
        "log-transmission-entreprise.com", "inoheld.de", "ukouri.com",
        "righttrac.com", "heteachertoolkit.com", "concepts-auto.com",
        "awrencevilleweather.com", "allouslab.net", "hirazi.blogfa.com",
        "60degree.blogsky.com", "omeditorial.com", "therside.su",
        "androvercentre.com", "olitape.ru", "sservatoreseriale.it",
        "lackjackround.com", "freeworks.com", "avenecommerce.com",
        "-bike.co.uk", "tntechnologies.in", "stutesol.com", "zoneclubs.com",
        "ath.umn.edu/~olver", "ncotexkkm.ru", "hubbybody.com",
        "kcthundernation.com", "ostmap.com", "ent-wifi.com", "killdrick.co.uk",
        "irtualeduca.org", "piritualite2000.com", "nvestingontrack.com",
        "utsuzunhikayeleri.tumblr.com", "kentoo.com", "raycor.com", "2volt.sk",
        "ornogameonline.com", "edicalyoga.in", "evue-amateur.com",
        "retaportobello.com", "abernacleatl.com", "en-mart.com", "rzanic.com",
        "ridgersteel.com", "sgguncesi.com", "lobemoving.net",
        "utodoplnky-prodej.cz", "35678.com", "objscott.com", "esource-auto.ru",
        "exnokom.com", "dainternet.com", "ions-net.com", "ostonmamas.com",
        "ecolafacil.com.br", "-daama.com", "kis.org.hk", "odrum-rentacar.com",
        "zieu.pl", "ookshopblog.com", "on-dualitypress.org",
        "lmedauniversity.org", "achelwcole.com", "omethingvalue.com",
        "luczykarze.pl", "asargrup.com", "ousageguide.com",
        "nexplainedstuff.com", "alifa.org", "reebtcd.org", "anbu-city.com",
        "uciemanet.fr", "iwoyuen.com", "ishes.dev", "ilgiyuvasi1.info",
        "xdcm.com", "ppincomereport.com", "hbs.in", "ecure128.com", "itpro.us",
        "rl.com.tw", "echgeeker.com", "naptech.com", "otalhorsechannel.com",
        "vantisport.de", "ictorfoo.com", "ost5.ir", "anouba-sugarfree.com",
        "xpomuseum.com", "oodvood.com", "anzaniaonline-cn.org", "cecat.tw",
        "enteparaviajar.com", "eadsplus.us", "ertificadoecool.com.br",
        "lp.co.jp", "elesur.sr", "stituto-besta.it", "adioboxapp.com.br",
        "nmaya0001.tumblr.com", "onnectedfamilies.org", "etzaehler.de",
        "espreslaporte.com", "remierescene.net", "obileaff.mobi",
        "eddingcollectibles.com", "ngular.dev", "huo-hot.com",
        "repaidstart.com", "n--b1agviax.xn--p1ai", "mooozesh.ir", "elun.com"
    ]

    ci = 0
    for i in items:
        if not (i in bloomfilter):  # remove not if you want to use the prints
            ci += 1

    if ci != 0:
        print_failure(
            "\nFirst part of test failed with %d elemented missed\n\n" % ci)
    else:
        print_success(
            "\nTest passed!, all elements of bloomfilter are found\n\n")

    ci = []
    for i in nitems:
        if not i in bloomfilter:
            ci.append(i)

    print(ci)
    print_success(
        "%d element are not malicious ouf of %d, and they are printed above" %
        (len(ci), len(items)))
Example #23
0
 def setUp(self):
     self.bloom_filter = BloomFilter(247, 5)
     self.words = ['hola', 'como', 'estan', 'holis']
     self.bloom_filter.insert_list(self.words)
Example #24
0
 def setUp(self):
     self.bloom = BloomFilter(31)