Ejemplo n.º 1
0
    def setUp(self):
        super(self.__class__, self).setUp()

        # Construct a set of links that the user has seen.
        self.seen_links = set()
        while len(self.seen_links) < 100:
            fullname = self.random_fullname()
            self.seen_links.add(fullname)

        # Construct a set of links that the user hasn't seen.  Ensure that
        # there's no intersection between the seen set and the unseen set.
        self.unseen_links = set()
        while len(self.unseen_links) < 100:
            fullname = self.random_fullname()
            if fullname not in self.seen_links:
                self.unseen_links.add(fullname)

        # Initialize the recently consumed Bloom filter on the seen set.
        self.recently_consumed = BloomFilter(
            num_values=1000,
            false_positives=0.001,
            key='recently-consumed',
        )
        self.recently_consumed.clear()
        self.recently_consumed.update(self.seen_links)
Ejemplo n.º 2
0
 def calculation_label_prob(self, labels):
     labels_total = len(labels)
     bl = BloomFilter(0.001, 1000000)
     for la in labels:
         if not bl.is_element_exist(la):
             self.label_prob[la] = labels.count(la) / labels_total
             bl.insert_element(la)
Ejemplo n.º 3
0
 def test_hash_consistent(self):
     bf = BloomFilter(10, 5)
     hasher = bf.get_hasher()
     hashed1 = hasher('sdlksdlkf')
     hashed2 = hasher('sdlksdlkf')
     self.assertEqual(type(hashed1), int)
     self.assertEqual(hashed1, hashed2)
Ejemplo n.º 4
0
    def test_size_and_num_hashes(self):
        dilberts = BloomFilter(num_values=100, false_positives=0.1)
        assert dilberts.size() == 480
        assert dilberts.num_hashes() == 3

        dilberts = BloomFilter(num_values=100, false_positives=0.01)
        assert dilberts.size() == 960
        assert dilberts.num_hashes() == 7

        dilberts = BloomFilter(num_values=1000, false_positives=0.1)
        assert dilberts.size() == 4800
        assert dilberts.num_hashes() == 3

        dilberts = BloomFilter(num_values=1000, false_positives=0.01)
        assert dilberts.size() == 9592
        assert dilberts.num_hashes() == 7
Ejemplo n.º 5
0
 def test_init_with_iterable(self):
     dilberts = BloomFilter({'rajiv', 'raj'})
     assert dilberts.key.startswith(BloomFilter._RANDOM_KEY_PREFIX)
     assert 'rajiv' in dilberts
     assert 'raj' in dilberts
     assert 'dan' not in dilberts
     assert 'eric' not in dilberts
     assert len(dilberts) == 2
Ejemplo n.º 6
0
 def test_init(self):
     dilberts = BloomFilter()
     assert dilberts.key.startswith(BloomFilter._RANDOM_KEY_PREFIX)
     assert 'rajiv' not in dilberts
     assert 'raj' not in dilberts
     assert 'dan' not in dilberts
     assert 'eric' not in dilberts
     assert len(dilberts) == 0
Ejemplo n.º 7
0
 def testFalseNegativeNeverHappens(self):
     n = 10
     inserted = []
     bf = BloomFilter(n, n)
     for i in range(n):
         bf.insert(str(i))
         inserted.append(i)
         for j in inserted:
             self.assertTrue(bf.query(j))
Ejemplo n.º 8
0
 def testNumberOfSetBitsNeverDecreases(self):
     n = 10
     bf = BloomFilter(n, n)
     prev_cnt = 0
     for i in range(n):
         bf.insert(str(i))
         cnt = bf.count(True)
         self.assertTrue(cnt >= prev_cnt)
         prev_cnt = cnt
def get_dictionary_bloom(file_name):
    """
    Get a dictionary as a Bloom filer data-structure from a text file.

    :param str file_name: name of the file storing the dictionary
    :return BloomFilter: dictionary
    """
    bloom = BloomFilter()
    with open(file_name) as f:
        for line in f:
            bloom.insert(line.strip())
    return bloom
Ejemplo n.º 10
0
def main():
    print("Hello!")
    try:
        client = MongoClient("mongodb+srv://amartya:[email protected]/test?retryWrites=true&w=majority")
    except ConnectionError:
        print("Server not available")
        
    bloom_test = BloomFilter(20, 0.02)
    bloom_test.add("basu")
    bloom_test.add("basu")
    if "basu" in bloom_test:
        print("Object exists")
    else:
        print("The searched item does NOT exist in the cache. Read from persistent datastore")
Ejemplo n.º 11
0
    def test_clear(self):
        dilberts = BloomFilter({'rajiv', 'raj'})
        assert 'rajiv' in dilberts
        assert 'raj' in dilberts
        assert 'dan' not in dilberts
        assert 'eric' not in dilberts
        assert len(dilberts) == 2

        dilberts.clear()
        assert 'rajiv' not in dilberts
        assert 'raj' not in dilberts
        assert 'dan' not in dilberts
        assert 'eric' not in dilberts
        assert len(dilberts) == 0
Ejemplo n.º 12
0
    def test_add(self):
        dilberts = BloomFilter()
        assert 'rajiv' not in dilberts
        assert 'raj' not in dilberts
        assert 'dan' not in dilberts
        assert 'eric' not in dilberts
        assert len(dilberts) == 0

        dilberts.add('rajiv')
        assert 'rajiv' in dilberts
        assert 'raj' not in dilberts
        assert 'dan' not in dilberts
        assert 'eric' not in dilberts
        assert len(dilberts) == 1

        dilberts.add('raj')
        assert 'rajiv' in dilberts
        assert 'raj' in dilberts
        assert 'dan' not in dilberts
        assert 'eric' not in dilberts
        assert len(dilberts) == 2

        dilberts.add('rajiv')
        assert 'rajiv' in dilberts
        assert 'raj' in dilberts
        assert 'dan' not in dilberts
        assert 'eric' not in dilberts
        assert len(dilberts) == 2

        dilberts.add('raj')
        assert 'rajiv' in dilberts
        assert 'raj' in dilberts
        assert 'dan' not in dilberts
        assert 'eric' not in dilberts
        assert len(dilberts) == 2

        dilberts.add('dan')
        assert 'rajiv' in dilberts
        assert 'raj' in dilberts
        assert 'dan' in dilberts
        assert 'eric' not in dilberts
        assert len(dilberts) == 3

        dilberts.add('eric')
        assert 'rajiv' in dilberts
        assert 'raj' in dilberts
        assert 'dan' in dilberts
        assert 'eric' in dilberts
        assert len(dilberts) == 4
Ejemplo n.º 13
0
def test_bloom_filter(hash_fn, hash_size, hash_qty, lex):
    """
    Build a Bloom filter and test its precision
    """
    bf = BloomFilter(hash_fn, hash_size, hash_qty)
    bf.load_words(wordfile)
    
    # check precision
    prec_fails = 0
    for i in range(10000):
        word = rand_word()
        if bf.lookup(word) and not word in lex:
            prec_fails += 1

    return prec_fails / 10000
Ejemplo n.º 14
0
 def __init__(self,
              addrs,
              params=MAINNET,
              user_agent="/pyBitcoin:0.1/",
              max_connections=10):
     self.addrs = addrs
     self.params = params
     self.user_agent = user_agent
     self.max_connections = max_connections
     self.peers = []
     self.inventory = {}
     self.pending_txs = {}
     self.subscriptions = {}
     self.bloom_filter = BloomFilter(3, 0.01, random.getrandbits(32),
                                     BloomFilter.UPDATE_NONE)
     self.connect_to_peers()
Ejemplo n.º 15
0
    def test_update(self):
        dilberts = BloomFilter()
        assert 'rajiv' not in dilberts
        assert 'raj' not in dilberts
        assert 'dan' not in dilberts
        assert 'eric' not in dilberts
        assert 'jenny' not in dilberts
        assert 'will' not in dilberts
        assert 'rhodes' not in dilberts
        assert len(dilberts) == 0

        dilberts.update({'rajiv', 'raj'}, {'dan', 'eric'})
        assert 'rajiv' in dilberts
        assert 'raj' in dilberts
        assert 'dan' in dilberts
        assert 'eric' in dilberts
        assert 'jenny' not in dilberts
        assert 'will' not in dilberts
        assert 'rhodes' not in dilberts
        assert len(dilberts) == 4

        dilberts.update({'eric', 'jenny', 'will'})
        assert 'rajiv' in dilberts
        assert 'raj' in dilberts
        assert 'dan' in dilberts
        assert 'eric' in dilberts
        assert 'jenny' in dilberts
        assert 'will' in dilberts
        assert 'rhodes' not in dilberts
        assert len(dilberts) == 6

        dilberts.update(set())
        assert 'rajiv' in dilberts
        assert 'raj' in dilberts
        assert 'dan' in dilberts
        assert 'eric' in dilberts
        assert 'jenny' in dilberts
        assert 'will' in dilberts
        assert 'rhodes' not in dilberts
        assert len(dilberts) == 6
Ejemplo n.º 16
0
 def testInputIsFalsePositiveUntilInserted(self):
     n = 100
     bf = BloomFilter(n / 2, n)
     fp = [False] * n
     inserted = [False] * n
     fp_cnt = 0
     for i in range(n):
         bf.insert(str(i))
         inserted[i] = True
         for j in range(n):
             if inserted[j]:
                 continue
             # It was false positive before, it must continue
             # to be (since it was not inserted).
             if fp[j]:
                 self.assertTrue(bf.query(j))
             # Update false positives list
             elif bf.query(j):
                 fp[j] = True
                 fp_cnt += 1
     # We're inserting more elements than the size of the array, so
     # there must be false positives.
     self.assertTrue(fp_cnt > 0)
Ejemplo n.º 17
0
def bloom_filter_run(n, m, k=None):
    keys = range(n)
    # Random sampling without replacement
    random.shuffle(keys)
    probs = []
    filter = BloomFilter(m, n, k)
    inserted = [False] * (n)

    for cnt, entry in enumerate(keys):
        filter.insert(str(entry))

        inserted[entry] = True
        false_positives, total = 0, 0
        # Compute false positives
        for probe in range(n):
            if not inserted[probe]:
                exists = filter.query(probe)
                if exists:
                    false_positives += 1
                total += 1
        if total != 0:
            prob = false_positives * 1.0 / total
            probs.append(prob)
    return probs
Ejemplo n.º 18
0
 def setUp(self):
     super(self.__class__, self).setUp()
     self.thread1 = BloomFilter(key='dilberts')
     self.thread1.clear()
     self.thread2 = BloomFilter(key='dilberts')
Ejemplo n.º 19
0
 def test_clear_gets_stored(self):
     'When we clear() all elements, ensure that we Memcache the bit array'
     self.dilberts.clear()
     office_space = BloomFilter(key='dilberts')
     assert office_space._bit_array == self.dilberts._bit_array
Ejemplo n.º 20
0
 def test_update_gets_stored(self):
     'When we update() with elements, ensure that we Memcache the bit array'
     self.dilberts.update({'dan', 'eric'})
     office_space = BloomFilter(key='dilberts')
     assert office_space._bit_array == self.dilberts._bit_array
Ejemplo n.º 21
0
from bloom import BloomFilter
from random import shuffle

error = 0.1
entries = 10

bloom_obj = BloomFilter(error, entries)

print("Size of vector array is:{}".format(bloom_obj.bits))
print("Optimal number of Hash Functions:{}\n".format(bloom_obj.hash))

# words to be added
word_present = [
    'abound', 'abounds', 'abundance', 'abundant', 'accessable', 'bloom',
    'blossom', 'bolster', 'bonny', 'bonus', 'bonuses'
]

# word not added
word_absent = [
    'bluff', 'cheater', 'hate', 'war', 'humanity', 'racism', 'hurt', 'nuke',
    'gloomy', 'facebook'
]

for item in word_present:
    bloom_obj.add(item)

shuffle(word_present)
shuffle(word_absent)

test_words = word_present[:10] + word_absent
shuffle(test_words)
Ejemplo n.º 22
0
 def test_init_gets_stored(self):
     'When we __init__() on an iterable, ensure we Memcache the bit array'
     office_space = BloomFilter(key='dilberts')
     assert office_space._bit_array == self.dilberts._bit_array
Ejemplo n.º 23
0
from bloom import BloomFilter
from random import shuffle

n = 20  #número de elementos a añadir
p = 0.05  #probabilidad de falsos positivos

bloom_filtro = BloomFilter(n, p)
print("Tamaño del array:{}".format(bloom_filtro.size))
print("Probabilidad de falsos positivos:{}".format(bloom_filtro.fp_prob))
print("Número de funciones hash:{}".format(bloom_filtro.hash_count))

# Nombres de usuarios a ser añadidos
nombres_existentes = [
    'ironman', 'thor', 'american_captain', 'spiderman', 'loki', 'wolverine',
    'black_widow', 'hulk', 'deadpool', 'nick_fury', 'thanos', 'Dr_strange',
    'venon', 'odin', 'magneto', 'black_panter', 'rocket', 'gamora', 'ultron',
    'groot', 'ant_man'
]

# Palabras no existentes
nombres_no_existentes = [
    'superman', 'batman', 'wonder_woman', 'green_Lantern', 'he_Man', 'batgirl',
    'lion_O', 'shazam', 'aquaman', 'green_arrow', 'flash', 'tygro', 'Cheetara'
]

for item in nombres_existentes:
    bloom_filtro.add(item)

shuffle(nombres_existentes)
shuffle(nombres_no_existentes)
Ejemplo n.º 24
0
 def test_repr(self):
     dilberts = BloomFilter(key='dilberts')
     assert repr(dilberts) == '<BloomFilter key=dilberts>'
Ejemplo n.º 25
0
from hashlib import sha1
from bloom import BloomFilter
from bottle import route, request, run, error

filter = BloomFilter("pwned-passwords-1.0u2.bloom")


@route('/check')
def check():
    password = request.query.password
    if filter.contains(password):
        return "BAD"
    else:
        return "GOOD"


run(host='0.0.0.0', port=8080, debug=False)
Ejemplo n.º 26
0
 def test_repr(self):
     self.assertEqual(repr(BloomFilter(num_bits=10, num_hashers=5)),
                      "BloomFilter(num_bits=10, num_hashers=5)")
Ejemplo n.º 27
0
    plt.show()


def test_bloom():
    # Generate N distinct values in the range [0, 100000]
    N = 50_000
    s = set(np.random.choice(1_000_000, N, replace=False))
    print(f"Num inserted values: {N}, min: {min(s)}, max: {max(s)}")

    desired_error_prob = [0.05, 0.1, 0.2]
    for p in desired_error_prob:
        M, d = BloomFilter.optimal_size(p, N)
        print(f"Desired FP rate: {100*p:.2f}%, size of Bloom filter: "
              f"{M} bits (with {d} hash functions)")

        bf = BloomFilter(M, d, s)

        test_set = np.random.choice(1_000_000, 100_000)
        fp, tn = 0, 0
        for x in test_set:
            if x not in s:
                if x in bf:
                    fp += 1
                else:
                    tn += 1
        print(
            f"Estimated FP rate: {100*fp/(fp + tn):.2f}% (from {fp + tn} negative samples)"
        )


if __name__ == '__main__':
Ejemplo n.º 28
0
 def test_add_gets_stored(self):
     'When we add() an element, ensure that we Memcache the bit array'
     self.dilberts.add('dan')
     office_space = BloomFilter(key='dilberts')
     assert office_space._bit_array == self.dilberts._bit_array
Ejemplo n.º 29
0
def create_bloom(set, capacity=3000, error_rate=0.001):
    # Create Bloom filter
    bf = BloomFilter(capacity=capacity, error_rate=error_rate)
    for x in set:
        bf.add(x)
    return bf
Ejemplo n.º 30
0
 def setUp(self):
     super(self.__class__, self).setUp()
     self.dilberts = BloomFilter({'rajiv', 'raj'}, key='dilberts')