Esempio n. 1
0
    def test_raises_error_on_out_of_range_error_rate(self):
        '''BloomFilter() raises on out-of-range error rate'''
        with self.assertRaises(ValueError):
            BloomFilter(5, -1)

        with self.assertRaises(ValueError):
            BloomFilter(5, 2)
Esempio n. 2
0
    def test_creates_filter_with_non_integral_capacity(self):
        '''BloomFilter() creates filter with non-integral capacity'''
        float_filter = BloomFilter(capacity=1000.2, error_rate=1e-3)
        int_filter = BloomFilter(capacity=1000, error_rate=1e-3)

        bit_count = int_filter.bit_count
        self.assertGreaterEqual(float_filter.bit_count, bit_count)
        self.assertLess(float_filter.bit_count, bit_count + 10)
        self.assertEqual(int_filter.hash_count, float_filter.hash_count)
Esempio n. 3
0
def _build_guided_bloom(prefixes, fpp, k, num_bits, root, fib, protocol='v4'):
    '''Returns a Bloom filer optimized for the `root` bin search tree,
        and `encoded_pref_lens` dict for looking up the BMP prefix length
        from hash-encoded bit sequence.
    '''
    max_shift = NUMBITS[protocol]

    if not (k or num_bits):
        bf = BloomFilter(fpp, len(prefixes['prefixes']))
    else:
        bf = BloomFilter(fpp,
                         len(prefixes['prefixes']),
                         k=k,
                         num_bits=num_bits)

    count = 0  # report progress
    for pair in prefixes['prefixes']:
        if count % 10000 == 0:
            print('build processsed %.3f of all prefixes' %
                  (count / len(prefixes['prefixes'])))
        count += 1

        prefix, preflen = pair
        # BMP is an index, can recover prefix length using prefixes['ix2len']
        bmp, fib_val = _find_bmp(prefix,
                                 bf,
                                 root,
                                 fib,
                                 preflen - 1,
                                 prefixes['minn'],
                                 prefixes['len2ix'],
                                 prefixes['ix2len'],
                                 protocol=protocol)

        current = root
        count_hit = 0
        while current:
            if preflen < current.val:
                current = current.left
            elif preflen == current.val:
                # insert using hash_1..hash_k
                pref_encoded = encode_ip_prefix_pair(prefix, preflen, protocol)
                bf.insert(pref_encoded, hashes=_choose_hash_funcs(0, end=bf.k))
                break
            else:  # preflen > current.val
                masked = (((1 << max_shift) - 1) <<
                          (max_shift - current.val)) & prefix
                pref_encoded = encode_ip_prefix_pair(masked, current.val,
                                                     protocol)
                bf.insert(pref_encoded, hashes=_choose_hash_funcs(0, end=1))
                count_hit += 1
                # insert pointers
                bf.insert(pref_encoded,
                          hashes=_choose_hash_funcs(count_hit, pattern=bmp))
                current = current.right
    return bf, root
Esempio n. 4
0
    def test_returns_positive_when_hashes_collide(self):
        '''BloomFilter.test_by_hash() returns True when hashes collide'''
        bloom_filter = BloomFilter(1000000, 1e-3)

        bloom_filter.add_by_hash('abc')

        self.assertEqual(bloom_filter.test_by_hash(u'abc'), True)
Esempio n. 5
0
    def test_returns_true_positive_when_value_had_been_added(self):
        '''BloomFilter.test_by_hash() returns True after the item added'''
        bloom_filter = BloomFilter(1000000, 1e-3)

        bloom_filter.add_by_hash('abc')

        self.assertEqual(bloom_filter.test_by_hash('abc'), True)
Esempio n. 6
0
def rappor(n,f,p,q,m):
    n=str(n)
    bloom=BloomFilter()
    noisydata=bloom.add_data(n,m)
    
    
# Permanent randomized response  
    for i in range(len(noisydata)):
        choose=np.random.randint(0,totalnum)
        if noisydata[i]==1:
            if choose/totalnum<=f/2:
                noisydata[i]=0
        else:
            if choose/totalnum<=f/2:
                noisydata[i]=1

                
                    

                    
    # Instantaneous randomized response
    for i in range(len(noisydata)):
         choose=np.random.randint(0,totalnum)
         if noisydata[i]==1:
             if choose/totalnum<=1-q:
                 noisydata[i]=0
         else:
             if choose/totalnum<=p:
                 noisydata[i]=1
                 
    return noisydata
def build_bf(n, p, ref_fasta):
    # call bloom filter class and output stats
    bloomf = BloomFilter(n, p)
    print("Size of bit array:{}".format(bloomf.size))
    print("False positive Probability:{}".format(bloomf.fp_prob))
    print("Number of hash functions:{}".format(bloomf.hash_count))

    mycoplasma_fasta = open(ref_fasta, 'r')
    N_count = 0
    read_count = 0
    while True:
        name = mycoplasma_fasta.readline()  # read id
        if len(name) == 0:
            break  # end of file
        read = mycoplasma_fasta.readline().strip()
        if 'N' not in read:
            # do not add any uncalled bases
            bloomf.add(read)
            read_count += 1
        else:
            N_count += 1
    print('N_count = %s' % N_count)
    print('read_count = %s' % read_count)
    mycoplasma_fasta.close()
    return bloomf
    def test_dumps(self):
        bloom_filter = BloomFilter(300, 0.0001, MURMUR128_MITZ_32)
        for i in range(100):
            bloom_filter.put(i)
        byte_array = bloom_filter.dumps()
        new_filter = BloomFilter.loads(byte_array)

        self.assertEqual(
            new_filter.num_hash_functions,
            bloom_filter.num_hash_functions,
            "New filter's num of hash functions is expected to be the same as old filter's",
        )
        self.assertEqual(
            new_filter.strategy,
            bloom_filter.strategy,
            "New filter's strategy is expected to be the same as old filter's",
        )
        self.assertEqual(
            new_filter.data,
            bloom_filter.data,
            "New filter's data is expected to be the same as old filter's",
        )
        self.assertEqual(
            new_filter.dumps(),
            byte_array,
            "New filter's dump is expected to be the same as old filter's",
        )
    def test_basic_functionality(self):
        bloom_filter = BloomFilter(10000000, 0.001)
        for i in range(200):
            bloom_filter.put(i)

        for i in range(200):
            self.assertTrue(
                bloom_filter.might_contain(i),
                f"Number {i} is expected to be in bloomfilter",
            )
        for i in range(200, 500):
            self.assertFalse(
                bloom_filter.might_contain(i),
                f"Number {i} is NOT expected to be in bloomfilter",
            )

        words = ["hello", "world", "bloom", "filter"]
        for word in words:
            bloom_filter.put(word)

        for word in words:
            self.assertTrue(word in bloom_filter,
                            f"Word '{word}' is expected to be in bloomfilter")
        self.assertFalse(
            "not_exist" in bloom_filter,
            "Word 'not_exist' is expected to be in bloomfilter",
        )
Esempio n. 10
0
    def test_all_test_positive_when_hashes_collide(self):
        """BloomFilter.test_by_hash() returns False when filter is empty."""
        bloom_filter = BloomFilter(1000000, 1e-3)

        bloom_filter.add_by_hash("abc")

        self.assertEqual(bloom_filter.test_by_hash("def"), False)
Esempio n. 11
0
    def test_returns_positive_when_hashes_collide(self):
        """BloomFilter.test_by_hash() returns True when hashes collide."""
        bloom_filter = BloomFilter(1000000, 1e-3)

        bloom_filter.add_by_hash("abc")

        self.assertEqual(bloom_filter.test_by_hash(u"abc"), True)
Esempio n. 12
0
    def test_all_test_positive_when_hashes_collide(self):
        '''BloomFilter.test_by_hash() returns False when filter is empty'''
        bloom_filter = BloomFilter(1000000, 1e-3)

        bloom_filter.add_by_hash('abc')

        self.assertEqual(bloom_filter.test_by_hash('def'), False)
 def _add_bloom(self):
     new_error = self.base_error * self.error_tightening_ratio**len(
         self.bloom_filters)
     new_bloom = BloomFilter(self.capacity, new_error)
     self.bloom_filters.append(new_bloom)
     self.current_bloom = new_bloom
     return new_bloom
Esempio n. 14
0
def make_checker(word_file='notes/words.txt', force_lower=True):
    '''Return a checker correctly spelled words

        >>> 'army' in make_checker()
        True
        >>> 'ahmee' in make_checker()
        False

    '''
    try:
        with open('words.pickle') as cache_file:
            return pickle.load(cache_file)
    except IOError:
        pass

    with open(word_file) as f:
        s = f.read()
    if force_lower:
        s = s.lower()
    bf = BloomFilter(s.split(), population=4000000, probes=12)

    with open('words.pickle', 'w') as cache_file:
        pickle.dump(bf, cache_file)

    return bf
    def test_words(self):
        '''Ensure that strings work well'''
        vocabulary = self.load_words('words')
        test_words = self.load_words('testwords')
        bloom_filter = BloomFilter(100000, 1e-4)

        intersection = set(vocabulary) & set(test_words)

        setup_collision_count = 0
        for word in vocabulary:
            if bloom_filter.test_by_hash(word):
                setup_collision_count += 1
            else:
                bloom_filter.add_by_hash(word)
        self.assertLess(setup_collision_count, 5)

        false_positive_count = 0
        false_negative_count = 0
        for word in test_words:
            if word in intersection:
                if not bloom_filter.test_by_hash(word):
                    false_negative_count += 1
            else:
                if bloom_filter.test_by_hash(word):
                    false_positive_count += 1
        self.assertEqual(false_negative_count, 0)
        self.assertLessEqual(false_positive_count, 6)
Esempio n. 16
0
class newsSpider(scrapy.Spider):
    name = "news"
    start_urls = [
        'http://www.bbc.com/news',
    ]
    count = 0
    n = 2000  #Number of bits
    p = 0.15  #falseProbabilityRate
    bloomf = BloomFilter(2000, 0.15)

    def parse(self, response, count=1):

        mydiv = response.xpath('//div')
        for p in mydiv.xpath('.//p/text()').extract():
            p = p.replace(u"Â", u"").replace(u"â", u"")
            if 'Email' in p or 'MMS' in p or 'Follow' in p or 'stories' in p or 'news' in p or 'world' in p:
                continue

            yield {'text': p}
        newsSpider.count = newsSpider.count + 1
        if newsSpider.count <= 5:
            URLlist = response.css('div a::attr("href")').extract()
            for next_page in URLlist:
                if self.bloomf.check(next_page):
                    continue
                self.bloomf.add(next_page)
                newsSpider.count = newsSpider.count + 1
                if newsSpider.count >= 5:
                    break
                yield response.follow(next_page, self.parse)
Esempio n. 17
0
 def test_exercise_2(self):
     block_hash = bytes.fromhex(
         '0000000053787814ed9dd8c029d0a0a9af4ab8ec0591dc31bdc4ab31fae88ce9')
     passphrase = b'Jimmy Song Programming Blockchain'  # FILL THIS IN
     secret = little_endian_to_int(hash256(passphrase))
     private_key = PrivateKey(secret=secret)
     addr = private_key.point.address(testnet=True)
     filter_size = 30
     filter_num_functions = 5
     filter_tweak = 90210  # FILL THIS IN
     h160 = decode_base58(addr)
     bf = BloomFilter(filter_size, filter_num_functions, filter_tweak)
     bf.add(h160)
     node = SimpleNode('tbtc.programmingblockchain.com',
                       testnet=True,
                       logging=False)
     node.handshake()
     node.send(bf.filterload())
     getdata = GetDataMessage()
     getdata.add_data(FILTERED_BLOCK_DATA_TYPE, block_hash)
     node.send(getdata)
     mb = node.wait_for(MerkleBlock)
     tx = node.wait_for(Tx)
     self.assertEqual(
         tx.serialize().hex(),
         '0100000002a663815ab2b2ba5f53e442f9a2ea6cc11bbcd98fb1585e48a134bd870dbfbd6a000000006a47304402202151107dc2367cf5a9e2429cde0641c252374501214ce52069fbca1320180aa602201a43b5d4f91e48514c00c01521dc04f02c57f15305adc4eaad01c418f6e7a1180121031dbe3aff7b9ad64e2612b8b15e9f5e4a3130663a526df91abfb7b1bd16de5d6effffffff618b00a343488bd62751cf21f368ce3be76e3a0323fdc594a0d24f27a1155cd2000000006a473044022024c4dd043ab8637c019528b549e0b10333b2dfa83e7ca66776e401ad3fc31b6702207d4d1d73ac8940c59c57c0b7daf084953324154811c10d06d0563947a88f99b20121031dbe3aff7b9ad64e2612b8b15e9f5e4a3130663a526df91abfb7b1bd16de5d6effffffff0280969800000000001976a914ad346f8eb57dee9a37981716e498120ae80e44f788aca0ce6594000000001976a9146e13971913b9aa89659a9f53d327baa8826f2d7588ac00000000'
     )
Esempio n. 18
0
 def _sync(self):
     self.bf = BloomFilter(BF_SIZE, BF_HASH_COUNT)
     with kv_reader(self.path) as r:
         while r.has_next():
             key = r.read_key()
             self.bf.add(key)
             r.skip_value()
Esempio n. 19
0
def main():
    number_of_items = 20
    false_positive_probability = 0.1
    bloom = BloomFilter(number_of_items, false_positive_probability)
    word_present = [
        'abound', 'abounds', 'abundance', 'abundant', 'accessable', 'bloom',
        'blossom', 'bolster', 'bonny', 'bonus', 'bonuses', 'coherent',
        'cohesive', 'colorful', 'comely', 'comfort', 'gems', 'generosity',
        'generous', 'generously', 'genial'
    ]
    word_absent = [
        'bluff', 'cheater', 'hate', 'war', 'humanity', 'racism', 'hurt',
        'nuke', 'gloomy', 'facebook', 'geeksforgeeks', 'twitter'
    ]
    print('bloomfilter size: ', bloom.bit_size)
    print('false_positive_probability', bloom.false_positive_probability)
    print('hash_count: ', bloom.hash_count)

    for item in word_present:
        bloom.add(item)
    shuffle(word_present)
    shuffle(word_absent)
    random_list = word_present[:5] + word_absent[:5]
    shuffle(random_list)
    for word in random_list:
        print('word: ', word)
        if bloom.check(word):
            if word in word_absent:
                print('false positive')
            else:
                print('word most likely member')
        else:
            print('word not present')
Esempio n. 20
0
 def create(cls, path, memtable):
     bf = BloomFilter(BF_SIZE, BF_HASH_COUNT)
     with kv_writer(path) as writer:
         for key, value in memtable.entries():
             writer.write_entry(key, value)
             bf.add(key)
     return cls(path, bf)
 def test_exercise_4(self):
     last_block_hex = '000000000d65610b5af03d73ed67704713c9b734d87cf4b970d39a0416dd80f9'
     last_block = bytes.fromhex(last_block_hex)
     secret = little_endian_to_int(
         hash256(b'Jimmy Song Programming Blockchain'))
     private_key = PrivateKey(secret=secret)
     addr = private_key.point.address(testnet=True)
     h160 = decode_base58(addr)
     target_address = 'mwJn1YPMq7y5F8J3LkC5Hxg9PHyZ5K4cFv'
     self.assertEqual(addr, target_address)
     filter_size = 30
     filter_num_functions = 5
     filter_tweak = 90210  # FILL THIS IN
     target_h160 = decode_base58(target_address)
     target_script = p2pkh_script(target_h160)
     fee = 5000  # fee in satoshis
     node = SimpleNode('tbtc.programmingblockchain.com',
                       testnet=True,
                       logging=False)
     bf = BloomFilter(filter_size, filter_num_functions, filter_tweak)
     bf.add(h160)
     node.handshake()
     node.send(b'filterload', bf.filterload())
     getheaders_message = GetHeadersMessage(start_block=last_block)
     node.send(getheaders_message.command, getheaders_message.serialize())
     headers_envelope = node.wait_for_commands([HeadersMessage.command])
     stream = headers_envelope.stream()
     headers = HeadersMessage.parse(stream)
     get_data_message = GetDataMessage()
     for block in headers.blocks:
         self.assertTrue(block.check_pow())
         if last_block is not None:
             self.assertEqual(block.prev_block, last_block)
         last_block = block.hash()
         get_data_message.add_data(FILTERED_BLOCK_DATA_TYPE, last_block)
     node.send(get_data_message.command, get_data_message.serialize())
     prev_tx = None
     while prev_tx is None:
         envelope = node.wait_for_commands([b'merkleblock', b'tx'])
         stream = envelope.stream()
         if envelope.command == b'merkleblock':
             mb = MerkleBlock.parse(stream)
             self.assertTrue(mb.is_valid())
         else:
             prev = Tx.parse(stream, testnet=True)
             for i, tx_out in enumerate(prev.tx_outs):
                 if tx_out.script_pubkey.address(testnet=True) == addr:
                     prev_tx = prev.hash()
                     prev_index = i
                     prev_amount = tx_out.amount
                     break
     tx_in = TxIn(prev_tx, prev_index)
     output_amount = prev_amount - fee
     tx_out = TxOut(output_amount, target_script)
     tx_obj = Tx(1, [tx_in], [tx_out], 0, testnet=True)
     tx_obj.sign_input(0, private_key)
     self.assertEqual(
         tx_obj.serialize().hex(),
         '010000000194e631abb9e1079ec72a1616a3aa0111c614e65b96a6a4420e2cc6af9e6cc96e000000006a47304402203cc8c56abe1c0dd043afa9eb125dafbebdde2dd4cd7abf0fb1aae0667a22006e02203c95b74d0f0735bbf1b261d36e077515b6939fc088b9d7c1b7030a5e494596330121021cdd761c7eb1c90c0af0a5963e94bf0203176b4662778d32bd6d7ab5d8628b32ffffffff01f8829800000000001976a914ad346f8eb57dee9a37981716e498120ae80e44f788ac00000000'
     )
Esempio n. 22
0
 def __init__(self):
     self.cc = ConnectToCassandra()
     self.n, self.word_present = self.cc.get_id()  #no of items to add
     self.p = 0.05  #false positive probability
     self.bloomf = BloomFilter(self.n, self.p)
     for item in self.word_present:
         self.bloomf.add(bytes(to_integer(item.date())))
Esempio n. 23
0
 def test_byte_size_is_in_expected_range(self):
     '''BloomFilter.byte_size returns expected value'''
     bloom_filter = BloomFilter(1000000, 1e-3)
     size = bloom_filter.byte_size
     # 14377640 bits, 10 hashes
     self.assertLess(1797208, size)
     self.assertGreater(1800000, size)
Esempio n. 24
0
    def rabinKarp(self, patterns, txt):
        if (not txt or not patterns):
            raise ValueError('Search requires text and a pattern')

        q = 101  # a prime number
        d = 256
        h = 1

        matches = dict()
        for p in patterns:
            matches[p] = []

        patternHashes = []
        patternLen = len(next(iter(patterns)))  #length of first pattern
        txtLen = len(txt)

        if (txtLen < patternLen):
            raise ValueError(
                'A pattern longer than text to search cannot exist in the text.'
            )

        # The value of h would be "pow(d, M-1)%q"
        for i in range(patternLen - 1):
            h = (h * d) % q

        numPat = len(patterns)
        if (numPat < 1):
            raise ValueError('Search requires a pattern')

        for pat in set(patterns):
            if (patternLen != len(pat)):
                raise ValueError(
                    'Search only supports a fixed length pattern match.')

            patternHash = 0
            for i in range(patternLen):
                patternHash = (d * patternHash + ord(pat[i])) % q
            patternHashes.append(patternHash)
        bloomf = BloomFilter(patternHashes)

        # setup the first comparison based on length of pattern
        left = 0
        right = patternLen
        txtHash = 0
        for j in range(patternLen):
            txtHash = (d * txtHash + ord(txt[j])) % q

        #scoot through txt 1 char at a time
        while (right <= txtLen):
            if (bloomf.contains(txtHash)):
                if (txt[left:right] in patterns):
                    matches[txt[left:right]].append(left)
            if (left + patternLen < txtLen):
                txtHash = (d * (txtHash - ord(txt[left]) * h) +
                           ord(txt[left + patternLen])) % q
            left += 1
            right += 1

        return matches
Esempio n. 25
0
 def test_exercise_6(self):
     last_block_hex = '000000000d65610b5af03d73ed67704713c9b734d87cf4b970d39a0416dd80f9'
     secret = little_endian_to_int(
         hash256(b'Jimmy Song Programming Blockchain'))
     private_key = PrivateKey(secret=secret)
     addr = private_key.point.address(testnet=True)
     h160 = decode_base58(addr)
     target_address = 'mwJn1YPMq7y5F8J3LkC5Hxg9PHyZ5K4cFv'
     self.assertEqual(addr, target_address)
     target_h160 = decode_base58(target_address)
     target_script = p2pkh_script(target_h160)
     fee = 5000
     node = SimpleNode('tbtc.programmingblockchain.com', testnet=True)
     bf = BloomFilter(30, 5, 90210)
     bf.add(h160)
     node.handshake()
     node.send(bf.filterload())
     start_block = bytes.fromhex(last_block_hex)
     getheaders = GetHeadersMessage(start_block=start_block)
     node.send(getheaders)
     headers = node.wait_for(HeadersMessage)
     last_block = None
     getdata = GetDataMessage()
     for b in headers.blocks:
         if not b.check_pow():
             raise RuntimeError('proof of work is invalid')
         if last_block is not None and b.prev_block != last_block:
             raise RuntimeError('chain broken')
         getdata.add_data(FILTERED_BLOCK_DATA_TYPE, b.hash())
         last_block = b.hash()
     node.send(getdata)
     prev_tx, prev_index, prev_tx_obj = None, None, None
     while prev_tx is None:
         message = node.wait_for(MerkleBlock, Tx)
         if message.command == b'merkleblock':
             if not message.is_valid():
                 raise RuntimeError('invalid merkle proof')
         else:
             message.testnet = True
             for i, tx_out in enumerate(message.tx_outs):
                 if tx_out.script_pubkey.address(testnet=True) == addr:
                     prev_tx = message.hash()
                     prev_index = i
                     prev_amount = tx_out.amount
                     self.assertEqual(
                         message.id(),
                         '6ec96c9eafc62c0e42a4a6965be614c61101aaa316162ac79e07e1b9ab31e694'
                     )
                     self.assertEqual(i, 0)
                     break
     tx_in = TxIn(prev_tx, prev_index)
     output_amount = prev_amount - fee
     tx_out = TxOut(output_amount, target_script)
     tx_obj = Tx(1, [tx_in], [tx_out], 0, testnet=True)
     tx_obj.sign_input(0, private_key)
     self.assertEqual(
         tx_obj.serialize().hex(),
         '010000000194e631abb9e1079ec72a1616a3aa0111c614e65b96a6a4420e2cc6af9e6cc96e000000006a47304402203cc8c56abe1c0dd043afa9eb125dafbebdde2dd4cd7abf0fb1aae0667a22006e02203c95b74d0f0735bbf1b261d36e077515b6939fc088b9d7c1b7030a5e494596330121021cdd761c7eb1c90c0af0a5963e94bf0203176b4662778d32bd6d7ab5d8628b32ffffffff01f8829800000000001976a914ad346f8eb57dee9a37981716e498120ae80e44f788ac00000000'
     )
Esempio n. 26
0
    def test_serializes_filter_serialize_without_line_feeds(self):
        '''BloomFilter serializes with base64 shield without line feeds'''
        bloom_filter = BloomFilter(100, 0.1)
        bloom_filter.add_by_hash('abcdef')

        serialized_filter = bloom_filter.serialize()

        self.assertEqual(serialized_filter.find('\n'), -1)
Esempio n. 27
0
 def test_bloomfilter(self):
     bloom = BloomFilter(100)
     for i in xrange(50):
         bloom.add(str(i))
     assert "20" in bloom
     assert "25" in bloom
     assert "49" in bloom
     assert "50" not in bloom
Esempio n. 28
0
    def test_serializes_filter_serialize(self):
        '''BloomFilter can round trip serialize() -> deserialize()'''
        bloom_filter = BloomFilter(100, 0.1)
        bloom_filter.add_by_hash('abcdef')

        serialized_filter = bloom_filter.serialize()

        restored_filter = BloomFilter.deserialize(serialized_filter)
        self.assertEqual(bloom_filter.raw_data(), restored_filter.raw_data())
Esempio n. 29
0
 def __init__(self, cnt, word_present):
     self.n = cnt
     self.word_present = word_present  #no of items to add
     self.p = 0.05  #false positive probability
     self.bloomf = BloomFilter(self.n, self.p)
     for item in self.word_present:
         print(item)
         self.bloomf.add(
             bytes(to_integer(datetime.datetime.strptime(item, '%Y%m%d'))))
Esempio n. 30
0
def test():
    bf = BloomFilter(num_hashes=10, size_bytes=100)
    bf.add('hello')
    s = pickle.dumps(bf)

    bf2 = pickle.loads(s)
    assert 'hi' not in bf2
    assert 'hello' in bf2
    assert (bf.seeds == bf2.seeds).all()