Esempi in Python per BloomFilter.add, esempi in Python per bloomfilter.BloomFilter.add

Esempio n. 1

0

Mostra file

File: test.py Progetto: manesajian/syswiz

class TestBloomFilter(unittest.TestCase):
    def setUp(self):
        self.size = 500000
        self.hash_count = 7

        self.bf = BloomFilter(self.size, self.hash_count)
        lst = ['abc', 'xyz', 'foo', 'bar']
        for item in lst:
            self.bf.add(item)

    def _initialize(self):
        pass

    def _cleanup(self):
        if self.bf:
            del(self.bf)
            self.bf = None

    def test_lookup_yes(self):
        self.assertEqual(self.bf.lookup('foo'), True)

    def test_lookup_no(self):
        self.assertEqual(self.bf.lookup('hello'), False)

    def tearDown(self):
        self._cleanup()

Esempio n. 2

0

Mostra file

File: mycoplasma_test.py Progetto: mjapkim/EN601647_final_scan_contaminants

def build_bf(n, p, ref_fasta):
    # call bloom filter class and output stats
    bloomf = BloomFilter(n, p)
    print("Size of bit array:{}".format(bloomf.size))
    print("False positive Probability:{}".format(bloomf.fp_prob))
    print("Number of hash functions:{}".format(bloomf.hash_count))

    mycoplasma_fasta = open(ref_fasta, 'r')
    N_count = 0
    read_count = 0
    while True:
        name = mycoplasma_fasta.readline()  # read id
        if len(name) == 0:
            break  # end of file
        read = mycoplasma_fasta.readline().strip()
        if 'N' not in read:
            # do not add any uncalled bases
            bloomf.add(read)
            read_count += 1
        else:
            N_count += 1
    print('N_count = %s' % N_count)
    print('read_count = %s' % read_count)
    mycoplasma_fasta.close()
    return bloomf

Esempio n. 3

0

Mostra file

File: bloom_filter_mr.py Progetto: ankit96/BigDataExamples

class BloomFilterMR(MRJob):
    
    
    def __init__(self, *args, **kwargs):
        super(BloomFilterMR, self).__init__(*args, **kwargs)
        self.n = 20
        self.p = 0.05
        self.hot_list = [1,8,14,12,23,31,55]
		
    

    #defining steps 
    def steps(self):    
        return [
            MRStep(mapper_init=self.mapper_init
            		,mapper=self.mapper
                  )

        ]

    def mapper_init(self):
		self.bloomf = BloomFilter(self.n,self.p) 

		for elem in self.hot_list:
			self.bloomf.add(str(elem))


    #MapReduce Phase 1 : convert temperature data into city,day,temp,temp_count
    def mapper(self, _, line):
        (city,temp,timestamp) = line.split('|')
        if self.bloomf.check(temp):
            yield city,(temp,timestamp)

Esempio n. 4

0

Mostra file

 def test_exercise_2(self):
     block_hash = bytes.fromhex(
         '0000000053787814ed9dd8c029d0a0a9af4ab8ec0591dc31bdc4ab31fae88ce9')
     passphrase = b'Jimmy Song Programming Blockchain'  # FILL THIS IN
     secret = little_endian_to_int(hash256(passphrase))
     private_key = PrivateKey(secret=secret)
     addr = private_key.point.address(testnet=True)
     filter_size = 30
     filter_num_functions = 5
     filter_tweak = 90210  # FILL THIS IN
     h160 = decode_base58(addr)
     bf = BloomFilter(filter_size, filter_num_functions, filter_tweak)
     bf.add(h160)
     node = SimpleNode('tbtc.programmingblockchain.com',
                       testnet=True,
                       logging=False)
     node.handshake()
     node.send(bf.filterload())
     getdata = GetDataMessage()
     getdata.add_data(FILTERED_BLOCK_DATA_TYPE, block_hash)
     node.send(getdata)
     mb = node.wait_for(MerkleBlock)
     tx = node.wait_for(Tx)
     self.assertEqual(
         tx.serialize().hex(),
         '0100000002a663815ab2b2ba5f53e442f9a2ea6cc11bbcd98fb1585e48a134bd870dbfbd6a000000006a47304402202151107dc2367cf5a9e2429cde0641c252374501214ce52069fbca1320180aa602201a43b5d4f91e48514c00c01521dc04f02c57f15305adc4eaad01c418f6e7a1180121031dbe3aff7b9ad64e2612b8b15e9f5e4a3130663a526df91abfb7b1bd16de5d6effffffff618b00a343488bd62751cf21f368ce3be76e3a0323fdc594a0d24f27a1155cd2000000006a473044022024c4dd043ab8637c019528b549e0b10333b2dfa83e7ca66776e401ad3fc31b6702207d4d1d73ac8940c59c57c0b7daf084953324154811c10d06d0563947a88f99b20121031dbe3aff7b9ad64e2612b8b15e9f5e4a3130663a526df91abfb7b1bd16de5d6effffffff0280969800000000001976a914ad346f8eb57dee9a37981716e498120ae80e44f788aca0ce6594000000001976a9146e13971913b9aa89659a9f53d327baa8826f2d7588ac00000000'
     )

Esempio n. 5

0

Mostra file

def main():
    number_of_items = 20
    false_positive_probability = 0.1
    bloom = BloomFilter(number_of_items, false_positive_probability)
    word_present = [
        'abound', 'abounds', 'abundance', 'abundant', 'accessable', 'bloom',
        'blossom', 'bolster', 'bonny', 'bonus', 'bonuses', 'coherent',
        'cohesive', 'colorful', 'comely', 'comfort', 'gems', 'generosity',
        'generous', 'generously', 'genial'
    ]
    word_absent = [
        'bluff', 'cheater', 'hate', 'war', 'humanity', 'racism', 'hurt',
        'nuke', 'gloomy', 'facebook', 'geeksforgeeks', 'twitter'
    ]
    print('bloomfilter size: ', bloom.bit_size)
    print('false_positive_probability', bloom.false_positive_probability)
    print('hash_count: ', bloom.hash_count)

    for item in word_present:
        bloom.add(item)
    shuffle(word_present)
    shuffle(word_absent)
    random_list = word_present[:5] + word_absent[:5]
    shuffle(random_list)
    for word in random_list:
        print('word: ', word)
        if bloom.check(word):
            if word in word_absent:
                print('false positive')
            else:
                print('word most likely member')
        else:
            print('word not present')

Esempio n. 6

0

Mostra file

File: 8.py Progetto: adamjonas/sending-btc-with-network-protocol-jsong-pb-sf-072618

 def test_exercise_4(self):
     last_block_hex = '000000000d65610b5af03d73ed67704713c9b734d87cf4b970d39a0416dd80f9'
     last_block = bytes.fromhex(last_block_hex)
     secret = little_endian_to_int(
         hash256(b'Jimmy Song Programming Blockchain'))
     private_key = PrivateKey(secret=secret)
     addr = private_key.point.address(testnet=True)
     h160 = decode_base58(addr)
     target_address = 'mwJn1YPMq7y5F8J3LkC5Hxg9PHyZ5K4cFv'
     self.assertEqual(addr, target_address)
     filter_size = 30
     filter_num_functions = 5
     filter_tweak = 90210  # FILL THIS IN
     target_h160 = decode_base58(target_address)
     target_script = p2pkh_script(target_h160)
     fee = 5000  # fee in satoshis
     node = SimpleNode('tbtc.programmingblockchain.com',
                       testnet=True,
                       logging=False)
     bf = BloomFilter(filter_size, filter_num_functions, filter_tweak)
     bf.add(h160)
     node.handshake()
     node.send(b'filterload', bf.filterload())
     getheaders_message = GetHeadersMessage(start_block=last_block)
     node.send(getheaders_message.command, getheaders_message.serialize())
     headers_envelope = node.wait_for_commands([HeadersMessage.command])
     stream = headers_envelope.stream()
     headers = HeadersMessage.parse(stream)
     get_data_message = GetDataMessage()
     for block in headers.blocks:
         self.assertTrue(block.check_pow())
         if last_block is not None:
             self.assertEqual(block.prev_block, last_block)
         last_block = block.hash()
         get_data_message.add_data(FILTERED_BLOCK_DATA_TYPE, last_block)
     node.send(get_data_message.command, get_data_message.serialize())
     prev_tx = None
     while prev_tx is None:
         envelope = node.wait_for_commands([b'merkleblock', b'tx'])
         stream = envelope.stream()
         if envelope.command == b'merkleblock':
             mb = MerkleBlock.parse(stream)
             self.assertTrue(mb.is_valid())
         else:
             prev = Tx.parse(stream, testnet=True)
             for i, tx_out in enumerate(prev.tx_outs):
                 if tx_out.script_pubkey.address(testnet=True) == addr:
                     prev_tx = prev.hash()
                     prev_index = i
                     prev_amount = tx_out.amount
                     break
     tx_in = TxIn(prev_tx, prev_index)
     output_amount = prev_amount - fee
     tx_out = TxOut(output_amount, target_script)
     tx_obj = Tx(1, [tx_in], [tx_out], 0, testnet=True)
     tx_obj.sign_input(0, private_key)
     self.assertEqual(
         tx_obj.serialize().hex(),
         '010000000194e631abb9e1079ec72a1616a3aa0111c614e65b96a6a4420e2cc6af9e6cc96e000000006a47304402203cc8c56abe1c0dd043afa9eb125dafbebdde2dd4cd7abf0fb1aae0667a22006e02203c95b74d0f0735bbf1b261d36e077515b6939fc088b9d7c1b7030a5e494596330121021cdd761c7eb1c90c0af0a5963e94bf0203176b4662778d32bd6d7ab5d8628b32ffffffff01f8829800000000001976a914ad346f8eb57dee9a37981716e498120ae80e44f788ac00000000'
     )

Esempio n. 7

0

Mostra file

File: test.py Progetto: garrickw/bloomfilterpy

def test_bloom():
    data = (str(uuid.uuid1()) for i in range(100000))
    filter = BloomFilter(100000, 0.0001)
    for item in data:
        if not item in filter:
            filter.add(item)
    print "{name} costs {bytes} bytes.".format(name=sys._getframe().f_code.co_name, bytes=filter.container_size())

Esempio n. 8

0

Mostra file

File: sstable.py Progetto: anarmanafov1/kvs

 def create(cls, path, memtable):
     bf = BloomFilter(BF_SIZE, BF_HASH_COUNT)
     with kv_writer(path) as writer:
         for key, value in memtable.entries():
             writer.write_entry(key, value)
             bf.add(key)
     return cls(path, bf)

Esempio n. 9

0

Mostra file

File: 12.py Progetto: gionasoldati/programmingbitcoin

 def test_exercise_6(self):
     last_block_hex = '000000000d65610b5af03d73ed67704713c9b734d87cf4b970d39a0416dd80f9'
     secret = little_endian_to_int(
         hash256(b'Jimmy Song Programming Blockchain'))
     private_key = PrivateKey(secret=secret)
     addr = private_key.point.address(testnet=True)
     h160 = decode_base58(addr)
     target_address = 'mwJn1YPMq7y5F8J3LkC5Hxg9PHyZ5K4cFv'
     self.assertEqual(addr, target_address)
     target_h160 = decode_base58(target_address)
     target_script = p2pkh_script(target_h160)
     fee = 5000
     node = SimpleNode('tbtc.programmingblockchain.com', testnet=True)
     bf = BloomFilter(30, 5, 90210)
     bf.add(h160)
     node.handshake()
     node.send(bf.filterload())
     start_block = bytes.fromhex(last_block_hex)
     getheaders = GetHeadersMessage(start_block=start_block)
     node.send(getheaders)
     headers = node.wait_for(HeadersMessage)
     last_block = None
     getdata = GetDataMessage()
     for b in headers.blocks:
         if not b.check_pow():
             raise RuntimeError('proof of work is invalid')
         if last_block is not None and b.prev_block != last_block:
             raise RuntimeError('chain broken')
         getdata.add_data(FILTERED_BLOCK_DATA_TYPE, b.hash())
         last_block = b.hash()
     node.send(getdata)
     prev_tx, prev_index, prev_tx_obj = None, None, None
     while prev_tx is None:
         message = node.wait_for(MerkleBlock, Tx)
         if message.command == b'merkleblock':
             if not message.is_valid():
                 raise RuntimeError('invalid merkle proof')
         else:
             message.testnet = True
             for i, tx_out in enumerate(message.tx_outs):
                 if tx_out.script_pubkey.address(testnet=True) == addr:
                     prev_tx = message.hash()
                     prev_index = i
                     prev_amount = tx_out.amount
                     self.assertEqual(
                         message.id(),
                         '6ec96c9eafc62c0e42a4a6965be614c61101aaa316162ac79e07e1b9ab31e694'
                     )
                     self.assertEqual(i, 0)
                     break
     tx_in = TxIn(prev_tx, prev_index)
     output_amount = prev_amount - fee
     tx_out = TxOut(output_amount, target_script)
     tx_obj = Tx(1, [tx_in], [tx_out], 0, testnet=True)
     tx_obj.sign_input(0, private_key)
     self.assertEqual(
         tx_obj.serialize().hex(),
         '010000000194e631abb9e1079ec72a1616a3aa0111c614e65b96a6a4420e2cc6af9e6cc96e000000006a47304402203cc8c56abe1c0dd043afa9eb125dafbebdde2dd4cd7abf0fb1aae0667a22006e02203c95b74d0f0735bbf1b261d36e077515b6939fc088b9d7c1b7030a5e494596330121021cdd761c7eb1c90c0af0a5963e94bf0203176b4662778d32bd6d7ab5d8628b32ffffffff01f8829800000000001976a914ad346f8eb57dee9a37981716e498120ae80e44f788ac00000000'
     )

Esempio n. 10

0

Mostra file

File: tests.py Progetto: kumar-abhishek/tips

 def test_bloomfilter(self):
     bloom = BloomFilter(100)
     for i in xrange(50):
         bloom.add(str(i))
     assert "20" in bloom
     assert "25" in bloom
     assert "49" in bloom
     assert "50" not in bloom

Esempio n. 11

0

Mostra file

File: tests.py Progetto: rokujyouhitoma/tips

 def test_bloomfilter(self):
     bloom = BloomFilter(100)
     for i in xrange(50):
         bloom.add(str(i))
     assert "20" in bloom
     assert "25" in bloom
     assert "49" in bloom
     assert "50" not in bloom

Esempio n. 12

0

Mostra file

def test():
    bf = BloomFilter(num_hashes=10, size_bytes=100)
    bf.add('hello')
    s = pickle.dumps(bf)

    bf2 = pickle.loads(s)
    assert 'hi' not in bf2
    assert 'hello' in bf2
    assert (bf.seeds == bf2.seeds).all()

Esempio n. 13

0

Mostra file

    def test_excluded(self):
        bf = BloomFilter()
        bf.add('t1')
        bf.add('t2')

        test1 = bf.test("t3")
        test2 = bf.test("t4")
        test3 = bf.test("t5")
        # making few checks to eliminate test failings on false positives
        self.assertFalse(test1 and test2 and test3)

Esempio n. 14

0

Mostra file

def write_bloom_filter():
    bloomf = BloomFilter(n, p)
    print("Size of bit array:{}".format(bloomf.size))
    print("False positive Probability:{}".format(bloomf.fp_prob))
    print("Number of hash functions:{}".format(bloomf.hash_count))

    for item in word_present:
        bloomf.add(item)

    with open(filename, "wb") as outfile:
        outfile.write(bloomf.prepare_bloom_filter_to_write())

Esempio n. 15

0

Mostra file

File: TamilBloomFilterCreator.py Progetto: krishnansr/TamilSpellChecker

    def create_bloomfilter_file(self):

        bloomf = BloomFilter(self.unique_word_count,
                             self.false_positive_probability)
        try:
            for word in self.ta_words_unique:
                bloomf.add(word)
            bloomf.writetofile(self.bloom_file_path)
        except Exception as e:
            track = traceback.format_exc()
            print(track)

Esempio n. 16

0

Mostra file

    def create_csv_bloomfilter_files(self):
        items_count = len(self.dict_tamil_word)
        falsepositive_probability = 0.001
        bloomf = BloomFilter(items_count, falsepositive_probability)

        with open(self.csv_file_path, "w") as f:
            for word, count in self.dict_tamil_word.items():
                write_line = word + "," + str(count) + os.linesep
                bloomf.add(word)
                f.write(write_line)

        bloomf.writetofile(self.bloomfilter_file_path)

Esempio n. 17

0

Mostra file

 def test_get_filtered_txs(self):
     from bloomfilter import BloomFilter
     bf = BloomFilter(30, 5, 90210)
     h160 = decode_base58('mseRGXB89UTFVkWJhTRTzzZ9Ujj4ZPbGK5')
     bf.add(h160)
     node = SimpleNode('tbtc.programmingblockchain.com', testnet=True)
     node.handshake()
     node.send(bf.filterload())
     block_hash = bytes.fromhex('00000000000377db7fde98411876c53e318a395af7304de298fd47b7c549d125')
     txs = node.get_filtered_txs([block_hash])
     self.assertEqual(txs[0].id(), '0c024b9d3aa2ae8faae96603b8d40c88df2fc6bf50b3f446295206f70f3cf6ad')
     self.assertEqual(txs[1].id(), '0886537e27969a12478e0d33707bf6b9fe4fdaec8d5d471b5304453b04135e7e')
     self.assertEqual(txs[2].id(), '23d4effc88b80fb7dbcc2e6a0b0af9821c6fe3bb4c8dc3b61bcab7c45f0f6888')

Esempio n. 18

0

Mostra file

File: bloom_test.py Progetto: maticrown/computer-security-project

def test_bloom_filter(num_of_items, fp_prob):
    bloomf = BloomFilter(num_of_items,fp_prob) 
    
    # words to be added 
    word_present = ['abound','abounds','abundance','abundant','accessable', 
                    'bloom','blossom','bolster','bonny','bonus','bonuses', 
                    'coherent','cohesive','colorful','comely','comfort', 
                    'gems','generosity','generous','generously','genial']
    
    # word not added
    word_absent = ['bluff','cheater','hate','war','humanity', 
                'racism','hurt','nuke','gloomy','facebook', 
                'geeksforgeeks','twitter']
    
    top_passwords_last_years = ['123456',	'123456789', 'qwerty', 'password',
                                'football', '1234567', '12345678', 'letmein',
                                '1234', '1234567890', 'dragon', 'baseball',
                                'sunshine', 'iloveyou','trustno1', 'princess',
                                'adobe123', '123123', 'welcome', 'login', 'admin',
                                '111111', 'qwerty123', 'solo', '1q2w3e4r', 'master',
                                'abc123', '666666', 'photoshop', '1qaz2wsx', 'qwertyuiop',
                                'ashley', 'mustang', '121212', 'starwars',	'654321',
                                'bailey',	'access', 'flower', '555555', 'passw0rd',
                                'monkey', 'lovely', 'shadow', '7777777', '12345', 'michael',
                                '!@#$%^&*', 'jesus', 'password1', 'superman', 'hello',
                                'charlie', '888888', '696969', 'hottie', 'freedom', 'aa123456',
                                'qazwsx', 'ninja', 'azerty', 'loveme', 'whatever', 'donald',
                                'batman', 'zaq1zaq1', 'Football', '0', '123qwe', '1111111',
                                '12345', '000000', '1234', '1q2w3e4r5t', '123', '987654321',
                                '12345679', 'mynoob', '123321', '18atcskd2w', '3rjs1la7qe',
                                'google', 'zxcvbnm', '1q2w3e', ]
    
    for item in word_present:
        bloomf.add(item)

    shuffle(word_present) 
    shuffle(word_absent)
    
    test_words = word_present[:10] + word_absent 
    shuffle(test_words)
    
    for word in test_words:
        if bloomf.check(word):
            if word in word_absent:
                print("'{}' is a false positive!".format(word))
            else:
                print("'{}' is probably present!".format(word))
        else:
            print("'{}' is definitely not present!".format(word))

Esempio n. 19

0

Mostra file

File: secure_index.py Progetto: jacobpchen/secure_index

    def build_index(self, document_identifier, kpriv, list_of_words):
        # Create an empty list to hold the trapdoors for the word (x1, x2, ..., xr)
        trapdoor = []
        # Create an empty list to hold the codewords for the word (y1, y2, ..., yr)
        codewords = []

        for word in list_of_words:
            '''
            Create a trapdoor for each unique word
            '''
            # Takes the word and creates a trapdoor
            for i in range(0,self.r):
                # Converts kpriv[i] from hex to a bytes object - Necessary to use HMAC
                key = bytes.fromhex(kpriv[i])
                w = bytes(word, 'utf-8')
                trapdoor_digest = hmac.new(key, msg=w, digestmod=hashlib.sha1)
                trapdoor_digest = trapdoor_digest.hexdigest()
                trapdoor.append(trapdoor_digest)

        # Take each word and hash it again with the document_identifier as the key to generate y1, y2, ..., yr
        for i in range(0, len(trapdoor)):
            # encode the docunemt identifier and the trapdoor[i]
            d_id = bytes(document_identifier, 'utf-8')
            message = bytes(trapdoor[i], 'utf-8')
            codeword_digest = hmac.new(message, msg=d_id, digestmod=hashlib.sha1)
            codeword_digest = codeword_digest.hexdigest()
            codewords.append(codeword_digest)

        #Create a bloom filter and insert the codewords into the bloom filter

        # Creates a bloom filter
        bf = BloomFilter()

        # For each value in the list of codewords, add the codeword to the bloom filter
        for codeword in codewords:
            bf.add(codeword)

        # adding noise - take the total number of words - unique words * r and insert into bloom filter
        for i in range (0, (self.unique_word_count - len(list_of_words)) * self.r):
            # generate a random number from 0 - bf.size
            index = random.randrange(0, bf.size-1)
            bf.set_index(index)

        return(document_identifier, bf)

Esempio n. 20

0

Mostra file

class CreateBloomFilter():
    def __init__(self):
        self.cc = ConnectToCassandra()
        self.n, self.word_present = self.cc.get_id()  #no of items to add
        self.p = 0.05  #false positive probability
        self.bloomf = BloomFilter(self.n, self.p)
        for item in self.word_present:
            self.bloomf.add(bytes(to_integer(item.date())))

    def createfilter(self):
        for item in self.word_present:
            self.bloomf.add(bytes(to_integer(item)))

    def testdate(self, todate):
        todate = to_integer(todate)
        if self.bloomf.check(bytes(todate)):
            return 1
        else:
            return 0

Esempio n. 21

0

Mostra file

def encryptData(data,size,fp=0.01,bigrams=2,bpower=8,p=None):
    """
        Criptografa um string

        bigrams : 2 = Bigrams
        size : Size of BF
        fp : False positive rate
    """
    bloomfilter = BloomFilter(size,fp,bfpower=bpower)
    if p != None:
        bloomfilter.set_hashfunction_by_p(p)

    index = ngram.NGram(N=bigrams)
    bigrams = list(index.ngrams(index.pad(str(data))))

    for bigram in bigrams:
        bloomfilter.add(str(bigram))

    return bloomfilter

Esempio n. 22

0

Mostra file

File: information_retrieval.py Progetto: ntshcalleia/recinfo-trab2

class Document:
    def __init__(self, terms, doc_id):
        self.id = doc_id
        self.terms = terms
        self.terms = tokenize_terms(self.terms)
        self.signature = BloomFilter()
        self.signature.add(self.terms)

    def verify(self, query):
        if isinstance(query, str):
            if query not in self.terms:
                return False
        elif isinstance(query, list):
            for term in query:
                if term not in self.terms:
                    return False
        else:
            return False
        return True

Esempio n. 23

0

Mostra file

 def test_example_5(self):
     last_block_hex = '00000000000538d5c2246336644f9a4956551afb44ba47278759ec55ea912e19'
     address = 'mwJn1YPMq7y5F8J3LkC5Hxg9PHyZ5K4cFv'
     h160 = decode_base58(address)
     node = SimpleNode('tbtc.programmingblockchain.com',
                       testnet=True,
                       logging=False)
     bf = BloomFilter(30, 5, 90210)
     bf.add(h160)
     node.handshake()
     node.send(b'filterload', bf.filterload())
     start_block = bytes.fromhex(last_block_hex)
     getheaders_message = GetHeadersMessage(start_block=start_block)
     node.send(b'getheaders', getheaders_message.serialize())
     headers_envelope = node.wait_for_commands({b'headers'})
     stream = headers_envelope.stream()
     headers = HeadersMessage.parse(stream)
     get_data_message = GetDataMessage()
     for b in headers.blocks:
         if not b.check_pow():
             raise RuntimeError('proof of work is invalid')
         get_data_message.add_data(FILTERED_BLOCK_DATA_TYPE, b.hash())
     node.send(b'getdata', get_data_message.serialize())
     found = False
     while not found:
         envelope = node.wait_for_commands({b'merkleblock', b'tx'})
         stream = envelope.stream()
         if envelope.command == b'merkleblock':
             mb = MerkleBlock.parse(stream)
             if not mb.is_valid():
                 raise RuntimeError('invalid merkle proof')
         else:
             prev_tx_obj = Tx.parse(stream, testnet=True)
             for i, tx_out in enumerate(prev_tx_obj.tx_outs):
                 if tx_out.script_pubkey.address(testnet=True) == address:
                     self.assertEqual(
                         prev_tx_obj.id(),
                         'e3930e1e566ca9b75d53b0eb9acb7607f547e1182d1d22bd4b661cfe18dcddf1'
                     )
                     self.assertEqual(i, 0)
                     found = True
                     break

Esempio n. 24

0

Mostra file

File: bloomfilter.py Progetto: amina4496/TAFC

def bloom(word_present):
    n = 20  #no of items to add
    p = 0.05  #false positive probability

    bloomf = BloomFilter(n, p)
    print("Size of bit array:{}".format(bloomf.size))
    print("False positive Probability:{}".format(bloomf.fp_prob))
    print("Number of hash functions:{}".format(bloomf.hash_count))
    role = [
        'Financial analyst', 'Jr. Maintenance Engineer', 'Marketing manager',
        'Quantitative analyst', 'Sales Consultant', 'Sales Executive',
        'Sales Representative', 'Sr. Software engineer', 'Sr. Software tester',
        'Technical support', 'Web developer', 'Jr. Software engineer',
        'Jr. Software tester', 'Sr. Maintenance Engineer'
    ]
    # words to be added
    #word_present = ['abound','abounds','abundance','abundant','accessable',
    #               'bloom','blossom','bolster','bonny','bonus','bonuses',
    #              'coherent','cohesive','colorful','comely','comfort',
    #             'gems','generosity','generous','generously','genial']

    # word not added
    word_absent = []
    for i in role:
        if i not in word_present:
            word_absent.append(i)

    for item in word_present:
        bloomf.add(item)

    shuffle(word_present)
    shuffle(word_absent)
    print word_present
    for word in word_present:
        print word
        if bloomf.check(word):
            if word in word_absent:
                print("'{}' is a false positive!".format(word))
            else:
                print("'{}' is probably present!".format(word))
        else:
            print("'{}' is definitely not present!".format(word))

Esempio n. 25

0

Mostra file

File: pipelines_bak.py Progetto: lh50404797/search-engine

class DuplicatesPipeline(object):
    def __init__(self):
#         self.bf = BloomFilter(10000000, 0.01, 'filter.bloom')
        
        self.bf = BloomFilter(10000, 0.0001, 'filter.bloom')
        self.f_write = open('visitedsites','w')
        self.si = SearchIndex()
        self.si.SearchInit()
        self.count_num = 0
        

    def process_item(self, item, spider):
#         print '************%d pages visited!*****************' %len(self.bf)
        temp='?'
        str1=item['url']
        str2=str1[:str1.find(temp)]
#         if self.bf.add(item['url']):#True if item in the BF
#         if self.bf.lookup(item['url']):
        if self.bf.lookup(str2):   
            raise DropItem("Duplicate item found: %s" % item)
        else:
#             print '%d pages visited!'% len(self.url_seen)
            self.count_num+=1
#             self.bf.add(item['url'])
#             self.save_to_file(item['url'],item['title'])
            self.bf.add(str2)
            self.save_to_file(item['url'],item['title'])
            self.si.AddIndex(item)
            print self.count_num
            return item

    def save_to_file(self,url,utitle):
        self.f_write.write(url)
        self.f_write.write('\t')
        self.f_write.write(utitle.encode('utf-8'))
        self.f_write.write('\n')

    def __del__(self):
        """docstring for __del__"""
        self.f_write.close()
        self.si.IndexDone()

Esempio n. 26

0

Mostra file

def dblookuptimetest():
    print("Testing DB lookup time using bloom filter\n")
    bf = BloomFilter(500000, 7)
    huge = []

    lines = open("/usr/share/dict/american-english").read().splitlines()
    for line in lines:
        bf.add(line)
        huge.append(line)

    import datetime

    start = datetime.datetime.now()
    bf.contains("google")
    finish = datetime.datetime.now()
    print('Checking "google" using bloom filter in dictionary\n')
    print((finish - start).microseconds)

    start = datetime.datetime.now()
    for word in huge:
        if word == "google":
            break
    finish = datetime.datetime.now()
    print('Checking "google" without  using bloom filter in dictionary\n')
    print((finish - start).microseconds)

    print(bf.contains("Max"))
    print(bf.contains("mice"))
    print(bf.contains("3"))

    start = datetime.datetime.now()
    bf.contains("apple")
    finish = datetime.datetime.now()
    print((finish - start).microseconds)

    start = datetime.datetime.now()
    for word in huge:
        if word == "apple":
            break
    finish = datetime.datetime.now()
    print((finish - start).microseconds)

Esempio n. 27

0

Mostra file

File: sstable.py Progetto: anarmanafov1/kvs

class SSTable:
    """Represents a Sorted-String-Table (SSTable) on disk"""

    def __init__(self, path, bf=None):
        self.path = path
        self.bf = bf
        if not self.bf:
            self._sync()

    def _sync(self):
        self.bf = BloomFilter(BF_SIZE, BF_HASH_COUNT)
        with kv_reader(self.path) as r:
            while r.has_next():
                key = r.read_key()
                self.bf.add(key)
                r.skip_value()

    @classmethod
    def create(cls, path, memtable):
        bf = BloomFilter(BF_SIZE, BF_HASH_COUNT)
        with kv_writer(path) as writer:
            for key, value in memtable.entries():
                writer.write_entry(key, value)
                bf.add(key)
        return cls(path, bf)

    def search(self, search_key):
        if not self.bf.exists(search_key):
            return None
        with kv_reader(self.path) as r:
            while r.has_next():
                key = r.read_key()
                # stop if the key is too big
                if key > search_key:
                    return None
                if key == search_key:
                    return r.read_value()
                r.skip_value()
        return None

Esempio n. 28

0

Mostra file

 def merge(cls, sstables: List[SSTable]) -> SSTable:
     new_path = sstables[0].path.replace(".dat", "-compacted.dat")
     new_index = sstables[0].index
     new_bf = BloomFilter(BF_SIZE, BF_HASH_COUNT)
     readers = [cls.Entries(sstable) for sstable in sstables
                 if sstable.size > 0]
     with kv_writer(new_path) as writer:
         while readers:
             min_reader = min(
                 readers,
                 key=lambda r: (r.current_pair[0], r.sstable.index * -1),
             )
             for reader in readers:
                 if reader is min_reader:
                     continue
                 if reader.current_pair[0] == min_reader.current_pair[0]:
                     reader.advance()
             if min_reader.current_pair[1] is not TOMBSTONE:
                 writer.write_entry(*min_reader.current_pair)
                 new_bf.add(min_reader.current_pair[0])
             min_reader.advance()
             readers = [reader for reader in readers if reader.has_next]
     return cls(new_path, new_index, new_bf)

Esempio n. 29

0

Mostra file

File: compare.py Progetto: pombredanne/bloom-2

def main():
    m = 1000000  # max hash value
    h = 2000  # number of hash functions
    jaccard = 0.8
    N = np.linspace(10, 10**3, num=10).astype('int')

    jaccard_minhash = []
    jaccard_bloom = []
    jaccard_true = []

    for n in N:
        d1 = set([str(x) for x in range(n)])
        min_d2 = int(n*(1.-jaccard)/(1. + jaccard))
        d2 = set([str(x) for x in range(min_d2, min_d2 + n)])

        b1 = BloomFilter(m, h)
        b2 = BloomFilter(m, h)

        mh1 = MinHash(h)
        mh2 = MinHash(h)

        for s1, s2 in izip(d1, d2):
            b1.add(s1)
            b2.add(s2)
        mh1.hash(d1)
        mh2.hash(d2)

        jaccard_minhash.append(1.-hamming(mh1.vec, mh2.vec))
        jaccard_bloom.append(1-2*float(sum(np.not_equal(b1.bit_array, b2.bit_array)))/(sum(b1.bit_array) + sum(b2.bit_array)))
        jaccard_true.append(float(len(d1.intersection(d2)))/len(d1.union(d2)))

    plt.plot(N, np.array([jaccard_bloom, jaccard_minhash, jaccard_true]).T)
    plt.legend(['Bloom Filter', 'MinHash', 'True'], loc='upper left')
    plt.xlabel('Number of strings')
    plt.ylabel('Jaccard Coefficient')
    plt.title('Jaccard Approximation Through Hashing')
    plt.show()

Esempio n. 30

0

Mostra file

File: test_bloom_filter.py Progetto: cookiebaker444/cmpe273-Assignment3

def test_bloom_filter():
    bloomfilter = BloomFilter(NUM_KEYS, FALSE_POSITIVE_PROBABILITY)
    word_present = [
        'abound', 'abounds', 'abundance', 'abundant', 'accessable', 'bloom',
        'blossom', 'bolster', 'bonny', 'bonus', 'bonuses', 'coherent',
        'cohesive', 'colorful', 'comely', 'comfort', 'gems', 'generosity',
        'generous', 'generously', 'genial'
    ]

    word_absent = ['facebook', 'twitter']

    for item in word_present:
        bloomfilter.add(item)

    test_words = word_present[:10] + word_absent
    shuffle(test_words)
    for word in test_words:
        if bloomfilter.is_member(word):
            if word in word_absent:
                print(f"'{word}' is a false positive!")
            else:
                print(f"'{word}' is probably present!")
        else:
            print(f"'{word}' is definitely not present!")

Esempio n. 31

0

Mostra file

class CreateBloomFilter():
    def __init__(self, cnt, word_present):
        self.n = cnt
        self.word_present = word_present  #no of items to add
        self.p = 0.05  #false positive probability
        self.bloomf = BloomFilter(self.n, self.p)
        for item in self.word_present:
            print(item)
            self.bloomf.add(
                bytes(to_integer(datetime.datetime.strptime(item, '%Y%m%d'))))

    def createfilter(self, cnt, word_present):
        self.p = 0.05  #false positive probability
        self.bloomf = BloomFilter(cnt, self.p)
        for item in word_present:
            self.bloomf.add(bytes(to_integer(item)))

    def testdate(self, todate):
        todate = datetime.datetime.strptime(todate, '%Y%m%d')
        todate = to_integer(todate)
        if self.bloomf.check(bytes(todate)):
            return 1
        else:
            return 0

Esempio n. 32

0

Mostra file

File: HW51.py Progetto: siddhartharoynandi/CIS787

word_present = []
inFile = open("/Users/siddhartharoynandi/Desktop/listed_username_30.txt")
for line in inFile:
    word_present.append(line)

n = len(word_present)  # no of items to add
p = 0.05  # false positive probability

bloomf = BloomFilter(n, p)
#print("Size of bit array:{}".format(bloomf.size))
#print("False positive Probability:{}".format(bloomf.fp_prob))
#print("Number of hash functions:{}".format(bloomf.hash_count))

for item in word_present:
    bloomf.add(item)

word_tobe_tested = []
inFile = open("/Users/siddhartharoynandi/Desktop/listed_username_365.txt")
for line in inFile:
    word_tobe_tested.append(line)

shuffle(word_present)
shuffle(word_tobe_tested)

count = 0
count1 = 0

for word in word_tobe_tested:
    if bloomf.check(word):
        count1 = count1 + 1

Esempio n. 33

0

Mostra file

File: app.py Progetto: ahmd0atlalka/bloom_filter

                         )
                 except:  # invalid data or mistake
                     pass
         filter_visual_window.close()
         pass
     except:
         pass
 elif event == 'Insert new password':
     try:
         if len(values['-NEW-PASSWORD-']) != 0:
             if bloomf.check_if_add(values['-NEW-PASSWORD-']):
                 sg.PopupError("inserted not successfully  This word[ " +
                               str(values['-NEW-PASSWORD-']) +
                               " ] in bloom filter , Try Again!")
             else:
                 bloomf.add(values['-NEW-PASSWORD-'])
                 sg.PopupOK(
                     "The password has been inserted successfully [ we Found overlap "
                     + str(bloomf.c) + "bits]")
             if bloomf.c == sizeofhashs:
                 sg.popup_ok("ohh ! , we Found False positive ")
                 false_positive += 1
         else:
             sg.PopupError(
                 "inserted not successfully  Null input, Try Again!")
     except:
         pass
 elif event == 'Show complete password strength analysis':
     try:
         if len(values['-NEW-PASSWORD-']) == 0:
             sg.popup_error("Null input :(")

Esempio n. 34

0

Mostra file

#address = 'mwJn1YPMq7y5F8J3LkC5Hxg9PHyZ5K4cFv'

# our test

#last_block_hex = '0000000017e6fbd8931bce659d45d92040a4674950f2ae5416d0bf1a239641f9'
last_block_hex = '00000000970369111c044804ec0319792c9e1aa29f59a622c5d14b3544ae4eba'
#0000000017e6fbd8931bce659d45d92040a4674950f2ae5416d0bf1a239641f9
#last_block_hex = '0000000000000004fea90996fdf40772e2c2c76205a1fb57fae465194fdaffb9'
address = 'mvEg6eZ3sUApodedYQrkpEPMMALsr1K1k1'

h160 = decode_base58(address)
node = SimpleNode('testnet.programmingbitcoin.com',
                  testnet=True,
                  logging=False)
bf = BloomFilter(size=30, function_count=5, tweak=90210)
bf.add(h160)
node.handshake()
node.send(bf.filterload())
start_block = bytes.fromhex(last_block_hex)
getheaders = GetHeadersMessage(start_block=start_block)
node.send(getheaders)
print('ok2')
headers = node.wait_for(HeadersMessage)
print('ok3')
getdata = GetDataMessage()
for b in headers.blocks:
    if not b.check_pow():
        raise RuntimeError('proof of work is invalid')
    getdata.add_data(FILTERED_BLOCK_DATA_TYPE, b.hash())
node.send(getdata)
found = False

Esempio n. 35

0

Mostra file

File: run_bloomfilter.py Progetto: dirkmoors/python-bloomfilter

if __name__ == '__main__':
    from random import sample
    from string import ascii_letters

    states = '''Alabama Alaska Arizona Arkansas California Colorado Connecticut
        Delaware Florida Georgia Hawaii Idaho Illinois Indiana Iowa Kansas
        Kentucky Louisiana Maine Maryland Massachusetts Michigan Minnesota
        Mississippi Missouri Montana Nebraska Nevada NewHampshire NewJersey
        NewMexico NewYork NorthCarolina NorthDakota Ohio Oklahoma Oregon
        Pennsylvania RhodeIsland SouthCarolina SouthDakota Tennessee Texas Utah
        Vermont Virginia Washington WestVirginia Wisconsin Wyoming'''.split()

    bf1 = BloomFilter(ideal_num_elements_n=100000, error_rate_p=0.001)
    for state in states:
        bf1.add(state)

    json_bf = bf1.toJSON()

    print "##################"
    print json_bf
    print "##################"

    len_json = len(json_bf)
    print "data size: %s bytes"%len_json

    bf2 = BloomFilter.fromJSON(json_bf)
    assertListEquals(bf1.data, bf2.data)

    new_data = bf2.get_data()