Example #1
0
    def test_bf_export_file(self):
        ''' test exporting bloom filter to file '''
        filename = 'test.blm'
        md5_val = '7f590086f9b962387e145899dd001256'
        blm = BloomFilter(est_elements=10, false_positive_rate=0.05)
        blm.add('this is a test')
        blm.export(filename)

        md5_out = calc_file_md5(filename)
        self.assertEqual(md5_out, md5_val)
        os.remove(filename)
Example #2
0
    def test_bf_export_file(self):
        """ test exporting bloom filter to file """
        filename = "test.blm"
        md5_val = "7f590086f9b962387e145899dd001256"
        blm = BloomFilter(est_elements=10, false_positive_rate=0.05)
        blm.add("this is a test")
        blm.export(filename)

        md5_out = calc_file_md5(filename)
        self.assertEqual(md5_out, md5_val)
        os.remove(filename)
Example #3
0
    def test_bf_export_file(self):
        ''' test exporting bloom filter to file '''
        filename = 'test.blm'
        md5_val = '7f590086f9b962387e145899dd001256'
        blm = BloomFilter(est_elements=10, false_positive_rate=0.05)
        blm.add('this is a test')
        blm.export(filename)

        md5_out = calc_file_md5(filename)
        self.assertEqual(md5_out, md5_val)
        os.remove(filename)
Example #4
0
    def test_bf_export_file(self):
        """test exporting bloom filter to file"""
        md5_val = "8d27e30e1c5875b0edcf7413c7bdb221"
        blm = BloomFilter(est_elements=10, false_positive_rate=0.05)
        blm.add("this is a test")

        with NamedTemporaryFile(dir=os.getcwd(),
                                suffix=".blm",
                                delete=DELETE_TEMP_FILES) as fobj:
            blm.export(fobj.name)
            md5_out = calc_file_md5(fobj.name)
        self.assertEqual(md5_out, md5_val)
Example #5
0
    def test_bf_load_file(self):
        """ test loading bloom filter from file """
        filename = "test.blm"

        blm = BloomFilter(est_elements=10, false_positive_rate=0.05)
        blm.add("this is a test")
        blm.export(filename)

        blm2 = BloomFilter(filepath=filename)
        self.assertEqual("this is a test" in blm2, True)
        self.assertEqual("this is not a test" in blm2, False)
        os.remove(filename)
Example #6
0
    def test_bf_load_file(self):
        ''' test loading bloom filter from file '''
        filename = 'test.blm'

        blm = BloomFilter(est_elements=10, false_positive_rate=0.05)
        blm.add('this is a test')
        blm.export(filename)

        blm2 = BloomFilter(filepath=filename)
        self.assertEqual('this is a test' in blm2, True)
        self.assertEqual('this is not a test' in blm2, False)
        os.remove(filename)
Example #7
0
    def test_bf_load_file(self):
        ''' test loading bloom filter from file '''
        filename = 'test.blm'

        blm = BloomFilter(est_elements=10, false_positive_rate=0.05)
        blm.add('this is a test')
        blm.export(filename)

        blm2 = BloomFilter(filepath=filename)
        self.assertEqual('this is a test' in blm2, True)
        self.assertEqual('this is not a test' in blm2, False)
        os.remove(filename)
Example #8
0
    def test_bfod_load_on_disk(self):
        """ test loading a previously saved blm on disk """
        filename = "tmp.blm"

        blm = BloomFilter(10, 0.05)
        blm.add("this is a test")
        blm.export(filename)

        blmd = BloomFilterOnDisk(filename)
        self.assertEqual("this is a test" in blmd, True)
        self.assertEqual("this is not a test" in blmd, False)
        blmd.close()
        os.remove(filename)
Example #9
0
    def test_bfod_load_on_disk(self):
        """test loading a previously saved blm on disk"""
        with NamedTemporaryFile(dir=os.getcwd(),
                                suffix=".blm",
                                delete=DELETE_TEMP_FILES) as fobj:
            blm = BloomFilter(10, 0.05)
            blm.add("this is a test")
            blm.export(fobj.name)

            blmd = BloomFilterOnDisk(fobj.name)
            self.assertEqual("this is a test" in blmd, True)
            self.assertEqual("this is not a test" in blmd, False)
            blmd.close()
Example #10
0
    def test_bf_load_file(self):
        """test loading bloom filter from file"""
        blm = BloomFilter(est_elements=10, false_positive_rate=0.05)
        blm.add("this is a test")

        with NamedTemporaryFile(dir=os.getcwd(),
                                suffix=".blm",
                                delete=DELETE_TEMP_FILES) as fobj:
            blm.export(fobj.name)
            blm2 = BloomFilter(filepath=fobj.name)

        self.assertEqual("this is a test" in blm2, True)
        self.assertEqual("this is not a test" in blm2, False)
Example #11
0
    def test_bfod_load_on_disk(self):
        ''' test loading a previously saved blm on disk '''
        filename = 'tmp.blm'

        blm = BloomFilter(10, 0.05)
        blm.add('this is a test')
        blm.export(filename)

        blmd = BloomFilterOnDisk(filename)
        self.assertEqual('this is a test' in blmd, True)
        self.assertEqual('this is not a test' in blmd, False)
        blmd.close()
        os.remove(filename)
Example #12
0
    def test_bfod_load_on_disk(self):
        ''' test loading a previously saved blm on disk '''
        filename = 'tmp.blm'

        blm = BloomFilter(10, 0.05)
        blm.add('this is a test')
        blm.export(filename)

        blmd = BloomFilterOnDisk(filename)
        self.assertEqual('this is a test' in blmd, True)
        self.assertEqual('this is not a test' in blmd, False)
        blmd.close()
        os.remove(filename)
Example #13
0
    def test_another_hashing_algo(self):
        """ test defining a completely different hashing strategy """
        md5_val = "7f590086f9b962387e145899dd001256"  # for default hash used
        filename = "test.blm"
        results = [
            14409285476674975580,
            1383622036369840193,
            10825905054403519891,
            3456253732347153957,
            1494124715262089992,
        ]

        def my_hash(key, depth, encoding="utf-8"):
            """ my hashing strategy """
            max64mod = UINT64_T_MAX + 1
            results = list()
            for i in range(0, depth):
                tmp = key[i:] + key[:i]
                val = int(hashlib.sha512(tmp.encode(encoding)).hexdigest(), 16)
                results.append(val % max64mod)
            return results

        blm = BloomFilter(est_elements=10,
                          false_positive_rate=0.05,
                          hash_function=my_hash)

        self.assertEqual(blm.elements_added, 0)
        blm.add("this is a test")
        blm.export(filename)

        md5_out = calc_file_md5(filename)
        self.assertNotEqual(md5_out, md5_val)
        os.remove(filename)

        for i in range(0, 10):
            tmp = "this is a test {0}".format(i)
            blm.add(tmp)

        self.assertEqual(blm.elements_added, 11)

        for i in range(0, 10):
            tmp = "this is a test {0}".format(i)
            self.assertTrue(blm.check(tmp))

        self.assertEqual(blm.hashes("this is a test", 5), results)
        res = blm.hashes("this is a test", 1)
        self.assertEqual(len(res), 1)
        self.assertEqual(res[0], results[0])
Example #14
0
    def test_bf_use_different_hash(self):
        """test that the different hash works as intended"""
        md5_val = "7f590086f9b962387e145899dd001256"  # for default hash used
        results = [
            14409285476674975580,
            6203976290780191624,
            5074829385518853901,
            3953072760750514173,
            11782747630324011555,
        ]

        @hash_with_depth_int
        def my_hash(key, depth=1, encoding="utf-8"):
            """my hash function"""
            max64mod = UINT64_T_MAX + 1
            val = int(hashlib.sha512(key.encode(encoding)).hexdigest(), 16)
            return val % max64mod

        blm = BloomFilter(est_elements=10,
                          false_positive_rate=0.05,
                          hash_function=my_hash)
        self.assertEqual(blm.elements_added, 0)
        blm.add("this is a test")
        with NamedTemporaryFile(dir=os.getcwd(),
                                suffix=".blm",
                                delete=DELETE_TEMP_FILES) as fobj:
            blm.export(fobj.name)

            md5_out = calc_file_md5(fobj.name)
        self.assertNotEqual(md5_out, md5_val)

        for i in range(0, 10):
            tmp = "this is a test {0}".format(i)
            blm.add(tmp)

        self.assertEqual(blm.elements_added, 11)

        for i in range(0, 10):
            tmp = "this is a test {0}".format(i)
            self.assertTrue(blm.check(tmp))

        self.assertEqual(blm.hashes("this is a test", 5), results)
        res = blm.hashes("this is a test", 1)
        self.assertEqual(len(res), 1)
        self.assertEqual(res[0], results[0])
Example #15
0
    def test_another_hashing_algo(self):
        ''' test defining a completely different hashing strategy '''
        md5_val = '7f590086f9b962387e145899dd001256'  # for default hash used
        filename = 'test.blm'
        results = [14409285476674975580,
                   1383622036369840193,
                   10825905054403519891,
                   3456253732347153957,
                   1494124715262089992]

        def my_hash(key, depth, encoding='utf-8'):
            ''' my hashing strategy '''
            max64mod = UINT64_T_MAX + 1
            results = list()
            for i in range(0, depth):
                tmp = key[i:] + key[:i]
                val = int(hashlib.sha512(tmp.encode(encoding)).hexdigest(), 16)
                results.append(val % max64mod)
            return results

        blm = BloomFilter(est_elements=10, false_positive_rate=0.05,
                          hash_function=my_hash)

        self.assertEqual(blm.elements_added, 0)
        blm.add('this is a test')
        blm.export(filename)

        md5_out = calc_file_md5(filename)
        self.assertNotEqual(md5_out, md5_val)
        os.remove(filename)

        for i in range(0, 10):
            tmp = 'this is a test {0}'.format(i)
            blm.add(tmp)

        self.assertEqual(blm.elements_added, 11)

        for i in range(0, 10):
            tmp = 'this is a test {0}'.format(i)
            self.assertTrue(blm.check(tmp))

        self.assertEqual(blm.hashes('this is a test', 5), results)
        res = blm.hashes('this is a test', 1)
        self.assertEqual(len(res), 1)
        self.assertEqual(res[0], results[0])
Example #16
0
    def test_bf_use_different_hash(self):
        ''' test that the different hash works as intended '''
        md5_val = '7f590086f9b962387e145899dd001256'  # for default hash used
        filename = 'test.blm'
        results = [14409285476674975580,
                   6203976290780191624,
                   5074829385518853901,
                   3953072760750514173,
                   11782747630324011555]

        @hash_with_depth_int
        def my_hash(key, encoding='utf-8'):
            ''' my hash function '''
            max64mod = UINT64_T_MAX + 1
            val = int(hashlib.sha512(key.encode(encoding)).hexdigest(), 16)
            return val % max64mod

        blm = BloomFilter(est_elements=10, false_positive_rate=0.05,
                          hash_function=my_hash)
        self.assertEqual(blm.elements_added, 0)
        blm.add('this is a test')
        blm.export(filename)

        md5_out = calc_file_md5(filename)
        self.assertNotEqual(md5_out, md5_val)
        os.remove(filename)

        for i in range(0, 10):
            tmp = 'this is a test {0}'.format(i)
            blm.add(tmp)

        self.assertEqual(blm.elements_added, 11)

        for i in range(0, 10):
            tmp = 'this is a test {0}'.format(i)
            self.assertTrue(blm.check(tmp))

        self.assertEqual(blm.hashes('this is a test', 5), results)
        res = blm.hashes('this is a test', 1)
        self.assertEqual(len(res), 1)
        self.assertEqual(res[0], results[0])
Example #17
0
    def test_bf_use_different_hash(self):
        ''' test that the different hash works as intended '''
        md5_val = '7f590086f9b962387e145899dd001256'  # for default hash used
        filename = 'test.blm'
        results = [
            14409285476674975580, 6203976290780191624, 5074829385518853901,
            3953072760750514173, 11782747630324011555
        ]

        @hash_with_depth_int
        def my_hash(key, encoding='utf-8'):
            ''' my hash function '''
            max64mod = UINT64_T_MAX + 1
            val = int(hashlib.sha512(key.encode(encoding)).hexdigest(), 16)
            return val % max64mod

        blm = BloomFilter(est_elements=10,
                          false_positive_rate=0.05,
                          hash_function=my_hash)
        self.assertEqual(blm.elements_added, 0)
        blm.add('this is a test')
        blm.export(filename)

        md5_out = calc_file_md5(filename)
        self.assertNotEqual(md5_out, md5_val)
        os.remove(filename)

        for i in range(0, 10):
            tmp = 'this is a test {0}'.format(i)
            blm.add(tmp)

        self.assertEqual(blm.elements_added, 11)

        for i in range(0, 10):
            tmp = 'this is a test {0}'.format(i)
            self.assertTrue(blm.check(tmp))

        self.assertEqual(blm.hashes('this is a test', 5), results)
        res = blm.hashes('this is a test', 1)
        self.assertEqual(len(res), 1)
        self.assertEqual(res[0], results[0])
Example #18
0
                    if check_stock_symbol in stock_symbol_dist.keys():
                        present_count = present_count + 1
                        stock_symbol_file.write(check_stock_symbol + "," + ' Traded\n')
                    else:
                        false_postive_count = false_postive_count + 1
                        stock_symbol_file.write(check_stock_symbol + "," + ' Traded - False Positive\n')
                else:
                    not_present_count = not_present_count + 1
                    stock_symbol_file.write(check_stock_symbol + "," + ' Defenitely Not Traded\n')
            stock_symbol_file.write('Probably Traded Count,' + str(prob_present_count) + "\n")
            stock_symbol_file.write('Traded Count,' + str(present_count) + "\n")
            stock_symbol_file.write('False +ve Traded Count,' + str(false_postive_count) + "\n")
            stock_symbol_file.write('Definitely Not Traded Count,' + str(not_present_count) + "\n")

            stock_trade_file.close()
            stock_exists_bf.export(stock_exists_bf_filename)

            timenow = datetime.datetime.now()
            bf_endtime = str(timenow.strftime("%x")) + ' ' + str((timenow.strftime("%X")))

            est_elemnts_added_bf = stock_exists_bf.elements_added
            est_elements_planned = stock_exists_bf.estimated_elements
            stocks_added = len(stock_symbol_dist)
            est_unique_elements = stock_exists_bf.estimate_elements()

            process_file.write(str(est_elemnts_added_bf))
            process_file.write("," + str(est_elements_planned))
            process_file.write("," + str(stocks_added))
            process_file.write("," + str(est_unique_elements))
            process_file.write("," + str(false_postive_count))
            process_file.write("," + str(false_postive_count/est_unique_elements))