def test_bf_export_file(self): ''' test exporting bloom filter to file ''' filename = 'test.blm' md5_val = '7f590086f9b962387e145899dd001256' blm = BloomFilter(est_elements=10, false_positive_rate=0.05) blm.add('this is a test') blm.export(filename) md5_out = calc_file_md5(filename) self.assertEqual(md5_out, md5_val) os.remove(filename)
def test_bf_export_file(self): """ test exporting bloom filter to file """ filename = "test.blm" md5_val = "7f590086f9b962387e145899dd001256" blm = BloomFilter(est_elements=10, false_positive_rate=0.05) blm.add("this is a test") blm.export(filename) md5_out = calc_file_md5(filename) self.assertEqual(md5_out, md5_val) os.remove(filename)
def test_bf_export_file(self): ''' test exporting bloom filter to file ''' filename = 'test.blm' md5_val = '7f590086f9b962387e145899dd001256' blm = BloomFilter(est_elements=10, false_positive_rate=0.05) blm.add('this is a test') blm.export(filename) md5_out = calc_file_md5(filename) self.assertEqual(md5_out, md5_val) os.remove(filename)
def test_bf_export_file(self): """test exporting bloom filter to file""" md5_val = "8d27e30e1c5875b0edcf7413c7bdb221" blm = BloomFilter(est_elements=10, false_positive_rate=0.05) blm.add("this is a test") with NamedTemporaryFile(dir=os.getcwd(), suffix=".blm", delete=DELETE_TEMP_FILES) as fobj: blm.export(fobj.name) md5_out = calc_file_md5(fobj.name) self.assertEqual(md5_out, md5_val)
def test_bf_load_file(self): """ test loading bloom filter from file """ filename = "test.blm" blm = BloomFilter(est_elements=10, false_positive_rate=0.05) blm.add("this is a test") blm.export(filename) blm2 = BloomFilter(filepath=filename) self.assertEqual("this is a test" in blm2, True) self.assertEqual("this is not a test" in blm2, False) os.remove(filename)
def test_bf_load_file(self): ''' test loading bloom filter from file ''' filename = 'test.blm' blm = BloomFilter(est_elements=10, false_positive_rate=0.05) blm.add('this is a test') blm.export(filename) blm2 = BloomFilter(filepath=filename) self.assertEqual('this is a test' in blm2, True) self.assertEqual('this is not a test' in blm2, False) os.remove(filename)
def test_bf_load_file(self): ''' test loading bloom filter from file ''' filename = 'test.blm' blm = BloomFilter(est_elements=10, false_positive_rate=0.05) blm.add('this is a test') blm.export(filename) blm2 = BloomFilter(filepath=filename) self.assertEqual('this is a test' in blm2, True) self.assertEqual('this is not a test' in blm2, False) os.remove(filename)
def test_bfod_load_on_disk(self): """ test loading a previously saved blm on disk """ filename = "tmp.blm" blm = BloomFilter(10, 0.05) blm.add("this is a test") blm.export(filename) blmd = BloomFilterOnDisk(filename) self.assertEqual("this is a test" in blmd, True) self.assertEqual("this is not a test" in blmd, False) blmd.close() os.remove(filename)
def test_bfod_load_on_disk(self): """test loading a previously saved blm on disk""" with NamedTemporaryFile(dir=os.getcwd(), suffix=".blm", delete=DELETE_TEMP_FILES) as fobj: blm = BloomFilter(10, 0.05) blm.add("this is a test") blm.export(fobj.name) blmd = BloomFilterOnDisk(fobj.name) self.assertEqual("this is a test" in blmd, True) self.assertEqual("this is not a test" in blmd, False) blmd.close()
def test_bf_load_file(self): """test loading bloom filter from file""" blm = BloomFilter(est_elements=10, false_positive_rate=0.05) blm.add("this is a test") with NamedTemporaryFile(dir=os.getcwd(), suffix=".blm", delete=DELETE_TEMP_FILES) as fobj: blm.export(fobj.name) blm2 = BloomFilter(filepath=fobj.name) self.assertEqual("this is a test" in blm2, True) self.assertEqual("this is not a test" in blm2, False)
def test_bfod_load_on_disk(self): ''' test loading a previously saved blm on disk ''' filename = 'tmp.blm' blm = BloomFilter(10, 0.05) blm.add('this is a test') blm.export(filename) blmd = BloomFilterOnDisk(filename) self.assertEqual('this is a test' in blmd, True) self.assertEqual('this is not a test' in blmd, False) blmd.close() os.remove(filename)
def test_bfod_load_on_disk(self): ''' test loading a previously saved blm on disk ''' filename = 'tmp.blm' blm = BloomFilter(10, 0.05) blm.add('this is a test') blm.export(filename) blmd = BloomFilterOnDisk(filename) self.assertEqual('this is a test' in blmd, True) self.assertEqual('this is not a test' in blmd, False) blmd.close() os.remove(filename)
def test_another_hashing_algo(self): """ test defining a completely different hashing strategy """ md5_val = "7f590086f9b962387e145899dd001256" # for default hash used filename = "test.blm" results = [ 14409285476674975580, 1383622036369840193, 10825905054403519891, 3456253732347153957, 1494124715262089992, ] def my_hash(key, depth, encoding="utf-8"): """ my hashing strategy """ max64mod = UINT64_T_MAX + 1 results = list() for i in range(0, depth): tmp = key[i:] + key[:i] val = int(hashlib.sha512(tmp.encode(encoding)).hexdigest(), 16) results.append(val % max64mod) return results blm = BloomFilter(est_elements=10, false_positive_rate=0.05, hash_function=my_hash) self.assertEqual(blm.elements_added, 0) blm.add("this is a test") blm.export(filename) md5_out = calc_file_md5(filename) self.assertNotEqual(md5_out, md5_val) os.remove(filename) for i in range(0, 10): tmp = "this is a test {0}".format(i) blm.add(tmp) self.assertEqual(blm.elements_added, 11) for i in range(0, 10): tmp = "this is a test {0}".format(i) self.assertTrue(blm.check(tmp)) self.assertEqual(blm.hashes("this is a test", 5), results) res = blm.hashes("this is a test", 1) self.assertEqual(len(res), 1) self.assertEqual(res[0], results[0])
def test_bf_use_different_hash(self): """test that the different hash works as intended""" md5_val = "7f590086f9b962387e145899dd001256" # for default hash used results = [ 14409285476674975580, 6203976290780191624, 5074829385518853901, 3953072760750514173, 11782747630324011555, ] @hash_with_depth_int def my_hash(key, depth=1, encoding="utf-8"): """my hash function""" max64mod = UINT64_T_MAX + 1 val = int(hashlib.sha512(key.encode(encoding)).hexdigest(), 16) return val % max64mod blm = BloomFilter(est_elements=10, false_positive_rate=0.05, hash_function=my_hash) self.assertEqual(blm.elements_added, 0) blm.add("this is a test") with NamedTemporaryFile(dir=os.getcwd(), suffix=".blm", delete=DELETE_TEMP_FILES) as fobj: blm.export(fobj.name) md5_out = calc_file_md5(fobj.name) self.assertNotEqual(md5_out, md5_val) for i in range(0, 10): tmp = "this is a test {0}".format(i) blm.add(tmp) self.assertEqual(blm.elements_added, 11) for i in range(0, 10): tmp = "this is a test {0}".format(i) self.assertTrue(blm.check(tmp)) self.assertEqual(blm.hashes("this is a test", 5), results) res = blm.hashes("this is a test", 1) self.assertEqual(len(res), 1) self.assertEqual(res[0], results[0])
def test_another_hashing_algo(self): ''' test defining a completely different hashing strategy ''' md5_val = '7f590086f9b962387e145899dd001256' # for default hash used filename = 'test.blm' results = [14409285476674975580, 1383622036369840193, 10825905054403519891, 3456253732347153957, 1494124715262089992] def my_hash(key, depth, encoding='utf-8'): ''' my hashing strategy ''' max64mod = UINT64_T_MAX + 1 results = list() for i in range(0, depth): tmp = key[i:] + key[:i] val = int(hashlib.sha512(tmp.encode(encoding)).hexdigest(), 16) results.append(val % max64mod) return results blm = BloomFilter(est_elements=10, false_positive_rate=0.05, hash_function=my_hash) self.assertEqual(blm.elements_added, 0) blm.add('this is a test') blm.export(filename) md5_out = calc_file_md5(filename) self.assertNotEqual(md5_out, md5_val) os.remove(filename) for i in range(0, 10): tmp = 'this is a test {0}'.format(i) blm.add(tmp) self.assertEqual(blm.elements_added, 11) for i in range(0, 10): tmp = 'this is a test {0}'.format(i) self.assertTrue(blm.check(tmp)) self.assertEqual(blm.hashes('this is a test', 5), results) res = blm.hashes('this is a test', 1) self.assertEqual(len(res), 1) self.assertEqual(res[0], results[0])
def test_bf_use_different_hash(self): ''' test that the different hash works as intended ''' md5_val = '7f590086f9b962387e145899dd001256' # for default hash used filename = 'test.blm' results = [14409285476674975580, 6203976290780191624, 5074829385518853901, 3953072760750514173, 11782747630324011555] @hash_with_depth_int def my_hash(key, encoding='utf-8'): ''' my hash function ''' max64mod = UINT64_T_MAX + 1 val = int(hashlib.sha512(key.encode(encoding)).hexdigest(), 16) return val % max64mod blm = BloomFilter(est_elements=10, false_positive_rate=0.05, hash_function=my_hash) self.assertEqual(blm.elements_added, 0) blm.add('this is a test') blm.export(filename) md5_out = calc_file_md5(filename) self.assertNotEqual(md5_out, md5_val) os.remove(filename) for i in range(0, 10): tmp = 'this is a test {0}'.format(i) blm.add(tmp) self.assertEqual(blm.elements_added, 11) for i in range(0, 10): tmp = 'this is a test {0}'.format(i) self.assertTrue(blm.check(tmp)) self.assertEqual(blm.hashes('this is a test', 5), results) res = blm.hashes('this is a test', 1) self.assertEqual(len(res), 1) self.assertEqual(res[0], results[0])
def test_bf_use_different_hash(self): ''' test that the different hash works as intended ''' md5_val = '7f590086f9b962387e145899dd001256' # for default hash used filename = 'test.blm' results = [ 14409285476674975580, 6203976290780191624, 5074829385518853901, 3953072760750514173, 11782747630324011555 ] @hash_with_depth_int def my_hash(key, encoding='utf-8'): ''' my hash function ''' max64mod = UINT64_T_MAX + 1 val = int(hashlib.sha512(key.encode(encoding)).hexdigest(), 16) return val % max64mod blm = BloomFilter(est_elements=10, false_positive_rate=0.05, hash_function=my_hash) self.assertEqual(blm.elements_added, 0) blm.add('this is a test') blm.export(filename) md5_out = calc_file_md5(filename) self.assertNotEqual(md5_out, md5_val) os.remove(filename) for i in range(0, 10): tmp = 'this is a test {0}'.format(i) blm.add(tmp) self.assertEqual(blm.elements_added, 11) for i in range(0, 10): tmp = 'this is a test {0}'.format(i) self.assertTrue(blm.check(tmp)) self.assertEqual(blm.hashes('this is a test', 5), results) res = blm.hashes('this is a test', 1) self.assertEqual(len(res), 1) self.assertEqual(res[0], results[0])
if check_stock_symbol in stock_symbol_dist.keys(): present_count = present_count + 1 stock_symbol_file.write(check_stock_symbol + "," + ' Traded\n') else: false_postive_count = false_postive_count + 1 stock_symbol_file.write(check_stock_symbol + "," + ' Traded - False Positive\n') else: not_present_count = not_present_count + 1 stock_symbol_file.write(check_stock_symbol + "," + ' Defenitely Not Traded\n') stock_symbol_file.write('Probably Traded Count,' + str(prob_present_count) + "\n") stock_symbol_file.write('Traded Count,' + str(present_count) + "\n") stock_symbol_file.write('False +ve Traded Count,' + str(false_postive_count) + "\n") stock_symbol_file.write('Definitely Not Traded Count,' + str(not_present_count) + "\n") stock_trade_file.close() stock_exists_bf.export(stock_exists_bf_filename) timenow = datetime.datetime.now() bf_endtime = str(timenow.strftime("%x")) + ' ' + str((timenow.strftime("%X"))) est_elemnts_added_bf = stock_exists_bf.elements_added est_elements_planned = stock_exists_bf.estimated_elements stocks_added = len(stock_symbol_dist) est_unique_elements = stock_exists_bf.estimate_elements() process_file.write(str(est_elemnts_added_bf)) process_file.write("," + str(est_elements_planned)) process_file.write("," + str(stocks_added)) process_file.write("," + str(est_unique_elements)) process_file.write("," + str(false_postive_count)) process_file.write("," + str(false_postive_count/est_unique_elements))