def test_cms_export(self): """test exporting a count-min sketch""" md5_val = "fb1c39dd1a73f1ef0d7fc79f60fc028e" with NamedTemporaryFile(dir=os.getcwd(), suffix=".cms", delete=DELETE_TEMP_FILES) as fobj: cms = CountMinSketch(width=1000, depth=5) cms.add("this is a test", 100) cms.export(fobj.name) md5_out = calc_file_md5(fobj.name) self.assertEqual(md5_out, md5_val)
def test_cms_export(self): ''' test exporting a count-min sketch ''' md5_val = '61d2ea9d0cb09b7bb284e1cf1a860449' filename = 'test.cms' cms = CountMinSketch(width=1000, depth=5) cms.add('this is a test', 100) cms.export(filename) md5_out = calc_file_md5(filename) os.remove(filename) self.assertEqual(md5_out, md5_val)
def test_cms_load(self): """test loading a count-min sketch from file""" md5_val = "fb1c39dd1a73f1ef0d7fc79f60fc028e" with NamedTemporaryFile(dir=os.getcwd(), suffix=".cms", delete=DELETE_TEMP_FILES) as fobj: cms = CountMinSketch(width=1000, depth=5) self.assertEqual(cms.add("this is a test", 100), 100) cms.export(fobj.name) md5_out = calc_file_md5(fobj.name) self.assertEqual(md5_out, md5_val) # try loading directly to file! cms2 = CountMinSketch(filepath=fobj.name) self.assertEqual(cms2.elements_added, 100) self.assertEqual(cms2.check("this is a test"), 100)
def test_cms_load(self): ''' test loading a count-min sketch from file ''' md5_val = '61d2ea9d0cb09b7bb284e1cf1a860449' filename = 'test.cms' cms = CountMinSketch(width=1000, depth=5) self.assertEqual(cms.add('this is a test', 100), 100) cms.export(filename) md5_out = calc_file_md5(filename) self.assertEqual(md5_out, md5_val) # try loading directly to file! cms2 = CountMinSketch(filepath=filename) self.assertEqual(cms2.elements_added, 100) self.assertEqual(cms2.check('this is a test'), 100) os.remove(filename)
def test_cms_load_diff_hash(self): """test loading a count-min sketch from file""" md5_val = "fb1c39dd1a73f1ef0d7fc79f60fc028e" with NamedTemporaryFile(dir=os.getcwd(), suffix=".cms", delete=DELETE_TEMP_FILES) as fobj: cms = CountMinSketch(width=1000, depth=5) self.assertEqual(cms.add("this is a test", 100), 100) cms.export(fobj.name) md5_out = calc_file_md5(fobj.name) self.assertEqual(md5_out, md5_val) cms2 = CountMinSketch(filepath=fobj.name, hash_function=different_hash) self.assertEqual(cms2.elements_added, 100) # should not work since it is a different hash self.assertNotEqual(cms.check("this is a test"), True) self.assertNotEqual(cms.hashes("this is a test"), cms2.hashes("this is a test"))
def test_cms_load_diff_hash(self): ''' test loading a count-min sketch from file ''' md5_val = '61d2ea9d0cb09b7bb284e1cf1a860449' filename = 'test.cms' cms = CountMinSketch(width=1000, depth=5) self.assertEqual(cms.add('this is a test', 100), 100) cms.export(filename) md5_out = calc_file_md5(filename) self.assertEqual(md5_out, md5_val) cms2 = CountMinSketch(filepath=filename, hash_function=different_hash) self.assertEqual(cms2.elements_added, 100) # should not work since it is a different hash self.assertNotEqual(cms.check('this is a test'), True) self.assertNotEqual(cms.hashes('this is a test'), cms2.hashes('this is a test')) os.remove(filename)
cms = CountMinSketch(width=1000, depth=4) # CMS defined # def create_message(value): # data = [] # date = datetime.now().strftime('%Y-%m-%d %H:%M:%S') # data.append((date, value)) # # data['input'] = value # # data['datetime'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S') # tuples = [{'date':i[0], 'input': i[1]} for i in data] # # return str(data) # return tuples def main(): x = 0 while x < 20: # x < X limit to be able to run print statements below i = randint(0, 10) msg = str(i) #create_message(i) cms.add(msg) print(msg) time.sleep(1) x += 1 if __name__ == "__main__": main() print("frequency of 9 = ", cms.check('9')) #checks frequency of 9 print("frequency of 7 = ", cms.check('7')) print("frequency of 2 = ", cms.check('2')) cms.export('cms_synopsis') # export this to a file on pc
stock_symbol_freq_cms = stock_freq_cms.check(stock_symbol) sketch_endtime = time.process_time() sketch_qrytime = sketch_qrytime + (sketch_endtime - sketch_starttime) stock_symbol_freq = stock_symbol_dist[stock_symbol] accuracy = 1 - abs(stock_symbol_freq-stock_symbol_freq_cms)/stock_symbol_freq if accuracy < 0: accuracy = 0 total_accuracy = total_accuracy + accuracy stock_symbol_file.write(stock_symbol + "," + str(no_of_record)) stock_symbol_file.write("," + str(width) + "," + str(depth)) stock_symbol_file.write("," + str(stock_symbol_freq) + "," + str(stock_symbol_freq_cms)) stock_symbol_file.write("," + str(accuracy) + "\n") symbol_count = len(stock_symbol_dist) avg_accuracy = total_accuracy / symbol_count stock_freq_cms.export(cms_filename) stock_freq_cms.clear() stock_symbol_file.write("Averge," + str(no_of_record)) stock_symbol_file.write("," + str(width) + "," + str(depth)) stock_symbol_file.write(",,," + str(avg_accuracy) + "\n") timenow = datetime.datetime.now() cms_endtime = str(timenow.strftime("%x")) + ' ' + str((timenow.strftime("%X"))) process_file.write(str(no_of_record) + "," + str(symbol_count) + "," + str(width) + "," + str(depth)) process_file.write("," + str(avg_accuracy) + "," + str(os.path.getsize(cms_filename)/1024)) process_file.write("," + str(sketch_time) + "," + str(sketch_qrytime) + "," + cms_starttime + "," + cms_endtime + "\n") stock_trade_file.close() stock_symbol_file.close()