예제 #1
0
 def test_cms_export(self):
     """test exporting a count-min sketch"""
     md5_val = "fb1c39dd1a73f1ef0d7fc79f60fc028e"
     with NamedTemporaryFile(dir=os.getcwd(), suffix=".cms", delete=DELETE_TEMP_FILES) as fobj:
         cms = CountMinSketch(width=1000, depth=5)
         cms.add("this is a test", 100)
         cms.export(fobj.name)
         md5_out = calc_file_md5(fobj.name)
     self.assertEqual(md5_out, md5_val)
예제 #2
0
    def test_cms_export(self):
        ''' test exporting a count-min sketch '''
        md5_val = '61d2ea9d0cb09b7bb284e1cf1a860449'
        filename = 'test.cms'
        cms = CountMinSketch(width=1000, depth=5)
        cms.add('this is a test', 100)
        cms.export(filename)
        md5_out = calc_file_md5(filename)
        os.remove(filename)

        self.assertEqual(md5_out, md5_val)
예제 #3
0
    def test_cms_export(self):
        ''' test exporting a count-min sketch '''
        md5_val = '61d2ea9d0cb09b7bb284e1cf1a860449'
        filename = 'test.cms'
        cms = CountMinSketch(width=1000, depth=5)
        cms.add('this is a test', 100)
        cms.export(filename)
        md5_out = calc_file_md5(filename)
        os.remove(filename)

        self.assertEqual(md5_out, md5_val)
예제 #4
0
    def test_cms_load(self):
        """test loading a count-min sketch from file"""
        md5_val = "fb1c39dd1a73f1ef0d7fc79f60fc028e"
        with NamedTemporaryFile(dir=os.getcwd(), suffix=".cms", delete=DELETE_TEMP_FILES) as fobj:
            cms = CountMinSketch(width=1000, depth=5)
            self.assertEqual(cms.add("this is a test", 100), 100)
            cms.export(fobj.name)
            md5_out = calc_file_md5(fobj.name)
            self.assertEqual(md5_out, md5_val)

            # try loading directly to file!
            cms2 = CountMinSketch(filepath=fobj.name)
            self.assertEqual(cms2.elements_added, 100)
            self.assertEqual(cms2.check("this is a test"), 100)
예제 #5
0
    def test_cms_load(self):
        ''' test loading a count-min sketch from file '''
        md5_val = '61d2ea9d0cb09b7bb284e1cf1a860449'
        filename = 'test.cms'
        cms = CountMinSketch(width=1000, depth=5)
        self.assertEqual(cms.add('this is a test', 100), 100)
        cms.export(filename)
        md5_out = calc_file_md5(filename)
        self.assertEqual(md5_out, md5_val)

        # try loading directly to file!
        cms2 = CountMinSketch(filepath=filename)
        self.assertEqual(cms2.elements_added, 100)
        self.assertEqual(cms2.check('this is a test'), 100)
        os.remove(filename)
예제 #6
0
    def test_cms_load_diff_hash(self):
        """test loading a count-min sketch from file"""
        md5_val = "fb1c39dd1a73f1ef0d7fc79f60fc028e"
        with NamedTemporaryFile(dir=os.getcwd(), suffix=".cms", delete=DELETE_TEMP_FILES) as fobj:
            cms = CountMinSketch(width=1000, depth=5)
            self.assertEqual(cms.add("this is a test", 100), 100)
            cms.export(fobj.name)
            md5_out = calc_file_md5(fobj.name)
            self.assertEqual(md5_out, md5_val)

            cms2 = CountMinSketch(filepath=fobj.name, hash_function=different_hash)
            self.assertEqual(cms2.elements_added, 100)
            # should not work since it is a different hash
            self.assertNotEqual(cms.check("this is a test"), True)
            self.assertNotEqual(cms.hashes("this is a test"), cms2.hashes("this is a test"))
예제 #7
0
    def test_cms_load(self):
        ''' test loading a count-min sketch from file '''
        md5_val = '61d2ea9d0cb09b7bb284e1cf1a860449'
        filename = 'test.cms'
        cms = CountMinSketch(width=1000, depth=5)
        self.assertEqual(cms.add('this is a test', 100), 100)
        cms.export(filename)
        md5_out = calc_file_md5(filename)
        self.assertEqual(md5_out, md5_val)

        # try loading directly to file!
        cms2 = CountMinSketch(filepath=filename)
        self.assertEqual(cms2.elements_added, 100)
        self.assertEqual(cms2.check('this is a test'), 100)
        os.remove(filename)
예제 #8
0
    def test_cms_load_diff_hash(self):
        ''' test loading a count-min sketch from file '''
        md5_val = '61d2ea9d0cb09b7bb284e1cf1a860449'
        filename = 'test.cms'
        cms = CountMinSketch(width=1000, depth=5)
        self.assertEqual(cms.add('this is a test', 100), 100)
        cms.export(filename)
        md5_out = calc_file_md5(filename)
        self.assertEqual(md5_out, md5_val)

        cms2 = CountMinSketch(filepath=filename, hash_function=different_hash)
        self.assertEqual(cms2.elements_added, 100)
        # should not work since it is a different hash
        self.assertNotEqual(cms.check('this is a test'), True)
        self.assertNotEqual(cms.hashes('this is a test'),
                            cms2.hashes('this is a test'))
        os.remove(filename)
예제 #9
0
    def test_cms_load_diff_hash(self):
        ''' test loading a count-min sketch from file '''
        md5_val = '61d2ea9d0cb09b7bb284e1cf1a860449'
        filename = 'test.cms'
        cms = CountMinSketch(width=1000, depth=5)
        self.assertEqual(cms.add('this is a test', 100), 100)
        cms.export(filename)
        md5_out = calc_file_md5(filename)
        self.assertEqual(md5_out, md5_val)

        cms2 = CountMinSketch(filepath=filename, hash_function=different_hash)
        self.assertEqual(cms2.elements_added, 100)
        # should not work since it is a different hash
        self.assertNotEqual(cms.check('this is a test'), True)
        self.assertNotEqual(cms.hashes('this is a test'),
                            cms2.hashes('this is a test'))
        os.remove(filename)
예제 #10
0
cms = CountMinSketch(width=1000, depth=4)  # CMS defined
# def create_message(value):
# 		data = []
# 		date = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
# 		data.append((date, value))
# 		# data['input'] = value
# 		# data['datetime'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
# 		tuples = [{'date':i[0], 'input': i[1]}  for i in data]
# 		# return str(data)
# 		return tuples


def main():
    x = 0
    while x < 20:  # x < X limit to be able to run print statements below
        i = randint(0, 10)
        msg = str(i)  #create_message(i)
        cms.add(msg)
        print(msg)
        time.sleep(1)
        x += 1


if __name__ == "__main__":
    main()

print("frequency of 9 = ", cms.check('9'))  #checks frequency of 9
print("frequency of 7 = ", cms.check('7'))
print("frequency of 2 = ", cms.check('2'))
cms.export('cms_synopsis')  # export this to a file on pc
예제 #11
0
                stock_symbol_freq_cms = stock_freq_cms.check(stock_symbol)
                sketch_endtime = time.process_time()
                sketch_qrytime = sketch_qrytime + (sketch_endtime - sketch_starttime)

                stock_symbol_freq = stock_symbol_dist[stock_symbol]
                accuracy = 1 - abs(stock_symbol_freq-stock_symbol_freq_cms)/stock_symbol_freq
                if accuracy < 0: accuracy = 0
                total_accuracy = total_accuracy + accuracy
                stock_symbol_file.write(stock_symbol + "," + str(no_of_record))
                stock_symbol_file.write("," + str(width) + "," + str(depth))
                stock_symbol_file.write("," + str(stock_symbol_freq) + "," + str(stock_symbol_freq_cms))
                stock_symbol_file.write("," + str(accuracy) + "\n")

            symbol_count = len(stock_symbol_dist)
            avg_accuracy = total_accuracy / symbol_count
            stock_freq_cms.export(cms_filename)
            stock_freq_cms.clear()

            stock_symbol_file.write("Averge," + str(no_of_record))
            stock_symbol_file.write("," + str(width) + "," + str(depth))
            stock_symbol_file.write(",,," + str(avg_accuracy) + "\n")

            timenow = datetime.datetime.now()
            cms_endtime = str(timenow.strftime("%x")) + ' ' + str((timenow.strftime("%X")))

            process_file.write(str(no_of_record) + "," + str(symbol_count) + "," + str(width) + "," + str(depth))
            process_file.write("," + str(avg_accuracy) + "," + str(os.path.getsize(cms_filename)/1024))
            process_file.write("," + str(sketch_time) + "," + str(sketch_qrytime) + "," + cms_starttime + "," + cms_endtime + "\n")

            stock_trade_file.close()
            stock_symbol_file.close()