-
Notifications
You must be signed in to change notification settings - Fork 0
/
test.py
73 lines (59 loc) · 1.6 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
"""A comparation about """
from bloomfilter import BloomFilter
import sys
import functools
import time
import cPickle
import hashlib
import uuid
def timeit(func):
@functools.wraps(func)
def wrapper(*arg, **kw):
start = time.time()
func()
end = time.time()
print "costs {time}s.".format(time = end - start)
return wrapper
@timeit
def test_bloom():
data = (str(uuid.uuid1()) for i in range(100000))
filter = BloomFilter(100000, 0.0001)
for item in data:
if not item in filter:
filter.add(item)
print "{name} costs {bytes} bytes.".format(name=sys._getframe().f_code.co_name, bytes=filter.container_size())
@timeit
def test_set():
s = set()
data = (str(uuid.uuid1()) for i in range(100000))
for i in data:
s.add(i)
t = cPickle.dumps(s)
print "{name} costs {bytes} bytes.".format(name=sys._getframe().f_code.co_name, bytes=sys.getsizeof(t))
@timeit
def test_list():
l = list()
data = (str(uuid.uuid1()) for i in range(100000))
for i in data:
if i not in l:
l.append(i)
t = cPickle.dumps(l)
print "{name} costs {bytes} bytes.".format(name=sys._getframe().f_code.co_name, bytes=sys.getsizeof(t))
if __name__ == "__main__":
test_set()
print
test_bloom()
print
test_list()
#output:
#
#test_set costs 4788979 bytes.
#costs 2.68518805504s.
#
#test_bloom costs 692464 bytes.
#costs 8.05900907516s.
#
#test_list costs 4788943 bytes.
#costs 67.8493359089s
#
#Aparently bloomfilter is considerable solution when data become so big, because it save a lot of space.