from cuckoovec import CuckooVector vec = CuckooVector({"f1": 3, "f2": 8, "f9": 2}) for k, v in vec.items(): print(k)
from cuckoovec import CuckooVector import numpy as np import string from ngrams import * vocab = [c for c in string.ascii_uppercase + string.ascii_lowercase] # letters n = 5 t = 2 # form random sparse vectors from <= t random words and weights def randsparse(t): return dict(zip(np.random.choice(vocab, t), np.random.randn(t))) s1 = [randsparse(t) for i in range(n)] s2 = [randsparse(t) for i in range(n)] t1 = ngrams(s1) print(t1) t2 = ngrams(s2) v1 = CuckooVector(t1) v2 = CuckooVector(t2) # these should be close to orthogonal. print(v1.dot(v2))
def densify(m, d): a = np.zeros([d]) for k,v in iter(m.items()): a[int(k)] = v return a # Evaluate accuracy of low-dimensional linear algebra operations d = 10000000 # high dimension t = 500000 # number of actual features m1 = randsparse(d, t) m2 = randsparse(d, t) v1 = densify(m1, d) v2 = densify(m2, d) cv1 = CuckooVector(m1) cv2 = CuckooVector(m2) print("2-norms: ") print(np.linalg.norm(v1)) print(cv1.norm(2)) print(np.linalg.norm(v2)) print(cv2.norm(2)) print("dots: ") print(np.dot(v1,v2)) print(cv1.dot(cv2)) v1 = v1 + v2 cv1.add(cv2) print("2-norm of sums: ")