コード例 #1
0
ファイル: ngrams_test.py プロジェクト: cert-ace/cuckoovec
from cuckoovec import CuckooVector
import numpy as np
import string
from ngrams import *

vocab = [c for c in string.ascii_uppercase + string.ascii_lowercase]  # letters
n = 5 
t = 2

# form random sparse vectors from <= t random words and weights 
def randsparse(t):
  return dict(zip(np.random.choice(vocab, t), np.random.randn(t)))
   
s1 = [randsparse(t) for i in range(n)]
s2 = [randsparse(t) for i in range(n)]
t1 = ngrams(s1)
print(t1)
t2 = ngrams(s2)
v1 = CuckooVector(t1)
v2 = CuckooVector(t2)

# these should be close to orthogonal.
print(v1.dot(v2))
コード例 #2
0
ファイル: example.py プロジェクト: cert-ace/cuckoovec
d = 10000000 # high dimension
t = 500000 # number of actual features 

m1 = randsparse(d, t)
m2 = randsparse(d, t)
v1 = densify(m1, d)
v2 = densify(m2, d)
cv1 = CuckooVector(m1)
cv2 = CuckooVector(m2)
  
print("2-norms: ")
print(np.linalg.norm(v1))
print(cv1.norm(2))
print(np.linalg.norm(v2))
print(cv2.norm(2))

print("dots: ")
print(np.dot(v1,v2))
print(cv1.dot(cv2))

v1 = v1 + v2
cv1.add(cv2)
print("2-norm of sums: ")
print(np.linalg.norm(v1))
print(cv1.norm(2))
dist = 0
for k in set().union(m1, m2):
  dist = dist + (abs(v1[int(k)] - cv1[k])) 
print("1-distance between sums: ")
print(dist)