Esempio n. 1
0
def test_svm():
    trainingset = [ngrams(1, "foo foo bar baz"), ngrams(1, "foo foo bar bar baz baz"), ngrams(1,"foo foo bar baz")]
    labels = [1, -1, -1]
    lsc = LinearSVMClassifier()
    for vec in zip(trainingset, labels):
        lsc.addFeatureVector(vec[0], vec[1])
    print lsc.classify(ngrams(1, "foo foo bar bar baz baz"))
    print lsc.classify(ngrams(1, "foo foo foo bar baz"))
Esempio n. 2
0
def test_svm():
    trainingset = [
        ngrams(1, "foo foo bar baz"),
        ngrams(1, "foo foo bar bar baz baz"),
        ngrams(1, "foo foo bar baz")
    ]
    labels = [1, -1, -1]
    lsc = LinearSVMClassifier()
    for vec in zip(trainingset, labels):
        lsc.addFeatureVector(vec[0], vec[1])
    print lsc.classify(ngrams(1, "foo foo bar bar baz baz"))
    print lsc.classify(ngrams(1, "foo foo foo bar baz"))
Esempio n. 3
0
from cuckoovec import CuckooVector
import numpy as np
import string
from ngrams import *

vocab = [c for c in string.ascii_uppercase + string.ascii_lowercase]  # letters
n = 5 
t = 2

# form random sparse vectors from <= t random words and weights 
def randsparse(t):
  return dict(zip(np.random.choice(vocab, t), np.random.randn(t)))
   
s1 = [randsparse(t) for i in range(n)]
s2 = [randsparse(t) for i in range(n)]
t1 = ngrams(s1)
print(t1)
t2 = ngrams(s2)
v1 = CuckooVector(t1)
v2 = CuckooVector(t2)

# these should be close to orthogonal.
print(v1.dot(v2))
 def hashindex(elements: List, hashsize: int, gramsize: int):
     return [hash(tp) % hashsize for tp in ngrams(elements, gramsize, True)]
 def _wordchar_to_idx(self, words: List, hashsize: int, gramsize: int):
     chars = []
     for w in words:
         chars += [for tp in ngrams(list(w), gramsize, True)]
     return [hash(tp) % hashsize for tp in chars]
Esempio n. 6
0
import ngrams

sentence = 'this is a foo bar sentences and i want to ngramize it'

n = 6
sixgrams = ngrams(sentence.split(), n)

for grams in sixgrams:
    print(grams)