Ejemplo n.º 1
0
def test_countvectorizer_custom_vocabulary():
    what_we_like = ["pizza", "beer"]
    vect = CountVectorizer(vocabulary=what_we_like)
    vect.fit(JUNK_FOOD_DOCS)
    assert_equal(set(vect.vocabulary), set(what_we_like))
    X = vect.transform(JUNK_FOOD_DOCS)
    assert_equal(X.shape[1], len(what_we_like))
Ejemplo n.º 2
0
class SVM:

    def __init__(self, training, classes, vocabulary):
        vocabulary = load(vocabulary)
        self.cv = CountVectorizer(vocabulary = vocabulary.tolist())
        self.samples = load(training).tolist()
        self.classes = load(classes)
        self.classifier = LinearSVC()
        self.classifier.fit(self.samples, self.classes)

    def classify(self, text):
        features = self.cv.transform([text])
        return self.classifier.predict(features)[0]