예제 #1
0
    def test_transform(self):
        vectorizer = CountVectorizer(max_doc_freq=2,
                                     min_freq=1,
                                     max_features=1)
        vectorizer.fit(self.docs)
        sequences, X = vectorizer.transform(self.docs)
        npt.assert_array_equal(X.A, np.asarray([[0], [2], [0]]))

        vectorizer.binary = True
        _, X1 = vectorizer.fit_transform(self.docs)
        _, X2 = vectorizer.transform(self.docs)
        npt.assert_array_equal(X1.A, X2.A)
예제 #2
0
    def test_with_special_tokens(self):
        vectorizer = CountVectorizer(max_doc_freq=2,
                                     min_freq=1,
                                     max_features=1)
        vectorizer.fit(self.docs)

        new_vocab = Vocabulary(vectorizer.vocab.idx2tok,
                               use_special_tokens=True)
        vectorizer.vocab = new_vocab

        sequences, X = vectorizer.transform(self.docs)
        npt.assert_array_equal(X.A, np.asarray([[0], [2], [0]]))