def test_init_no_training(self): classifier = MultinomialNB() assert classifier.vocabulary == set() assert classifier.labels == set() classifier.train(*self.training_docs) self.test_labels() self.test_vocabulary()
def setup(self): self.training_docs = [('Chinese Bejing Chinese', 'yes'), ('Chinese Chinese Shanghai', 'yes'), ('Chinese Macao', 'yes'), ('Tokyo Japan Chinese', 'no')] self.training_docs = [(x.split(), y) for x, y in self.training_docs] self.classifier = MultinomialNB(*self.training_docs) self.make_snapshot()
def test_top_features(self): docs = [(['happy', 'joy', 'smile'], 'positive'), (['happy', 'joy', 'frown'], 'positive'), (['sad', 'frown', 'tired'], 'negative'), (['sad', 'tired', 'bored'], 'negative')] classifier = MultinomialNB() classifier.top_features = 2 classifier.train(*docs) result = classifier._most_common['positive'].store assert result == {'happy': 2, 'joy': 2} result = classifier._most_common['negative'].store assert result == {'sad': 2, 'tired': 2} first = classifier.prob_all(['happy', 'smile']) second = classifier.prob_all(['happy', 'smile', 'smile']) assert first == second, classifier._most_common first = classifier.prob_all(['sad', 'tired'])['negative'] second = classifier.prob_all(['sad', 'tired', 'frown'])['negative'] assert first == second, classifier._most_common
def test_ngrams_multinomialnb(self): # Integration test with Naive Bayes classifier. classifier = MultinomialNB() self.extractor.min_n, self.extractor.max_n = 1, 3 features = self.extractor.extract(self.document) classifier.train([features, 'positive'])
def test_train_many_document(self): documents = [(['one', 'document', 'already', 'tokenized'], 'label') ] * 5 classifier = MultinomialNB(*documents) expected = set(['one', 'document', 'already', 'tokenized']) assert classifier.vocabulary == expected