class HierarchicalClassifier(Experiment): """Train a single classifier and return performance on a test set.""" def _setup(self): self.subjective, self.polarity = MultinomialNB(), MultinomialNB() # self.subjective.top, self.polarity.top = 100000, 100000 # subjective.prior = lambda x: 0.5 # polarity.prior = lambda x: 0.5 def _predict(self, features): label, probability = self.subjective.classify(features) if label == 'neutral': return label, probability else: return self.polarity.classify(features) def _train(self, features, label): if label != 'neutral': assert label in set(['positive', 'negative']) self.polarity.train((features, label)) label = 'subjective' assert label in set(['neutral', 'subjective']) if sum(self.subjective._label_count[x] for x in self.subjective._label_count) < 8751: self.subjective.train((features, label)) def pickle_dumps(self): pickled = Pickled(self.extractor, (self.subjective, self.polarity)) return pickle.dumps(pickled, pickle.HIGHEST_PROTOCOL)
def test_init_no_training(self): classifier = MultinomialNB() assert classifier.vocabulary == set() assert classifier.labels == set() classifier.train(*self.training_docs) self.test_labels() self.test_vocabulary()
class SingleClassifier(Experiment): """Train a single classifier and return performance on a test set.""" def _setup(self): self.nb = MultinomialNB() # self.nb.top = 100000 def _predict(self, features): return self.nb.classify(features) def _train(self, features, label): self.nb.train((features, label)) def pickle_dumps(self): pickled = Pickled(self.extractor, self.nb) return pickle.dumps(pickled, pickle.HIGHEST_PROTOCOL)
def test_top_features(self): docs = [(['happy', 'joy', 'smile'], 'positive'), (['happy', 'joy', 'frown'], 'positive'), (['sad', 'frown', 'tired'], 'negative'), (['sad', 'tired', 'bored'], 'negative')] classifier = MultinomialNB() classifier.top_features = 2 classifier.train(*docs) result = classifier._most_common['positive'].store assert result == {'happy': 2, 'joy': 2} result = classifier._most_common['negative'].store assert result == {'sad': 2, 'tired': 2} first = classifier.prob_all(['happy', 'smile']) second = classifier.prob_all(['happy', 'smile', 'smile']) assert first == second, classifier._most_common first = classifier.prob_all(['sad', 'tired'])['negative'] second = classifier.prob_all(['sad', 'tired', 'frown'])['negative'] assert first == second, classifier._most_common
class OldClassifier(Experiment): def _setup(self): import old_classify def extract(x): old_classify.filter_text(x) old_classify.regularlize_text(x) features = old_classify.extract_features(x) return features self.extractor.extract = extract # tokens = old_classify.tokenizer.tokenize(x) # tokens = old_classify.regularlize_tokens(tokens) try: import cPickle as pickle except ImportError: import pickle # twitter-sentiment_classifier.1650000.pickle # with open(r"R:\_Other\Twitter\TwitterCorpus\results_sentiment\unbalanced_1-gram_stopword\twitter-sentiment_classifier.5000.pickle", mode='rb') as f: with open(r"R:\_Other\Twitter\TwitterCorpus\results_sentiment\unbalanced_1-gram_stopword\twitter-sentiment_classifier.1650000.pickle", mode='rb') as f: self.classifier = pickle.load(f) self.subjective = MultinomialNB() def _predict(self, features): label, probability = self.subjective.classify(features) if label == 'neutral': return label, probability sentiment = self.classifier.prob_classify(features) prob, neg, = sentiment.prob('pos'), sentiment.prob('neg') if prob > neg: return 'positive', prob else: return 'negative', neg def _train_func(self, features, label): if label != 'neutral': label = 'subjective' assert label in set(['neutral', 'subjective']) self.subjective.train((features, label))
def test_ngrams_multinomialnb(self): # Integration test with Naive Bayes classifier. classifier = MultinomialNB() self.extractor.min_n, self.extractor.max_n = 1, 3 features = self.extractor.extract(self.document) classifier.train([features, "positive"])