예제 #1
0
class HierarchicalClassifier(Experiment):
    """Train a single classifier and return performance on a test set."""

    def _setup(self):
        self.subjective, self.polarity = MultinomialNB(), MultinomialNB()
        # self.subjective.top, self.polarity.top = 100000, 100000
        # subjective.prior = lambda x: 0.5
        # polarity.prior = lambda x: 0.5

    def _predict(self, features):
        label, probability = self.subjective.classify(features)
        if label == 'neutral':
            return label, probability
        else:
            return self.polarity.classify(features)

    def _train(self, features, label):
        if label != 'neutral':
            assert label in set(['positive', 'negative'])
            self.polarity.train((features, label))
            label = 'subjective'
        assert label in set(['neutral', 'subjective'])
        if sum(self.subjective._label_count[x] for x in self.subjective._label_count) < 8751:
            self.subjective.train((features, label))

    def pickle_dumps(self):
        pickled = Pickled(self.extractor, (self.subjective, self.polarity))
        return pickle.dumps(pickled, pickle.HIGHEST_PROTOCOL)
예제 #2
0
 def test_init_no_training(self):
     classifier = MultinomialNB()
     assert classifier.vocabulary == set()
     assert classifier.labels == set()
     classifier.train(*self.training_docs)
     self.test_labels()
     self.test_vocabulary()
예제 #3
0
class SingleClassifier(Experiment):
    """Train a single classifier and return performance on a test set."""

    def _setup(self):
        self.nb = MultinomialNB()
        # self.nb.top = 100000

    def _predict(self, features):
        return self.nb.classify(features)

    def _train(self, features, label):
        self.nb.train((features, label))

    def pickle_dumps(self):
        pickled = Pickled(self.extractor, self.nb)
        return pickle.dumps(pickled, pickle.HIGHEST_PROTOCOL)
예제 #4
0
    def test_top_features(self):
        docs = [(['happy', 'joy', 'smile'], 'positive'),
                (['happy', 'joy', 'frown'], 'positive'),
                (['sad', 'frown', 'tired'], 'negative'),
                (['sad', 'tired', 'bored'], 'negative')]
        classifier = MultinomialNB()
        classifier.top_features = 2
        classifier.train(*docs)

        result = classifier._most_common['positive'].store
        assert result == {'happy': 2, 'joy': 2}
        result = classifier._most_common['negative'].store
        assert result == {'sad': 2, 'tired': 2}

        first = classifier.prob_all(['happy', 'smile'])
        second = classifier.prob_all(['happy', 'smile', 'smile'])
        assert first == second, classifier._most_common

        first = classifier.prob_all(['sad', 'tired'])['negative']
        second = classifier.prob_all(['sad', 'tired', 'frown'])['negative']
        assert first == second, classifier._most_common
예제 #5
0
class OldClassifier(Experiment):
    def _setup(self):
        import old_classify

        def extract(x):
            old_classify.filter_text(x)
            old_classify.regularlize_text(x)
            features = old_classify.extract_features(x)
            return features
        self.extractor.extract = extract
        # tokens = old_classify.tokenizer.tokenize(x)
        # tokens = old_classify.regularlize_tokens(tokens)

        try:
            import cPickle as pickle
        except ImportError:
            import pickle
        # twitter-sentiment_classifier.1650000.pickle
        # with open(r"R:\_Other\Twitter\TwitterCorpus\results_sentiment\unbalanced_1-gram_stopword\twitter-sentiment_classifier.5000.pickle", mode='rb') as f:
        with open(r"R:\_Other\Twitter\TwitterCorpus\results_sentiment\unbalanced_1-gram_stopword\twitter-sentiment_classifier.1650000.pickle", mode='rb') as f:
            self.classifier = pickle.load(f)
        self.subjective = MultinomialNB()

    def _predict(self, features):
        label, probability = self.subjective.classify(features)
        if label == 'neutral':
            return label, probability
        sentiment = self.classifier.prob_classify(features)
        prob, neg, = sentiment.prob('pos'), sentiment.prob('neg')
        if prob > neg:
            return 'positive', prob
        else:
            return 'negative', neg

    def _train_func(self, features, label):
        if label != 'neutral':
            label = 'subjective'
        assert label in set(['neutral', 'subjective'])
        self.subjective.train((features, label))
예제 #6
0
파일: nlp_test.py 프로젝트: bwbaugh/infer
 def test_ngrams_multinomialnb(self):
     # Integration test with Naive Bayes classifier.
     classifier = MultinomialNB()
     self.extractor.min_n, self.extractor.max_n = 1, 3
     features = self.extractor.extract(self.document)
     classifier.train([features, "positive"])