Beispiel #1
0
def main():
    '''
    Main function of the boilerplate code is the entry point of the 'chitragoopt' executable script (defined in setup.py).
    
    Use doctests, those are very helpful.
    
    >>> main()
    Hello
    >>> 2 + 2
    4
    '''

    lfeats = label_feats_from_corpus(movie_reviews)
    train_feats, test_feats = split_label_feats(lfeats, split=0.75)
    train_feats, test_feats = split_label_feats(lfeats, split=0.75)
    # nb_classifier = NaiveBayesClassifier.train(train_feats)
    print(sys.argv[1].split())
    negfeat = bag_of_words(sys.argv[1].split())

    f = open('my_classifier.pickle')
    nb_classifier = pickle.load(f)
    f.close()
    print(accuracy(nb_classifier, test_feats))
    print(nb_classifier.classify(negfeat))

    for x in range(0, 50):
        print(nb_classifier.classify(negfeat))
Beispiel #2
0
from nltk.classify import NaiveBayesClassifier
from nltk.classify.util import accuracy
from nltk.probability import DictionaryProbDist
from nltk.probability import LaplaceProbDist
from featx import label_feats_from_corpus, split_label_feats, bag_of_words  # featx.py debe estar en el mismo dir.
import time

print(movie_reviews.categories())
# ['neg', 'pos']

lfeats = label_feats_from_corpus(movie_reviews)

print(lfeats.keys())
# dict_keys(['neg', 'pos'])

train_feats, test_feats = split_label_feats(lfeats, split=0.75)
print(len(train_feats))

print(len(test_feats))

nb_classifier = NaiveBayesClassifier.train(train_feats)
print(nb_classifier.labels())

negfeat = bag_of_words(['the', 'plot', 'was', 'ludicrous'])
print(nb_classifier.classify(negfeat))

posfeat = bag_of_words(['kate', 'winslet', 'is', 'accessible'])
print(nb_classifier.classify(posfeat))

print(accuracy(nb_classifier, test_feats))
from featx import label_feats_from_corpus, split_label_feats, high_information_words, bag_of_words_in_set
from classification import precision_recall, MaxVoteClassifier    # classification.py debe estar en el mismo dir.
from nltk.corpus import movie_reviews
from nltk.classify.util import accuracy
from nltk.classify import NaiveBayesClassifier
from nltk.classify import MaxentClassifier
from nltk.classify import DecisionTreeClassifier
from nltk.classify.scikitlearn import SklearnClassifier
from sklearn.svm import LinearSVC

labels = movie_reviews.categories()
labeled_words = [(l, movie_reviews.words(categories=[l])) for l in labels]
high_info_words = set(high_information_words(labeled_words))
feat_det = lambda words: bag_of_words_in_set(words, high_info_words)
lfeats = label_feats_from_corpus(movie_reviews, feature_detector=feat_det)
train_feats, test_feats = split_label_feats(lfeats)

print("######################################################################")
nb_classifier = NaiveBayesClassifier.train(train_feats)
print("Accuracy Naive Bayes: " + str(accuracy(nb_classifier, test_feats)))
# Accuracy: 0.91
nb_precisions, nb_recalls = precision_recall(nb_classifier, test_feats)
print("Precisions Naive Bayes Pos: " + str(nb_precisions['pos']))
# Precisions Pos: 0.8988326848249028
print("Precisions Naive Bayes Neg: " + str(nb_precisions['neg']))
# Precisions Neg: 0.9218106995884774
print("Recalls Naive Bayes Pos: " + str(nb_recalls['pos']))
# Recalls Pos: 0.924
print("Recalls Naive Bayes Neg: " + str(nb_recalls['neg']))
# Recalls Neg: 0.896
print("######################################################################")