예제 #1
0
testing_set = nltk.classify.apply_features(extract_features, test_tweets)

for (tweet, sentiment) in test_tweets:
    print(classifier.classify(extract_features(tweet)))

print(nltk.classify.accuracy(classifier, testing_set))

classifier.show_most_informative_features(5)



"""
pipeline = Pipeline([('tfidf', TfidfTransformer()),
                      ('chi2', SelectKBest(chi2, k='all')),
                      ('nb', MultinomialNB())])
"""
pipeline = Pipeline([('tfidf', TfidfTransformer()),
                      ('chi2', SelectKBest(chi2, k='all')),
                      ('nb', MultinomialNB())])

classif = SklearnClassifier(pipeline)

classif.train(training_set)

print(classif.labels())
for (tweet, sentiment) in test_tweets:
    print(classif.classify(extract_features(tweet)))

print(nltk.classify.accuracy(classif, testing_set))
예제 #2
0
                save_classifier.close()
            except:
                print("Pb dans le NaiveBayesClassifier")

            LogisticRegression_classifier = SklearnClassifier(
                LogisticRegression())
            LogisticRegression_classifier.train(training_set)
            print("sklearn classifier créer en LogisticRegression : \n",
                  LogisticRegression_classifier)
            #LogisticRegression_classifier.fit(training_set)
            #print(LogisticRegression_classifier)
            print("LogisticRegression_classifier accuracy percent:",
                  (nltk.classify.accuracy(LogisticRegression_classifier,
                                          testing_set)) * 100)

            print("Labels :", LogisticRegression_classifier.labels())
            print("Type : ", type(LogisticRegression_classifier))

            dictum = [tupl[0] for tupl in testing_set]
            print("dictum : ", dictum)
            try:
                print("classify many:",
                      LogisticRegression_classifier.classify_many(dictum))
            except:
                print("classify many erreur \n", "Type testing_set: ",
                      type(dictum), "\n testing_set :", dictum)

            try:
                print("prob_classify_many:",
                      LogisticRegression_classifier.prob_classify_many(dictum))
                for probdisti in LogisticRegression_classifier.prob_classify_many(
예제 #3
0
	print "creating feature sets..."
	tweetlist = tweetTest.loadTwitterCSV('trainingandtestdata/testdata.csv')
	labeld_features = label_feats_from_tweets(tweetlist)
	#labeld_features = label_feats_from_corpus(movie_reviews)
	training_set, test_set = split_label_feats(labeld_features)

	# tweetlist = tweetTest.loadTwitterCSV('trainingandtestdata/training.1600000.processed.noemoticon.csv')
	# training_set = label_feats_from_tweets(tweetlist)
	# training_set, garbage = split_label_feats(training_set, 1.0)
	# test_set, garbage = split_label_feats(labeld_features, 1.0)

	print "training set length: %i  test set length: %i" % (len(training_set), len(test_set))
	print prettifyFeatureSet(test_set)
	print "training classifier..."
	#classifier = NaiveBayesClassifier.train(training_set)
	#classifier = MaxentClassifier.train(training_set, algorithm='iis', max_iter=99, min_lldelta=0.01)
	#classifier = MaxentClassifier.train(training_set)
	classifier = SklearnClassifier(LogisticRegression()).train(training_set)
	print "calculating accuracy..."
	print 'accuracy:', nltk.classify.util.accuracy(classifier, test_set)
	#classifier.show_most_informative_features(30)

	negfeat = bag_of_words(['the', 'plot', 'was', 'ludicrous'])
	print classifier.classify(negfeat)
	probdist =  classifier.prob_classify(negfeat)
	print "pos: ", probdist.prob('pos'), " neg: ", probdist.prob('neg')
	print classifier.labels()
	classify_tweet(classifier, "I love this movie!", True)
	classify_tweet(classifier, "!!!", True)

예제 #4
0
파일: train.py 프로젝트: Jasmeet107/serapis
    train_corpus = corpus[150:]
    test_corpus = corpus[:150]
    train_set = [(features(words), labelize(category in categories)) for (words, categories) in train_corpus]
    test_set = [(features(words), labelize(category in categories)) for (words, categories) in test_corpus]

    # train classifier
    # print "Training classifier for '%s'" % category
    # classifier = MaxentClassifier.train(train_set, max_iter= 3)
    # classifier = NaiveBayesClassifier.train(train_set)
    model = MultinomialNB()
    classifier = SklearnClassifier(model)

    # set priors
    classifier._encoder.fit([category, "no"])
    # [category, "no"] unless this is true then ["no", category]
    flip = classifier.labels()[0] == "no"
    categorized_proportion = len([words for (words, categories) in corpus if category in categories]) * 1.0 / len(corpus)
    if flip:
        model.class_prior = [1-categorized_proportion, categorized_proportion]
    else:
        model.class_prior = [categorized_proportion, 1-categorized_proportion]

    classifier.train(train_set)

    # test classifier
    test_results = classifier.classify_many([feat for (feat, label) in test_set])
    pos_test_set = set(i for i, result in enumerate(test_results) if result == category)
    reference_values = [label for (feat, label) in test_set]
    pos_ref_set = set(i for i, (feat, label) in enumerate(test_set) if label == category)
    accuracy = scores.accuracy(reference_values, test_results)
    accuracies.append(accuracy)
예제 #5
0
               (['not', 'like', 'that', 'man'], 'negative'),
               (['house', 'not', 'great'], 'negative'),
               (['your', 'song', 'annoying'], 'negative')]

testing_set = nltk.classify.apply_features(extract_features, test_tweets)

for (tweet, sentiment) in test_tweets:
    print(classifier.classify(extract_features(tweet)))

print(nltk.classify.accuracy(classifier, testing_set))

classifier.show_most_informative_features(5)
"""
pipeline = Pipeline([('tfidf', TfidfTransformer()),
                      ('chi2', SelectKBest(chi2, k='all')),
                      ('nb', MultinomialNB())])
"""
pipeline = Pipeline([('tfidf', TfidfTransformer()),
                     ('chi2', SelectKBest(chi2, k='all')),
                     ('nb', MultinomialNB())])

classif = SklearnClassifier(pipeline)

classif.train(training_set)

print(classif.labels())
for (tweet, sentiment) in test_tweets:
    print(classif.classify(extract_features(tweet)))

print(nltk.classify.accuracy(classif, testing_set))
예제 #6
0
파일: SVM.py 프로젝트: shreyg/GitFiles
poscutoff = len(posfeats)*3/4

cls_set = ['pos', 'neg']
 
for i in range(0,2):
	print negfeats[i]
	print '\n------------------------------------------------\n'

trainfeats = negfeats[:negcutoff] + posfeats[:poscutoff]
testfeats = negfeats[negcutoff:] + posfeats[poscutoff:]

print 'train on %d instances, test on %d instances' % (len(trainfeats), len(testfeats))

classif = SklearnClassifier(LinearSVC())
classif.train(trainfeats)
print classif.labels()
test_skl = []
t_test_skl = []
for d in testfeats:
 test_skl.append(d[0])
 t_test_skl.append(d[1])
 
print(set(t_test_skl))

result = []
for item in test_skl:
	p = classif.classify(item)
	result.append(p)
	
print len(result)
print len(t_test_skl)