Ejemplo n.º 1
0
def classify():
    #corpus = 'Cornell_text_polarity'
    #corpus = 'BingLiu_selected_sentences'
    corpus = 'Cornell_sentence_polarity'
    cases = load_corpus(corpus)
    features = get_word_features(cases)

    train_feats = []
    test_feats = []
    for polarity, feats in features.items():
        #cutoff = len(feats) * 2 / 4
        cutoff = 1000
        print polarity, 'number of train:', cutoff
        #train_feats += feats[:cutoff]
        #test_feats += feats[cutoff:]
        temp_feats = feats[:]
        random.shuffle(temp_feats)
        train_feats += temp_feats[:cutoff]
        test_feats += temp_feats[cutoff:]

    print 'train on %d instances, test on %d instances' % (len(train_feats), len(test_feats))

    classifier = SvmClassifier.train(train_feats)
    print 'Test classify:', classifier.classify(dict([('I', 0.0),('love',1.0), ('you', 0.0)]))
    print 'accuracy:', nltk.classify.util.accuracy(classifier, test_feats)
    classifier.show_most_informative_features()
Ejemplo n.º 2
0
# filter the feature vectors:
fvecs = [(tweet_features.get_tweet_features(t, worstfeaturesfilter), s) for (t, s) in tweets]
v_train = fvecs[0:num_train]
# v_train = fvecs
v_test = fvecs[num_train : len(tweets)]

# dump tweets which our feature selector found nothing
# for i in range(0,len(tweets)):
#    if tweet_features.is_zero_dict( fvecs[i][0] ):
#        print tweets[i][1] + ': ' + tweets[i][0]


# train classifier
# classifier = nltk.NaiveBayesClassifier.train(v_train);
# classifier = nltk.classify.maxent.train_maxent_classifier_with_gis(v_train);
classifier = SvmClassifier.train(v_train)
# classifier = nltk.classify.maxent.train_maxent_classifier_with_gis(v_train,count_cutoff=2)
# classifier = nltk.classify.maxent.train_maxent_classifier_with_iis(v_train,count_cutoff=4)
# classifier = nltk.classify.maxent.train_maxent_classifier_with_scipy(v_train, algorithm='BFGS');

pickle.dump(worstfeaturesfilter, open("worstfeaturesfilter.pickle", "w"))
print "WARNING: NOT PICKELING CLASSIFIER ANYMORE"
# pickle.dump(classifier, open('classifier.pickle', 'w'))

# print classifier.classify(tweet_features.get_tweet_features("Christmas Eve without my, with cold feet and nobody to.", worstfeaturesfilter))
# print classifier.classify(tweet_features.get_tweet_features("Nochebuena sin mi @ tdomhan, con los pies fríos y nadie a quien abrazar.", worstfeaturesfilter))
# print classifier.classify(tweet_features.get_tweet_features("Nochebuena sin mi, con los pies fríos y nadie a quien abrazar.", worstfeaturesfilter))
# print classifier.explain(tweet_features.get_tweet_features("Nochebuena sin mi @ tdomhan, con los pies fríos y nadie a quien abrazar.", worstfeaturesfilter))


# classify and dump results for interpretation
Ejemplo n.º 3
0
#filter the feature vectors:
fvecs = [(tweet_features.get_tweet_features(t, worstfeaturesfilter), s)
         for (t, s) in tweets]
v_train = fvecs[0:num_train]
#v_train = fvecs
v_test = fvecs[num_train:len(tweets)]

# dump tweets which our feature selector found nothing
#for i in range(0,len(tweets)):
#    if tweet_features.is_zero_dict( fvecs[i][0] ):
#        print tweets[i][1] + ': ' + tweets[i][0]

# train classifier
#classifier = nltk.NaiveBayesClassifier.train(v_train);
#classifier = nltk.classify.maxent.train_maxent_classifier_with_gis(v_train);
classifier = SvmClassifier.train(v_train)
#classifier = nltk.classify.maxent.train_maxent_classifier_with_gis(v_train,count_cutoff=2)
#classifier = nltk.classify.maxent.train_maxent_classifier_with_iis(v_train,count_cutoff=4)
#classifier = nltk.classify.maxent.train_maxent_classifier_with_scipy(v_train, algorithm='BFGS');

pickle.dump(worstfeaturesfilter, open('worstfeaturesfilter.pickle', 'w'))
print "WARNING: NOT PICKELING CLASSIFIER ANYMORE"
#pickle.dump(classifier, open('classifier.pickle', 'w'))

#print classifier.classify(tweet_features.get_tweet_features("Christmas Eve without my, with cold feet and nobody to.", worstfeaturesfilter))
#print classifier.classify(tweet_features.get_tweet_features("Nochebuena sin mi @ tdomhan, con los pies fríos y nadie a quien abrazar.", worstfeaturesfilter))
#print classifier.classify(tweet_features.get_tweet_features("Nochebuena sin mi, con los pies fríos y nadie a quien abrazar.", worstfeaturesfilter))
#print classifier.explain(tweet_features.get_tweet_features("Nochebuena sin mi @ tdomhan, con los pies fríos y nadie a quien abrazar.", worstfeaturesfilter))

# classify and dump results for interpretation
Ejemplo n.º 4
0
    tweets.append(item)
    if sentiment == "positive":
        pos_tweets += item
    else:
        neg_tweets += item

negcutoff, poscutoff = len(neg_tweets) * 4 / 5, len(pos_tweets) * 4 / 5
pos_train, pos_test = pos_tweets[:poscutoff], pos_tweets[poscutoff:]
neg_train, neg_test = neg_tweets[:negcutoff], neg_tweets[negcutoff:]

neg_feats_train = get_train_features_from_tweets(neg_train, 'neg')
pos_feats_train = get_train_features_from_tweets(pos_train, 'pos')

train_feats = neg_feats_train + pos_feats_train

classifier = SvmClassifier.train(train_feats)
# classifier = nltk.NaiveBayesClassifier.train(train_feats)


# Evaluation
correct, wrong = 0, 0

for tweet in neg_test:
    features = get_features_from_tweet(tweet)
    result = classifier.classify(features)
    if result == "neg":
        correct += 1
    else:
        wrong += 1