def test_aggregated_sentiments(): sub_clf = classifier.get_optimal_subjectivity_classifier() pol_clf = classifier.get_optimal_polarity_classifier() tweets = utils.get_pickles(2) sentimentvalues = utils.get_sentimentvalues(2) sub_train_tweets, sub_train_targets, _, _, sub_train_sentiments, _ = utils.make_subjectivity_train_and_test_and_targets( tweets, sentimentvalues, splitvalue=1.0 ) pol_train_tweets, pol_train_targets, _, _, pol_train_sentiments, _ = utils.make_polarity_train_and_test_and_targets( tweets, sentimentvalues, splitvalue=1.0 ) sub_predictions = sub_clf.classify(sub_train_tweets, sub_train_sentiments) pol_predictions = pol_clf.classify(pol_train_tweets, pol_train_sentiments) print pol_train_targets, pol_predictions days, targets, predicts, total_frequencies = utils.temporally_aggregate_subjectivity( sub_train_tweets, sub_predictions, targets=sub_train_targets ) data = {"Targets": [days, targets], "Predictions": [days, predicts], "Frequencies": [days, total_frequencies]} plotting.plot_subjectivity_aggregates(data, "aggregated_subjectivity") days, targets, predicts, frequencies = utils.temporally_aggregate_polarity( pol_train_tweets, pol_predictions, targets=pol_train_targets ) for i in range(len(days)): targets[i] = targets[i] * 1.0 / frequencies[i] predicts[i] = predicts[i] * 1.0 / frequencies[i] frequencies[i] = frequencies[i] * 1.0 / total_frequencies[i] data = {"Targets": [days, targets], "Predictions": [days, predicts], "Frequencies": [days, frequencies]} plotting.plot_polarity_aggregates(data, "aggregated_polarity")
def get_optimal_subjectivity_classifier(): """ Trains and returns the optimal subjectivity classifier. """ tweets = utils.get_pickles(3) tweets, targets = utils.make_subjectivity_targets(tweets) vect_options = { 'ngram_range': (1,1), 'max_df': 0.5 } tfidf_options = { 'sublinear_tf': False, 'use_idf': True, 'smooth_idf': True, } clf = SVM(tweets, targets, vect_options, tfidf_options) clf.set_feature_set('SA', utils.get_sentimentvalues(3)) clf.train_on_feature_set() return clf