def show_all_scores(): X_orig, Y_orig = load_sanders_data() unique_classes = np.unique(Y_orig) for c in unique_classes: print("#%s tweets: %i" % (c, sum(Y_orig == c))) print(120 * "#") print "== Pos vs. neg ==" pos_neg = np.logical_or(Y_orig == "positive", Y_orig == "negative") X = X_orig[pos_neg] Y = Y_orig[pos_neg] Y = tweak_labels(Y, ["positive"]) train_and_evaluate(X, Y, name="pos vs neg") print(120 * "#") print "== Pos/neg vs. irrelevant/neutral ==" X = X_orig Y = tweak_labels(Y_orig, ["positive", "negative"]) train_and_evaluate(X, Y, name="sentiment vs rest") print(120 * "#") print "== Pos vs. rest ==" X = X_orig Y = tweak_labels(Y_orig, ["positive"]) train_and_evaluate(X, Y, name="pos vs rest") print(120 * "#") print "== Neg vs. rest ==" X = X_orig Y = tweak_labels(Y_orig, ["negative"]) train_and_evaluate(X, Y, name="neg vs rest") print(120 * "#")
def show_all_scores(): X_orig, Y_orig = load_sanders_data() unique_classes = np.unique(Y_orig) for c in unique_classes: print ("#%s tweets: %i" % (c, sum(Y_orig == c))) print (120 * "#") print "== Pos vs. neg ==" pos_neg = np.logical_or(Y_orig == "positive", Y_orig == "negative") X = X_orig[pos_neg] Y = Y_orig[pos_neg] Y = tweak_labels(Y, ["positive"]) # best_clf, best_score, best_params = grid_search_model(create_ngram_model, X, Y) train_and_evaluate_tuned_model(X, Y, name="pos vs neg (tuned)") print (120 * "#") print "== Pos/neg vs. irrelevant/neutral ==" X = X_orig Y = tweak_labels(Y_orig, ["positive", "negative"]) train_and_evaluate_tuned_model(X, Y, name="sentiment vs rest (tuned)") print (120 * "#") print "== Pos vs. rest ==" X = X_orig Y = tweak_labels(Y_orig, ["positive"]) train_and_evaluate_tuned_model(X, Y, name="pos vs rest (tuned)") print (120 * "#") print "== Neg vs. rest ==" X = X_orig Y = tweak_labels(Y_orig, ["negative"]) train_and_evaluate_tuned_model(X, Y, name="neg vs rest (tuned)") print (120 * "#")
def show_all_scores(): X_orig, Y_orig = load_sanders_data() unique_classes = np.unique(Y_orig) for c in unique_classes: print("#%s tweets: %i" % (c, sum(Y_orig == c))) print(120 * "#") print "== Pos vs. neg ==" pos_neg = np.logical_or(Y_orig == "positive", Y_orig == "negative") X = X_orig[pos_neg] Y = Y_orig[pos_neg] Y = tweak_labels(Y, ["positive"]) # best_clf, best_score, best_params = grid_search_model(create_ngram_model, X, Y) train_and_evaluate_tuned_model(X, Y, name="pos vs neg (tuned)") print(120 * "#") print "== Pos/neg vs. irrelevant/neutral ==" X = X_orig Y = tweak_labels(Y_orig, ["positive", "negative"]) train_and_evaluate_tuned_model(X, Y, name="sentiment vs rest (tuned)") print(120 * "#") print "== Pos vs. rest ==" X = X_orig Y = tweak_labels(Y_orig, ["positive"]) train_and_evaluate_tuned_model(X, Y, name="pos vs rest (tuned)") print(120 * "#") print "== Neg vs. rest ==" X = X_orig Y = tweak_labels(Y_orig, ["negative"]) train_and_evaluate_tuned_model(X, Y, name="neg vs rest (tuned)") print(120 * "#")
vect__min_df=1, vect__stop_words=None, vect__smooth_idf=False, vect__use_idf=False, vect__sublinear_tf=True, vect__binary=False, clf__alpha=0.01, ) best_clf = create_ngram_model(best_params) return best_clf if __name__ == "__main__": X_orig, Y_orig = load_sanders_data() classes = np.unique(Y_orig) for c in classes: print("#%s: %i" % (c, sum(Y_orig == c))) print("== Pos vs. neg ==") pos_neg = np.logical_or(Y_orig == "positive", Y_orig == "negative") X = X_orig[pos_neg] Y = Y_orig[pos_neg] Y = tweak_labels(Y, ["positive"]) train_model(get_best_model(), X, Y, name="pos vs neg", plot=True) print("== Pos/neg vs. irrelevant/neutral ==") X = X_orig Y = tweak_labels(Y_orig, ["positive", "negative"])
best_params = dict(vect__ngram_range=(1, 2), vect__min_df=1, vect__stop_words=None, vect__smooth_idf=False, vect__use_idf=False, vect__sublinear_tf=True, vect__binary=False, clf__alpha=0.01, ) best_clf = create_ngram_model(best_params) return best_clf if __name__ == "__main__": X_orig, Y_orig = load_sanders_data() classes = np.unique(Y_orig) for c in classes: print "#%s: %i" % (c, sum(Y_orig == c)) print "== Pos vs. neg ==" pos_neg = np.logical_or(Y_orig == "positive", Y_orig == "negative") X = X_orig[pos_neg] Y = Y_orig[pos_neg] Y = tweak_labels(Y, ["positive"]) train_model(get_best_model(), X, Y, name="pos vs neg", plot=True) print "== Pos/neg vs. irrelevant/neutral ==" X = X_orig Y = tweak_labels(Y_orig, ["positive", "negative"])
def sentimentAndNoSentiment(X,Y): #Y now contains 1 for tweets that were positive or negative #and 0 for neutral or irrelevant Y= u.tweak_labels(Y,["positive","negative"]) classes = np.unique(Y) for c in classes: print("#%s: %i" % (c, sum(Y==c))) train_model(Models.get_best_union_model,X,Y,"sentimentVsNoSentiment",True) def posVsRest(X,Y): #Y now contains 1 for tweets that were positive #and 0 for negative, neutral or irrelevant Y= u.tweak_labels(Y,["positive"]) classes = np.unique(Y) for c in classes: print("#%s: %i" % (c, sum(Y==c))) train_model(Models.create_ngram_model,X,Y,"posVsRest",True) def findBestEstimator(X,Y): best_clf = gridSearchModel.grid_search_model(Models.create_ngram_model,X,Y) X,Y = u.load_sanders_data() runJustPosandNeg(X,Y) #sentimentAndNoSentiment(X,Y) #posVsRest(X,Y) #find best estimator for sentiment vs no sentiment #Y= u.tweak_labels(Y,["positive","negative"]) #findBestEstimator(X,Y)