def show_all_scores():
    X_orig, Y_orig = load_sanders_data()
    unique_classes = np.unique(Y_orig)
    for c in unique_classes:
        print("#%s tweets: %i" % (c, sum(Y_orig == c)))

    print(120 * "#")
    print "== Pos vs. neg =="
    pos_neg = np.logical_or(Y_orig == "positive", Y_orig == "negative")
    X = X_orig[pos_neg]
    Y = Y_orig[pos_neg]
    Y = tweak_labels(Y, ["positive"])

    train_and_evaluate(X, Y, name="pos vs neg")
    print(120 * "#")

    print "== Pos/neg vs. irrelevant/neutral =="
    X = X_orig
    Y = tweak_labels(Y_orig, ["positive", "negative"])
    train_and_evaluate(X, Y, name="sentiment vs rest")
    print(120 * "#")

    print "== Pos vs. rest =="
    X = X_orig
    Y = tweak_labels(Y_orig, ["positive"])
    train_and_evaluate(X, Y, name="pos vs rest")
    print(120 * "#")

    print "== Neg vs. rest =="
    X = X_orig
    Y = tweak_labels(Y_orig, ["negative"])
    train_and_evaluate(X, Y, name="neg vs rest")
    print(120 * "#")
def show_all_scores():
    X_orig, Y_orig = load_sanders_data()
    unique_classes = np.unique(Y_orig)
    for c in unique_classes:
        print ("#%s tweets: %i" % (c, sum(Y_orig == c)))

    print (120 * "#")
    print "== Pos vs. neg =="
    pos_neg = np.logical_or(Y_orig == "positive", Y_orig == "negative")
    X = X_orig[pos_neg]
    Y = Y_orig[pos_neg]
    Y = tweak_labels(Y, ["positive"])

    # best_clf, best_score, best_params = grid_search_model(create_ngram_model, X, Y)
    train_and_evaluate_tuned_model(X, Y, name="pos vs neg (tuned)")
    print (120 * "#")

    print "== Pos/neg vs. irrelevant/neutral =="
    X = X_orig
    Y = tweak_labels(Y_orig, ["positive", "negative"])
    train_and_evaluate_tuned_model(X, Y, name="sentiment vs rest (tuned)")
    print (120 * "#")

    print "== Pos vs. rest =="
    X = X_orig
    Y = tweak_labels(Y_orig, ["positive"])
    train_and_evaluate_tuned_model(X, Y, name="pos vs rest (tuned)")
    print (120 * "#")

    print "== Neg vs. rest =="
    X = X_orig
    Y = tweak_labels(Y_orig, ["negative"])
    train_and_evaluate_tuned_model(X, Y, name="neg vs rest (tuned)")
    print (120 * "#")
Ejemplo n.º 3
0
def show_all_scores():
    X_orig, Y_orig = load_sanders_data()
    unique_classes = np.unique(Y_orig)
    for c in unique_classes:
        print("#%s tweets: %i" % (c, sum(Y_orig == c)))

    print(120 * "#")
    print "== Pos vs. neg =="
    pos_neg = np.logical_or(Y_orig == "positive", Y_orig == "negative")
    X = X_orig[pos_neg]
    Y = Y_orig[pos_neg]
    Y = tweak_labels(Y, ["positive"])

    # best_clf, best_score, best_params = grid_search_model(create_ngram_model, X, Y)
    train_and_evaluate_tuned_model(X, Y, name="pos vs neg (tuned)")
    print(120 * "#")

    print "== Pos/neg vs. irrelevant/neutral =="
    X = X_orig
    Y = tweak_labels(Y_orig, ["positive", "negative"])
    train_and_evaluate_tuned_model(X, Y, name="sentiment vs rest (tuned)")
    print(120 * "#")

    print "== Pos vs. rest =="
    X = X_orig
    Y = tweak_labels(Y_orig, ["positive"])
    train_and_evaluate_tuned_model(X, Y, name="pos vs rest (tuned)")
    print(120 * "#")

    print "== Neg vs. rest =="
    X = X_orig
    Y = tweak_labels(Y_orig, ["negative"])
    train_and_evaluate_tuned_model(X, Y, name="neg vs rest (tuned)")
    print(120 * "#")
Ejemplo n.º 4
0
        vect__min_df=1,
        vect__stop_words=None,
        vect__smooth_idf=False,
        vect__use_idf=False,
        vect__sublinear_tf=True,
        vect__binary=False,
        clf__alpha=0.01,
    )

    best_clf = create_ngram_model(best_params)

    return best_clf


if __name__ == "__main__":
    X_orig, Y_orig = load_sanders_data()
    classes = np.unique(Y_orig)
    for c in classes:
        print("#%s: %i" % (c, sum(Y_orig == c)))

    print("== Pos vs. neg ==")
    pos_neg = np.logical_or(Y_orig == "positive", Y_orig == "negative")
    X = X_orig[pos_neg]
    Y = Y_orig[pos_neg]
    Y = tweak_labels(Y, ["positive"])
    train_model(get_best_model(), X, Y, name="pos vs neg", plot=True)

    print("== Pos/neg vs. irrelevant/neutral ==")
    X = X_orig
    Y = tweak_labels(Y_orig, ["positive", "negative"])
Ejemplo n.º 5
0
    best_params = dict(vect__ngram_range=(1, 2),
                       vect__min_df=1,
                       vect__stop_words=None,
                       vect__smooth_idf=False,
                       vect__use_idf=False,
                       vect__sublinear_tf=True,
                       vect__binary=False,
                       clf__alpha=0.01,
                       )

    best_clf = create_ngram_model(best_params)

    return best_clf

if __name__ == "__main__":
    X_orig, Y_orig = load_sanders_data()
    classes = np.unique(Y_orig)
    for c in classes:
        print "#%s: %i" % (c, sum(Y_orig == c))

    print "== Pos vs. neg =="
    pos_neg = np.logical_or(Y_orig == "positive", Y_orig == "negative")
    X = X_orig[pos_neg]
    Y = Y_orig[pos_neg]
    Y = tweak_labels(Y, ["positive"])
    train_model(get_best_model(), X, Y, name="pos vs neg", plot=True)

    print "== Pos/neg vs. irrelevant/neutral =="
    X = X_orig
    Y = tweak_labels(Y_orig, ["positive", "negative"])
Ejemplo n.º 6
0
def sentimentAndNoSentiment(X,Y):
    #Y now contains 1 for tweets that were positive or negative
    #and 0 for neutral or irrelevant
    Y= u.tweak_labels(Y,["positive","negative"])
    classes = np.unique(Y)
    for c in classes:
            print("#%s: %i" % (c, sum(Y==c)))
    train_model(Models.get_best_union_model,X,Y,"sentimentVsNoSentiment",True)

def posVsRest(X,Y):
    #Y now contains 1 for tweets that were positive
    #and 0 for negative, neutral or irrelevant
    Y= u.tweak_labels(Y,["positive"])
    classes = np.unique(Y)
    for c in classes:
            print("#%s: %i" % (c, sum(Y==c)))
    train_model(Models.create_ngram_model,X,Y,"posVsRest",True)

def findBestEstimator(X,Y):
    best_clf = gridSearchModel.grid_search_model(Models.create_ngram_model,X,Y)

X,Y = u.load_sanders_data()
runJustPosandNeg(X,Y)
#sentimentAndNoSentiment(X,Y)
#posVsRest(X,Y)

#find best estimator for sentiment vs no sentiment
#Y= u.tweak_labels(Y,["positive","negative"])
#findBestEstimator(X,Y)