Beispiel #1
0
def eval_model():
    comments, labels = load_extended_data()

    clf1 = build_base_model()
    clf2 = build_elasticnet_model()
    clf3 = build_stacked_model()
    clf4 = build_nltk_model()
    models = [clf1, clf2, clf3, clf4]
    #models = [clf1]
    cv = ShuffleSplit(len(comments),
                      n_iterations=5,
                      test_size=0.2,
                      indices=True)
    scores = []
    for train, test in cv:
        probs_common = np.zeros((len(test), 2))
        for clf in models:
            X_train, y_train = comments[train], labels[train]
            X_test, y_test = comments[test], labels[test]
            clf.fit(X_train, y_train)
            probs = clf.predict_proba(X_test)
            print("score: %f" % auc_score(y_test, probs[:, 1]))
            probs_common += probs
        probs_common /= 4.
        scores.append(auc_score(y_test, probs_common[:, 1]))
        print("combined score: %f" % scores[-1])

    print(np.mean(scores), np.std(scores))
Beispiel #2
0
def explore_features():
    comments, labels = load_extended_data()
    ft = TextFeatureTransformer()
    features, flat_words_lower, filtered_words, comments_filtered = \
            ft._preprocess(comments)
    asdf = [" ".join(w) for w in filtered_words]
    np.savetxt("filtered.txt", asdf, fmt="%s")
Beispiel #3
0
def eval_model():
    comments, labels = load_extended_data()

    clf1 = build_base_model()
    clf2 = build_elasticnet_model()
    clf3 = build_stacked_model()
    clf4 = build_nltk_model()
    models = [clf1, clf2, clf3, clf4]
    #models = [clf1]
    cv = ShuffleSplit(len(comments), n_iterations=5, test_size=0.2,
            indices=True)
    scores = []
    for train, test in cv:
        probs_common = np.zeros((len(test), 2))
        for clf in models:
            X_train, y_train = comments[train], labels[train]
            X_test, y_test = comments[test], labels[test]
            clf.fit(X_train, y_train)
            probs = clf.predict_proba(X_test)
            print("score: %f" % auc_score(y_test, probs[:, 1]))
            probs_common += probs
        probs_common /= 4.
        scores.append(auc_score(y_test, probs_common[:, 1]))
        print("combined score: %f" % scores[-1])

    print(np.mean(scores), np.std(scores))
Beispiel #4
0
def explore_features():
    comments, labels = load_extended_data()
    ft = TextFeatureTransformer()
    features, flat_words_lower, filtered_words, comments_filtered = \
            ft._preprocess(comments)
    asdf = [" ".join(w) for w in filtered_words]
    np.savetxt("filtered.txt", asdf, fmt="%s")
Beispiel #5
0
def apply_models():
    comments, labels = load_extended_data()
    comments_test = load_test("impermium_verification_set_.csv")

    clf1 = build_base_model()
    clf2 = build_elasticnet_model()
    clf3 = build_stacked_model()
    clf4 = build_nltk_model()
    models = [clf1, clf2, clf3, clf4]
    probs_common = np.zeros((len(comments_test), 2))
    for i, clf in enumerate(models):
        clf.fit(comments, labels)
        probs = clf.predict_proba(comments_test)
        #print("score: %f" % auc_score(labels_test, probs[:, 1]))
        probs_common += probs
        write_test(probs[:, 1], "test_prediction_model_%d.csv" % i,
                ds="impermium_verification_set_.csv")
    probs_common /= 4.
    #score = auc_score(labels_test, probs_common[:, 1])
    #print("combined score: %f" % score)
    write_test(probs_common[:, 1], "test_prediction_combined.csv",
            ds="impermium_verification_set_.csv")
Beispiel #6
0
def apply_models():
    comments, labels = load_extended_data()
    comments_test = load_test("impermium_verification_set_.csv")

    clf1 = build_base_model()
    clf2 = build_elasticnet_model()
    clf3 = build_stacked_model()
    clf4 = build_nltk_model()
    models = [clf1, clf2, clf3, clf4]
    probs_common = np.zeros((len(comments_test), 2))
    for i, clf in enumerate(models):
        clf.fit(comments, labels)
        probs = clf.predict_proba(comments_test)
        #print("score: %f" % auc_score(labels_test, probs[:, 1]))
        probs_common += probs
        write_test(probs[:, 1],
                   "test_prediction_model_%d.csv" % i,
                   ds="impermium_verification_set_.csv")
    probs_common /= 4.
    #score = auc_score(labels_test, probs_common[:, 1])
    #print("combined score: %f" % score)
    write_test(probs_common[:, 1],
               "test_prediction_combined.csv",
               ds="impermium_verification_set_.csv")