def eval_model(): comments, labels = load_extended_data() clf1 = build_base_model() clf2 = build_elasticnet_model() clf3 = build_stacked_model() clf4 = build_nltk_model() models = [clf1, clf2, clf3, clf4] #models = [clf1] cv = ShuffleSplit(len(comments), n_iterations=5, test_size=0.2, indices=True) scores = [] for train, test in cv: probs_common = np.zeros((len(test), 2)) for clf in models: X_train, y_train = comments[train], labels[train] X_test, y_test = comments[test], labels[test] clf.fit(X_train, y_train) probs = clf.predict_proba(X_test) print("score: %f" % auc_score(y_test, probs[:, 1])) probs_common += probs probs_common /= 4. scores.append(auc_score(y_test, probs_common[:, 1])) print("combined score: %f" % scores[-1]) print(np.mean(scores), np.std(scores))
def apply_models(): comments, labels = load_extended_data() comments_test = load_test("impermium_verification_set_.csv") clf1 = build_base_model() clf2 = build_elasticnet_model() clf3 = build_stacked_model() clf4 = build_nltk_model() models = [clf1, clf2, clf3, clf4] probs_common = np.zeros((len(comments_test), 2)) for i, clf in enumerate(models): clf.fit(comments, labels) probs = clf.predict_proba(comments_test) #print("score: %f" % auc_score(labels_test, probs[:, 1])) probs_common += probs write_test(probs[:, 1], "test_prediction_model_%d.csv" % i, ds="impermium_verification_set_.csv") probs_common /= 4. #score = auc_score(labels_test, probs_common[:, 1]) #print("combined score: %f" % score) write_test(probs_common[:, 1], "test_prediction_combined.csv", ds="impermium_verification_set_.csv")