def save_best_classifier(target, X, y, make_clf, verbose=False): try: best = load_shelve('%s_clf' % target) except: best = (float('-inf'), False, (False, False)) new = False cv = StratifiedKFold(y, n_folds=5) for train_index, test_index in cv: clf, name = make_clf() X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] clf.fit(X_train, y_train) y_true, y_pred = y_test, clf.predict_proba(X_test) curr_score = roc_auc_score(y_true, y_pred[:, 1]) if verbose: print("Detailed classification report:") print() print("The model is trained on a folded development set.") print("The scores are computed on the full set.") print() print(classification_report(np.around(y_true), np.around(y_pred[:, 1]))) print() print(curr_score) print() if curr_score > best[0]: new = True best = curr_score, clf, (train_index, test_index) if new: print("Saving the best classifier in %s as %s_clf with a score of %.2f" % (name, target, best[0])) insert_shelve(best, '%s_clf' % target) else: print("Loaded the best classifier in %s as %s_clf with a score of %.2f" % (name, target, best[0])) return best
def save_best_classifier(target, X, y, make_clf, verbose=False): try: best = load_shelve('%s_clf' % target) except: best = (float('-inf'), False, (False, False)) new = False cv = StratifiedKFold(y, n_folds=5) for train_index, test_index in cv: clf, name = make_clf() X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] clf.fit(X_train, y_train) y_true, y_pred = y_test, clf.predict_proba(X_test) curr_score = roc_auc_score(y_true, y_pred[:, 1]) if verbose: print("Detailed classification report:") print() print("The model is trained on a folded development set.") print("The scores are computed on the full set.") print() print( classification_report(np.around(y_true), np.around(y_pred[:, 1]))) print() print(curr_score) print() if curr_score > best[0]: new = True best = curr_score, clf, (train_index, test_index) if new: print( "Saving the best classifier in %s as %s_clf with a score of %.2f" % (name, target, best[0])) insert_shelve(best, '%s_clf' % target) else: print( "Loaded the best classifier in %s as %s_clf with a score of %.2f" % (name, target, best[0])) return best
def postprocess_data(target, X, y, paths, train=False, submission=False): loaded = load_shelve('%s_clf' % target) score, clf, train = loaded XX, yy = accumulate_scores(X, y, clf, paths) scores_for_post(XX, yy)
def postprocess_data(target, X, y, paths, train=False, submission=False): loaded = load_shelve('%s_clf' % target) score, clf, train = loaded XX, yy = accumulate_scores(X, y, clf, paths) scores_for_post(XX, yy)