def pick_best_params(model, X, y): opt_scores = [] for C in np.arange(5.26, 6, 0.1): for p in (False, ): #model = linear_model.LogisticRegression(C=C, penalty=p) # the classifier we'll use model.C = C model.dual = p # Train and Crossvalidate scores, _ = my_cross_val_score(model, X, y, auc_roc, 3) #print str(scores) print "Mean AUC for p=%s, C=%f: %f" % (p, C, sp.mean(scores)) opt_scores.append((sp.mean(scores), p, C)) opt_scores = sorted(opt_scores, reverse=True) print opt_scores return opt_scores[0][-1]
def pick_best_params(model, X, y): """ Pick best params """ opt_scores = [] for C in np.arange(0.5, 4, 0.3): for p in (True, ): model.C = C model.dual = p # Train and Crossvalidate scores, _ = my_cross_val_score(model, X, y, auc_roc, 3) #print str(scores) logging.debug("Mean AUC for p=%s, C=%f: %f" % (p, C, sp.mean(scores))) opt_scores.append((sp.mean(scores), p, C)) opt_scores = sorted(opt_scores, reverse=True) logging.debug(opt_scores) return opt_scores[0][-1]
stop_val=feature_sel_stop, kfold=4, random_state=SEED*78, features_sel=preselected, remove_worst=feature_remove_worst) X = X[:, features] X_test = X_test[:, features] logging.debug(X.shape) logging.info("Encoding factors/transform data") X, X_test = encode_factors(X, X_test) logging.info("Find best params for model (C)") model.C = pick_best_params(model, X, y) # Get cross_val scores logging.info("Cross-validate") scores, _ = my_cross_val_score(model, X, y, auc_roc, 10, random_state=SEED*31) #print str(scores) logging.info("Mean AUC: %f" % (sp.mean(scores))) # Train with whole dataset logging.info("Train with the whole dataset") model.fit(X, y) preds = model.predict_proba(X_test)[:, 1] # Save results logging.info("Save results") save_results(preds, "new_sal.csv")