def apply_cross_validation(X_train, Y_train, clf, clf_call, which): kfold = cross_validation.KFold(len(X_train), n_folds=which['n_folds']) cross_vals = [] for train, test in kfold: XX = eval('clf.' + clf_call) YY = Y_train[test] [cXX, cYY] = libscores.normalize_array(XX, YY) if which['metric'] == 'bac_metric': cur = libscores.bac_metric(cXX[np.newaxis, :], cYY[np.newaxis, :]) else: cur = libscores.auc_metric(cXX[np.newaxis, :], cYY[np.newaxis, :]) cross_vals.append(cur) return np.mean(cross_vals)
# Making classification predictions (the output is a vector of class IDs) Ypred_tr = myclassifier.predict(D.data['X_train']) Ypred_va = myclassifier.predict(D.data['X_valid']) Ypred_te = myclassifier.predict(D.data['X_test']) # Making probabilistic predictions (each line contains the proba of belonging in each class) Yprob_tr = myclassifier.predict_proba(D.data['X_train']) Yprob_va = myclassifier.predict_proba(D.data['X_valid']) Yprob_te = myclassifier.predict_proba(D.data['X_test']) # Training success rate and error bar: # First the regular accuracy (fraction of correct classifications) acc = accuracy_score(Ytrue_tr, Ypred_tr) # Then two AutoML challenge metrics, working on the other representation auc = auc_metric(Yonehot_tr, Yprob_tr, task='multiclass.classification') bac = bac_metric(Yonehot_tr, Yprob_tr, task='multiclass.classification') # Note that the AutoML metrics are rescaled between 0 and 1. print "%s\t%5.2f\t%5.2f\t%5.2f\t(%5.2f)" % ( key, auc, bac, acc, ebar(acc, Ytrue_tr.shape[0])) print "The error bar is valid for Acc only" # Note: we do not know Ytrue_va and Ytrue_te # See modelTest for a better evaluation using cross-validation # Another useful tool is the confusion matrix from sklearn.metrics import confusion_matrix print "Confusion matrix for %s" % key
def auc_metric(self, y, y_pred): return libscores.auc_metric(y, y_pred, self.task)