def apply_cross_validation(X_train, Y_train, clf, clf_call, which):
    kfold = cross_validation.KFold(len(X_train), n_folds=which['n_folds'])

    cross_vals = []
    for train, test in kfold:
        XX = eval('clf.' + clf_call)
        YY = Y_train[test]
        [cXX, cYY] = libscores.normalize_array(XX, YY)
        if which['metric'] == 'bac_metric':
            cur = libscores.bac_metric(cXX[np.newaxis, :], cYY[np.newaxis, :])
        else:
            cur = libscores.auc_metric(cXX[np.newaxis, :], cYY[np.newaxis, :])
        cross_vals.append(cur)
    return np.mean(cross_vals)
Example #2
0
        # Making classification predictions (the output is a vector of class IDs)
        Ypred_tr = myclassifier.predict(D.data['X_train'])
        Ypred_va = myclassifier.predict(D.data['X_valid'])
        Ypred_te = myclassifier.predict(D.data['X_test'])

        # Making probabilistic predictions (each line contains the proba of belonging in each class)
        Yprob_tr = myclassifier.predict_proba(D.data['X_train'])
        Yprob_va = myclassifier.predict_proba(D.data['X_valid'])
        Yprob_te = myclassifier.predict_proba(D.data['X_test'])

        # Training success rate and error bar:
        # First the regular accuracy (fraction of correct classifications)
        acc = accuracy_score(Ytrue_tr, Ypred_tr)
        # Then two AutoML challenge metrics, working on the other representation
        auc = auc_metric(Yonehot_tr,
                         Yprob_tr,
                         task='multiclass.classification')
        bac = bac_metric(Yonehot_tr,
                         Yprob_tr,
                         task='multiclass.classification')
        # Note that the AutoML metrics are rescaled between 0 and 1.

        print "%s\t%5.2f\t%5.2f\t%5.2f\t(%5.2f)" % (
            key, auc, bac, acc, ebar(acc, Ytrue_tr.shape[0]))
    print "The error bar is valid for Acc only"
    # Note: we do not know Ytrue_va and Ytrue_te
    # See modelTest for a better evaluation using cross-validation

    # Another useful tool is the confusion matrix
    from sklearn.metrics import confusion_matrix
    print "Confusion matrix for %s" % key
 def auc_metric(self, y, y_pred):
     return libscores.auc_metric(y, y_pred, self.task)