Esempio n. 1
0
def bac_metric_wrapper(y, y_pred):
    """ transform y from a vector to a 2D array. """
    if y.ndim == 1:
        y = np.reshape(y, (len(y), 1))
    if y_pred.ndim == 1:
        y_pred = np.reshape(y_pred, (len(y_pred), 1))

    return bac_metric(y, y_pred)
def apply_cross_validation(X_train, Y_train, n_folds, clf, clf_call):
    kfold = cross_validation.KFold(len(X_train), n_folds=n_folds)
    
    cross_vals = []
    for train, test in kfold:       
        XX = eval('clf.' + clf_call)
        YY = Y_train[test]
        [cXX, cYY] = libscores.normalize_array(XX, YY)
        cur = (libscores.bac_metric(cXX[np.newaxis, :], cYY[np.newaxis, :]))
        cross_vals.append(cur)
    return np.mean(cross_vals)
def apply_cross_validation(X_train, Y_train, clf, clf_call, which):
    kfold = cross_validation.KFold(len(X_train), n_folds=which['n_folds'])

    cross_vals = []
    for train, test in kfold:
        XX = eval('clf.' + clf_call)
        YY = Y_train[test]
        [cXX, cYY] = libscores.normalize_array(XX, YY)
        if which['metric'] == 'bac_metric':
            cur = libscores.bac_metric(cXX[np.newaxis, :], cYY[np.newaxis, :])
        else:
            cur = libscores.auc_metric(cXX[np.newaxis, :], cYY[np.newaxis, :])
        cross_vals.append(cur)
    return np.mean(cross_vals)
Esempio n. 4
0
        Ypred_te = myclassifier.predict(D.data['X_test'])

        # Making probabilistic predictions (each line contains the proba of belonging in each class)
        Yprob_tr = myclassifier.predict_proba(D.data['X_train'])
        Yprob_va = myclassifier.predict_proba(D.data['X_valid'])
        Yprob_te = myclassifier.predict_proba(D.data['X_test'])

        # Training success rate and error bar:
        # First the regular accuracy (fraction of correct classifications)
        acc = accuracy_score(Ytrue_tr, Ypred_tr)
        # Then two AutoML challenge metrics, working on the other representation
        auc = auc_metric(Yonehot_tr,
                         Yprob_tr,
                         task='multiclass.classification')
        bac = bac_metric(Yonehot_tr,
                         Yprob_tr,
                         task='multiclass.classification')
        # Note that the AutoML metrics are rescaled between 0 and 1.

        print "%s\t%5.2f\t%5.2f\t%5.2f\t(%5.2f)" % (
            key, auc, bac, acc, ebar(acc, Ytrue_tr.shape[0]))
    print "The error bar is valid for Acc only"
    # Note: we do not know Ytrue_va and Ytrue_te
    # See modelTest for a better evaluation using cross-validation

    # Another useful tool is the confusion matrix
    from sklearn.metrics import confusion_matrix
    print "Confusion matrix for %s" % key
    print confusion_matrix(Ytrue_tr, Ypred_tr)
    # On peut aussi la visualiser, voir:
    # http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html
Esempio n. 5
0
            #                scores = cross_validation.cross_val_score(clf, D.data['X_train'], D.data['Y_train'], cv=5)
            #                vprint( verbose,  "[+] SVM croos_val done! Mean = %0.2f" % (scores.mean()))

            clf = RForestClass(n_estimators, random_state=1, n_jobs=4)

            kfold = cross_validation.KFold(len(nD.data['X_train']), n_folds=10)

            cross_vals = []
            for train, test in kfold:
                #print "dims"
                XX = clf.fit(nD.data['X_train'][train],
                             D.data['Y_train'][train]).predict_proba(
                                 nD.data['X_train'][test])[:, 1]
                YY = D.data['Y_train'][test]
                [cXX, cYY] = libscores.normalize_array(XX, YY)
                cur = (libscores.bac_metric(cXX[np.newaxis, :],
                                            cYY[np.newaxis, :]))
                cross_vals.append(cur)
            score_mean = np.mean(cross_vals)

            if cycle == 0:
                print("")
                print("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$")
                print("")
                print("[+] RF croos_val w/o fs done! Mean = %0.2f" %
                      (score_mean))
            if cycle > 0:
                print(
                    "[+] RF croos_val with fs done (%i features)! Mean = %0.2f"
                    % (100 - cycle * 11, score_mean))
            if cycle == 8:
                print("")
Esempio n. 6
0
 def bac_metric(self, y, y_pred):
     return libscores.bac_metric(y, y_pred, self.task)