def AUC(targetVariable, allPredictions): trainMask = numpy.isfinite(targetVariable) targetVariableTrainOnly = targetVariable[trainMask] predictionsTrainOnly = allPredictions[trainMask] FPR, TPR, thresholds = roc(targetVariableTrainOnly, predictionsTrainOnly) roc_auc = auc(FPR, TPR) return roc_auc
def AUC(targetVariable, allPredictions): AUC_DEC_PTS = 3 # decimal points to round predictions to, to speed AUC calculation trainMask = numpy.isfinite(targetVariable) targetVariableTrainOnly = targetVariable[trainMask] predictionsTrainOnly = allPredictions[trainMask] predictionsTrainOnly = numpy.round(predictionsTrainOnly, decimals=AUC_DEC_PTS) #new FPR, TPR, thresholds = roc(targetVariableTrainOnly, predictionsTrainOnly) roc_auc = auc(FPR, TPR) return roc_auc
def AUCkFoldLogisticRegression(regularization, inData, penalty, kFolds): print "\n\tCalculating AUC for regularization", regularization, "using", kFolds, "folds" sys.stdout.flush() xData, yData = getXYData(inData) nSamples, nFeatures = xData.shape if nSamples % kFolds != 0: raise UserWarning( "Uneven fold sizes! Must evenly divide 5922 (e.g. 2,3,7 or 9 folds" ) # 2, 3, 7, and 9 are factors of 5922 (#data points) & yield equal fold sizes crossValFolds = KFold(nSamples, kFolds) yTestDataAllFolds = array([]) probasTestDataAllFolds = array([]) sumAUC = 0.0 for foldNum, (train, test) in enumerate(crossValFolds): # fit a new LR model for each fold's data & evaluate using AUC LRclassifier = LogisticRegression(C=regularization, penalty=penalty) probas_ = LRclassifier.fit(xData[train], yData[train]).predict_proba(xData[test]) numNon0Coefs = sum( [1 for coef in LRclassifier.coef_[:][0] if coef != 0]) # probas_ contains 2 columns of probabilities, one for each of the 2 classes (0,1) # In the documentation, seems like col 1 is for class 1, # but tests show it seems like col 0 is for class 1, so we use that below. CLASS_1_COL = 0 # Compute ROC curve and area under the curve FPR, TPR, thresholds = roc(yData[test], probas_[:, CLASS_1_COL]) roc_auc = auc(FPR, TPR) print "\tFold:", foldNum, " AUC:", roc_auc, "Non0Coefs:", numNon0Coefs, print "Reg:", regularization, print localTimeString() sys.stdout.flush() sumAUC += roc_auc yTestDataAllFolds = numpy.concatenate((yTestDataAllFolds, yData[test])) probasTestDataAllFolds = \ numpy.concatenate((probasTestDataAllFolds,probas_[:,CLASS_1_COL]) ) FPRallFolds, TPRallFolds, thresholds = roc(yTestDataAllFolds, probasTestDataAllFolds) roc_auc_allFolds = auc(FPRallFolds, TPRallFolds) print "AUC_all_folds:", roc_auc_allFolds, print "Reg:", regularization, "Penalty:", penalty, "kFolds:", kFolds, print localTimeString() return roc_auc_allFolds
def test_roc(): """test Receiver operating characteristic (ROC)""" fpr, tpr, thresholds = roc(y[half:], probas_[:,1]) roc_auc = auc(fpr, tpr) assert_array_almost_equal(roc_auc, 0.80, decimal=2)
n_samples, n_features = X.shape p = range(n_samples) random.seed(0) random.shuffle(p) X, y = X[p], y[p] half = int(n_samples/2) # Add noisy features X = np.c_[X,np.random.randn(n_samples, 200*n_features)] # Run classifier classifier = svm.SVC(kernel='linear', probability=True) probas_ = classifier.fit(X[:half],y[:half]).predict_proba(X[half:]) # Compute ROC curve and area the curve fpr, tpr, thresholds = roc(y[half:], probas_[:,1]) roc_auc = auc(fpr, tpr) print "Area under the ROC curve : %f" % roc_auc # Plot ROC curve pl.figure(-1) pl.clf() pl.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % roc_auc) pl.plot([0, 1], [0, 1], 'k--') pl.xlim([0.0,1.0]) pl.ylim([0.0,1.0]) pl.xlabel('False Positive Rate') pl.ylabel('True Positive Rate') pl.title('Receiver operating characteristic example') pl.legend(loc="lower right") pl.show()
################################################################################ # Classification and ROC analysis # Run classifier with crossvalidation and plot ROC curves cv = StratifiedKFold(y, k=6) classifier = svm.SVC(kernel='linear', probability=True) mean_tpr = 0.0 mean_fpr = np.linspace(0, 1, 100) all_tpr = [] for i, (train, test) in enumerate(cv): probas_ = classifier.fit(X[train], y[train]).predict_proba(X[test]) # Compute ROC curve and area the curve fpr, tpr, thresholds = roc(y[test], probas_[:,1]) mean_tpr += interp(mean_fpr, fpr, tpr) mean_tpr[0] = 0.0 roc_auc = auc(fpr, tpr) pl.plot(fpr, tpr, lw=1, label='ROC fold %d (area = %0.2f)' % (i, roc_auc)) pl.plot([0, 1], [0, 1], '--', color=(0.6,0.6,0.6), label='Luck') mean_tpr /= len(cv) mean_tpr[-1] = 1.0 mean_auc = auc(mean_fpr, mean_tpr) pl.plot(mean_fpr, mean_tpr, 'k--', label='Mean ROC (area = %0.2f)' % mean_auc, lw=2) pl.xlim([-0.05,1.05]) pl.ylim([-0.05,1.05])
def calcAUC(targetData): yData = targetData probas = numpy.random.random((len(targetData))) FPR, TPR, thresholds = roc(yData, probas) roc_auc = auc(FPR, TPR) return roc_auc