str(queriedFeatures) + " | " + str(acquiredFeaturesCost) + " | " + str(predictedLabel) + " | " + str(predictedTrueLabelProb)) predictedTestLabels[i] = predictedLabel predictedTestTrueLabelProbs[i] = predictedTrueLabelProb totalTestFeatureCosts += acquiredFeaturesCost runTime = (time.time() - startTime) / float(testData.shape[0]) print("runtime per test sample (in seconds) = " + str(runTime)) runTimesAllFolds[foldId] = runTime avgTestFeatureCosts = totalTestFeatureCosts / float( testData.shape[0]) threshold_forExactRecall = evaluation.getThresholdFromPredictedProbabilities( testLabels, predictedTestTrueLabelProbs, targetRecall=0.95) testRecallAllFolds_exactRecall[foldId] = evaluation.getRecall( testLabels, predictedTestTrueLabelProbs, threshold_forExactRecall) testFDRAllFolds_exactRecall[foldId] = evaluation.getFDR( testLabels, predictedTestTrueLabelProbs, threshold_forExactRecall) testTotalCostsAllFolds[foldId] = evaluation.getAverageTotalCosts( testLabels, predictedTestLabels, avgTestFeatureCosts, misclassificationCosts) testMisClassificationCostsAllFolds[ foldId] = evaluation.getAverageMisclassificationCosts( testLabels, predictedTestLabels, misclassificationCosts) testFeatureCostsAllFolds[foldId] = avgTestFeatureCosts
for i in range(len(allTargetRecalls)): testRecallAllFolds.append(numpy.zeros(NUMBER_OF_FOLDS)) testSpecifityAllFolds.append(numpy.zeros(NUMBER_OF_FOLDS)) testFDRAllFolds.append(numpy.zeros(NUMBER_OF_FOLDS)) for foldId in range(NUMBER_OF_FOLDS): FILENAME_STEM = BASEFOLDER + dataName + "_fold" + str( foldId) + "/" + "costs" + str(featureCosts) + "_" labelsValidation, allPredictedProbsValidation, covariateUsageValidation = getAllStatistics( FILENAME_STEM + "val.npy") labelsTest, allPredictedProbsTest, covariateUsageTest = getAllStatistics( FILENAME_STEM + "ts.npy") for i, targetRecall in enumerate(allTargetRecalls): threshold = evaluation.getThresholdFromPredictedProbabilities( labelsValidation, allPredictedProbsValidation, targetRecall) testRecallAllFolds[i][foldId] = evaluation.getRecall( labelsTest, allPredictedProbsTest, threshold) testSpecifityAllFolds[i][foldId] = evaluation.getSpecifity( labelsTest, allPredictedProbsTest, threshold) testFDRAllFolds[i][foldId] = evaluation.getFDR( labelsTest, allPredictedProbsTest, threshold) testAUCAllFolds[foldId] = sklearn.metrics.roc_auc_score( labelsTest, allPredictedProbsTest) testNrFeaturesAllFolds[foldId] = numpy.mean( numpy.sum(covariateUsageTest, axis=1)) # print("featureCosts = ", featureCosts) # print("testAUCAllFolds[foldId] = ", testAUCAllFolds[foldId]) # print("average number of features used = ", avgNumberOfUsedCovariates)
for foldId in range(NUMBER_OF_FOLDS): if COST_TYPE == "recall": allOperationsCostsValidation = numpy.zeros(len(allRCOSTS)) allThresholds = numpy.zeros(len(allRCOSTS)) for penaltyId, featureCostsPenalty in enumerate(allRCOSTS): FILENAME_STEM = BASEFOLDER + dataName + "_fold" + str( foldId) + "/" + "costs" + str(featureCostsPenalty) + "_" validationLabels, allPredictedProbsValidation, covariateUsageValidation = getAllStatistics( FILENAME_STEM + "val.npy") avgValidationFeatureCosts = getAvgCovariateCosts( definedFeatureCosts, covariateUsageValidation) thresholdValidation = evaluation.getThresholdFromPredictedProbabilities( validationLabels, allPredictedProbsValidation, targetRecall) predictedValidationLabels = evaluation.getPredictedLabelsAtThreshold( thresholdValidation, allPredictedProbsValidation) allOperationsCostsValidation[ penaltyId] = evaluation.getAverageOperationCosts( validationLabels, predictedValidationLabels, avgValidationFeatureCosts, falsePositiveCost) allThresholds[penaltyId] = thresholdValidation bestPenaltyId = numpy.argmin(allOperationsCostsValidation) bestFeatureCostsPenalty = allRCOSTS[bestPenaltyId] thresholdTest = allThresholds[bestPenaltyId] FILENAME_STEM = BASEFOLDER + dataName + "_fold" + str( foldId) + "/" + "costs" + str(bestFeatureCostsPenalty) + "_"
allThresholdLogistic = evaluation.getThresholdEstimate_pooled( allImputedTrainData, trainLabels, allBestHyperparameters, allTargetRecalls, modelType="logReg") pooledPredictedProbs = evaluation.getPooledProbability( allModels, testData) for i in range(len(allTargetRecalls)): testRecallAllFoldsLogistic[i][foldId] = evaluation.getRecall( testLabels, pooledPredictedProbs, allThresholdLogistic[i]) testFDRAllFoldsLogistic[i][foldId] = evaluation.getFDR( testLabels, pooledPredictedProbs, allThresholdLogistic[i]) exactThreshold = evaluation.getThresholdFromPredictedProbabilities( testLabels, pooledPredictedProbs, allTargetRecalls[i]) testRecallAllFoldsLogistic_exactRecall[i][ foldId] = evaluation.getRecall(testLabels, pooledPredictedProbs, exactThreshold) testFDRAllFoldsLogistic_exactRecall[i][ foldId] = evaluation.getFDR(testLabels, pooledPredictedProbs, exactThreshold) testAUCAllFoldsLogistic[foldId] = sklearn.metrics.roc_auc_score( testLabels, pooledPredictedProbs) else: assert (False)