예제 #1
0
                        str(queriedFeatures) + " | " +
                        str(acquiredFeaturesCost) + " | " +
                        str(predictedLabel) + " | " +
                        str(predictedTrueLabelProb))
                    predictedTestLabels[i] = predictedLabel
                    predictedTestTrueLabelProbs[i] = predictedTrueLabelProb
                    totalTestFeatureCosts += acquiredFeaturesCost

                runTime = (time.time() - startTime) / float(testData.shape[0])
                print("runtime per test sample (in seconds) = " + str(runTime))
                runTimesAllFolds[foldId] = runTime

                avgTestFeatureCosts = totalTestFeatureCosts / float(
                    testData.shape[0])

            threshold_forExactRecall = evaluation.getThresholdFromPredictedProbabilities(
                testLabels, predictedTestTrueLabelProbs, targetRecall=0.95)
            testRecallAllFolds_exactRecall[foldId] = evaluation.getRecall(
                testLabels, predictedTestTrueLabelProbs,
                threshold_forExactRecall)
            testFDRAllFolds_exactRecall[foldId] = evaluation.getFDR(
                testLabels, predictedTestTrueLabelProbs,
                threshold_forExactRecall)
            testTotalCostsAllFolds[foldId] = evaluation.getAverageTotalCosts(
                testLabels, predictedTestLabels, avgTestFeatureCosts,
                misclassificationCosts)
            testMisClassificationCostsAllFolds[
                foldId] = evaluation.getAverageMisclassificationCosts(
                    testLabels, predictedTestLabels, misclassificationCosts)

            testFeatureCostsAllFolds[foldId] = avgTestFeatureCosts
예제 #2
0
    for i in range(len(allTargetRecalls)):
        testRecallAllFolds.append(numpy.zeros(NUMBER_OF_FOLDS))
        testSpecifityAllFolds.append(numpy.zeros(NUMBER_OF_FOLDS))
        testFDRAllFolds.append(numpy.zeros(NUMBER_OF_FOLDS))

    for foldId in range(NUMBER_OF_FOLDS):
        FILENAME_STEM = BASEFOLDER + dataName + "_fold" + str(
            foldId) + "/" + "costs" + str(featureCosts) + "_"
        labelsValidation, allPredictedProbsValidation, covariateUsageValidation = getAllStatistics(
            FILENAME_STEM + "val.npy")
        labelsTest, allPredictedProbsTest, covariateUsageTest = getAllStatistics(
            FILENAME_STEM + "ts.npy")

        for i, targetRecall in enumerate(allTargetRecalls):
            threshold = evaluation.getThresholdFromPredictedProbabilities(
                labelsValidation, allPredictedProbsValidation, targetRecall)
            testRecallAllFolds[i][foldId] = evaluation.getRecall(
                labelsTest, allPredictedProbsTest, threshold)
            testSpecifityAllFolds[i][foldId] = evaluation.getSpecifity(
                labelsTest, allPredictedProbsTest, threshold)
            testFDRAllFolds[i][foldId] = evaluation.getFDR(
                labelsTest, allPredictedProbsTest, threshold)

        testAUCAllFolds[foldId] = sklearn.metrics.roc_auc_score(
            labelsTest, allPredictedProbsTest)
        testNrFeaturesAllFolds[foldId] = numpy.mean(
            numpy.sum(covariateUsageTest, axis=1))

        # print("featureCosts = ", featureCosts)
        # print("testAUCAllFolds[foldId] = ", testAUCAllFolds[foldId])
        # print("average number of features used = ", avgNumberOfUsedCovariates)
예제 #3
0
    for foldId in range(NUMBER_OF_FOLDS):

        if COST_TYPE == "recall":
            allOperationsCostsValidation = numpy.zeros(len(allRCOSTS))
            allThresholds = numpy.zeros(len(allRCOSTS))
            for penaltyId, featureCostsPenalty in enumerate(allRCOSTS):
                FILENAME_STEM = BASEFOLDER + dataName + "_fold" + str(
                    foldId) + "/" + "costs" + str(featureCostsPenalty) + "_"
                validationLabels, allPredictedProbsValidation, covariateUsageValidation = getAllStatistics(
                    FILENAME_STEM + "val.npy")
                avgValidationFeatureCosts = getAvgCovariateCosts(
                    definedFeatureCosts, covariateUsageValidation)

                thresholdValidation = evaluation.getThresholdFromPredictedProbabilities(
                    validationLabels, allPredictedProbsValidation,
                    targetRecall)
                predictedValidationLabels = evaluation.getPredictedLabelsAtThreshold(
                    thresholdValidation, allPredictedProbsValidation)
                allOperationsCostsValidation[
                    penaltyId] = evaluation.getAverageOperationCosts(
                        validationLabels, predictedValidationLabels,
                        avgValidationFeatureCosts, falsePositiveCost)
                allThresholds[penaltyId] = thresholdValidation

            bestPenaltyId = numpy.argmin(allOperationsCostsValidation)
            bestFeatureCostsPenalty = allRCOSTS[bestPenaltyId]
            thresholdTest = allThresholds[bestPenaltyId]

            FILENAME_STEM = BASEFOLDER + dataName + "_fold" + str(
                foldId) + "/" + "costs" + str(bestFeatureCostsPenalty) + "_"
            allThresholdLogistic = evaluation.getThresholdEstimate_pooled(
                allImputedTrainData,
                trainLabels,
                allBestHyperparameters,
                allTargetRecalls,
                modelType="logReg")
            pooledPredictedProbs = evaluation.getPooledProbability(
                allModels, testData)

            for i in range(len(allTargetRecalls)):
                testRecallAllFoldsLogistic[i][foldId] = evaluation.getRecall(
                    testLabels, pooledPredictedProbs, allThresholdLogistic[i])
                testFDRAllFoldsLogistic[i][foldId] = evaluation.getFDR(
                    testLabels, pooledPredictedProbs, allThresholdLogistic[i])

                exactThreshold = evaluation.getThresholdFromPredictedProbabilities(
                    testLabels, pooledPredictedProbs, allTargetRecalls[i])
                testRecallAllFoldsLogistic_exactRecall[i][
                    foldId] = evaluation.getRecall(testLabels,
                                                   pooledPredictedProbs,
                                                   exactThreshold)
                testFDRAllFoldsLogistic_exactRecall[i][
                    foldId] = evaluation.getFDR(testLabels,
                                                pooledPredictedProbs,
                                                exactThreshold)

            testAUCAllFoldsLogistic[foldId] = sklearn.metrics.roc_auc_score(
                testLabels, pooledPredictedProbs)

        else:
            assert (False)