import constants import realdata import sklearn.metrics # dataName = "breastcancer_5foldCV" # dataName = "pima_5foldCV" # dataName = "pyhsioNetWithMissing_5foldCV" dataName = "heartDiseaseWithMissing_5foldCV" resultsRecorder = experimentHelper.ResultsRecorder(len(constants.allFalsePositiveCosts)) readyForSaving = False definedFeatureCosts = realdata.getFeaturesCosts(dataName) for falsePositiveCost in constants.allFalsePositiveCosts: falseNegativeCost = falsePositiveCost * constants.FN_TO_FP_RATIO allBestSettings = numpy.zeros((constants.NUMBER_OF_FOLDS, 2), dtype = numpy.int) for testFoldId in range(constants.NUMBER_OF_FOLDS): print("*************************** GREEDY MISER *******************************************") bestLambdaId, bestTreeId = evaluation.getBestParametersForGreedyMiser_asymmetric(dataName, definedFeatureCosts, testFoldId, falsePositiveCost, falseNegativeCost)
if variationName == DYNAMIC: infoStr += "_" + densityRegressionModelName if USE_UNLABELED_DATA: infoStr += "_withUnlabeledData" else: infoStr += "_noUnlabeledData" if variationName == FULL_MODEL: infoStr = FULL_MODEL else: infoStr += "_" + FEATURE_SELECTION_METHOD infoStr += "_" + classificationModelName resultsRecorder = experimentHelper.ResultsRecorder( len(ALL_FALSE_POSITIVE_COSTS)) for costId in range(len(ALL_FALSE_POSITIVE_COSTS)): falsePositiveCost = ALL_FALSE_POSITIVE_COSTS[costId] if COST_TYPE == "symmetricCost": falseNegativeCost = falsePositiveCost assert (dataName == "pima_5foldCV") correctClassificationCost = -50.0 # in order to align to the setting in (Ji and Carin, 2007; Dulac-Arnold et al., 2012) for Diabetes. misclassificationCosts = numpy.zeros((2, 2)) misclassificationCosts[0, 1] = falsePositiveCost misclassificationCosts[1, 0] = falseNegativeCost misclassificationCosts[0, 0] = correctClassificationCost misclassificationCosts[1, 1] = correctClassificationCost
dataName = "breastcancer_5foldCV" if dataName == "pima_5foldCV": sameClassCost = -50.0 # set to -50.0 in order to compare to Li and Carin work # assert(misclassificationCostsSymmetric == 400 or misclassificationCostsSymmetric == 800) else: sameClassCost = 0.0 NUMBER_OF_FOLDS = 5 allMisclassificationCostsSymmetric = [100.0, 200.0, 300.0, 400.0, 500.0, 600.0, 700.0, 800.0, 900.0, 1000.0] resultsRecorder = experimentHelper.ResultsRecorder(len(allMisclassificationCostsSymmetric)) readyForSaving = False for misclassificationCostsSymmetric in allMisclassificationCostsSymmetric: validTotalCostsAllFolds = numpy.zeros(NUMBER_OF_FOLDS) validFeatureCostsAllFolds = numpy.zeros(NUMBER_OF_FOLDS) validMisClassificationCostsAllFolds = numpy.zeros(NUMBER_OF_FOLDS) validAccuracyAllFolds = numpy.zeros(NUMBER_OF_FOLDS) allTestFoldAvgBestTotalCostsValidResult = numpy.zeros(NUMBER_OF_FOLDS) # for analysis only allBestSettings = numpy.zeros((NUMBER_OF_FOLDS, 2), dtype = numpy.int) for testFoldId in range(NUMBER_OF_FOLDS):