def normalize(self): """Normalize the detectors' scores according to the Baseline, and print to the console. Function can only be called with the scoring step (i.e. runner.score()) preceding it. This reads the total score values from the results CSVs, and adds the relevant baseline value. The scores are then normalized by multiplying by 100/perfect, where the perfect score is the number of TPs possible (i.e. 44.0). Note the results CSVs still contain the original scores, not normalized. """ print "\nRunning score normalization step" # Get baselines for each application profile. baselineDir = os.path.join(self.resultsDir, "baseline") if not os.path.isdir(baselineDir): raise IOError( "No results directory for baseline. You must " "run the baseline detector before normalizing scores.") baselines = {} for profileName, _ in self.profiles.iteritems(): fileName = os.path.join(baselineDir, "baseline_" + profileName + "_scores.csv") with open(fileName) as f: results = pandas.read_csv(f) baselines[profileName] = results["Score"].iloc[-1] # Normalize the score from each results file. finalResults = {} for resultsFile in self.resultsFiles: profileName = [k for k in baselines.keys() if k in resultsFile][0] base = baselines[profileName] with open(resultsFile) as f: results = pandas.read_csv(f) # Calculate score: perfect = 44.0 - base score = (-base + results["Score"].iloc[-1]) * (100 / perfect) # Add to results dict: resultsInfo = resultsFile.split('/')[-1].split('.')[0] detector = resultsInfo.split('_')[0] profile = resultsInfo.replace(detector + "_", "").replace("_scores", "") if detector not in finalResults: finalResults[detector] = {} finalResults[detector][profile] = score print("Final score for \'%s\' detector on \'%s\' profile = %.2f" % (detector, profile, score)) resultsPath = os.path.join(self.resultsDir, "final_results.json") updateFinalResults(finalResults, resultsPath) print "Final scores have been written to %s." % resultsPath
def normalize(self): """Normalize the detectors' scores according to the Baseline, and print to the console. Function can only be called with the scoring step (i.e. runner.score()) preceding it. This reads the total score values from the results CSVs, and adds the relevant baseline value. The scores are then normalized by multiplying by 100/perfect, where the perfect score is the number of TPs possible (i.e. 44.0). Note the results CSVs still contain the original scores, not normalized. """ print "\nRunning score normalization step" # Get baselines for each application profile. baselineDir = os.path.join(self.resultsDir, "baseline") if not os.path.isdir(baselineDir): raise IOError("No results directory for baseline. You must " "run the baseline detector before normalizing scores.") baselines = {} for profileName, _ in self.profiles.iteritems(): fileName = os.path.join(baselineDir, "baseline_" + profileName + "_scores.csv") with open(fileName) as f: results = pandas.read_csv(f) baselines[profileName] = results["Score"].iloc[-1] # Normalize the score from each results file. finalResults = {} for resultsFile in self.resultsFiles: profileName = [k for k in baselines.keys() if k in resultsFile][0] base = baselines[profileName] with open(resultsFile) as f: results = pandas.read_csv(f) # Calculate score: perfect = 44.0 - base score = (-base + results["Score"].iloc[-1]) * (100/perfect) # Add to results dict: resultsInfo = resultsFile.split('/')[-1].split('.')[0] detector = resultsInfo.split('_')[0] profile = resultsInfo.replace(detector + "_", "").replace("_scores", "") if detector not in finalResults: finalResults[detector] = {} finalResults[detector][profile] = score print ("Final score for \'%s\' detector on \'%s\' profile = %.2f" % (detector, profile, score)) resultsPath = os.path.join(self.resultsDir, "final_results.json") updateFinalResults(finalResults, resultsPath) print "Final scores have been written to %s." % resultsPath
def normalize(self): """ Normalize the detectors' scores according to the baseline defined by the null detector, and print to the console. Function can only be called with the scoring step (i.e. runner.score()) preceding it. This reads the total score values from the results CSVs, and subtracts the relevant baseline value. The scores are then normalized by multiplying by 100 and dividing by perfect less the baseline, where the perfect score is the number of TPs possible. Note the results CSVs still contain the original scores, not normalized. """ print("\nRunning score normalization step") # Get baseline scores for each application profile. nullDir = os.path.join(self.resultsDir, "null") if not os.path.isdir(nullDir): raise IOError("No results directory for null detector. You must " "run the null detector before normalizing scores.") baselines = {} for profileName, _ in self.profiles.items(): fileName = os.path.join(nullDir, "null_" + profileName + "_scores.csv") with open(fileName) as f: results = pandas.read_csv(f) baselines[profileName] = results["Score"].iloc[-1] # Get total number of TPs with open(self.labelPath, "rb") as f: labelsDict = json.load(f) tpCount = 0 for labels in list(labelsDict.values()): tpCount += len(labels) # Normalize the score from each results file. finalResults = {} for resultsFile in self.resultsFiles: profileName = [k for k in list(baselines.keys()) if k in resultsFile][0] base = baselines[profileName] with open(resultsFile) as f: results = pandas.read_csv(f) # Calculate score: perfect = tpCount * self.profiles[profileName]["CostMatrix"]["tpWeight"] score = 100 * (results["Score"].iloc[-1] - base) / (perfect - base) # Add to results dict: resultsInfo = resultsFile.split(os.path.sep)[-1].split('.')[0] detector = resultsInfo.split('_')[0] profile = resultsInfo.replace(detector + "_", "").replace("_scores", "") if detector not in finalResults: finalResults[detector] = {} finalResults[detector][profile] = score print(("Final score for \'%s\' detector on \'%s\' profile = %.2f" % (detector, profile, score))) resultsPath = os.path.join(self.resultsDir, "final_results.json") updateFinalResults(finalResults, resultsPath) print("Final scores have been written to %s." % resultsPath)
def normalize(self): """ Normalize the detectors' scores according to the baseline defined by the null detector, and print to the console. Function can only be called with the scoring step (i.e. runner.score()) preceding it. This reads the total score values from the results CSVs, and subtracts the relevant baseline value. The scores are then normalized by multiplying by 100 and dividing by perfect less the baseline, where the perfect score is the number of TPs possible. Note the results CSVs still contain the original scores, not normalized. """ print "\nRunning score normalization step" # Get baseline scores for each application profile. nullDir = os.path.join(self.resultsDir, "null") if not os.path.isdir(nullDir): raise IOError( "No results directory for null detector. You must " "run the null detector before normalizing scores." ) baselines = {} for profileName, _ in self.profiles.iteritems(): fileName = os.path.join(nullDir, "null_" + profileName + "_scores.csv") with open(fileName) as f: results = pandas.read_csv(f) baselines[profileName] = results["Score"].iloc[-1] # Get total number of TPs with open(self.labelPath, "rb") as f: labelsDict = json.load(f) tpCount = 0 for labels in labelsDict.values(): tpCount += len(labels) # Normalize the score from each results file. finalResults = {} for resultsFile in self.resultsFiles: profileName = [k for k in baselines.keys() if k in resultsFile][0] base = baselines[profileName] with open(resultsFile) as f: results = pandas.read_csv(f) # Calculate score: perfect = tpCount * self.profiles[profileName]["CostMatrix"]["tpWeight"] score = 100 * (results["Score"].iloc[-1] - base) / (perfect - base) # Add to results dict: resultsInfo = resultsFile.split("/")[-1].split(".")[0] detector = resultsInfo.split("_")[0] profile = resultsInfo.replace(detector + "_", "").replace("_scores", "") if detector not in finalResults: finalResults[detector] = {} finalResults[detector][profile] = score print ("Final score for '%s' detector on '%s' profile = %.2f" % (detector, profile, score)) resultsPath = os.path.join(self.resultsDir, "final_results.json") updateFinalResults(finalResults, resultsPath) print "Final scores have been written to %s." % resultsPath