def score(self, detectorNames, thresholds): """Score the performance of the detectors. Function that must be called only after detection result files have been generated and thresholds have been optimized. This looks at the result files and scores the performance of each detector specified and stores these results in a csv file. @param detectorNames (list) List of detector names. @param thresholds (dict) Dictionary of dictionaries with detector names then profile names as keys followed by another dictionary containing the score and the threshold used to obtained that score. """ print "\nRunning scoring step" scoreFlag = True baselines = {} self.resultsFiles = [] for detectorName in detectorNames: resultsDetectorDir = os.path.join(self.resultsDir, detectorName) resultsCorpus = Corpus(resultsDetectorDir) for profileName, profile in self.profiles.iteritems(): threshold = thresholds[detectorName][profileName]["threshold"] resultsDF = scoreCorpus( threshold, ( self.pool, detectorName, profileName, profile["CostMatrix"], resultsDetectorDir, resultsCorpus, self.corpusLabel, self.probationaryPercent, scoreFlag, ), ) scorePath = os.path.join(resultsDetectorDir, "%s_%s_scores.csv" % (detectorName, profileName)) resultsDF.to_csv(scorePath, index=False) print "%s detector benchmark scores written to %s" % (detectorName, scorePath) self.resultsFiles.append(scorePath)
def score(self, detectorNames, thresholds): """Score the performance of the detectors. Function that must be called only after detection result files have been generated and thresholds have been optimized. This looks at the result files and scores the performance of each detector specified and stores these results in a csv file. @param detectorNames (list) List of detector names. @param thresholds (dict) Dictionary of dictionaries with detector names then profile names as keys followed by another dictionary containing the score and the threshold used to obtained that score. """ print("\nRunning scoring step") scoreFlag = True baselines = {} self.resultsFiles = [] for detectorName in detectorNames: resultsDetectorDir = os.path.join(self.resultsDir, detectorName) resultsCorpus = Corpus(resultsDetectorDir) for profileName, profile in self.profiles.items(): threshold = thresholds[detectorName][profileName]["threshold"] resultsDF = scoreCorpus(threshold, (self.pool, detectorName, profileName, profile["CostMatrix"], resultsDetectorDir, resultsCorpus, self.corpusLabel, self.probationaryPercent, scoreFlag)) scorePath = os.path.join(resultsDetectorDir, "%s_%s_scores.csv" %\ (detectorName, profileName)) resultsDF.to_csv(scorePath, index=False) print("%s detector benchmark scores written to %s" %\ (detectorName, scorePath)) self.resultsFiles.append(scorePath)
def objectiveFunction(threshold, args): """Objective function that scores the corpus given a specific threshold. @param threshold (float) Threshold value to convert an anomaly score value to a detection. @param args (tuple) Arguments necessary to call scoreHelper. @return score (float) Score of corpus. """ if not 0 <= threshold <= 1: return float("-inf") resultsDF = scoreCorpus(threshold, args) score = float(resultsDF["Score"].iloc[-1]) return score
def score(self, detectors, thresholds): """Score the performance of the detectors. Function that must be called only after detection result files have been generated and thresholds have been optimized. This looks at the result files and scores the performance of each detector specified and stores these results in a csv file. @param detectorNames (list) List of detector names. @param thresholds (dict) Dictionary of dictionaries with detector names then usernames as keys followed by another dictionary containing the score and the threshold used to obtained that score. """ print "\nObtaining Scores" for detector in detectors: ans = pandas.DataFrame(columns=("Detector", "Username", "File", \ "Threshold", "Score", "tp", "tn", "fp", "fn", "Total_Count")) resultsDetectorDir = os.path.join(self.resultsDir, detector) resultsCorpus = Corpus(resultsDetectorDir) for username, profile in self.profiles.iteritems(): costMatrix = profile["CostMatrix"] threshold = thresholds[detector][username]["threshold"] results = scoreCorpus(threshold, (self.pool, detector, username, costMatrix, resultsCorpus, self.corpusLabel, self.probationaryPercent)) for row in results: ans.loc[len(ans)] = row scorePath = os.path.join(resultsDetectorDir, detector + "_scores.csv") ans.to_csv(scorePath, index=False)