def main(args): if not args.absolutePaths: args.labelDir = os.path.join(root, args.labelDir) args.dataDir = os.path.join(root, args.dataDir) args.destDir = os.path.join(root, args.destDir) if not checkInputs(args): return corpus = Corpus(args.dataDir) corpusLabel = CorpusLabel(args.labelDir, corpus=corpus) corpusLabel.getEverything() columnData = {} for relativePath in corpusLabel.labels.keys(): columnData[relativePath] = pandas.Series( corpusLabel.labels[relativePath]["label"]) corpus.addColumn("label", columnData) corpus.copy(newRoot=args.destDir) print "Done adding labels!"
def main(args): if not args.absolutePaths: args.dataDir = os.path.join(root, args.dataDir) if args.destDir: args.destDir = os.path.join(root, args.destDir) corpus = Corpus(args.dataDir) for name in args.columnNames: corpus.removeColumn(name, write=True, newRoot=args.destDir)
def initialize(self): """Initialize all the relevant objects for the run.""" self.corpus = Corpus(self.dataDir) self.corpusLabel = CorpusLabel(path=self.labelPath, corpus=self.corpus) with open(self.profilesPath) as p: self.profiles = json.load(p)
def optimize(self, detectorNames): """Optimize the threshold for each combination of detector and profile. @param detectorNames (list) List of detector names. @return thresholds (dict) Dictionary of dictionaries with detector names then profile names as keys followed by another dictionary containing the score and the threshold used to obtained that score. """ print("\nRunning optimize step") scoreFlag = False thresholds = {} for detectorName in detectorNames: resultsDetectorDir = os.path.join(self.resultsDir, detectorName) resultsCorpus = Corpus(resultsDetectorDir) thresholds[detectorName] = {} for profileName, profile in self.profiles.items(): thresholds[detectorName][profileName] = optimizeThreshold( (detectorName, profile["CostMatrix"], resultsCorpus, self.corpusLabel, self.probationaryPercent)) updateThresholds(thresholds, self.thresholdPath) return thresholds
def main(args): if not args.absolutePaths: dataDir = os.path.join(root, args.dataDir) labelDir = os.path.join(root, args.labelDir) else: dataDir = args.dataDir labelDir = args.labelDir # The following params are used in NAB scoring, but defined here because they # impact the labeling process -- i.e. windows cannot exist in the probationary # period. windowSize = 0.10 probationaryPercent = 0.15 print "Getting Corpus" corpus = Corpus(dataDir) print "Creating LabelCombiner" labelCombiner = LabelCombiner(labelDir, corpus, args.threshold, windowSize, probationaryPercent, args.verbosity) print "Combining Labels" labelCombiner.combine() print "Writing combined labels files" labelCombiner.write(args.combinedLabelsPath, args.combinedWindowsPath) print "Attempting to load objects as a test" corpusLabel = CorpusLabel(args.combinedWindowsPath, corpus) corpusLabel.validateLabels() print "Successfully combined labels!" print "Resulting windows stored in:", args.combinedWindowsPath
def main(args): root = recur(os.path.dirname, os.path.realpath(__file__), n=2) if not os.path.isabs(args.labelFile): args.labelDir = os.path.join(root, args.labelFile) if not os.path.isabs(args.dataDir): args.dataDir = os.path.join(root, args.dataDir) corpus = Corpus(args.dataDir) empty_labels = { p: [] for p in list(corpus.dataFiles.keys()) if "Known" not in p } with open(args.labelFile, "w") as outFile: outFile.write( json.dumps(empty_labels, sort_keys=True, indent=4, separators=(',', ': '))) print("Empty label file written to", args.labelFile)
def main(args): if args.create: createApplication() if args.start: startApplication() if args.stop: stopApplication() if args.file: corpus = Corpus(args.data) labels = CorpusLabel(path=args.labels, corpus=corpus) streamFile(corpus, labels, args.results, args.file) if args.stream: corpus = Corpus(args.data) labels = CorpusLabel(path=args.labels, corpus=corpus) streamAll(corpus, labels, args.results) if args.delete: deleteApplication()
def score(self, detectorNames, thresholds): """Score the performance of the detectors. Function that must be called only after detection result files have been generated and thresholds have been optimized. This looks at the result files and scores the performance of each detector specified and stores these results in a csv file. @param detectorNames (list) List of detector names. @param thresholds (dict) Dictionary of dictionaries with detector names then profile names as keys followed by another dictionary containing the score and the threshold used to obtained that score. """ print("\nRunning scoring step") scoreFlag = True baselines = {} self.resultsFiles = [] for detectorName in detectorNames: resultsDetectorDir = os.path.join(self.resultsDir, detectorName) resultsCorpus = Corpus(resultsDetectorDir) for profileName, profile in self.profiles.items(): threshold = thresholds[detectorName][profileName]["threshold"] resultsDF = scoreCorpus(threshold, (self.pool, detectorName, profileName, profile["CostMatrix"], resultsDetectorDir, resultsCorpus, self.corpusLabel, self.probationaryPercent, scoreFlag)) scorePath = os.path.join(resultsDetectorDir, "%s_%s_scores.csv" %\ (detectorName, profileName)) resultsDF.to_csv(scorePath, index=False) print("%s detector benchmark scores written to %s" %\ (detectorName, scorePath)) self.resultsFiles.append(scorePath)
"Layer1_TM_entropy" ] pandaBaker.dataStreams = dict((name, cDataStream()) for name in streams) # create dicts for more comfortable code # could be also written like: pandaBaker.dataStreams["myStreamName"] = cDataStream() pandaBaker.PrepareDatabase() # WHILE USING PANDAVIS # SPECIFY HERE FOR WHAT DATA YOU WANT TO RUN THIS DETECTOR if PANDA_VIS_BAKE_DATA: import pandas as pd import os.path as path from nab.corpus import Corpus dataDir = path.abspath(path.join(__file__ ,"../../../..","data")) corpus = Corpus(dataDir) dataSet = corpus.dataFiles["artificialWithAnomaly/art_daily_flatmiddle.csv"] detector = HtmcoreDetector(dataSet=dataSet, probationaryPercent=0.15) detector.initialize() detector.run() pandaBaker.CommitBatch()