예제 #1
0
def main(args):

  if not args.absolutePaths:
    args.labelDir = os.path.join(root, args.labelDir)
    args.dataDir = os.path.join(root, args.dataDir)
    args.destDir = os.path.join(root, args.destDir)

  if not checkInputs(args):
    return

  corpus = Corpus(args.dataDir)

  corpusLabel = CorpusLabel(args.labelDir, corpus=corpus)
  corpusLabel.getEverything()

  columnData = {}
  for relativePath in corpusLabel.labels.keys():
    columnData[relativePath] = pandas.Series(
      corpusLabel.labels[relativePath]["label"])

  corpus.addColumn("label", columnData)

  corpus.copy(newRoot=args.destDir)

  print "Done adding labels!"
예제 #2
0
def main(args):
  if not args.absolutePaths:
    args.dataDir = os.path.join(root, args.dataDir)
    if args.destDir:
      args.destDir = os.path.join(root, args.destDir)

  corpus = Corpus(args.dataDir)

  for name in args.columnNames:
    corpus.removeColumn(name, write=True, newRoot=args.destDir)
예제 #3
0
def main(args):
    if not args.absolutePaths:
        args.dataDir = os.path.join(root, args.dataDir)
        if args.destDir:
            args.destDir = os.path.join(root, args.destDir)

    corpus = Corpus(args.dataDir)

    for name in args.columnNames:
        corpus.removeColumn(name, write=True, newRoot=args.destDir)
예제 #4
0
파일: runner.py 프로젝트: pmenn36/NAB
  def initialize(self):
    """Initialize all the relevant objects for the run."""
    self.corpus = Corpus(self.dataDir)
    self.corpusLabel = CorpusLabel(path=self.labelPath, corpus=self.corpus)

    with open(self.profilesPath) as p:
      self.profiles = json.load(p)
예제 #5
0
파일: runner.py 프로젝트: pmenn36/NAB
  def optimize(self, detectorNames):
    """Optimize the threshold for each combination of detector and profile.

    @param detectorNames  (list)  List of detector names.

    @return thresholds    (dict)  Dictionary of dictionaries with detector names
                                  then profile names as keys followed by another
                                  dictionary containing the score and the
                                  threshold used to obtained that score.
    """
    print("\nRunning optimize step")

    scoreFlag = False
    thresholds = {}

    for detectorName in detectorNames:
      resultsDetectorDir = os.path.join(self.resultsDir, detectorName)
      resultsCorpus = Corpus(resultsDetectorDir)

      thresholds[detectorName] = {}

      for profileName, profile in self.profiles.items():
        thresholds[detectorName][profileName] = optimizeThreshold(
          (detectorName,
           profile["CostMatrix"],
           resultsCorpus,
           self.corpusLabel,
           self.probationaryPercent))

    updateThresholds(thresholds, self.thresholdPath)

    return thresholds
예제 #6
0
def main(args):
  if not args.absolutePaths:
    dataDir = os.path.join(root, args.dataDir)
    labelDir = os.path.join(root, args.labelDir)
  else:
    dataDir = args.dataDir
    labelDir = args.labelDir

  # The following params are used in NAB scoring, but defined here because they
  # impact the labeling process -- i.e. windows cannot exist in the probationary
  # period.
  windowSize = 0.10
  probationaryPercent = 0.15


  print "Getting Corpus"
  corpus = Corpus(dataDir)

  print "Creating LabelCombiner"
  labelCombiner = LabelCombiner(labelDir, corpus,
                                args.threshold, windowSize,
                                probationaryPercent, args.verbosity)

  print "Combining Labels"
  labelCombiner.combine()

  print "Writing combined labels files"
  labelCombiner.write(args.combinedLabelsPath, args.combinedWindowsPath)

  print "Attempting to load objects as a test"
  corpusLabel = CorpusLabel(args.combinedWindowsPath, corpus)
  corpusLabel.validateLabels()

  print "Successfully combined labels!"
  print "Resulting windows stored in:", args.combinedWindowsPath
def main(args):

    root = recur(os.path.dirname, os.path.realpath(__file__), n=2)

    if not os.path.isabs(args.labelFile):
        args.labelDir = os.path.join(root, args.labelFile)

    if not os.path.isabs(args.dataDir):
        args.dataDir = os.path.join(root, args.dataDir)

    corpus = Corpus(args.dataDir)

    empty_labels = {
        p: []
        for p in list(corpus.dataFiles.keys()) if "Known" not in p
    }

    with open(args.labelFile, "w") as outFile:
        outFile.write(
            json.dumps(empty_labels,
                       sort_keys=True,
                       indent=4,
                       separators=(',', ': ')))

    print("Empty label file written to", args.labelFile)
예제 #8
0
def main(args):

    if not args.absolutePaths:
        args.labelDir = os.path.join(root, args.labelDir)
        args.dataDir = os.path.join(root, args.dataDir)
        args.destDir = os.path.join(root, args.destDir)

    if not checkInputs(args):
        return

    corpus = Corpus(args.dataDir)

    corpusLabel = CorpusLabel(args.labelDir, corpus=corpus)
    corpusLabel.getEverything()

    columnData = {}
    for relativePath in corpusLabel.labels.keys():
        columnData[relativePath] = pandas.Series(
            corpusLabel.labels[relativePath]["label"])

    corpus.addColumn("label", columnData)

    corpus.copy(newRoot=args.destDir)

    print "Done adding labels!"
예제 #9
0
def main(args):
    if args.create:
        createApplication()

    if args.start:
        startApplication()

    if args.stop:
        stopApplication()

    if args.file:
        corpus = Corpus(args.data)
        labels = CorpusLabel(path=args.labels, corpus=corpus)
        streamFile(corpus, labels, args.results, args.file)

    if args.stream:
        corpus = Corpus(args.data)
        labels = CorpusLabel(path=args.labels, corpus=corpus)
        streamAll(corpus, labels, args.results)

    if args.delete:
        deleteApplication()
예제 #10
0
파일: runner.py 프로젝트: pmenn36/NAB
  def score(self, detectorNames, thresholds):
    """Score the performance of the detectors.

    Function that must be called only after detection result files have been
    generated and thresholds have been optimized. This looks at the result files
    and scores the performance of each detector specified and stores these
    results in a csv file.

    @param detectorNames  (list)    List of detector names.

    @param thresholds     (dict)    Dictionary of dictionaries with detector
                                    names then profile names as keys followed by
                                    another dictionary containing the score and
                                    the threshold used to obtained that score.
    """
    print("\nRunning scoring step")

    scoreFlag = True
    baselines = {}

    self.resultsFiles = []
    for detectorName in detectorNames:
      resultsDetectorDir = os.path.join(self.resultsDir, detectorName)
      resultsCorpus = Corpus(resultsDetectorDir)

      for profileName, profile in self.profiles.items():

        threshold = thresholds[detectorName][profileName]["threshold"]
        resultsDF = scoreCorpus(threshold,
                                (self.pool,
                                 detectorName,
                                 profileName,
                                 profile["CostMatrix"],
                                 resultsDetectorDir,
                                 resultsCorpus,
                                 self.corpusLabel,
                                 self.probationaryPercent,
                                 scoreFlag))

        scorePath = os.path.join(resultsDetectorDir, "%s_%s_scores.csv" %\
          (detectorName, profileName))

        resultsDF.to_csv(scorePath, index=False)
        print("%s detector benchmark scores written to %s" %\
          (detectorName, scorePath))
        self.resultsFiles.append(scorePath)
예제 #11
0
                 "Layer1_TM_entropy"
                 ]

      pandaBaker.dataStreams = dict((name, cDataStream()) for name in streams)  # create dicts for more comfortable code
      # could be also written like: pandaBaker.dataStreams["myStreamName"] = cDataStream()

      pandaBaker.PrepareDatabase()



# WHILE USING PANDAVIS
# SPECIFY HERE FOR WHAT DATA YOU WANT TO RUN THIS DETECTOR
if PANDA_VIS_BAKE_DATA:
  import pandas as pd
  import os.path as path
  from nab.corpus import Corpus
  dataDir =  path.abspath(path.join(__file__ ,"../../../..","data"))

  corpus = Corpus(dataDir)

  dataSet = corpus.dataFiles["artificialWithAnomaly/art_daily_flatmiddle.csv"]

  detector = HtmcoreDetector(dataSet=dataSet,
                  probationaryPercent=0.15)

  detector.initialize()

  detector.run()

  pandaBaker.CommitBatch()