Esempio n. 1
0
    def score(self, detectorNames, thresholds):
        """Score the performance of the detectors.

    Function that must be called only after detection result files have been
    generated and thresholds have been optimized. This looks at the result files
    and scores the performance of each detector specified and stores these
    results in a csv file.

    @param detectorNames  (list)    List of detector names.

    @param thresholds     (dict)    Dictionary of dictionaries with detector
                                    names then profile names as keys followed by
                                    another dictionary containing the score and
                                    the threshold used to obtained that score.
    """
        print "\nRunning scoring step"

        scoreFlag = True
        baselines = {}

        self.resultsFiles = []
        for detectorName in detectorNames:
            resultsDetectorDir = os.path.join(self.resultsDir, detectorName)
            resultsCorpus = Corpus(resultsDetectorDir)

            for profileName, profile in self.profiles.iteritems():

                threshold = thresholds[detectorName][profileName]["threshold"]
                resultsDF = scoreCorpus(
                    threshold,
                    (
                        self.pool,
                        detectorName,
                        profileName,
                        profile["CostMatrix"],
                        resultsDetectorDir,
                        resultsCorpus,
                        self.corpusLabel,
                        self.probationaryPercent,
                        scoreFlag,
                    ),
                )

                scorePath = os.path.join(resultsDetectorDir, "%s_%s_scores.csv" % (detectorName, profileName))

                resultsDF.to_csv(scorePath, index=False)
                print "%s detector benchmark scores written to %s" % (detectorName, scorePath)
                self.resultsFiles.append(scorePath)
Esempio n. 2
0
  def score(self, detectorNames, thresholds):
    """Score the performance of the detectors.

    Function that must be called only after detection result files have been
    generated and thresholds have been optimized. This looks at the result files
    and scores the performance of each detector specified and stores these
    results in a csv file.

    @param detectorNames  (list)    List of detector names.

    @param thresholds     (dict)    Dictionary of dictionaries with detector
                                    names then profile names as keys followed by
                                    another dictionary containing the score and
                                    the threshold used to obtained that score.
    """
    print("\nRunning scoring step")

    scoreFlag = True
    baselines = {}

    self.resultsFiles = []
    for detectorName in detectorNames:
      resultsDetectorDir = os.path.join(self.resultsDir, detectorName)
      resultsCorpus = Corpus(resultsDetectorDir)

      for profileName, profile in self.profiles.items():

        threshold = thresholds[detectorName][profileName]["threshold"]
        resultsDF = scoreCorpus(threshold,
                                (self.pool,
                                 detectorName,
                                 profileName,
                                 profile["CostMatrix"],
                                 resultsDetectorDir,
                                 resultsCorpus,
                                 self.corpusLabel,
                                 self.probationaryPercent,
                                 scoreFlag))

        scorePath = os.path.join(resultsDetectorDir, "%s_%s_scores.csv" %\
          (detectorName, profileName))

        resultsDF.to_csv(scorePath, index=False)
        print("%s detector benchmark scores written to %s" %\
          (detectorName, scorePath))
        self.resultsFiles.append(scorePath)
Esempio n. 3
0
def objectiveFunction(threshold, args):
    """Objective function that scores the corpus given a specific threshold.

  @param threshold  (float)   Threshold value to convert an anomaly score value
                              to a detection.

  @param args       (tuple)   Arguments necessary to call scoreHelper.

  @return score     (float)   Score of corpus.
  """
    if not 0 <= threshold <= 1:
        return float("-inf")

    resultsDF = scoreCorpus(threshold, args)
    score = float(resultsDF["Score"].iloc[-1])

    return score
Esempio n. 4
0
def objectiveFunction(threshold, args):
  """Objective function that scores the corpus given a specific threshold.

  @param threshold  (float)   Threshold value to convert an anomaly score value
                              to a detection.

  @param args       (tuple)   Arguments necessary to call scoreHelper.

  @return score     (float)   Score of corpus.
  """
  if not 0 <= threshold <= 1:
    return float("-inf")

  resultsDF = scoreCorpus(threshold, args)
  score = float(resultsDF["Score"].iloc[-1])

  return score
Esempio n. 5
0
  def score(self, detectors, thresholds):
    """Score the performance of the detectors.

    Function that must be called only after detection result files have been
    generated and thresholds have been optimized. This looks at the result files
    and scores the performance of each detector specified and stores these
    results in a csv file.

    @param detectorNames  (list)    List of detector names.

    @param thresholds     (dict)    Dictionary of dictionaries with detector
                                    names then usernames as keys followed by
                                    another dictionary containing the score and
                                    the threshold used to obtained that score.
    """
    print "\nObtaining Scores"

    for detector in detectors:
      ans = pandas.DataFrame(columns=("Detector", "Username", "File", \
        "Threshold", "Score", "tp", "tn", "fp", "fn", "Total_Count"))

      resultsDetectorDir = os.path.join(self.resultsDir, detector)
      resultsCorpus = Corpus(resultsDetectorDir)

      for username, profile in self.profiles.iteritems():

        costMatrix = profile["CostMatrix"]

        threshold = thresholds[detector][username]["threshold"]

        results = scoreCorpus(threshold,
                              (self.pool,
                               detector,
                               username,
                               costMatrix,
                               resultsCorpus,
                               self.corpusLabel,
                               self.probationaryPercent))

        for row in results:
          ans.loc[len(ans)] = row

      scorePath = os.path.join(resultsDetectorDir, detector + "_scores.csv")
      ans.to_csv(scorePath, index=False)