コード例 #1
0
    def testCalcSweepScoreWindowScoreInteraction(self):
        """Scores inside a window should be positive; all others should be negative."""
        numRows = 100
        fakeAnomalyScores = [1 for _ in range(numRows)]
        fakeTimestamps = [
            i for i in range(numRows)
        ]  # We'll use numbers, even though real data uses dates
        fakeName = "TestDataSet"

        windowA = (30, 39)
        windowB = (75, 95)
        windowLimits = [windowA, windowB]
        expectedInWindowCount = (windowA[1] - windowA[0] +
                                 1) + (windowB[1] - windowB[0] + 1)

        # Standard profile
        costMatrix = {
            "tpWeight": 1.0,
            "fnWeight": 1.0,
            "fpWeight": 0.11,
        }
        probationPercent = 0.1
        o = Sweeper(probationPercent=probationPercent, costMatrix=costMatrix)
        scoredAnomalies = o.calcSweepScore(fakeTimestamps, fakeAnomalyScores,
                                           windowLimits, fakeName)

        # Check that correct number of AnomalyPoints returned
        assert len(scoredAnomalies) == numRows
        assert all(isinstance(x, AnomalyPoint) for x in scoredAnomalies)

        # Expected number of points marked 'probationary'
        probationary = [
            x for x in scoredAnomalies if x.windowName == "probationary"
        ]
        assert len(probationary) == o._getProbationaryLength(numRows)

        # Expected number of points marked 'in window'
        inWindow = [
            x for x in scoredAnomalies
            if x.windowName not in ("probationary", None)
        ]
        assert len(inWindow) == expectedInWindowCount

        # Points in window have positive score; others have negative score
        for point in scoredAnomalies:
            if point.windowName not in ("probationary", None):
                assert point.sweepScore > 0
            else:
                assert point.sweepScore < 0
コード例 #2
0
ファイル: sweeper_test.py プロジェクト: numenta/NAB
  def testCalcSweepScoreWindowScoreInteraction(self):
    """Scores inside a window should be positive; all others should be negative."""
    numRows = 100
    fakeAnomalyScores = [1 for _ in range(numRows)]
    fakeTimestamps = [i for i in range(numRows)]  # We'll use numbers, even though real data uses dates
    fakeName = "TestDataSet"

    windowA = (30, 39)
    windowB = (75, 95)
    windowLimits = [windowA, windowB]
    expectedInWindowCount = (windowA[1] - windowA[0] + 1) + (windowB[1] - windowB[0] + 1)

    # Standard profile
    costMatrix = {
      "tpWeight": 1.0,
      "fnWeight": 1.0,
      "fpWeight": 0.11,
    }
    probationPercent = 0.1
    o = Sweeper(probationPercent=probationPercent, costMatrix=costMatrix)
    scoredAnomalies = o.calcSweepScore(fakeTimestamps, fakeAnomalyScores, windowLimits, fakeName)

    # Check that correct number of AnomalyPoints returned
    assert len(scoredAnomalies) == numRows
    assert all(isinstance(x, AnomalyPoint) for x in scoredAnomalies)

    # Expected number of points marked 'probationary'
    probationary = [x for x in scoredAnomalies if x.windowName == "probationary"]
    assert len(probationary) == o._getProbationaryLength(numRows)

    # Expected number of points marked 'in window'
    inWindow = [x for x in scoredAnomalies if x.windowName not in ("probationary", None)]
    assert len(inWindow) == expectedInWindowCount

    # Points in window have positive score; others have negative score
    for point in scoredAnomalies:
      if point.windowName not in ("probationary", None):
        assert point.sweepScore > 0
      else:
        assert point.sweepScore < 0
コード例 #3
0
def optimizeThreshold(args):
    """Optimize the threshold for a given combination of detector and profile.

  @param args       (tuple)   Contains:

    detectorName        (string)                Name of detector.

    costMatrix          (dict)                  Cost matrix to weight the
                                                true positives, false negatives,
                                                and false positives during
                                                scoring.
    resultsCorpus       (nab.Corpus)            Corpus object that holds the per
                                                record anomaly scores for a
                                                given detector.
    corpusLabel         (nab.CorpusLabel)       Ground truth anomaly labels for
                                                the NAB corpus.
    probationaryPercent (float)                 Percent of each data file not
                                                to be considered during scoring.

  @return (dict) Contains:
        "threshold" (float)   Threshold that returns the largest score from the
                              Objective function.

        "score"     (float)   The score from the objective function given the
                              threshold.
  """
    (detectorName, costMatrix, resultsCorpus, corpusLabel,
     probationaryPercent) = args

    sweeper = Sweeper(probationPercent=probationaryPercent,
                      costMatrix=costMatrix)

    # First, get the sweep-scores for each row in each data set
    allAnomalyRows = []
    for relativePath, dataSet in resultsCorpus.dataFiles.iteritems():
        if "_scores.csv" in relativePath:
            continue

        # relativePath: raw dataset file,
        # e.g. 'artificialNoAnomaly/art_noisy.csv'
        relativePath = convertResultsPathToDataPath(
            os.path.join(detectorName, relativePath))

        windows = corpusLabel.windows[relativePath]
        labels = corpusLabel.labels[relativePath]
        timestamps = labels['timestamp']
        anomalyScores = dataSet.data["anomaly_score"]

        curAnomalyRows = sweeper.calcSweepScore(timestamps, anomalyScores,
                                                windows, relativePath)
        allAnomalyRows.extend(curAnomalyRows)

    # Get scores by threshold for the entire corpus
    scoresByThreshold = sweeper.calcScoreByThreshold(allAnomalyRows)
    scoresByThreshold = sorted(scoresByThreshold,
                               key=lambda x: x.score,
                               reverse=True)
    bestParams = scoresByThreshold[0]

    print(
        "Optimizer found a max score of {} with anomaly threshold {}.".format(
            bestParams.score, bestParams.threshold))

    return {"threshold": bestParams.threshold, "score": bestParams.score}
コード例 #4
0
ファイル: optimizer.py プロジェクト: numenta/NAB
def optimizeThreshold(args):
  """Optimize the threshold for a given combination of detector and profile.

  @param args       (tuple)   Contains:

    detectorName        (string)                Name of detector.

    costMatrix          (dict)                  Cost matrix to weight the
                                                true positives, false negatives,
                                                and false positives during
                                                scoring.
    resultsCorpus       (nab.Corpus)            Corpus object that holds the per
                                                record anomaly scores for a
                                                given detector.
    corpusLabel         (nab.CorpusLabel)       Ground truth anomaly labels for
                                                the NAB corpus.
    probationaryPercent (float)                 Percent of each data file not
                                                to be considered during scoring.

  @return (dict) Contains:
        "threshold" (float)   Threshold that returns the largest score from the
                              Objective function.

        "score"     (float)   The score from the objective function given the
                              threshold.
  """
  (detectorName,
   costMatrix,
   resultsCorpus,
   corpusLabel,
   probationaryPercent) = args

  sweeper = Sweeper(
    probationPercent=probationaryPercent,
    costMatrix=costMatrix
  )

  # First, get the sweep-scores for each row in each data set
  allAnomalyRows = []
  for relativePath, dataSet in resultsCorpus.dataFiles.iteritems():
    if "_scores.csv" in relativePath:
      continue

    # relativePath: raw dataset file,
    # e.g. 'artificialNoAnomaly/art_noisy.csv'
    relativePath = convertResultsPathToDataPath(
      os.path.join(detectorName, relativePath))

    windows = corpusLabel.windows[relativePath]
    labels = corpusLabel.labels[relativePath]
    timestamps = labels['timestamp']
    anomalyScores = dataSet.data["anomaly_score"]

    curAnomalyRows = sweeper.calcSweepScore(
      timestamps,
      anomalyScores,
      windows,
      relativePath
    )
    allAnomalyRows.extend(curAnomalyRows)

  # Get scores by threshold for the entire corpus
  scoresByThreshold = sweeper.calcScoreByThreshold(allAnomalyRows)
  scoresByThreshold = sorted(
    scoresByThreshold,key=lambda x: x.score, reverse=True)
  bestParams = scoresByThreshold[0]

  print("Optimizer found a max score of {} with anomaly threshold {}.".format(
    bestParams.score, bestParams.threshold
  ))

  return {
    "threshold": bestParams.threshold,
    "score": bestParams.score
  }