Example #1
0
  def testFirstTruePositiveWithinWindow(self):
    """
    First record within window has a score approximately equal to 
    self.costMatrix["tpWeight"]; within 4 decimal places is more than enough
    precision.
    """
    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 10
    numWindows = 1
    windowSize = 2

    timestamps = generateTimestamps(start, increment, length)
    windows = generateWindows(timestamps, numWindows, windowSize)
    labels = generateLabels(timestamps, windows)
    predictions = pandas.Series([0]*length)

    index = timestamps[timestamps == windows[0][0]].index[0]
    predictions[index] = 1
    scorer = Scorer(timestamps, predictions, labels, windows, self.costMatrix,
      probationaryPeriod=0)
    (_, score) = scorer.getScore()

    self.assertAlmostEquals(score, self.costMatrix["tpWeight"], 4)
    self._checkCounts(scorer.counts, length-windowSize*numWindows, 1, 0,
      windowSize*numWindows-1)
Example #2
0
    def testFalsePositiveMeansNegativeScore(self):
        """
    A false positive should make the score negative.
    """
        start = datetime.datetime.now()
        increment = datetime.timedelta(minutes=5)
        length = 1000
        numWindows = 1
        windowSize = 10

        timestamps = generateTimestamps(start, increment, length)
        windows = generateWindows(timestamps, numWindows, windowSize)
        labels = generateLabels(timestamps, windows)
        predictions = pandas.Series([0] * length)

        predictions[0] = 1
        scorer = Scorer(timestamps,
                        predictions,
                        labels,
                        windows,
                        self.costMatrix,
                        probationaryPeriod=0)
        (_, score) = scorer.getScore()
        self.assertTrue(score < 0)
        self._checkCounts(scorer.counts, length - windowSize * numWindows - 1,
                          0, 1, windowSize * numWindows)
Example #3
0
    def testOneFalsePositiveNoWindow(self):
        """
    When there is no window (i.e. no anomaly), a false positive should still
    result in a negative score, specifically negative the FP weight.
    """
        start = datetime.datetime.now()
        increment = datetime.timedelta(minutes=5)
        length = 1000
        numWindows = 0
        windowSize = 10

        timestamps = generateTimestamps(start, increment, length)
        windows = generateWindows(timestamps, numWindows, windowSize)
        labels = generateLabels(timestamps, windows)
        predictions = pandas.Series([0] * length)

        predictions[0] = 1
        scorer = Scorer(timestamps,
                        predictions,
                        labels,
                        windows,
                        self.costMatrix,
                        probationaryPeriod=0)
        (_, score) = scorer.getScore()

        self.assertTrue(score == -self.costMatrix["fpWeight"])
        self._checkCounts(scorer.counts, length - windowSize * numWindows - 1,
                          0, 1, windowSize * numWindows)
Example #4
0
  def testFalsePositiveMeansNegativeScore(self):
    """
    A false positive should make the score negative.
    """
    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 1000
    numWindows = 1
    windowSize = 10
    threshold = 0.5

    timestamps = generateTimestamps(start, increment, length)
    windows = generateWindows(timestamps, numWindows, windowSize)
    anomalyScores = pandas.Series([0]*length)

    anomalyScores[0] = 1
    sweeper = Sweeper(probationPercent=0, costMatrix=self.costMatrix)
    (scores, matchingRow) = sweeper.scoreDataSet(
      timestamps,
      anomalyScores,
      windows,
      "testData",
      threshold
    )
    self.assertTrue(matchingRow.score < 0)
    self._checkCounts(matchingRow, length-windowSize*numWindows-1, 0, 1,
      windowSize*numWindows)
Example #5
0
  def testEarlierFalsePositiveAfterWindowIsBetter(self):
    """For two false positives A and B, where A occurs earlier than B, the
    score change due to A will be less than the score change due to B.
    """
    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 10
    numWindows = 1
    windowSize = 2

    timestamps = generateTimestamps(start, increment, length)
    windows = generateWindows(timestamps, numWindows, windowSize)
    labels = generateLabels(timestamps, windows)
    predictions1 = pandas.Series([0]*length)
    predictions2 = pandas.Series([0]*length)
    t1, t2 = windows[0]

    index1 = timestamps[timestamps == t2].index[0] + 1
    predictions1[index1] = 1
    scorer1 = Scorer(timestamps, predictions1, labels, windows, self.costMatrix,
      probationaryPeriod=0)
    (_, score1) = scorer1.getScore()

    predictions2[index1+1] = 1
    scorer2 = Scorer(timestamps, predictions2, labels, windows, self.costMatrix,
      probationaryPeriod=0)
    (_, score2) = scorer2.getScore()

    self.assertTrue(score1 > score2)
    self._checkCounts(scorer1.counts, length-windowSize*numWindows-1, 0, 1,
      windowSize*numWindows)
    self._checkCounts(scorer2.counts, length-windowSize*numWindows-1, 0, 1,
      windowSize*numWindows)
Example #6
0
  def test_oneFalsePositiveNoWindow(self):
    """
    When there is no window (meaning no anomaly), a false positive should still
    result in a negative score.
    """
    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 1000
    numWindows = 0
    windowSize = 10

    timestamps = generateTimestamps(start, increment, length)

    predictions = pandas.Series([0]*length)
    windows = generateWindows(timestamps, numWindows, windowSize)
    labels = generateLabels(timestamps, windows)

    costMatrix = {"tpWeight": 1.0,
    "fnWeight": 1.0,
    "fpWeight": 1.0,
    "tnWeight": 1.0}

    predictions[0] = 1

    scorer = Scorer(timestamps, predictions, labels, windows, costMatrix,
      probationaryPeriod=0)

    self.assertTrue(scorer.getScore() == -costMatrix["fpWeight"])

    # Ensure counts are correct.
    self.assertEqual(scorer.counts['tn'], length-windowSize*numWindows-1)
    self.assertEqual(scorer.counts['tp'], 0)
    self.assertEqual(scorer.counts['fp'], 1)
    self.assertEqual(scorer.counts['fn'], windowSize*numWindows)
Example #7
0
  def testFourFalseNegatives(self):
    """
    A false negative with four windows should have exactly four times
    the negative of the false negative score.
    """
    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 2000
    numWindows = 4
    windowSize = 10

    timestamps = generateTimestamps(start, increment, length)
    windows = generateWindows(timestamps, numWindows, windowSize)
    anomalyScores = pandas.Series([0] * length)
    threshold = 1

    sweeper = Sweeper(probationPercent=0, costMatrix=self.costMatrix)
    (scores, matchingRow) = sweeper.scoreDataSet(
      timestamps,
      anomalyScores,
      windows,
      "testData",
      threshold
    )

    self.assertEqual(matchingRow.score, 4 * -self.costMatrix["fnWeight"])
    self._checkCounts(matchingRow, length - windowSize * numWindows, 0, 0,
                      windowSize * numWindows)
    def testFourFalseNegatives(self):
        """
    A false negative with four windows should have exactly four times
    the negative of the false negative score.
    """
        start = datetime.datetime.now()
        increment = datetime.timedelta(minutes=5)
        length = 2000
        numWindows = 4
        windowSize = 10

        timestamps = generateTimestamps(start, increment, length)
        windows = generateWindows(timestamps, numWindows, windowSize)
        labels = generateLabels(timestamps, windows)
        predictions = pandas.Series([0] * length)

        scorer = Scorer(timestamps,
                        predictions,
                        labels,
                        windows,
                        self.costMatrix,
                        probationaryPeriod=0)
        (_, score) = scorer.getScore()

        self.assertTrue(abs(score + 4 * self.costMatrix['fnWeight']) < 0.01)
        self._checkCounts(scorer.counts, length - windowSize * numWindows, 0,
                          0, windowSize * numWindows)
Example #9
0
  def testRewardLowFalseNegatives(self):
    """
    Given false negatives in the set of detections, the score output with the
    Reward Low False Negatives application profile will be greater than with
    the Standard application profile.
    """
    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 100
    numWindows = 1
    windowSize = 10
    
    timestamps = generateTimestamps(start, increment, length)
    windows = generateWindows(timestamps, numWindows, windowSize)
    labels = generateLabels(timestamps, windows)
    predictions = pandas.Series([0]*length)
    
    costMatrixFN = copy.deepcopy(self.costMatrix)
    costMatrixFN["fnWeight"] = 2.0
    costMatrixFN["fpWeight"] = 0.055
    
    scorer1 = Scorer(timestamps, predictions, labels, windows, self.costMatrix,
      probationaryPeriod=0)
    (_, score1) = scorer1.getScore()
    scorer2 = Scorer(timestamps, predictions, labels, windows, costMatrixFN,
      probationaryPeriod=0)
    (_, score2) = scorer2.getScore()

    self.assertEqual(score1, 0.5*score2)
    self._checkCounts(scorer1.counts, length-windowSize*numWindows, 0, 0,
      windowSize*numWindows)
    self._checkCounts(scorer2.counts, length-windowSize*numWindows, 0, 0,
      windowSize*numWindows)
Example #10
0
  def testTwoFalsePositivesIsWorseThanOne(self):
    """
    For two false positives A and B in a file, the score given A and B should be
    more negative than the score given just A.
    """
    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 1000
    numWindows = 1
    windowSize = 10

    timestamps = generateTimestamps(start, increment, length)
    windows = generateWindows(timestamps, numWindows, windowSize)
    labels = generateLabels(timestamps, windows)
    predictions = pandas.Series([0]*length)

    predictions[0] = 1
    scorer1 = Scorer(timestamps, predictions, labels, windows, self.costMatrix,
      probationaryPeriod=0)
    (_, score1) = scorer1.getScore()

    predictions[1] = 1
    scorer2 = Scorer(timestamps, predictions, labels, windows, self.costMatrix,
      probationaryPeriod=0)
    (_, score2) = scorer2.getScore()

    self.assertTrue(score2 < score1)
    self._checkCounts(scorer1.counts, length-windowSize*numWindows-1, 0, 1,
      windowSize*numWindows)
    self._checkCounts(scorer2.counts, length-windowSize*numWindows-2, 0, 2,
      windowSize*numWindows)
    def testScoringAllMetrics(self):
        """
    This tests an example set of detections, where all metrics have counts > 0.
    """
        start = datetime.datetime.now()
        increment = datetime.timedelta(minutes=5)
        length = 100
        numWindows = 2
        windowSize = 5
        threshold = 0.5

        timestamps = generateTimestamps(start, increment, length)
        windows = generateWindows(timestamps, numWindows, windowSize)
        anomalyScores = pandas.Series([0] * length)

        index = timestamps[timestamps == windows[0][0]].index[0]
        # TP, add'l TP, and FP
        anomalyScores[index] = 1
        anomalyScores[index + 1] = 1
        anomalyScores[index + 7] = 1

        sweeper = Sweeper(probationPercent=0, costMatrix=self.costMatrix)

        (scores, matchingRow) = sweeper.scoreDataSet(timestamps, anomalyScores,
                                                     windows, "testData",
                                                     threshold)

        self.assertAlmostEquals(matchingRow.score, -0.9540, 4)
        self._checkCounts(matchingRow, length - windowSize * numWindows - 1, 2,
                          1, 8)
Example #12
0
  def testOneFalsePositiveNoWindow(self):
    """
    When there is no window (i.e. no anomaly), a false positive should still
    result in a negative score, specifically negative the FP weight.
    """
    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 1000
    numWindows = 0
    windowSize = 10
    threshold = 0.5

    timestamps = generateTimestamps(start, increment, length)
    windows = generateWindows(timestamps, numWindows, windowSize)
    anomalyScores = pandas.Series([0]*length)

    anomalyScores[0] = 1
    sweeper = Sweeper(probationPercent=0, costMatrix=self.costMatrix)
    (scores, matchingRow) = sweeper.scoreDataSet(
      timestamps,
      anomalyScores,
      windows,
      "testData",
      threshold
    )
    
    self.assertEqual(matchingRow.score, -self.costMatrix["fpWeight"])
    self._checkCounts(matchingRow, length-windowSize*numWindows-1, 0, 1,
      windowSize*numWindows)
Example #13
0
  def testOnlyScoreFirstTruePositiveWithinWindow(self):
    """
    An algorithm making multiple detections within a window (i.e. true positive)
    should only be scored for the earliest true positive.
    """
    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 10
    numWindows = 1
    windowSize = 2

    timestamps = generateTimestamps(start, increment, length)
    windows = generateWindows(timestamps, numWindows, windowSize)
    labels = generateLabels(timestamps, windows)
    predictions = pandas.Series([0]*length)
    window = windows[0]
    t1, t2 = window

    index1 = timestamps[timestamps == t1].index[0]
    predictions[index1] = 1
    scorer1 = Scorer(timestamps, predictions, labels, windows, self.costMatrix,
      probationaryPeriod=0)
    (_, score1) = scorer1.getScore()

    index2 = timestamps[timestamps == t2].index[0]
    predictions[index2] = 1
    scorer2 = Scorer(timestamps, predictions, labels, windows, self.costMatrix,
      probationaryPeriod=0)
    (_, score2) = scorer2.getScore()

    self.assertEqual(score1, score2)
    self._checkCounts(scorer1.counts, length-windowSize*numWindows, 1, 0,
      windowSize*numWindows-1)
    self._checkCounts(scorer2.counts, length-windowSize*numWindows, 2, 0,
      windowSize*numWindows-2)
Example #14
0
  def testRewardLowFalseNegatives(self):
    """
    Given false negatives in the set of detections, the score output with the
    Reward Low False Negatives application profile will be greater than with
    the Standard application profile.
    """
    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 100
    numWindows = 1
    windowSize = 10
    
    timestamps = generateTimestamps(start, increment, length)
    windows = generateWindows(timestamps, numWindows, windowSize)
    labels = generateLabels(timestamps, windows)
    predictions = pandas.Series([0]*length)
    
    costMatrixFN = copy.deepcopy(self.costMatrix)
    costMatrixFN["fnWeight"] = 2.0
    costMatrixFN["fpWeight"] = 0.055
    
    scorer1 = Scorer(timestamps, predictions, labels, windows, self.costMatrix,
      probationaryPeriod=0)
    (_, score1) = scorer1.getScore()
    scorer2 = Scorer(timestamps, predictions, labels, windows, costMatrixFN,
      probationaryPeriod=0)
    (_, score2) = scorer2.getScore()

    self.assertEqual(score1, 0.5*score2)
    self._checkCounts(scorer1.counts, length-windowSize*numWindows, 0, 0,
      windowSize*numWindows)
    self._checkCounts(scorer2.counts, length-windowSize*numWindows, 0, 0,
      windowSize*numWindows)
    def testOneFalsePositiveNoWindow(self):
        """
    When there is no window (i.e. no anomaly), a false positive should still
    result in a negative score, specifically negative the FP weight.
    """
        start = datetime.datetime.now()
        increment = datetime.timedelta(minutes=5)
        length = 1000
        numWindows = 0
        windowSize = 10
        threshold = 0.5

        timestamps = generateTimestamps(start, increment, length)
        windows = generateWindows(timestamps, numWindows, windowSize)
        anomalyScores = pandas.Series([0] * length)

        anomalyScores[0] = 1
        sweeper = Sweeper(probationPercent=0, costMatrix=self.costMatrix)
        (scores, matchingRow) = sweeper.scoreDataSet(timestamps, anomalyScores,
                                                     windows, "testData",
                                                     threshold)

        self.assertEqual(matchingRow.score, -self.costMatrix["fpWeight"])
        self._checkCounts(matchingRow, length - windowSize * numWindows - 1, 0,
                          1, windowSize * numWindows)
    def testEarlierFalsePositiveAfterWindowIsBetter(self):
        """For two false positives A and B, where A occurs earlier than B, the
    score change due to A will be less than the score change due to B.
    """
        start = datetime.datetime.now()
        increment = datetime.timedelta(minutes=5)
        length = 10
        numWindows = 1
        windowSize = 2
        threshold = 0.5

        timestamps = generateTimestamps(start, increment, length)
        windows = generateWindows(timestamps, numWindows, windowSize)
        anomalyScores1 = pandas.Series([0] * length)
        anomalyScores2 = pandas.Series([0] * length)
        t1, t2 = windows[0]

        index1 = timestamps[timestamps == t2].index[0] + 1
        anomalyScores1[index1] = 1
        sweeper = Sweeper(probationPercent=0, costMatrix=self.costMatrix)
        (scores, matchingRow1) = sweeper.scoreDataSet(timestamps,
                                                      anomalyScores1, windows,
                                                      "testData", threshold)

        anomalyScores2[index1 + 1] = 1
        sweeper = Sweeper(probationPercent=0, costMatrix=self.costMatrix)
        (scores, matchingRow2) = sweeper.scoreDataSet(timestamps,
                                                      anomalyScores2, windows,
                                                      "testData", threshold)

        self.assertTrue(matchingRow1.score > matchingRow2.score)
        self._checkCounts(matchingRow1, length - windowSize * numWindows - 1,
                          0, 1, windowSize * numWindows)
        self._checkCounts(matchingRow2, length - windowSize * numWindows - 1,
                          0, 1, windowSize * numWindows)
Example #17
0
  def testScoringAllMetrics(self):
    """
    This tests an example set of detections, where all metrics have counts > 0.
    """
    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 100
    numWindows = 2
    windowSize = 5
    threshold = 0.5
    
    timestamps = generateTimestamps(start, increment, length)
    windows = generateWindows(timestamps, numWindows, windowSize)
    anomalyScores = pandas.Series([0]*length)
    
    index = timestamps[timestamps == windows[0][0]].index[0]
    # TP, add'l TP, and FP
    anomalyScores[index] = 1
    anomalyScores[index+1] = 1
    anomalyScores[index+7] = 1

    sweeper = Sweeper(probationPercent=0, costMatrix=self.costMatrix)

    (scores, matchingRow) = sweeper.scoreDataSet(
      timestamps,
      anomalyScores,
      windows,
      "testData",
      threshold
    )
    
    self.assertAlmostEquals(matchingRow.score, -0.9540, 4)
    self._checkCounts(matchingRow, length-windowSize*numWindows-1, 2, 1, 8)
Example #18
0
 def testScoringAllMetrics(self):
   """
   This tests an example set of detections, where all metrics have counts > 0.
   """
   start = datetime.datetime.now()
   increment = datetime.timedelta(minutes=5)
   length = 100
   numWindows = 2
   windowSize = 5
   
   timestamps = generateTimestamps(start, increment, length)
   windows = generateWindows(timestamps, numWindows, windowSize)
   labels = generateLabels(timestamps, windows)
   predictions = pandas.Series([0]*length)
   
   index = timestamps[timestamps == windows[0][0]].index[0]
   # TP, add'l TP, and FP
   predictions[index] = 1
   predictions[index+1] = 1
   predictions[index+7] = 1
   
   scorer = Scorer(timestamps, predictions, labels, windows, self.costMatrix,
     probationaryPeriod=0)
   (_, score) = scorer.getScore()
   
   self.assertAlmostEquals(score, -0.9540, 4)
   self._checkCounts(scorer.counts, length-windowSize*numWindows-1, 2, 1, 8)
Example #19
0
  def test_firstTruePositiveWithinWindow(self):
    """
    First record within window has a score close to costMatrix["tpWeight"].
    Since we use Sigmoids, it will never be exactly 1.
    """

    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 10
    numWindows = 1
    windowSize = 2

    timestamps = generateTimestamps(start, increment, length)
    predictions = pandas.Series([0]*length)
    windows = generateWindows(timestamps, numWindows, windowSize)
    labels = generateLabels(timestamps, windows)

    costMatrix = {"tpWeight": 1.0,
                  "fnWeight": 2.0,
                  "fpWeight": 3.0,
                  "tnWeight": 4.0}

    index = timestamps[timestamps == windows[0][0]].index[0]
    predictions[index] = 1

    scorer = Scorer(timestamps, predictions, labels, windows, costMatrix,
      probationaryPeriod=0)

    self.assertTrue(costMatrix["tpWeight"] - scorer.getScore() <= 1)
Example #20
0
  def test_FourFalseNegatives(self):
    """
    A false negative with four windows should have exactly four times
    the negative of the false negative score.
    """
    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 2000
    numWindows = 4
    windowSize = 10

    timestamps = generateTimestamps(start, increment, length)
    predictions = pandas.Series([0]*length)
    windows = generateWindows(timestamps, numWindows, windowSize)
    labels = generateLabels(timestamps, windows)

    costMatrix = {"tpWeight": 1.0,
                  "fnWeight": 2.0,
                  "fpWeight": 3.0,
                  "tnWeight": 4.0}

    scorer = Scorer(timestamps, predictions, labels, windows, costMatrix,
      probationaryPeriod=0)

    self.assertTrue(abs(scorer.getScore() + 4*costMatrix['fnWeight']) < 0.01)

    # Ensure counts are correct.
    self.assertEqual(scorer.counts['tn'], length-windowSize*numWindows)
    self.assertEqual(scorer.counts['tp'], 0)
    self.assertEqual(scorer.counts['fp'], 0)
    self.assertEqual(scorer.counts['fn'], windowSize*numWindows)
  def testFirstTruePositiveWithinWindow(self):
    """
    First record within window has a score approximately equal to 
    self.costMatrix["tpWeight"]; within 4 decimal places is more than enough
    precision.
    """
    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 10
    numWindows = 1
    windowSize = 2

    timestamps = generateTimestamps(start, increment, length)
    windows = generateWindows(timestamps, numWindows, windowSize)
    anomalyScores = pandas.Series([0] * length)
    threshold = 0.5

    # Set a single true positive
    index = timestamps[timestamps == windows[0][0]].index[0]
    anomalyScores[index] = 1.0

    sweeper = Sweeper(probationPercent=0, costMatrix=self.costMatrix)
    (scores, matchingRow) = sweeper.scoreDataSet(
      timestamps,
      anomalyScores,
      windows,
      "testData",
      threshold
    )

    self.assertEqual(matchingRow.score, self.costMatrix["tpWeight"])
    self._checkCounts(matchingRow, length - windowSize * numWindows, 1, 0,
                      windowSize * numWindows - 1)
    def testFirstTruePositiveWithinWindow(self):
        """
    First record within window has a score approximately equal to 
    self.costMatrix["tpWeight"]; within 4 decimal places is more than enough
    precision.
    """
        start = datetime.datetime.now()
        increment = datetime.timedelta(minutes=5)
        length = 10
        numWindows = 1
        windowSize = 2

        timestamps = generateTimestamps(start, increment, length)
        windows = generateWindows(timestamps, numWindows, windowSize)
        labels = generateLabels(timestamps, windows)
        predictions = pandas.Series([0] * length)

        index = timestamps[timestamps == windows[0][0]].index[0]
        predictions[index] = 1
        scorer = Scorer(timestamps,
                        predictions,
                        labels,
                        windows,
                        self.costMatrix,
                        probationaryPeriod=0)
        (_, score) = scorer.getScore()

        self.assertAlmostEquals(score, self.costMatrix["tpWeight"], 4)
        self._checkCounts(scorer.counts, length - windowSize * numWindows, 1,
                          0, windowSize * numWindows - 1)
    def testTwoFalsePositivesIsWorseThanOne(self):
        """
    For two false positives A and B in a file, the score given A and B should be
    more negative than the score given just A.
    """
        start = datetime.datetime.now()
        increment = datetime.timedelta(minutes=5)
        length = 1000
        numWindows = 1
        windowSize = 10
        threshold = 0.5

        timestamps = generateTimestamps(start, increment, length)
        windows = generateWindows(timestamps, numWindows, windowSize)
        anomalyScores = pandas.Series([0] * length)

        anomalyScores[0] = 1
        sweeper = Sweeper(probationPercent=0, costMatrix=self.costMatrix)
        (scores, matchingRow1) = sweeper.scoreDataSet(timestamps,
                                                      anomalyScores, windows,
                                                      "testData", threshold)

        anomalyScores[1] = 1
        sweeper = Sweeper(probationPercent=0, costMatrix=self.costMatrix)
        (scores, matchingRow2) = sweeper.scoreDataSet(timestamps,
                                                      anomalyScores, windows,
                                                      "testData", threshold)

        self.assertTrue(matchingRow2.score < matchingRow1.score)
        self._checkCounts(matchingRow1, length - windowSize * numWindows - 1,
                          0, 1, windowSize * numWindows)
        self._checkCounts(matchingRow2, length - windowSize * numWindows - 2,
                          0, 2, windowSize * numWindows)
Example #24
0
    def testScoringAllMetrics(self):
        """
    This tests an example set of detections, where all metrics have counts > 0.
    """
        start = datetime.datetime.now()
        increment = datetime.timedelta(minutes=5)
        length = 100
        numWindows = 2
        windowSize = 5

        timestamps = generateTimestamps(start, increment, length)
        windows = generateWindows(timestamps, numWindows, windowSize)
        labels = generateLabels(timestamps, windows)
        predictions = pandas.Series([0] * length)

        index = timestamps[timestamps == windows[0][0]].index[0]
        # TP, add'l TP, and FP
        predictions[index] = 1
        predictions[index + 1] = 1
        predictions[index + 7] = 1

        scorer = Scorer(timestamps,
                        predictions,
                        labels,
                        windows,
                        self.costMatrix,
                        probationaryPeriod=0)
        (_, score) = scorer.getScore()

        self.assertAlmostEquals(score, -0.9540, 4)
        self._checkCounts(scorer.counts, length - windowSize * numWindows - 1,
                          2, 1, 8)
Example #25
0
  def test_falsePositiveMeansNegativeScore(self):
    """
    A false positive should make the score negative.
    """
    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 1000
    numWindows = 1
    windowSize = 10

    timestamps = generateTimestamps(start, increment, length)
    predictions = pandas.Series([0]*length)
    windows = generateWindows(timestamps, numWindows, windowSize)
    labels = generateLabels(timestamps, windows)

    costMatrix = {"tpWeight": 1.0,
    "fnWeight": 1.0,
    "fpWeight": 1.0,
    "tnWeight": 1.0}

    predictions[0] = 1

    scorer = Scorer(timestamps, predictions, labels, windows, costMatrix,
      probationaryPeriod=0)
    score = scorer.getScore()

    self.assertTrue(score < 0)

    # Ensure counts are correct.
    self.assertEqual(scorer.counts['tn'], length-windowSize*numWindows-1)
    self.assertEqual(scorer.counts['tp'], 0)
    self.assertEqual(scorer.counts['fp'], 1)
    self.assertEqual(scorer.counts['fn'], windowSize*numWindows)
  def testTruePositivesWithDifferentWindowSizes(self):
    """
    True positives  at the left edge of windows should have the same score
    regardless of width of window.
    """
    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 10
    numWindows = 1
    timestamps = generateTimestamps(start, increment, length)
    threshold = 0.5

    windowSize1 = 2
    windows1 = generateWindows(timestamps, numWindows, windowSize1)
    index = timestamps[timestamps == windows1[0][0]].index[0]
    anomalyScores1 = pandas.Series([0]*length)
    anomalyScores1[index] = 1
    
    windowSize2 = 3
    windows2 = generateWindows(timestamps, numWindows, windowSize2)
    index = timestamps[timestamps == windows2[0][0]].index[0]
    anomalyScores2 = pandas.Series([0]*length)
    anomalyScores2[index] = 1

    sweeper = Sweeper(probationPercent=0, costMatrix=self.costMatrix)
    (_, matchingRow1) = sweeper.scoreDataSet(
      timestamps,
      anomalyScores1,
      windows1,
      "testData",
      threshold
    )

    (_, matchingRow2) = sweeper.scoreDataSet(
      timestamps,
      anomalyScores2,
      windows2,
      "testData",
      threshold
    )
    
    self.assertEqual(matchingRow1.score, matchingRow2.score)
    self._checkCounts(matchingRow1, length-windowSize1*numWindows, 1, 0,
      windowSize1*numWindows-1)
    self._checkCounts(matchingRow2, length-windowSize2*numWindows, 1, 0,
      windowSize2*numWindows-1)
    def testTruePositivesWithDifferentWindowSizes(self):
        """
    True positives  at the left edge of windows should have the same score
    regardless of width of window.
    """
        start = datetime.datetime.now()
        increment = datetime.timedelta(minutes=5)
        length = 10
        numWindows = 1
        timestamps = generateTimestamps(start, increment, length)

        windowSize1 = 2
        windows1 = generateWindows(timestamps, numWindows, windowSize1)
        labels1 = generateLabels(timestamps, windows1)
        index = timestamps[timestamps == windows1[0][0]].index[0]
        predictions1 = pandas.Series([0] * length)
        predictions1[index] = 1

        windowSize2 = 3
        windows2 = generateWindows(timestamps, numWindows, windowSize2)
        labels2 = generateLabels(timestamps, windows2)
        index = timestamps[timestamps == windows2[0][0]].index[0]
        predictions2 = pandas.Series([0] * length)
        predictions2[index] = 1

        scorer1 = Scorer(timestamps,
                         predictions1,
                         labels1,
                         windows1,
                         self.costMatrix,
                         probationaryPeriod=0)
        (_, score1) = scorer1.getScore()
        scorer2 = Scorer(timestamps,
                         predictions2,
                         labels2,
                         windows2,
                         self.costMatrix,
                         probationaryPeriod=0)
        (_, score2) = scorer2.getScore()

        self.assertEqual(score1, score2)
        self._checkCounts(scorer1.counts, length - windowSize1 * numWindows, 1,
                          0, windowSize1 * numWindows - 1)
        self._checkCounts(scorer2.counts, length - windowSize2 * numWindows, 1,
                          0, windowSize2 * numWindows - 1)
Example #28
0
  def test_earlierFalsePositiveAfterWindowIsBetter(self):
    """Imagine there are two false positives A and B that both occur right after
    a window. If A occurs earlier than B, then the score change due to A will be
    less than the score change due to B.
    """
    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 10
    numWindows = 1
    windowSize = 2

    timestamps = generateTimestamps(start, increment, length)

    predictions1 = pandas.Series([0]*length)
    predictions2 = pandas.Series([0]*length)

    windows = generateWindows(timestamps, numWindows, windowSize)
    labels = generateLabels(timestamps, windows)

    window = windows[0]
    t1, t2 = window

    costMatrix = {"tpWeight": 1.0,
                  "fnWeight": 1.0,
                  "fpWeight": 1.0,
                  "tnWeight": 1.0}

    index1 = timestamps[timestamps == t2].index[0] + 1
    predictions1[index1] = 1

    scorer1 = Scorer(timestamps, predictions1, labels, windows, costMatrix,
      probationaryPeriod=0)
    score1 = scorer1.getScore()

    predictions2[index1+1] = 1

    scorer2 = Scorer(timestamps, predictions2, labels, windows, costMatrix,
      probationaryPeriod=0)
    score2 = scorer2.getScore()

    self.assertTrue(score1 > score2)

    # Ensure counts are correct.
    self.assertEqual(scorer1.counts['tn'], length-windowSize*numWindows-1)
    self.assertEqual(scorer1.counts['tp'], 0)
    self.assertEqual(scorer1.counts['fp'], 1)
    self.assertEqual(scorer1.counts['fn'], windowSize*numWindows)

    self.assertEqual(scorer2.counts['tn'], length-windowSize*numWindows-1)
    self.assertEqual(scorer2.counts['tp'], 0)
    self.assertEqual(scorer2.counts['fp'], 1)
    self.assertEqual(scorer2.counts['fn'], windowSize*numWindows)
Example #29
0
  def testTruePositivesWithDifferentWindowSizes(self):
    """
    True positives  at the left edge of windows should have the same score
    regardless of width of window.
    """
    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 10
    numWindows = 1
    timestamps = generateTimestamps(start, increment, length)
    
    windowSize1 = 2
    windows1 = generateWindows(timestamps, numWindows, windowSize1)
    labels1 = generateLabels(timestamps, windows1)
    index = timestamps[timestamps == windows1[0][0]].index[0]
    predictions1 = pandas.Series([0]*length)
    predictions1[index] = 1
    
    windowSize2 = 3
    windows2 = generateWindows(timestamps, numWindows, windowSize2)
    labels2 = generateLabels(timestamps, windows2)
    index = timestamps[timestamps == windows2[0][0]].index[0]
    predictions2 = pandas.Series([0]*length)
    predictions2[index] = 1

    scorer1 = Scorer(timestamps, predictions1, labels1, windows1,
      self.costMatrix, probationaryPeriod=0)
    (_, score1) = scorer1.getScore()
    scorer2 = Scorer(timestamps, predictions2, labels2, windows2,
      self.costMatrix, probationaryPeriod=0)
    (_, score2) = scorer2.getScore()
    
    self.assertEqual(score1, score2)
    self._checkCounts(scorer1.counts, length-windowSize1*numWindows, 1, 0,
      windowSize1*numWindows-1)
    self._checkCounts(scorer2.counts, length-windowSize2*numWindows, 1, 0,
      windowSize2*numWindows-1)
Example #30
0
  def test_twoFalsePositivesIsWorseThanOne(self):
    """False positives have an additive effect on the score. If there are two
    false positives, A and B, in a file, then the score given A and B should be
    larger than the score given just A.
    """
    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 1000
    numWindows = 1
    windowSize = 10

    timestamps = generateTimestamps(start, increment, length)
    predictions = pandas.Series([0]*length)
    windows = generateWindows(timestamps, numWindows, windowSize)
    labels = generateLabels(timestamps, windows)


    costMatrix = {"tpWeight": 1.0,
    "fnWeight": 1.0,
    "fpWeight": 1.0,
    "tnWeight": 1.0}

    predictions[0] = 1

    scorer1 = Scorer(timestamps, predictions, labels, windows, costMatrix,
      probationaryPeriod=0)

    score1 = scorer1.getScore()


    predictions[1] = 1

    scorer2 = Scorer(timestamps, predictions, labels, windows, costMatrix,
      probationaryPeriod=0)

    score2 = scorer2.getScore()

    self.assertTrue(score1 > score2)

    # Ensure counts are correct.
    self.assertEqual(scorer1.counts['tn'], length-windowSize*numWindows-1)
    self.assertEqual(scorer1.counts['tp'], 0)
    self.assertEqual(scorer1.counts['fp'], 1)
    self.assertEqual(scorer1.counts['fn'], windowSize*numWindows)

    self.assertEqual(scorer2.counts['tn'], length-windowSize*numWindows-2)
    self.assertEqual(scorer2.counts['tp'], 0)
    self.assertEqual(scorer2.counts['fp'], 2)
    self.assertEqual(scorer2.counts['fn'], windowSize*numWindows)
  def testTruePositiveAtRightEdgeOfWindow(self):
    """
    True positives at the right edge of a window should yield a score of
    approximately zero; the scaled sigmoid scoring function crosses the zero
    between a given window's last timestamp and the next timestamp (immediately
    following the window.
    """
    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 1000
    numWindows = 1
    windowSize = 100
    threshold = 0.5

    timestamps = generateTimestamps(start, increment, length)
    windows = generateWindows(timestamps, numWindows, windowSize)
    anomalyScores = pandas.Series([0]*length)

    # Make prediction at end of the window; TP
    index = timestamps[timestamps == windows[0][1]].index[0]
    anomalyScores[index] = 1
    sweeper = Sweeper(probationPercent=0, costMatrix=self.costMatrix)
    (_, matchingRow1) = sweeper.scoreDataSet(
      timestamps,
      anomalyScores,
      windows,
      "testData",
      threshold
    )
    # Make prediction just after the window; FP
    anomalyScores[index] = 0
    index += 1
    anomalyScores[index] = 1
    (_, matchingRow2) = sweeper.scoreDataSet(
      timestamps,
      anomalyScores,
      windows,
      "testData",
      threshold
    )

    # TP score + FP score + 1 should be very close to 0; the 1 is added to
    # account for the subsequent FN contribution.
    self.assertAlmostEqual(matchingRow1.score + matchingRow2.score + 1, 0.0, 3)
    self._checkCounts(matchingRow1, length-windowSize*numWindows, 1, 0,
      windowSize*numWindows-1)
    self._checkCounts(matchingRow2, length-windowSize*numWindows-1, 0, 1,
      windowSize*numWindows)
  def testEarlierTruePositiveIsBetter(self):
    """
    If two algorithms both get a true positive within a window, the algorithm
    with the earlier true positive (in the window) should get a higher score.
    """
    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 10
    numWindows = 1
    windowSize = 2

    timestamps = generateTimestamps(start, increment, length)
    windows = generateWindows(timestamps, numWindows, windowSize)
    anomalyScores1 = pandas.Series([0] * length)
    anomalyScores2 = pandas.Series([0] * length)
    threshold = 0.5
    t1, t2 = windows[0]

    index1 = timestamps[timestamps == t1].index[0]
    anomalyScores1[index1] = 1
    sweeper = Sweeper(probationPercent=0, costMatrix=self.costMatrix)
    (_, matchingRow1) = sweeper.scoreDataSet(
      timestamps,
      anomalyScores1,
      windows,
      "testData",
      threshold
    )

    index2 = timestamps[timestamps == t2].index[0]
    anomalyScores2[index2] = 1
    (_, matchingRow2) = sweeper.scoreDataSet(
      timestamps,
      anomalyScores2,
      windows,
      "testData",
      threshold
    )
    score1 = matchingRow1.score
    score2 = matchingRow2.score

    self.assertTrue(score1 > score2, "The earlier TP score is not greater than "
      "the later TP. They are %f and %f, respectively." % (score1, score2))
    self._checkCounts(matchingRow1, length-windowSize*numWindows, 1, 0,
      windowSize*numWindows-1)
    self._checkCounts(matchingRow2, length-windowSize*numWindows, 1, 0,
      windowSize*numWindows-1)
    def testTruePositiveAtRightEdgeOfWindow(self):
        """
    True positives at the right edge of a window should yield a score of
    approximately zero; the scaled sigmoid scoring function crosses the zero
    between a given window's last timestamp and the next timestamp (immediately
    following the window.
    """
        start = datetime.datetime.now()
        increment = datetime.timedelta(minutes=5)
        length = 1000
        numWindows = 1
        windowSize = 100

        timestamps = generateTimestamps(start, increment, length)
        windows = generateWindows(timestamps, numWindows, windowSize)
        labels = generateLabels(timestamps, windows)
        predictions = pandas.Series([0] * length)

        # Make prediction at end of the window; TP
        index = timestamps[timestamps == windows[0][1]].index[0]
        predictions[index] = 1
        scorer1 = Scorer(timestamps,
                         predictions,
                         labels,
                         windows,
                         self.costMatrix,
                         probationaryPeriod=0)
        (_, score1) = scorer1.getScore()
        # Make prediction just after the window; FP
        predictions[index] = 0
        index += 1
        predictions[index] = 1
        scorer2 = Scorer(timestamps,
                         predictions,
                         labels,
                         windows,
                         self.costMatrix,
                         probationaryPeriod=0)
        (_, score2) = scorer2.getScore()

        # TP score + FP score + 1 should be very close to 0; the 1 is added to
        # account for the subsequent FN contribution.
        self.assertAlmostEquals(score1 + score2 + 1, 0.0, 3)
        self._checkCounts(scorer1.counts, length - windowSize * numWindows, 1,
                          0, windowSize * numWindows - 1)
        self._checkCounts(scorer2.counts, length - windowSize * numWindows - 1,
                          0, 1, windowSize * numWindows)
Example #34
0
  def testRewardLowFalseNegatives(self):
    """
    Given false negatives in the set of detections, the score output with the
    Reward Low False Negatives application profile will be greater than with
    the Standard application profile.
    """
    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 100
    numWindows = 1
    windowSize = 10
    threshold = 0.5

    timestamps = generateTimestamps(start, increment, length)
    windows = generateWindows(timestamps, numWindows, windowSize)
    anomalyScores = pandas.Series([0]*length)
    
    costMatrixFN = copy.deepcopy(self.costMatrix)
    costMatrixFN["fnWeight"] = 2.0
    costMatrixFN["fpWeight"] = 0.055

    sweeper1 = Sweeper(probationPercent=0, costMatrix=self.costMatrix)
    sweeper2 = Sweeper(probationPercent=0, costMatrix=costMatrixFN)

    (scores, matchingRow1) = sweeper1.scoreDataSet(
      timestamps,
      anomalyScores,
      windows,
      "testData",
      threshold
    )

    (scores, matchingRow2) = sweeper2.scoreDataSet(
      timestamps,
      anomalyScores,
      windows,
      "testData",
      threshold
    )


    self.assertEqual(matchingRow1.score, 0.5*matchingRow2.score)
    self._checkCounts(matchingRow1, length-windowSize*numWindows, 0, 0,
      windowSize*numWindows)
    self._checkCounts(matchingRow2, length-windowSize*numWindows, 0, 0,
      windowSize*numWindows)
    def testEarlierTruePositiveIsBetter(self):
        """
    If two algorithms both get a true positive within a window, the algorithm
    with the earlier true positive (in the window) should get a higher score.
    """
        start = datetime.datetime.now()
        increment = datetime.timedelta(minutes=5)
        length = 10
        numWindows = 1
        windowSize = 2

        timestamps = generateTimestamps(start, increment, length)
        windows = generateWindows(timestamps, numWindows, windowSize)
        labels = generateLabels(timestamps, windows)
        predictions1 = pandas.Series([0] * length)
        predictions2 = pandas.Series([0] * length)
        t1, t2 = windows[0]

        index1 = timestamps[timestamps == t1].index[0]
        predictions1[index1] = 1
        scorer1 = Scorer(timestamps,
                         predictions1,
                         labels,
                         windows,
                         self.costMatrix,
                         probationaryPeriod=0)
        (_, score1) = scorer1.getScore()

        index2 = timestamps[timestamps == t2].index[0]
        predictions2[index2] = 1
        scorer2 = Scorer(timestamps,
                         predictions2,
                         labels,
                         windows,
                         self.costMatrix,
                         probationaryPeriod=0)
        (_, score2) = scorer2.getScore()

        self.assertTrue(
            score1 > score2, "The earlier TP score is not greater than "
            "the later TP. They are %f and %f, respectively." %
            (score1, score2))
        self._checkCounts(scorer1.counts, length - windowSize * numWindows, 1,
                          0, windowSize * numWindows - 1)
        self._checkCounts(scorer2.counts, length - windowSize * numWindows, 1,
                          0, windowSize * numWindows - 1)
  def testOnlyScoreFirstTruePositiveWithinWindow(self):
    """
    An algorithm making multiple detections within a window (i.e. true positive)
    should only be scored for the earliest true positive.
    """
    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 10
    numWindows = 1
    windowSize = 2

    timestamps = generateTimestamps(start, increment, length)
    windows = generateWindows(timestamps, numWindows, windowSize)
    anomalyScores = pandas.Series([0] * length)
    threshold = 0.5
    window = windows[0]
    t1, t2 = window

    # Score with a single true positive at start of window
    index1 = timestamps[timestamps == t1].index[0]
    anomalyScores[index1] = 1
    sweeper = Sweeper(probationPercent=0, costMatrix=self.costMatrix)
    (_, matchingRow1) = sweeper.scoreDataSet(
      timestamps,
      anomalyScores,
      windows,
      "testData",
      threshold
    )

    # Add a second true positive to end of window
    index2 = timestamps[timestamps == t2].index[0]
    anomalyScores[index2] = 1
    (_, matchingRow2) = sweeper.scoreDataSet(
      timestamps,
      anomalyScores,
      windows,
      "testData",
      threshold
    )

    self.assertEqual(matchingRow1.score, matchingRow2.score)
    self._checkCounts(matchingRow1, length-windowSize*numWindows, 1, 0,
      windowSize*numWindows-1)
    self._checkCounts(matchingRow2, length-windowSize*numWindows, 2, 0,
      windowSize*numWindows-2)
Example #37
0
  def testFalsePositiveScaling(self):
    """
    Test scaling the weight of false positives results in an approximate
    balance with the true positives.
    
    The contributions of TP and FP scores should approximately cancel; i.e.
    total score =0. With x windows, this total score should on average decrease
    x/2 because of x FNs. Thus, the acceptable range for score should be
    centered about -x/2.
    """
    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 100
    numWindows = 1
    windowSize = 10
    threshold = 0.5
    
    timestamps = generateTimestamps(start, increment, length)
    windows = generateWindows(timestamps, numWindows, windowSize)
    
    # Scale for 10% = windowSize/length
    self.costMatrix["fpWeight"] = 0.11
    sweeper = Sweeper(probationPercent=0, costMatrix=self.costMatrix)

    # Make arbitrary detections, score, repeat
    scores = []
    for _ in xrange(20):
      anomalyScores = pandas.Series([0]*length)
      indices = random.sample(range(length), 10)
      anomalyScores[indices] = 1
      (scores, matchingRow) = sweeper.scoreDataSet(
        timestamps,
        anomalyScores,
        windows,
        "testData",
        threshold
      )
      scores.append(matchingRow.score)
  
    avgScore = sum(scores)/float(len(scores))

    self.assertTrue(-1.5 <= avgScore <= 0.5, "The average score across 20 sets "
      "of random detections is %f, which is not within the acceptable range "
      "-1.5 to 0.5." % avgScore)
Example #38
0
    def testFalsePositiveScaling(self):
        """
    Test scaling the weight of false positives results in an approximate
    balance with the true positives.
    
    The contributions of TP and FP scores should approximately cancel; i.e.
    total score =0. With x windows, this total score should on average decrease
    x/2 because of x FNs. Thus, the acceptable range for score should be
    centered about -x/2.
    """
        start = datetime.datetime.now()
        increment = datetime.timedelta(minutes=5)
        length = 100
        numWindows = 1
        windowSize = 10

        timestamps = generateTimestamps(start, increment, length)
        windows = generateWindows(timestamps, numWindows, windowSize)
        labels = generateLabels(timestamps, windows)

        # Scale for 10% = windowSize/length
        self.costMatrix["fpWeight"] = 0.11

        # Make arbitrary detections, score, repeat
        scores = []
        for _ in xrange(20):
            predictions = pandas.Series([0] * length)
            indices = random.sample(range(length), 10)
            predictions[indices] = 1
            scorer = Scorer(timestamps,
                            predictions,
                            labels,
                            windows,
                            self.costMatrix,
                            probationaryPeriod=0)
            (_, score) = scorer.getScore()
            scores.append(score)

        avgScore = sum(scores) / float(len(scores))

        self.assertTrue(
            -1.5 <= avgScore <= 0.5, "The average score across 20 sets "
            "of random detections is %f, which is not within the acceptable range "
            "-1.5 to 0.5." % avgScore)
Example #39
0
  def test_secondTruePositiveWithinWindowIsIgnored(self):
    """
    If there are two true positives within the same window, then the score
    should be only decided by whichever true positive occurred earlier.
    """
    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 10
    numWindows = 1
    windowSize = 2

    timestamps = generateTimestamps(start, increment, length)

    predictions = pandas.Series([0]*length)

    windows = generateWindows(timestamps, numWindows, windowSize)

    labels = generateLabels(timestamps, windows)
    window = windows[0]
    t1, t2 = window

    costMatrix = {"tpWeight": 1.0,
                  "fnWeight": 2.0,
                  "fpWeight": 3.0,
                  "tnWeight": 4.0}

    index1 = timestamps[timestamps == t1].index[0]
    predictions[index1] = 1

    scorer1 = Scorer(timestamps, predictions, labels, windows, costMatrix,
      probationaryPeriod=0)

    score1 = scorer1.getScore()

    index2 = timestamps[timestamps == t2].index[0]
    predictions[index2] = 1

    scorer2 = Scorer(timestamps, predictions, labels, windows, costMatrix,
      probationaryPeriod=0)

    score2 = scorer2.getScore()

    self.assertEqual(score1, score2)
Example #40
0
  def testEarlierFalsePositiveAfterWindowIsBetter(self):
    """For two false positives A and B, where A occurs earlier than B, the
    score change due to A will be less than the score change due to B.
    """
    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 10
    numWindows = 1
    windowSize = 2
    threshold = 0.5

    timestamps = generateTimestamps(start, increment, length)
    windows = generateWindows(timestamps, numWindows, windowSize)
    anomalyScores1 = pandas.Series([0]*length)
    anomalyScores2 = pandas.Series([0]*length)
    t1, t2 = windows[0]

    index1 = timestamps[timestamps == t2].index[0] + 1
    anomalyScores1[index1] = 1
    sweeper = Sweeper(probationPercent=0, costMatrix=self.costMatrix)
    (scores, matchingRow1) = sweeper.scoreDataSet(
      timestamps,
      anomalyScores1,
      windows,
      "testData",
      threshold
    )

    anomalyScores2[index1+1] = 1
    sweeper = Sweeper(probationPercent=0, costMatrix=self.costMatrix)
    (scores, matchingRow2) = sweeper.scoreDataSet(
      timestamps,
      anomalyScores2,
      windows,
      "testData",
      threshold
    )

    self.assertTrue(matchingRow1.score > matchingRow2.score)
    self._checkCounts(matchingRow1, length-windowSize*numWindows-1, 0, 1,
      windowSize*numWindows)
    self._checkCounts(matchingRow2, length-windowSize*numWindows-1, 0, 1,
      windowSize*numWindows)
    def testOnlyScoreFirstTruePositiveWithinWindow(self):
        """
    An algorithm making multiple detections within a window (i.e. true positive)
    should only be scored for the earliest true positive.
    """
        start = datetime.datetime.now()
        increment = datetime.timedelta(minutes=5)
        length = 10
        numWindows = 1
        windowSize = 2

        timestamps = generateTimestamps(start, increment, length)
        windows = generateWindows(timestamps, numWindows, windowSize)
        labels = generateLabels(timestamps, windows)
        predictions = pandas.Series([0] * length)
        window = windows[0]
        t1, t2 = window

        index1 = timestamps[timestamps == t1].index[0]
        predictions[index1] = 1
        scorer1 = Scorer(timestamps,
                         predictions,
                         labels,
                         windows,
                         self.costMatrix,
                         probationaryPeriod=0)
        (_, score1) = scorer1.getScore()

        index2 = timestamps[timestamps == t2].index[0]
        predictions[index2] = 1
        scorer2 = Scorer(timestamps,
                         predictions,
                         labels,
                         windows,
                         self.costMatrix,
                         probationaryPeriod=0)
        (_, score2) = scorer2.getScore()

        self.assertEqual(score1, score2)
        self._checkCounts(scorer1.counts, length - windowSize * numWindows, 1,
                          0, windowSize * numWindows - 1)
        self._checkCounts(scorer2.counts, length - windowSize * numWindows, 2,
                          0, windowSize * numWindows - 2)
Example #42
0
  def test_earlierTruePositiveIsBetter(self):
    """
    If two algorithms both get a true positive within a window, the algorithm
    that labeled a true positive earlier in the window will get a higher score.
    """
    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 10
    numWindows = 1
    windowSize = 2

    timestamps = generateTimestamps(start, increment, length)

    predictions1 = pandas.Series([0]*length)
    predictions2 = pandas.Series([0]*length)

    windows = generateWindows(timestamps, numWindows, windowSize)

    labels = generateLabels(timestamps, windows)
    window = windows[0]
    t1, t2 = window

    costMatrix = {"tpWeight": 1.0,
                  "fnWeight": 2.0,
                  "fpWeight": 3.0,
                  "tnWeight": 4.0}

    index1 = timestamps[timestamps == t1].index[0]
    predictions1[index1] = 1

    scorer1 = Scorer(timestamps, predictions1, labels, windows, costMatrix,
      probationaryPeriod=0)
    score1 = scorer1.getScore()

    index2 = timestamps[timestamps == t2].index[0]
    predictions2[index2] = 1

    scorer2 = Scorer(timestamps, predictions2, labels, windows, costMatrix,
      probationaryPeriod=0)
    score2 = scorer2.getScore()

    self.assertTrue(score1 > score2)
    def testFalsePositiveScaling(self):
        """
    Test scaling the weight of false positives results in an approximate
    balance with the true positives.
    
    The contributions of TP and FP scores should approximately cancel; i.e.
    total score =0. With x windows, this total score should on average decrease
    x/2 because of x FNs. Thus, the acceptable range for score should be
    centered about -x/2.
    """
        start = datetime.datetime.now()
        increment = datetime.timedelta(minutes=5)
        length = 100
        numWindows = 1
        windowSize = 10
        threshold = 0.5

        timestamps = generateTimestamps(start, increment, length)
        windows = generateWindows(timestamps, numWindows, windowSize)

        # Scale for 10% = windowSize/length
        self.costMatrix["fpWeight"] = 0.11
        sweeper = Sweeper(probationPercent=0, costMatrix=self.costMatrix)

        # Make arbitrary detections, score, repeat
        scores = []
        for _ in range(20):
            anomalyScores = pandas.Series([0] * length)
            indices = random.sample(list(range(length)), 10)
            anomalyScores[indices] = 1
            (scores,
             matchingRow) = sweeper.scoreDataSet(timestamps, anomalyScores,
                                                 windows, "testData",
                                                 threshold)
            scores.append(matchingRow.score)

        avgScore = sum(scores) / float(len(scores))

        self.assertTrue(
            -1.5 <= avgScore <= 0.5, "The average score across 20 sets "
            "of random detections is %f, which is not within the acceptable range "
            "-1.5 to 0.5." % avgScore)
Example #44
0
    def testEarlierFalsePositiveAfterWindowIsBetter(self):
        """For two false positives A and B, where A occurs earlier than B, the
    score change due to A will be less than the score change due to B.
    """
        start = datetime.datetime.now()
        increment = datetime.timedelta(minutes=5)
        length = 10
        numWindows = 1
        windowSize = 2

        timestamps = generateTimestamps(start, increment, length)
        windows = generateWindows(timestamps, numWindows, windowSize)
        labels = generateLabels(timestamps, windows)
        predictions1 = pandas.Series([0] * length)
        predictions2 = pandas.Series([0] * length)
        t1, t2 = windows[0]

        index1 = timestamps[timestamps == t2].index[0] + 1
        predictions1[index1] = 1
        scorer1 = Scorer(timestamps,
                         predictions1,
                         labels,
                         windows,
                         self.costMatrix,
                         probationaryPeriod=0)
        (_, score1) = scorer1.getScore()

        predictions2[index1 + 1] = 1
        scorer2 = Scorer(timestamps,
                         predictions2,
                         labels,
                         windows,
                         self.costMatrix,
                         probationaryPeriod=0)
        (_, score2) = scorer2.getScore()

        self.assertTrue(score1 > score2)
        self._checkCounts(scorer1.counts, length - windowSize * numWindows - 1,
                          0, 1, windowSize * numWindows)
        self._checkCounts(scorer2.counts, length - windowSize * numWindows - 1,
                          0, 1, windowSize * numWindows)
Example #45
0
  def testTwoFalsePositivesIsWorseThanOne(self):
    """
    For two false positives A and B in a file, the score given A and B should be
    more negative than the score given just A.
    """
    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 1000
    numWindows = 1
    windowSize = 10
    threshold = 0.5

    timestamps = generateTimestamps(start, increment, length)
    windows = generateWindows(timestamps, numWindows, windowSize)
    anomalyScores = pandas.Series([0]*length)

    anomalyScores[0] = 1
    sweeper = Sweeper(probationPercent=0, costMatrix=self.costMatrix)
    (scores, matchingRow1) = sweeper.scoreDataSet(
      timestamps,
      anomalyScores,
      windows,
      "testData",
      threshold
    )

    anomalyScores[1] = 1
    sweeper = Sweeper(probationPercent=0, costMatrix=self.costMatrix)
    (scores, matchingRow2) = sweeper.scoreDataSet(
      timestamps,
      anomalyScores,
      windows,
      "testData",
      threshold
    )

    self.assertTrue(matchingRow2.score < matchingRow1.score)
    self._checkCounts(matchingRow1, length-windowSize*numWindows-1, 0, 1,
      windowSize*numWindows)
    self._checkCounts(matchingRow2, length-windowSize*numWindows-2, 0, 2,
      windowSize*numWindows)
Example #46
0
  def testTruePositiveAtRightEdgeOfWindow(self):
    """
    True positives at the right edge of a window should yield a score of
    approximately zero; the scaled sigmoid scoring function crosses the zero
    between a given window's last timestamp and the next timestamp (immediately
    following the window.
    """
    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 1000
    numWindows = 1
    windowSize = 100

    timestamps = generateTimestamps(start, increment, length)
    windows = generateWindows(timestamps, numWindows, windowSize)
    labels = generateLabels(timestamps, windows)
    predictions = pandas.Series([0]*length)

    # Make prediction at end of the window; TP
    index = timestamps[timestamps == windows[0][1]].index[0]
    predictions[index] = 1
    scorer1 = Scorer(timestamps, predictions, labels, windows, self.costMatrix,
      probationaryPeriod=0)
    (_, score1) = scorer1.getScore()
    # Make prediction just after the window; FP
    predictions[index] = 0
    index += 1
    predictions[index] = 1
    scorer2 = Scorer(timestamps, predictions, labels, windows, self.costMatrix,
      probationaryPeriod=0)
    (_, score2) = scorer2.getScore()

    # TP score + FP score + 1 should be very close to 0; the 1 is added to
    # account for the subsequent FN contribution.
    self.assertAlmostEquals(score1 + score2 + 1, 0.0, 3)
    self._checkCounts(scorer1.counts, length-windowSize*numWindows, 1, 0,
      windowSize*numWindows-1)
    self._checkCounts(scorer2.counts, length-windowSize*numWindows-1, 0, 1,
      windowSize*numWindows)
Example #47
0
    def testTwoFalsePositivesIsWorseThanOne(self):
        """
    For two false positives A and B in a file, the score given A and B should be
    more negative than the score given just A.
    """
        start = datetime.datetime.now()
        increment = datetime.timedelta(minutes=5)
        length = 1000
        numWindows = 1
        windowSize = 10

        timestamps = generateTimestamps(start, increment, length)
        windows = generateWindows(timestamps, numWindows, windowSize)
        labels = generateLabels(timestamps, windows)
        predictions = pandas.Series([0] * length)

        predictions[0] = 1
        scorer1 = Scorer(timestamps,
                         predictions,
                         labels,
                         windows,
                         self.costMatrix,
                         probationaryPeriod=0)
        (_, score1) = scorer1.getScore()

        predictions[1] = 1
        scorer2 = Scorer(timestamps,
                         predictions,
                         labels,
                         windows,
                         self.costMatrix,
                         probationaryPeriod=0)
        (_, score2) = scorer2.getScore()

        self.assertTrue(score2 < score1)
        self._checkCounts(scorer1.counts, length - windowSize * numWindows - 1,
                          0, 1, windowSize * numWindows)
        self._checkCounts(scorer2.counts, length - windowSize * numWindows - 2,
                          0, 2, windowSize * numWindows)
Example #48
0
  def testFalsePositiveMeansNegativeScore(self):
    """
    A false positive should make the score negative.
    """
    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 1000
    numWindows = 1
    windowSize = 10

    timestamps = generateTimestamps(start, increment, length)
    windows = generateWindows(timestamps, numWindows, windowSize)
    labels = generateLabels(timestamps, windows)
    predictions = pandas.Series([0]*length)

    predictions[0] = 1
    scorer = Scorer(timestamps, predictions, labels, windows, self.costMatrix,
      probationaryPeriod=0)
    (_, score) = scorer.getScore()
    self.assertTrue(score < 0)
    self._checkCounts(scorer.counts, length-windowSize*numWindows-1, 0, 1,
      windowSize*numWindows)
    def testFalsePositiveMeansNegativeScore(self):
        """
    A false positive should make the score negative.
    """
        start = datetime.datetime.now()
        increment = datetime.timedelta(minutes=5)
        length = 1000
        numWindows = 1
        windowSize = 10
        threshold = 0.5

        timestamps = generateTimestamps(start, increment, length)
        windows = generateWindows(timestamps, numWindows, windowSize)
        anomalyScores = pandas.Series([0] * length)

        anomalyScores[0] = 1
        sweeper = Sweeper(probationPercent=0, costMatrix=self.costMatrix)
        (scores, matchingRow) = sweeper.scoreDataSet(timestamps, anomalyScores,
                                                     windows, "testData",
                                                     threshold)
        self.assertTrue(matchingRow.score < 0)
        self._checkCounts(matchingRow, length - windowSize * numWindows - 1, 0,
                          1, windowSize * numWindows)
Example #50
0
  def testFourFalseNegatives(self):
    """
    A false negative with four windows should have exactly four times
    the negative of the false negative score.
    """
    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 2000
    numWindows = 4
    windowSize = 10

    timestamps = generateTimestamps(start, increment, length)
    windows = generateWindows(timestamps, numWindows, windowSize)
    labels = generateLabels(timestamps, windows)
    predictions = pandas.Series([0]*length)

    scorer = Scorer(timestamps, predictions, labels, windows, self.costMatrix,
      probationaryPeriod=0)
    (_, score) = scorer.getScore()

    self.assertTrue(abs(score + 4*self.costMatrix['fnWeight']) < 0.01)
    self._checkCounts(scorer.counts, length-windowSize*numWindows, 0, 0,
      windowSize*numWindows)
    def testRewardLowFalseNegatives(self):
        """
    Given false negatives in the set of detections, the score output with the
    Reward Low False Negatives application profile will be greater than with
    the Standard application profile.
    """
        start = datetime.datetime.now()
        increment = datetime.timedelta(minutes=5)
        length = 100
        numWindows = 1
        windowSize = 10
        threshold = 0.5

        timestamps = generateTimestamps(start, increment, length)
        windows = generateWindows(timestamps, numWindows, windowSize)
        anomalyScores = pandas.Series([0] * length)

        costMatrixFN = copy.deepcopy(self.costMatrix)
        costMatrixFN["fnWeight"] = 2.0
        costMatrixFN["fpWeight"] = 0.055

        sweeper1 = Sweeper(probationPercent=0, costMatrix=self.costMatrix)
        sweeper2 = Sweeper(probationPercent=0, costMatrix=costMatrixFN)

        (scores, matchingRow1) = sweeper1.scoreDataSet(timestamps,
                                                       anomalyScores, windows,
                                                       "testData", threshold)

        (scores, matchingRow2) = sweeper2.scoreDataSet(timestamps,
                                                       anomalyScores, windows,
                                                       "testData", threshold)

        self.assertEqual(matchingRow1.score, 0.5 * matchingRow2.score)
        self._checkCounts(matchingRow1, length - windowSize * numWindows, 0, 0,
                          windowSize * numWindows)
        self._checkCounts(matchingRow2, length - windowSize * numWindows, 0, 0,
                          windowSize * numWindows)
Example #52
0
  def testEarlierTruePositiveIsBetter(self):
    """
    If two algorithms both get a true positive within a window, the algorithm
    with the earlier true positive (in the window) should get a higher score.
    """
    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 10
    numWindows = 1
    windowSize = 2

    timestamps = generateTimestamps(start, increment, length)
    windows = generateWindows(timestamps, numWindows, windowSize)
    labels = generateLabels(timestamps, windows)
    predictions1 = pandas.Series([0]*length)
    predictions2 = pandas.Series([0]*length)
    t1, t2 = windows[0]

    index1 = timestamps[timestamps == t1].index[0]
    predictions1[index1] = 1
    scorer1 = Scorer(timestamps, predictions1, labels, windows, self.costMatrix,
      probationaryPeriod=0)
    (_, score1) = scorer1.getScore()

    index2 = timestamps[timestamps == t2].index[0]
    predictions2[index2] = 1
    scorer2 = Scorer(timestamps, predictions2, labels, windows, self.costMatrix,
      probationaryPeriod=0)
    (_, score2) = scorer2.getScore()

    self.assertTrue(score1 > score2, "The earlier TP score is not greater than "
      "the later TP. They are %f and %f, respectively." % (score1, score2))
    self._checkCounts(scorer1.counts, length-windowSize*numWindows, 1, 0,
      windowSize*numWindows-1)
    self._checkCounts(scorer2.counts, length-windowSize*numWindows, 1, 0,
      windowSize*numWindows-1)
    def testFourFalseNegatives(self):
        """
    A false negative with four windows should have exactly four times
    the negative of the false negative score.
    """
        start = datetime.datetime.now()
        increment = datetime.timedelta(minutes=5)
        length = 2000
        numWindows = 4
        windowSize = 10

        timestamps = generateTimestamps(start, increment, length)
        windows = generateWindows(timestamps, numWindows, windowSize)
        anomalyScores = pandas.Series([0] * length)
        threshold = 1

        sweeper = Sweeper(probationPercent=0, costMatrix=self.costMatrix)
        (scores, matchingRow) = sweeper.scoreDataSet(timestamps, anomalyScores,
                                                     windows, "testData",
                                                     threshold)

        self.assertEqual(matchingRow.score, 4 * -self.costMatrix["fnWeight"])
        self._checkCounts(matchingRow, length - windowSize * numWindows, 0, 0,
                          windowSize * numWindows)
Example #54
0
  def testOneFalsePositiveNoWindow(self):
    """
    When there is no window (i.e. no anomaly), a false positive should still
    result in a negative score, specifically negative the FP weight.
    """
    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 1000
    numWindows = 0
    windowSize = 10

    timestamps = generateTimestamps(start, increment, length)
    windows = generateWindows(timestamps, numWindows, windowSize)
    labels = generateLabels(timestamps, windows)
    predictions = pandas.Series([0]*length)

    predictions[0] = 1
    scorer = Scorer(timestamps, predictions, labels, windows, self.costMatrix,
      probationaryPeriod=0)
    (_, score) = scorer.getScore()
    
    self.assertTrue(score == -self.costMatrix["fpWeight"])
    self._checkCounts(scorer.counts, length-windowSize*numWindows-1, 0, 1,
      windowSize*numWindows)