Ejemplo n.º 1
0
    def testOneFalsePositiveNoWindow(self):
        """
    When there is no window (i.e. no anomaly), a false positive should still
    result in a negative score, specifically negative the FP weight.
    """
        start = datetime.datetime.now()
        increment = datetime.timedelta(minutes=5)
        length = 1000
        numWindows = 0
        windowSize = 10

        timestamps = generateTimestamps(start, increment, length)
        windows = generateWindows(timestamps, numWindows, windowSize)
        labels = generateLabels(timestamps, windows)
        predictions = pandas.Series([0] * length)

        predictions[0] = 1
        scorer = Scorer(timestamps,
                        predictions,
                        labels,
                        windows,
                        self.costMatrix,
                        probationaryPeriod=0)
        (_, score) = scorer.getScore()

        self.assertTrue(score == -self.costMatrix["fpWeight"])
        self._checkCounts(scorer.counts, length - windowSize * numWindows - 1,
                          0, 1, windowSize * numWindows)
Ejemplo n.º 2
0
  def testRewardLowFalsePositives(self):
    """
    Given false positives in the set of detections, the score output with the
    Reward Low False Positives application profile will be greater than with
    the Standard application profile.
    """
    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 100
    numWindows = 0
    windowSize = 10
    
    timestamps = generateTimestamps(start, increment, length)
    windows = []
    labels = generateLabels(timestamps, windows)
    predictions = pandas.Series([0]*length)
    
    costMatrixFP = copy.deepcopy(self.costMatrix)
    costMatrixFP["fpWeight"] = 2.0
    costMatrixFP["fnWeight"] = 0.5
    # FP
    predictions[0] = 1

    scorer1 = Scorer(timestamps, predictions, labels, windows, self.costMatrix,
      probationaryPeriod=0)
    (_, score1) = scorer1.getScore()
    scorer2 = Scorer(timestamps, predictions, labels, windows, costMatrixFP,
      probationaryPeriod=0)
    (_, score2) = scorer2.getScore()
    
    self.assertEqual(score1, 0.5*score2)
    self._checkCounts(scorer1.counts, length-windowSize*numWindows-1, 0, 1, 0)
    self._checkCounts(scorer2.counts, length-windowSize*numWindows-1, 0, 1, 0)
Ejemplo n.º 3
0
  def testRewardLowFalsePositives(self):
    """
    Given false positives in the set of detections, the score output with the
    Reward Low False Positives application profile will be greater than with
    the Standard application profile.
    """
    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 100
    numWindows = 0
    windowSize = 10
    
    timestamps = generateTimestamps(start, increment, length)
    windows = []
    labels = generateLabels(timestamps, windows)
    predictions = pandas.Series([0]*length)
    
    costMatrixFP = copy.deepcopy(self.costMatrix)
    costMatrixFP["fpWeight"] = 2.0
    costMatrixFP["fnWeight"] = 0.5
    # FP
    predictions[0] = 1

    scorer1 = Scorer(timestamps, predictions, labels, windows, self.costMatrix,
      probationaryPeriod=0)
    (_, score1) = scorer1.getScore()
    scorer2 = Scorer(timestamps, predictions, labels, windows, costMatrixFP,
      probationaryPeriod=0)
    (_, score2) = scorer2.getScore()
    
    self.assertEqual(score1, 0.5*score2)
    self._checkCounts(scorer1.counts, length-windowSize*numWindows-1, 0, 1, 0)
    self._checkCounts(scorer2.counts, length-windowSize*numWindows-1, 0, 1, 0)
Ejemplo n.º 4
0
    def testFirstTruePositiveWithinWindow(self):
        """
    First record within window has a score approximately equal to 
    self.costMatrix["tpWeight"]; within 4 decimal places is more than enough
    precision.
    """
        start = datetime.datetime.now()
        increment = datetime.timedelta(minutes=5)
        length = 10
        numWindows = 1
        windowSize = 2

        timestamps = generateTimestamps(start, increment, length)
        windows = generateWindows(timestamps, numWindows, windowSize)
        labels = generateLabels(timestamps, windows)
        predictions = pandas.Series([0] * length)

        index = timestamps[timestamps == windows[0][0]].index[0]
        predictions[index] = 1
        scorer = Scorer(timestamps,
                        predictions,
                        labels,
                        windows,
                        self.costMatrix,
                        probationaryPeriod=0)
        (_, score) = scorer.getScore()

        self.assertAlmostEquals(score, self.costMatrix["tpWeight"], 4)
        self._checkCounts(scorer.counts, length - windowSize * numWindows, 1,
                          0, windowSize * numWindows - 1)
Ejemplo n.º 5
0
  def test_oneFalsePositiveNoWindow(self):
    """
    When there is no window (meaning no anomaly), a false positive should still
    result in a negative score.
    """
    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 1000
    numWindows = 0
    windowSize = 10

    timestamps = generateTimestamps(start, increment, length)

    predictions = pandas.Series([0]*length)
    windows = generateWindows(timestamps, numWindows, windowSize)
    labels = generateLabels(timestamps, windows)

    costMatrix = {"tpWeight": 1.0,
    "fnWeight": 1.0,
    "fpWeight": 1.0,
    "tnWeight": 1.0}

    predictions[0] = 1

    scorer = Scorer(timestamps, predictions, labels, windows, costMatrix,
      probationaryPeriod=0)

    self.assertTrue(scorer.getScore() == -costMatrix["fpWeight"])

    # Ensure counts are correct.
    self.assertEqual(scorer.counts['tn'], length-windowSize*numWindows-1)
    self.assertEqual(scorer.counts['tp'], 0)
    self.assertEqual(scorer.counts['fp'], 1)
    self.assertEqual(scorer.counts['fn'], windowSize*numWindows)
Ejemplo n.º 6
0
  def testFirstTruePositiveWithinWindow(self):
    """
    First record within window has a score approximately equal to 
    self.costMatrix["tpWeight"]; within 4 decimal places is more than enough
    precision.
    """
    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 10
    numWindows = 1
    windowSize = 2

    timestamps = generateTimestamps(start, increment, length)
    windows = generateWindows(timestamps, numWindows, windowSize)
    labels = generateLabels(timestamps, windows)
    predictions = pandas.Series([0]*length)

    index = timestamps[timestamps == windows[0][0]].index[0]
    predictions[index] = 1
    scorer = Scorer(timestamps, predictions, labels, windows, self.costMatrix,
      probationaryPeriod=0)
    (_, score) = scorer.getScore()

    self.assertAlmostEquals(score, self.costMatrix["tpWeight"], 4)
    self._checkCounts(scorer.counts, length-windowSize*numWindows, 1, 0,
      windowSize*numWindows-1)
Ejemplo n.º 7
0
  def test_FourFalseNegatives(self):
    """
    A false negative with four windows should have exactly four times
    the negative of the false negative score.
    """
    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 2000
    numWindows = 4
    windowSize = 10

    timestamps = generateTimestamps(start, increment, length)
    predictions = pandas.Series([0]*length)
    windows = generateWindows(timestamps, numWindows, windowSize)
    labels = generateLabels(timestamps, windows)

    costMatrix = {"tpWeight": 1.0,
                  "fnWeight": 2.0,
                  "fpWeight": 3.0,
                  "tnWeight": 4.0}

    scorer = Scorer(timestamps, predictions, labels, windows, costMatrix,
      probationaryPeriod=0)

    self.assertTrue(abs(scorer.getScore() + 4*costMatrix['fnWeight']) < 0.01)

    # Ensure counts are correct.
    self.assertEqual(scorer.counts['tn'], length-windowSize*numWindows)
    self.assertEqual(scorer.counts['tp'], 0)
    self.assertEqual(scorer.counts['fp'], 0)
    self.assertEqual(scorer.counts['fn'], windowSize*numWindows)
Ejemplo n.º 8
0
 def testScoringAllMetrics(self):
   """
   This tests an example set of detections, where all metrics have counts > 0.
   """
   start = datetime.datetime.now()
   increment = datetime.timedelta(minutes=5)
   length = 100
   numWindows = 2
   windowSize = 5
   
   timestamps = generateTimestamps(start, increment, length)
   windows = generateWindows(timestamps, numWindows, windowSize)
   labels = generateLabels(timestamps, windows)
   predictions = pandas.Series([0]*length)
   
   index = timestamps[timestamps == windows[0][0]].index[0]
   # TP, add'l TP, and FP
   predictions[index] = 1
   predictions[index+1] = 1
   predictions[index+7] = 1
   
   scorer = Scorer(timestamps, predictions, labels, windows, self.costMatrix,
     probationaryPeriod=0)
   (_, score) = scorer.getScore()
   
   self.assertAlmostEquals(score, -0.9540, 4)
   self._checkCounts(scorer.counts, length-windowSize*numWindows-1, 2, 1, 8)
Ejemplo n.º 9
0
    def testFalsePositiveMeansNegativeScore(self):
        """
    A false positive should make the score negative.
    """
        start = datetime.datetime.now()
        increment = datetime.timedelta(minutes=5)
        length = 1000
        numWindows = 1
        windowSize = 10

        timestamps = generateTimestamps(start, increment, length)
        windows = generateWindows(timestamps, numWindows, windowSize)
        labels = generateLabels(timestamps, windows)
        predictions = pandas.Series([0] * length)

        predictions[0] = 1
        scorer = Scorer(timestamps,
                        predictions,
                        labels,
                        windows,
                        self.costMatrix,
                        probationaryPeriod=0)
        (_, score) = scorer.getScore()
        self.assertTrue(score < 0)
        self._checkCounts(scorer.counts, length - windowSize * numWindows - 1,
                          0, 1, windowSize * numWindows)
Ejemplo n.º 10
0
  def testOnlyScoreFirstTruePositiveWithinWindow(self):
    """
    An algorithm making multiple detections within a window (i.e. true positive)
    should only be scored for the earliest true positive.
    """
    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 10
    numWindows = 1
    windowSize = 2

    timestamps = generateTimestamps(start, increment, length)
    windows = generateWindows(timestamps, numWindows, windowSize)
    labels = generateLabels(timestamps, windows)
    predictions = pandas.Series([0]*length)
    window = windows[0]
    t1, t2 = window

    index1 = timestamps[timestamps == t1].index[0]
    predictions[index1] = 1
    scorer1 = Scorer(timestamps, predictions, labels, windows, self.costMatrix,
      probationaryPeriod=0)
    (_, score1) = scorer1.getScore()

    index2 = timestamps[timestamps == t2].index[0]
    predictions[index2] = 1
    scorer2 = Scorer(timestamps, predictions, labels, windows, self.costMatrix,
      probationaryPeriod=0)
    (_, score2) = scorer2.getScore()

    self.assertEqual(score1, score2)
    self._checkCounts(scorer1.counts, length-windowSize*numWindows, 1, 0,
      windowSize*numWindows-1)
    self._checkCounts(scorer2.counts, length-windowSize*numWindows, 2, 0,
      windowSize*numWindows-2)
Ejemplo n.º 11
0
  def testTwoFalsePositivesIsWorseThanOne(self):
    """
    For two false positives A and B in a file, the score given A and B should be
    more negative than the score given just A.
    """
    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 1000
    numWindows = 1
    windowSize = 10

    timestamps = generateTimestamps(start, increment, length)
    windows = generateWindows(timestamps, numWindows, windowSize)
    labels = generateLabels(timestamps, windows)
    predictions = pandas.Series([0]*length)

    predictions[0] = 1
    scorer1 = Scorer(timestamps, predictions, labels, windows, self.costMatrix,
      probationaryPeriod=0)
    (_, score1) = scorer1.getScore()

    predictions[1] = 1
    scorer2 = Scorer(timestamps, predictions, labels, windows, self.costMatrix,
      probationaryPeriod=0)
    (_, score2) = scorer2.getScore()

    self.assertTrue(score2 < score1)
    self._checkCounts(scorer1.counts, length-windowSize*numWindows-1, 0, 1,
      windowSize*numWindows)
    self._checkCounts(scorer2.counts, length-windowSize*numWindows-2, 0, 2,
      windowSize*numWindows)
Ejemplo n.º 12
0
    def testFourFalseNegatives(self):
        """
    A false negative with four windows should have exactly four times
    the negative of the false negative score.
    """
        start = datetime.datetime.now()
        increment = datetime.timedelta(minutes=5)
        length = 2000
        numWindows = 4
        windowSize = 10

        timestamps = generateTimestamps(start, increment, length)
        windows = generateWindows(timestamps, numWindows, windowSize)
        labels = generateLabels(timestamps, windows)
        predictions = pandas.Series([0] * length)

        scorer = Scorer(timestamps,
                        predictions,
                        labels,
                        windows,
                        self.costMatrix,
                        probationaryPeriod=0)
        (_, score) = scorer.getScore()

        self.assertTrue(abs(score + 4 * self.costMatrix['fnWeight']) < 0.01)
        self._checkCounts(scorer.counts, length - windowSize * numWindows, 0,
                          0, windowSize * numWindows)
Ejemplo n.º 13
0
  def test_firstTruePositiveWithinWindow(self):
    """
    First record within window has a score close to costMatrix["tpWeight"].
    Since we use Sigmoids, it will never be exactly 1.
    """

    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 10
    numWindows = 1
    windowSize = 2

    timestamps = generateTimestamps(start, increment, length)
    predictions = pandas.Series([0]*length)
    windows = generateWindows(timestamps, numWindows, windowSize)
    labels = generateLabels(timestamps, windows)

    costMatrix = {"tpWeight": 1.0,
                  "fnWeight": 2.0,
                  "fpWeight": 3.0,
                  "tnWeight": 4.0}

    index = timestamps[timestamps == windows[0][0]].index[0]
    predictions[index] = 1

    scorer = Scorer(timestamps, predictions, labels, windows, costMatrix,
      probationaryPeriod=0)

    self.assertTrue(costMatrix["tpWeight"] - scorer.getScore() <= 1)
Ejemplo n.º 14
0
  def testEarlierFalsePositiveAfterWindowIsBetter(self):
    """For two false positives A and B, where A occurs earlier than B, the
    score change due to A will be less than the score change due to B.
    """
    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 10
    numWindows = 1
    windowSize = 2

    timestamps = generateTimestamps(start, increment, length)
    windows = generateWindows(timestamps, numWindows, windowSize)
    labels = generateLabels(timestamps, windows)
    predictions1 = pandas.Series([0]*length)
    predictions2 = pandas.Series([0]*length)
    t1, t2 = windows[0]

    index1 = timestamps[timestamps == t2].index[0] + 1
    predictions1[index1] = 1
    scorer1 = Scorer(timestamps, predictions1, labels, windows, self.costMatrix,
      probationaryPeriod=0)
    (_, score1) = scorer1.getScore()

    predictions2[index1+1] = 1
    scorer2 = Scorer(timestamps, predictions2, labels, windows, self.costMatrix,
      probationaryPeriod=0)
    (_, score2) = scorer2.getScore()

    self.assertTrue(score1 > score2)
    self._checkCounts(scorer1.counts, length-windowSize*numWindows-1, 0, 1,
      windowSize*numWindows)
    self._checkCounts(scorer2.counts, length-windowSize*numWindows-1, 0, 1,
      windowSize*numWindows)
Ejemplo n.º 15
0
  def test_falsePositiveMeansNegativeScore(self):
    """
    A false positive should make the score negative.
    """
    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 1000
    numWindows = 1
    windowSize = 10

    timestamps = generateTimestamps(start, increment, length)
    predictions = pandas.Series([0]*length)
    windows = generateWindows(timestamps, numWindows, windowSize)
    labels = generateLabels(timestamps, windows)

    costMatrix = {"tpWeight": 1.0,
    "fnWeight": 1.0,
    "fpWeight": 1.0,
    "tnWeight": 1.0}

    predictions[0] = 1

    scorer = Scorer(timestamps, predictions, labels, windows, costMatrix,
      probationaryPeriod=0)
    score = scorer.getScore()

    self.assertTrue(score < 0)

    # Ensure counts are correct.
    self.assertEqual(scorer.counts['tn'], length-windowSize*numWindows-1)
    self.assertEqual(scorer.counts['tp'], 0)
    self.assertEqual(scorer.counts['fp'], 1)
    self.assertEqual(scorer.counts['fn'], windowSize*numWindows)
Ejemplo n.º 16
0
 def testScoringAllMetrics(self):
   """
   This tests an example set of detections, where all metrics have counts > 0.
   """
   start = datetime.datetime.now()
   increment = datetime.timedelta(minutes=5)
   length = 100
   numWindows = 2
   windowSize = 5
   
   timestamps = generateTimestamps(start, increment, length)
   windows = generateWindows(timestamps, numWindows, windowSize)
   labels = generateLabels(timestamps, windows)
   predictions = pandas.Series([0]*length)
   
   index = timestamps[timestamps == windows[0][0]].index[0]
   # TP, add'l TP, and FP
   predictions[index] = 1
   predictions[index+1] = 1
   predictions[index+7] = 1
   
   scorer = Scorer(timestamps, predictions, labels, windows, self.costMatrix,
     probationaryPeriod=0)
   (_, score) = scorer.getScore()
   
   self.assertAlmostEquals(score, -0.9540, 4)
   self._checkCounts(scorer.counts, length-windowSize*numWindows-1, 2, 1, 8)
    def testTruePositivesWithDifferentWindowSizes(self):
        """
    True positives  at the left edge of windows should have the same score
    regardless of width of window.
    """
        start = datetime.datetime.now()
        increment = datetime.timedelta(minutes=5)
        length = 10
        numWindows = 1
        timestamps = generateTimestamps(start, increment, length)

        windowSize1 = 2
        windows1 = generateWindows(timestamps, numWindows, windowSize1)
        labels1 = generateLabels(timestamps, windows1)
        index = timestamps[timestamps == windows1[0][0]].index[0]
        predictions1 = pandas.Series([0] * length)
        predictions1[index] = 1

        windowSize2 = 3
        windows2 = generateWindows(timestamps, numWindows, windowSize2)
        labels2 = generateLabels(timestamps, windows2)
        index = timestamps[timestamps == windows2[0][0]].index[0]
        predictions2 = pandas.Series([0] * length)
        predictions2[index] = 1

        scorer1 = Scorer(timestamps,
                         predictions1,
                         labels1,
                         windows1,
                         self.costMatrix,
                         probationaryPeriod=0)
        (_, score1) = scorer1.getScore()
        scorer2 = Scorer(timestamps,
                         predictions2,
                         labels2,
                         windows2,
                         self.costMatrix,
                         probationaryPeriod=0)
        (_, score2) = scorer2.getScore()

        self.assertEqual(score1, score2)
        self._checkCounts(scorer1.counts, length - windowSize1 * numWindows, 1,
                          0, windowSize1 * numWindows - 1)
        self._checkCounts(scorer2.counts, length - windowSize2 * numWindows, 1,
                          0, windowSize2 * numWindows - 1)
Ejemplo n.º 18
0
  def test_earlierFalsePositiveAfterWindowIsBetter(self):
    """Imagine there are two false positives A and B that both occur right after
    a window. If A occurs earlier than B, then the score change due to A will be
    less than the score change due to B.
    """
    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 10
    numWindows = 1
    windowSize = 2

    timestamps = generateTimestamps(start, increment, length)

    predictions1 = pandas.Series([0]*length)
    predictions2 = pandas.Series([0]*length)

    windows = generateWindows(timestamps, numWindows, windowSize)
    labels = generateLabels(timestamps, windows)

    window = windows[0]
    t1, t2 = window

    costMatrix = {"tpWeight": 1.0,
                  "fnWeight": 1.0,
                  "fpWeight": 1.0,
                  "tnWeight": 1.0}

    index1 = timestamps[timestamps == t2].index[0] + 1
    predictions1[index1] = 1

    scorer1 = Scorer(timestamps, predictions1, labels, windows, costMatrix,
      probationaryPeriod=0)
    score1 = scorer1.getScore()

    predictions2[index1+1] = 1

    scorer2 = Scorer(timestamps, predictions2, labels, windows, costMatrix,
      probationaryPeriod=0)
    score2 = scorer2.getScore()

    self.assertTrue(score1 > score2)

    # Ensure counts are correct.
    self.assertEqual(scorer1.counts['tn'], length-windowSize*numWindows-1)
    self.assertEqual(scorer1.counts['tp'], 0)
    self.assertEqual(scorer1.counts['fp'], 1)
    self.assertEqual(scorer1.counts['fn'], windowSize*numWindows)

    self.assertEqual(scorer2.counts['tn'], length-windowSize*numWindows-1)
    self.assertEqual(scorer2.counts['tp'], 0)
    self.assertEqual(scorer2.counts['fp'], 1)
    self.assertEqual(scorer2.counts['fn'], windowSize*numWindows)
Ejemplo n.º 19
0
  def testTruePositivesWithDifferentWindowSizes(self):
    """
    True positives  at the left edge of windows should have the same score
    regardless of width of window.
    """
    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 10
    numWindows = 1
    timestamps = generateTimestamps(start, increment, length)
    
    windowSize1 = 2
    windows1 = generateWindows(timestamps, numWindows, windowSize1)
    labels1 = generateLabels(timestamps, windows1)
    index = timestamps[timestamps == windows1[0][0]].index[0]
    predictions1 = pandas.Series([0]*length)
    predictions1[index] = 1
    
    windowSize2 = 3
    windows2 = generateWindows(timestamps, numWindows, windowSize2)
    labels2 = generateLabels(timestamps, windows2)
    index = timestamps[timestamps == windows2[0][0]].index[0]
    predictions2 = pandas.Series([0]*length)
    predictions2[index] = 1

    scorer1 = Scorer(timestamps, predictions1, labels1, windows1,
      self.costMatrix, probationaryPeriod=0)
    (_, score1) = scorer1.getScore()
    scorer2 = Scorer(timestamps, predictions2, labels2, windows2,
      self.costMatrix, probationaryPeriod=0)
    (_, score2) = scorer2.getScore()
    
    self.assertEqual(score1, score2)
    self._checkCounts(scorer1.counts, length-windowSize1*numWindows, 1, 0,
      windowSize1*numWindows-1)
    self._checkCounts(scorer2.counts, length-windowSize2*numWindows, 1, 0,
      windowSize2*numWindows-1)
Ejemplo n.º 20
0
  def test_twoFalsePositivesIsWorseThanOne(self):
    """False positives have an additive effect on the score. If there are two
    false positives, A and B, in a file, then the score given A and B should be
    larger than the score given just A.
    """
    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 1000
    numWindows = 1
    windowSize = 10

    timestamps = generateTimestamps(start, increment, length)
    predictions = pandas.Series([0]*length)
    windows = generateWindows(timestamps, numWindows, windowSize)
    labels = generateLabels(timestamps, windows)


    costMatrix = {"tpWeight": 1.0,
    "fnWeight": 1.0,
    "fpWeight": 1.0,
    "tnWeight": 1.0}

    predictions[0] = 1

    scorer1 = Scorer(timestamps, predictions, labels, windows, costMatrix,
      probationaryPeriod=0)

    score1 = scorer1.getScore()


    predictions[1] = 1

    scorer2 = Scorer(timestamps, predictions, labels, windows, costMatrix,
      probationaryPeriod=0)

    score2 = scorer2.getScore()

    self.assertTrue(score1 > score2)

    # Ensure counts are correct.
    self.assertEqual(scorer1.counts['tn'], length-windowSize*numWindows-1)
    self.assertEqual(scorer1.counts['tp'], 0)
    self.assertEqual(scorer1.counts['fp'], 1)
    self.assertEqual(scorer1.counts['fn'], windowSize*numWindows)

    self.assertEqual(scorer2.counts['tn'], length-windowSize*numWindows-2)
    self.assertEqual(scorer2.counts['tp'], 0)
    self.assertEqual(scorer2.counts['fp'], 2)
    self.assertEqual(scorer2.counts['fn'], windowSize*numWindows)
    def testTruePositiveAtRightEdgeOfWindow(self):
        """
    True positives at the right edge of a window should yield a score of
    approximately zero; the scaled sigmoid scoring function crosses the zero
    between a given window's last timestamp and the next timestamp (immediately
    following the window.
    """
        start = datetime.datetime.now()
        increment = datetime.timedelta(minutes=5)
        length = 1000
        numWindows = 1
        windowSize = 100

        timestamps = generateTimestamps(start, increment, length)
        windows = generateWindows(timestamps, numWindows, windowSize)
        labels = generateLabels(timestamps, windows)
        predictions = pandas.Series([0] * length)

        # Make prediction at end of the window; TP
        index = timestamps[timestamps == windows[0][1]].index[0]
        predictions[index] = 1
        scorer1 = Scorer(timestamps,
                         predictions,
                         labels,
                         windows,
                         self.costMatrix,
                         probationaryPeriod=0)
        (_, score1) = scorer1.getScore()
        # Make prediction just after the window; FP
        predictions[index] = 0
        index += 1
        predictions[index] = 1
        scorer2 = Scorer(timestamps,
                         predictions,
                         labels,
                         windows,
                         self.costMatrix,
                         probationaryPeriod=0)
        (_, score2) = scorer2.getScore()

        # TP score + FP score + 1 should be very close to 0; the 1 is added to
        # account for the subsequent FN contribution.
        self.assertAlmostEquals(score1 + score2 + 1, 0.0, 3)
        self._checkCounts(scorer1.counts, length - windowSize * numWindows, 1,
                          0, windowSize * numWindows - 1)
        self._checkCounts(scorer2.counts, length - windowSize * numWindows - 1,
                          0, 1, windowSize * numWindows)
    def testEarlierTruePositiveIsBetter(self):
        """
    If two algorithms both get a true positive within a window, the algorithm
    with the earlier true positive (in the window) should get a higher score.
    """
        start = datetime.datetime.now()
        increment = datetime.timedelta(minutes=5)
        length = 10
        numWindows = 1
        windowSize = 2

        timestamps = generateTimestamps(start, increment, length)
        windows = generateWindows(timestamps, numWindows, windowSize)
        labels = generateLabels(timestamps, windows)
        predictions1 = pandas.Series([0] * length)
        predictions2 = pandas.Series([0] * length)
        t1, t2 = windows[0]

        index1 = timestamps[timestamps == t1].index[0]
        predictions1[index1] = 1
        scorer1 = Scorer(timestamps,
                         predictions1,
                         labels,
                         windows,
                         self.costMatrix,
                         probationaryPeriod=0)
        (_, score1) = scorer1.getScore()

        index2 = timestamps[timestamps == t2].index[0]
        predictions2[index2] = 1
        scorer2 = Scorer(timestamps,
                         predictions2,
                         labels,
                         windows,
                         self.costMatrix,
                         probationaryPeriod=0)
        (_, score2) = scorer2.getScore()

        self.assertTrue(
            score1 > score2, "The earlier TP score is not greater than "
            "the later TP. They are %f and %f, respectively." %
            (score1, score2))
        self._checkCounts(scorer1.counts, length - windowSize * numWindows, 1,
                          0, windowSize * numWindows - 1)
        self._checkCounts(scorer2.counts, length - windowSize * numWindows, 1,
                          0, windowSize * numWindows - 1)
Ejemplo n.º 23
0
    def testFalsePositiveScaling(self):
        """
    Test scaling the weight of false positives results in an approximate
    balance with the true positives.
    
    The contributions of TP and FP scores should approximately cancel; i.e.
    total score =0. With x windows, this total score should on average decrease
    x/2 because of x FNs. Thus, the acceptable range for score should be
    centered about -x/2.
    """
        start = datetime.datetime.now()
        increment = datetime.timedelta(minutes=5)
        length = 100
        numWindows = 1
        windowSize = 10

        timestamps = generateTimestamps(start, increment, length)
        windows = generateWindows(timestamps, numWindows, windowSize)
        labels = generateLabels(timestamps, windows)

        # Scale for 10% = windowSize/length
        self.costMatrix["fpWeight"] = 0.11

        # Make arbitrary detections, score, repeat
        scores = []
        for _ in xrange(20):
            predictions = pandas.Series([0] * length)
            indices = random.sample(range(length), 10)
            predictions[indices] = 1
            scorer = Scorer(timestamps,
                            predictions,
                            labels,
                            windows,
                            self.costMatrix,
                            probationaryPeriod=0)
            (_, score) = scorer.getScore()
            scores.append(score)

        avgScore = sum(scores) / float(len(scores))

        self.assertTrue(
            -1.5 <= avgScore <= 0.5, "The average score across 20 sets "
            "of random detections is %f, which is not within the acceptable range "
            "-1.5 to 0.5." % avgScore)
Ejemplo n.º 24
0
  def test_secondTruePositiveWithinWindowIsIgnored(self):
    """
    If there are two true positives within the same window, then the score
    should be only decided by whichever true positive occurred earlier.
    """
    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 10
    numWindows = 1
    windowSize = 2

    timestamps = generateTimestamps(start, increment, length)

    predictions = pandas.Series([0]*length)

    windows = generateWindows(timestamps, numWindows, windowSize)

    labels = generateLabels(timestamps, windows)
    window = windows[0]
    t1, t2 = window

    costMatrix = {"tpWeight": 1.0,
                  "fnWeight": 2.0,
                  "fpWeight": 3.0,
                  "tnWeight": 4.0}

    index1 = timestamps[timestamps == t1].index[0]
    predictions[index1] = 1

    scorer1 = Scorer(timestamps, predictions, labels, windows, costMatrix,
      probationaryPeriod=0)

    score1 = scorer1.getScore()

    index2 = timestamps[timestamps == t2].index[0]
    predictions[index2] = 1

    scorer2 = Scorer(timestamps, predictions, labels, windows, costMatrix,
      probationaryPeriod=0)

    score2 = scorer2.getScore()

    self.assertEqual(score1, score2)
    def testOnlyScoreFirstTruePositiveWithinWindow(self):
        """
    An algorithm making multiple detections within a window (i.e. true positive)
    should only be scored for the earliest true positive.
    """
        start = datetime.datetime.now()
        increment = datetime.timedelta(minutes=5)
        length = 10
        numWindows = 1
        windowSize = 2

        timestamps = generateTimestamps(start, increment, length)
        windows = generateWindows(timestamps, numWindows, windowSize)
        labels = generateLabels(timestamps, windows)
        predictions = pandas.Series([0] * length)
        window = windows[0]
        t1, t2 = window

        index1 = timestamps[timestamps == t1].index[0]
        predictions[index1] = 1
        scorer1 = Scorer(timestamps,
                         predictions,
                         labels,
                         windows,
                         self.costMatrix,
                         probationaryPeriod=0)
        (_, score1) = scorer1.getScore()

        index2 = timestamps[timestamps == t2].index[0]
        predictions[index2] = 1
        scorer2 = Scorer(timestamps,
                         predictions,
                         labels,
                         windows,
                         self.costMatrix,
                         probationaryPeriod=0)
        (_, score2) = scorer2.getScore()

        self.assertEqual(score1, score2)
        self._checkCounts(scorer1.counts, length - windowSize * numWindows, 1,
                          0, windowSize * numWindows - 1)
        self._checkCounts(scorer2.counts, length - windowSize * numWindows, 2,
                          0, windowSize * numWindows - 2)
Ejemplo n.º 26
0
  def test_earlierTruePositiveIsBetter(self):
    """
    If two algorithms both get a true positive within a window, the algorithm
    that labeled a true positive earlier in the window will get a higher score.
    """
    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 10
    numWindows = 1
    windowSize = 2

    timestamps = generateTimestamps(start, increment, length)

    predictions1 = pandas.Series([0]*length)
    predictions2 = pandas.Series([0]*length)

    windows = generateWindows(timestamps, numWindows, windowSize)

    labels = generateLabels(timestamps, windows)
    window = windows[0]
    t1, t2 = window

    costMatrix = {"tpWeight": 1.0,
                  "fnWeight": 2.0,
                  "fpWeight": 3.0,
                  "tnWeight": 4.0}

    index1 = timestamps[timestamps == t1].index[0]
    predictions1[index1] = 1

    scorer1 = Scorer(timestamps, predictions1, labels, windows, costMatrix,
      probationaryPeriod=0)
    score1 = scorer1.getScore()

    index2 = timestamps[timestamps == t2].index[0]
    predictions2[index2] = 1

    scorer2 = Scorer(timestamps, predictions2, labels, windows, costMatrix,
      probationaryPeriod=0)
    score2 = scorer2.getScore()

    self.assertTrue(score1 > score2)
Ejemplo n.º 27
0
    def testEarlierFalsePositiveAfterWindowIsBetter(self):
        """For two false positives A and B, where A occurs earlier than B, the
    score change due to A will be less than the score change due to B.
    """
        start = datetime.datetime.now()
        increment = datetime.timedelta(minutes=5)
        length = 10
        numWindows = 1
        windowSize = 2

        timestamps = generateTimestamps(start, increment, length)
        windows = generateWindows(timestamps, numWindows, windowSize)
        labels = generateLabels(timestamps, windows)
        predictions1 = pandas.Series([0] * length)
        predictions2 = pandas.Series([0] * length)
        t1, t2 = windows[0]

        index1 = timestamps[timestamps == t2].index[0] + 1
        predictions1[index1] = 1
        scorer1 = Scorer(timestamps,
                         predictions1,
                         labels,
                         windows,
                         self.costMatrix,
                         probationaryPeriod=0)
        (_, score1) = scorer1.getScore()

        predictions2[index1 + 1] = 1
        scorer2 = Scorer(timestamps,
                         predictions2,
                         labels,
                         windows,
                         self.costMatrix,
                         probationaryPeriod=0)
        (_, score2) = scorer2.getScore()

        self.assertTrue(score1 > score2)
        self._checkCounts(scorer1.counts, length - windowSize * numWindows - 1,
                          0, 1, windowSize * numWindows)
        self._checkCounts(scorer2.counts, length - windowSize * numWindows - 1,
                          0, 1, windowSize * numWindows)
Ejemplo n.º 28
0
    def testTwoFalsePositivesIsWorseThanOne(self):
        """
    For two false positives A and B in a file, the score given A and B should be
    more negative than the score given just A.
    """
        start = datetime.datetime.now()
        increment = datetime.timedelta(minutes=5)
        length = 1000
        numWindows = 1
        windowSize = 10

        timestamps = generateTimestamps(start, increment, length)
        windows = generateWindows(timestamps, numWindows, windowSize)
        labels = generateLabels(timestamps, windows)
        predictions = pandas.Series([0] * length)

        predictions[0] = 1
        scorer1 = Scorer(timestamps,
                         predictions,
                         labels,
                         windows,
                         self.costMatrix,
                         probationaryPeriod=0)
        (_, score1) = scorer1.getScore()

        predictions[1] = 1
        scorer2 = Scorer(timestamps,
                         predictions,
                         labels,
                         windows,
                         self.costMatrix,
                         probationaryPeriod=0)
        (_, score2) = scorer2.getScore()

        self.assertTrue(score2 < score1)
        self._checkCounts(scorer1.counts, length - windowSize * numWindows - 1,
                          0, 1, windowSize * numWindows)
        self._checkCounts(scorer2.counts, length - windowSize * numWindows - 2,
                          0, 2, windowSize * numWindows)
Ejemplo n.º 29
0
  def testTruePositiveAtRightEdgeOfWindow(self):
    """
    True positives at the right edge of a window should yield a score of
    approximately zero; the scaled sigmoid scoring function crosses the zero
    between a given window's last timestamp and the next timestamp (immediately
    following the window.
    """
    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 1000
    numWindows = 1
    windowSize = 100

    timestamps = generateTimestamps(start, increment, length)
    windows = generateWindows(timestamps, numWindows, windowSize)
    labels = generateLabels(timestamps, windows)
    predictions = pandas.Series([0]*length)

    # Make prediction at end of the window; TP
    index = timestamps[timestamps == windows[0][1]].index[0]
    predictions[index] = 1
    scorer1 = Scorer(timestamps, predictions, labels, windows, self.costMatrix,
      probationaryPeriod=0)
    (_, score1) = scorer1.getScore()
    # Make prediction just after the window; FP
    predictions[index] = 0
    index += 1
    predictions[index] = 1
    scorer2 = Scorer(timestamps, predictions, labels, windows, self.costMatrix,
      probationaryPeriod=0)
    (_, score2) = scorer2.getScore()

    # TP score + FP score + 1 should be very close to 0; the 1 is added to
    # account for the subsequent FN contribution.
    self.assertAlmostEquals(score1 + score2 + 1, 0.0, 3)
    self._checkCounts(scorer1.counts, length-windowSize*numWindows, 1, 0,
      windowSize*numWindows-1)
    self._checkCounts(scorer2.counts, length-windowSize*numWindows-1, 0, 1,
      windowSize*numWindows)
Ejemplo n.º 30
0
  def testFalsePositiveScaling(self):
    """
    Test scaling the weight of false positives results in an approximate
    balance with the true positives.
    
    The contributions of TP and FP scores should approximately cancel; i.e.
    total score =0. With x windows, this total score should on average decrease
    x/2 because of x FNs. Thus, the acceptable range for score should be
    centered about -x/2.
    """
    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 100
    numWindows = 1
    windowSize = 10
    
    timestamps = generateTimestamps(start, increment, length)
    windows = generateWindows(timestamps, numWindows, windowSize)
    labels = generateLabels(timestamps, windows)
    
    # Scale for 10% = windowSize/length
    self.costMatrix["fpWeight"] = 0.11
    
    # Make arbitrary detections, score, repeat
    scores = []
    for _ in xrange(20):
      predictions = pandas.Series([0]*length)
      indices = random.sample(range(length), 10)
      predictions[indices] = 1
      scorer = Scorer(timestamps, predictions, labels, windows, self.costMatrix,
        probationaryPeriod=0)
      (_, score) = scorer.getScore()
      scores.append(score)
  
    avgScore = sum(scores)/float(len(scores))

    self.assertTrue(-1.5 <= avgScore <= 0.5, "The average score across 20 sets "
      "of random detections is %f, which is not within the acceptable range "
      "-1.5 to 0.5." % avgScore)
Ejemplo n.º 31
0
  def testFalsePositiveMeansNegativeScore(self):
    """
    A false positive should make the score negative.
    """
    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 1000
    numWindows = 1
    windowSize = 10

    timestamps = generateTimestamps(start, increment, length)
    windows = generateWindows(timestamps, numWindows, windowSize)
    labels = generateLabels(timestamps, windows)
    predictions = pandas.Series([0]*length)

    predictions[0] = 1
    scorer = Scorer(timestamps, predictions, labels, windows, self.costMatrix,
      probationaryPeriod=0)
    (_, score) = scorer.getScore()
    self.assertTrue(score < 0)
    self._checkCounts(scorer.counts, length-windowSize*numWindows-1, 0, 1,
      windowSize*numWindows)
Ejemplo n.º 32
0
  def testFourFalseNegatives(self):
    """
    A false negative with four windows should have exactly four times
    the negative of the false negative score.
    """
    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 2000
    numWindows = 4
    windowSize = 10

    timestamps = generateTimestamps(start, increment, length)
    windows = generateWindows(timestamps, numWindows, windowSize)
    labels = generateLabels(timestamps, windows)
    predictions = pandas.Series([0]*length)

    scorer = Scorer(timestamps, predictions, labels, windows, self.costMatrix,
      probationaryPeriod=0)
    (_, score) = scorer.getScore()

    self.assertTrue(abs(score + 4*self.costMatrix['fnWeight']) < 0.01)
    self._checkCounts(scorer.counts, length-windowSize*numWindows, 0, 0,
      windowSize*numWindows)
Ejemplo n.º 33
0
  def testEarlierTruePositiveIsBetter(self):
    """
    If two algorithms both get a true positive within a window, the algorithm
    with the earlier true positive (in the window) should get a higher score.
    """
    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 10
    numWindows = 1
    windowSize = 2

    timestamps = generateTimestamps(start, increment, length)
    windows = generateWindows(timestamps, numWindows, windowSize)
    labels = generateLabels(timestamps, windows)
    predictions1 = pandas.Series([0]*length)
    predictions2 = pandas.Series([0]*length)
    t1, t2 = windows[0]

    index1 = timestamps[timestamps == t1].index[0]
    predictions1[index1] = 1
    scorer1 = Scorer(timestamps, predictions1, labels, windows, self.costMatrix,
      probationaryPeriod=0)
    (_, score1) = scorer1.getScore()

    index2 = timestamps[timestamps == t2].index[0]
    predictions2[index2] = 1
    scorer2 = Scorer(timestamps, predictions2, labels, windows, self.costMatrix,
      probationaryPeriod=0)
    (_, score2) = scorer2.getScore()

    self.assertTrue(score1 > score2, "The earlier TP score is not greater than "
      "the later TP. They are %f and %f, respectively." % (score1, score2))
    self._checkCounts(scorer1.counts, length-windowSize*numWindows, 1, 0,
      windowSize*numWindows-1)
    self._checkCounts(scorer2.counts, length-windowSize*numWindows, 1, 0,
      windowSize*numWindows-1)
Ejemplo n.º 34
0
  def testOneFalsePositiveNoWindow(self):
    """
    When there is no window (i.e. no anomaly), a false positive should still
    result in a negative score, specifically negative the FP weight.
    """
    start = datetime.datetime.now()
    increment = datetime.timedelta(minutes=5)
    length = 1000
    numWindows = 0
    windowSize = 10

    timestamps = generateTimestamps(start, increment, length)
    windows = generateWindows(timestamps, numWindows, windowSize)
    labels = generateLabels(timestamps, windows)
    predictions = pandas.Series([0]*length)

    predictions[0] = 1
    scorer = Scorer(timestamps, predictions, labels, windows, self.costMatrix,
      probationaryPeriod=0)
    (_, score) = scorer.getScore()
    
    self.assertTrue(score == -self.costMatrix["fpWeight"])
    self._checkCounts(scorer.counts, length-windowSize*numWindows-1, 0, 1,
      windowSize*numWindows)