Ejemplo n.º 1
0
    def testUpdateAnomalyLikelihoods(self):
        """
    A slight more complex test. This calls estimateAnomalyLikelihoods
    to estimate the distribution on fake data, followed by several calls
    to updateAnomalyLikelihoods.
    """

        # ------------------------------------------
        # Step 1. Generate an initial estimate using fake distribution of anomaly
        # scores.
        data1 = _generateSampleData(mean=0.2)[0:1000]
        _, _, estimatorParams = an.estimateAnomalyLikelihoods(data1, averagingWindow=5)

        # ------------------------------------------
        # Step 2. Generate some new data with a higher average anomaly
        # score. Using the estimator from step 1, to compute likelihoods. Now we
        # should see a lot more anomalies.
        data2 = _generateSampleData(mean=0.6)[0:300]
        likelihoods2, avgRecordList2, estimatorParams2 = an.updateAnomalyLikelihoods(data2, estimatorParams)
        self.assertEqual(len(likelihoods2), len(data2))
        self.assertEqual(len(avgRecordList2), len(data2))
        self.assertTrue(an.isValidEstimatorParams(estimatorParams))

        # The new running total should be different
        self.assertNotEqual(estimatorParams2["movingAverage"]["total"], estimatorParams["movingAverage"]["total"])

        # We should have many more samples where likelihood is < 0.01, but not all
        self.assertGreaterEqual(numpy.sum(likelihoods2 < 0.01), 25)
        self.assertLessEqual(numpy.sum(likelihoods2 < 0.01), 250)

        # ------------------------------------------
        # Step 3. Generate some new data with the expected average anomaly score. We
        # should see fewer anomalies than in Step 2.
        data3 = _generateSampleData(mean=0.2)[0:1000]
        likelihoods3, avgRecordList3, estimatorParams3 = an.updateAnomalyLikelihoods(data3, estimatorParams2)

        self.assertEqual(len(likelihoods3), len(data3))
        self.assertEqual(len(avgRecordList3), len(data3))
        self.assertTrue(an.isValidEstimatorParams(estimatorParams3))

        # The new running total should be different
        self.assertNotEqual(estimatorParams3["movingAverage"]["total"], estimatorParams["movingAverage"]["total"])
        self.assertNotEqual(estimatorParams3["movingAverage"]["total"], estimatorParams2["movingAverage"]["total"])

        # We should have a small number samples where likelihood is < 0.02, but at
        # least one
        self.assertGreaterEqual(numpy.sum(likelihoods3 < 0.01), 1)
        self.assertLessEqual(numpy.sum(likelihoods3 < 0.01), 100)

        # ------------------------------------------
        # Step 4. Validate that sending data incrementally is the same as sending
        # in one batch
        allData = data1
        allData.extend(data2)
        allData.extend(data3)

        # Compute moving average of all the data and check it's the same
        _, historicalValuesAll, totalAll = an._anomalyScoreMovingAverage(allData, windowSize=5)
        self.assertEqual(sum(historicalValuesAll), sum(estimatorParams3["movingAverage"]["historicalValues"]))
        self.assertEqual(totalAll, estimatorParams3["movingAverage"]["total"])
    def testVeryFewScores(self):
        """
    This calls estimateAnomalyLikelihoods and updateAnomalyLikelihoods
    with one or no scores.
    """

        # Generate an estimate using two data points
        data1 = _generateSampleData(mean=42.0, variance=1e-10)

        _, _, estimatorParams = (an.estimateAnomalyLikelihoods(data1[0:2]))

        self.assertTrue(an.isValidEstimatorParams(estimatorParams))

        # Check that the estimated mean is that value
        dParams = estimatorParams["distribution"]
        self.assertWithinEpsilon(dParams["mean"], data1[0][2])

        # Can't generate an estimate using no data points
        data1 = numpy.zeros(0)
        with self.assertRaises(ValueError):
            an.estimateAnomalyLikelihoods(data1)

        # Can't update with no scores
        with self.assertRaises(ValueError):
            an.updateAnomalyLikelihoods(data1, estimatorParams)
    def testEstimateAnomalyLikelihoodsMalformedRecords(self):
        """
    This calls estimateAnomalyLikelihoods with malformed records, which should
    be quietly skipped.
    """

        # Generate a fake distribution of anomaly scores, and add malformed records
        data1 = _generateSampleData(mean=0.2)
        data1 = data1[0:1000] + [(2, 2)] + [(2, 2, 2, 2)] + [()] + [(2)]

        likelihoods, avgRecordList, estimatorParams = (
            an.estimateAnomalyLikelihoods(data1[0:1004]))
        self.assertEqual(len(likelihoods), 1000)
        self.assertEqual(len(avgRecordList), 1000)
        self.assertTrue(an.isValidEstimatorParams(estimatorParams))

        # Check that the sum is correct
        avgParams = estimatorParams["movingAverage"]
        total = 0
        for v in avgRecordList:
            total = total + v[2]
        self.assertTrue(avgParams["total"], total)

        # Check that the estimated mean is correct
        dParams = estimatorParams["distribution"]
        self.assertWithinEpsilon(dParams["mean"],
                                 total / float(len(avgRecordList)))
    def testEstimateAnomalyLikelihoods(self):
        """
    This calls estimateAnomalyLikelihoods to estimate the distribution on fake
    data and validates the results
    """

        # Generate an estimate using fake distribution of anomaly scores.
        data1 = _generateSampleData(mean=0.2)

        likelihoods, avgRecordList, estimatorParams = (
            an.estimateAnomalyLikelihoods(data1[0:1000]))
        self.assertEqual(len(likelihoods), 1000)
        self.assertEqual(len(avgRecordList), 1000)
        self.assertTrue(an.isValidEstimatorParams(estimatorParams))

        # Check that the sum is correct
        avgParams = estimatorParams["movingAverage"]
        total = 0
        for v in avgRecordList:
            total = total + v[2]
        self.assertTrue(avgParams["total"], total)

        # Check that the estimated mean is correct
        dParams = estimatorParams["distribution"]
        self.assertWithinEpsilon(dParams["mean"],
                                 total / float(len(avgRecordList)))

        # Number of points with lower than 2% probability should be pretty low
        # but not zero. Can't use exact 2% here due to random variations
        self.assertLessEqual(numpy.sum(likelihoods < 0.02), 50)
        self.assertGreaterEqual(numpy.sum(likelihoods < 0.02), 1)
Ejemplo n.º 5
0
  def testVeryFewScores(self):
    """
    This calls estimateAnomalyLikelihoods and updateAnomalyLikelihoods
    with one or no scores.
    """

    # Generate an estimate using two data points
    data1 = _generateSampleData(mean=42.0, variance=1e-10)

    _, _, estimatorParams = (
      an.estimateAnomalyLikelihoods(data1[0:2])
    )

    self.assertTrue(an.isValidEstimatorParams(estimatorParams))

    # Check that the estimated mean is that value
    dParams = estimatorParams["distribution"]
    self.assertWithinEpsilon(dParams["mean"], data1[0][2])

    # Can't generate an estimate using no data points
    data1 = numpy.zeros(0)
    with self.assertRaises(ValueError):
      an.estimateAnomalyLikelihoods(data1)

    # Can't update with no scores
    with self.assertRaises(ValueError):
      an.updateAnomalyLikelihoods(data1, estimatorParams)
Ejemplo n.º 6
0
  def testEstimateAnomalyLikelihoodsMalformedRecords(self):
    """
    This calls estimateAnomalyLikelihoods with malformed records, which should
    be quietly skipped.
    """

    # Generate an estimate using fake distribution of anomaly scores.
    data1 = _generateSampleData(mean=0.2)
    data1 = data1 + [(2, 2), (2, 2, 2, 2), (), (2)]  # Malformed records

    likelihoods, avgRecordList, estimatorParams = (
      an.estimateAnomalyLikelihoods(data1[0:1000])
    )
    self.assertEqual(len(likelihoods), 1000)
    self.assertEqual(len(avgRecordList), 1000)
    self.assertTrue(an.isValidEstimatorParams(estimatorParams))

    # Check that the sum is correct
    avgParams = estimatorParams["movingAverage"]
    total = 0
    for v in avgRecordList:
      total = total + v[2]
    self.assertTrue(avgParams["total"], total)

    # Check that the estimated mean is correct
    dParams = estimatorParams["distribution"]
    self.assertWithinEpsilon(dParams["mean"],
                             total / float(len(avgRecordList)))
Ejemplo n.º 7
0
  def testEstimateAnomalyLikelihoods(self):
    """
    This calls estimateAnomalyLikelihoods to estimate the distribution on fake
    data and validates the results
    """

    # Generate an estimate using fake distribution of anomaly scores.
    data1 = _generateSampleData(mean=0.2)

    likelihoods, avgRecordList, estimatorParams = (
      an.estimateAnomalyLikelihoods(data1[0:1000])
    )
    self.assertEqual(len(likelihoods), 1000)
    self.assertEqual(len(avgRecordList), 1000)
    self.assertTrue(an.isValidEstimatorParams(estimatorParams))

    # Check that the sum is correct
    avgParams = estimatorParams["movingAverage"]
    total = 0
    for v in avgRecordList:
      total = total + v[2]
    self.assertTrue(avgParams["total"], total)

    # Check that the estimated mean is correct
    dParams = estimatorParams["distribution"]
    self.assertWithinEpsilon(dParams["mean"],
                             total / float(len(avgRecordList)))

    # Number of points with lower than 2% probability should be pretty low
    # but not zero. Can't use exact 2% here due to random variations
    self.assertLessEqual(numpy.sum(likelihoods < 0.02), 50)
    self.assertGreaterEqual(numpy.sum(likelihoods < 0.02), 1)
Ejemplo n.º 8
0
  def testFlatAnomalyScores(self):
    """
    This calls estimateAnomalyLikelihoods with flat distributions and
    ensures things don't crash.
    """

    # Generate an estimate using fake distribution of anomaly scores.
    data1 = _generateSampleData(mean=42.0, variance=1e-10)

    likelihoods, avgRecordList, estimatorParams = (
      an.estimateAnomalyLikelihoods(data1[0:1000])
    )
    self.assertEqual(len(likelihoods), 1000)
    self.assertEqual(len(avgRecordList), 1000)
    self.assertTrue(an.isValidEstimatorParams(estimatorParams))

    ## Check that the estimated mean is correct
    dParams = estimatorParams["distribution"]
    self.assertWithinEpsilon(dParams["mean"], data1[0][2])

    # If you deviate from the mean, you should get probability 0
    # Test this by sending in just slightly different values.
    data2 = _generateSampleData(mean=42.5, variance=1e-10)
    likelihoods2, _, _ = (
      an.updateAnomalyLikelihoods(data2[0:10], estimatorParams)
    )

    # The likelihoods should go to zero very quickly
    self.assertLessEqual(likelihoods2.sum(), 0.01)


    # Test edge case where anomaly scores are very close to 0
    # In this case we don't let likelihood to get too low. An average
    # anomaly score of 0.1 should be essentially zero, but an average
    # of 0.04 should be higher
    data3 = _generateSampleData(mean=0.01, variance=1e-6)

    _, _, estimatorParams3 = (
      an.estimateAnomalyLikelihoods(data3[0:1000])
    )

    data4 = _generateSampleData(mean=0.1, variance=1e-6)
    likelihoods4, _, estimatorParams4 = (
      an.updateAnomalyLikelihoods(data4[0:20], estimatorParams3)
    )

    # Average of 0.1 should go to zero
    self.assertLessEqual(likelihoods4[10:].mean(), 0.002)

    data5 = _generateSampleData(mean=0.05, variance=1e-6)
    likelihoods5, _, _ = (
      an.updateAnomalyLikelihoods(data5[0:20], estimatorParams4)
    )

    # The likelihoods should be low but not near zero
    self.assertLessEqual(likelihoods5[10:].mean(), 0.28)
    self.assertGreater(likelihoods5[10:].mean(), 0.015)
Ejemplo n.º 9
0
  def testFlatAnomalyScores(self):
    """
    This calls estimateAnomalyLikelihoods with flat distributions and
    ensures things don't crash.
    """

    # Generate an estimate using fake distribution of anomaly scores.
    data1 = _generateSampleData(mean=42.0, variance=1e-10)

    likelihoods, avgRecordList, estimatorParams = (
      an.estimateAnomalyLikelihoods(data1[0:1000])
    )
    self.assertEqual(len(likelihoods), 1000)
    self.assertEqual(len(avgRecordList), 1000)
    self.assertTrue(an.isValidEstimatorParams(estimatorParams))

    ## Check that the estimated mean is correct
    dParams = estimatorParams["distribution"]
    self.assertWithinEpsilon(dParams["mean"], data1[0][2])

    # If you deviate from the mean, you should get probability 0
    # Test this by sending in just slightly different values.
    data2 = _generateSampleData(mean=42.5, variance=1e-10)
    likelihoods2, _, _ = (
      an.updateAnomalyLikelihoods(data2[0:10], estimatorParams)
    )

    # The likelihoods should go to zero very quickly
    self.assertLessEqual(likelihoods2.sum(), 0.01)


    # Test edge case where anomaly scores are very close to 0
    # In this case we don't let likelihood to get too low. An average
    # anomaly score of 0.1 should be essentially zero, but an average
    # of 0.04 should be higher
    data3 = _generateSampleData(mean=0.01, variance=1e-6)

    _, _, estimatorParams3 = (
      an.estimateAnomalyLikelihoods(data3[0:1000])
    )

    data4 = _generateSampleData(mean=0.1, variance=1e-6)
    likelihoods4, _, estimatorParams4 = (
      an.updateAnomalyLikelihoods(data4[0:20], estimatorParams3)
    )

    # Average of 0.1 should go to zero
    self.assertLessEqual(likelihoods4[10:].mean(), 0.002)

    data5 = _generateSampleData(mean=0.05, variance=1e-6)
    likelihoods5, _, _ = (
      an.updateAnomalyLikelihoods(data5[0:20], estimatorParams4)
    )

    # The likelihoods should be low but not near zero
    self.assertLessEqual(likelihoods5[10:].mean(), 0.28)
    self.assertGreater(likelihoods5[10:].mean(), 0.015)
Ejemplo n.º 10
0
    def testEstimateAnomalyLikelihoodsCategoryValues(self):
        start = datetime.datetime(2017, 1, 1, 0, 0, 0)
        delta = datetime.timedelta(minutes=5)
        dts = [start + (i * delta) for i in xrange(10)]
        values = ["a", "b", "c", "d", "e"] * 2
        rawScores = [0.1 * i for i in xrange(10)]
        data = zip(dts, values, rawScores)

        likelihoods, avgRecordList, estimatorParams = (
            an.estimateAnomalyLikelihoods(data))
        self.assertEqual(len(likelihoods), 10)
        self.assertEqual(len(avgRecordList), 10)
        self.assertTrue(an.isValidEstimatorParams(estimatorParams))
Ejemplo n.º 11
0
  def testEstimateAnomalyLikelihoodsCategoryValues(self):
    start = datetime.datetime(2017, 1, 1, 0, 0, 0)
    delta = datetime.timedelta(minutes=5)
    dts = [start + (i * delta) for i in xrange(10)]
    values = ["a", "b", "c", "d", "e"] * 2
    rawScores = [0.1 * i for i in xrange(10)]
    data = zip(dts, values, rawScores)

    likelihoods, avgRecordList, estimatorParams = (
      an.estimateAnomalyLikelihoods(data)
    )
    self.assertEqual(len(likelihoods), 10)
    self.assertEqual(len(avgRecordList), 10)
    self.assertTrue(an.isValidEstimatorParams(estimatorParams))
Ejemplo n.º 12
0
    def testBadParams(self):
        """
    Calls updateAnomalyLikelihoods with bad params.
    """

        # Generate an estimate using one data point
        data1 = _generateSampleData(mean=42.0, variance=1e-10)

        _, _, estimatorParams = (an.estimateAnomalyLikelihoods(data1[0:1]))

        self.assertTrue(an.isValidEstimatorParams(estimatorParams))

        # Can't pass in a bad params structure
        with self.assertRaises(ValueError):
            an.updateAnomalyLikelihoods(data1, {"haha": "heehee"})

        # Can't pass in something not a dict
        with self.assertRaises(ValueError):
            an.updateAnomalyLikelihoods(data1, 42.0)
Ejemplo n.º 13
0
    def testBadParams(self):
        """
    Calls updateAnomalyLikelihoods with bad params.
    """

        # Generate an estimate using one data point
        data1 = _generateSampleData(mean=42.0, variance=1e-10)

        _, _, estimatorParams = an.estimateAnomalyLikelihoods(data1[0:1])

        self.assertTrue(an.isValidEstimatorParams(estimatorParams))

        # Can't pass in a bad params structure
        with self.assertRaises(ValueError):
            an.updateAnomalyLikelihoods(data1, {"haha": "heehee"})

        # Can't pass in something not a dict
        with self.assertRaises(ValueError):
            an.updateAnomalyLikelihoods(data1, 42.0)
Ejemplo n.º 14
0
    def testUpdateAnomalyLikelihoods(self):
        """
    A slight more complex test. This calls estimateAnomalyLikelihoods
    to estimate the distribution on fake data, followed by several calls
    to updateAnomalyLikelihoods.
    """

        #------------------------------------------
        # Step 1. Generate an initial estimate using fake distribution of anomaly
        # scores.
        data1 = _generateSampleData(mean=0.2)[0:1000]
        _, _, estimatorParams = (an.estimateAnomalyLikelihoods(
            data1, averagingWindow=5))

        #------------------------------------------
        # Step 2. Generate some new data with a higher average anomaly
        # score. Using the estimator from step 1, to compute likelihoods. Now we
        # should see a lot more anomalies.
        data2 = _generateSampleData(mean=0.6)[0:300]
        likelihoods2, avgRecordList2, estimatorParams2 = (
            an.updateAnomalyLikelihoods(data2, estimatorParams))
        self.assertEqual(len(likelihoods2), len(data2))
        self.assertEqual(len(avgRecordList2), len(data2))
        self.assertTrue(an.isValidEstimatorParams(estimatorParams))

        # The new running total should be different
        self.assertNotEqual(estimatorParams2["movingAverage"]["total"],
                            estimatorParams["movingAverage"]["total"])

        # We should have many more samples where likelihood is < 0.01, but not all
        self.assertGreaterEqual(numpy.sum(likelihoods2 < 0.01), 25)
        self.assertLessEqual(numpy.sum(likelihoods2 < 0.01), 250)

        #------------------------------------------
        # Step 3. Generate some new data with the expected average anomaly score. We
        # should see fewer anomalies than in Step 2.
        data3 = _generateSampleData(mean=0.2)[0:1000]
        likelihoods3, avgRecordList3, estimatorParams3 = (
            an.updateAnomalyLikelihoods(data3, estimatorParams2))

        self.assertEqual(len(likelihoods3), len(data3))
        self.assertEqual(len(avgRecordList3), len(data3))
        self.assertTrue(an.isValidEstimatorParams(estimatorParams3))

        # The new running total should be different
        self.assertNotEqual(estimatorParams3["movingAverage"]["total"],
                            estimatorParams["movingAverage"]["total"])
        self.assertNotEqual(estimatorParams3["movingAverage"]["total"],
                            estimatorParams2["movingAverage"]["total"])

        # We should have a small number samples where likelihood is < 0.02, but at
        # least one
        self.assertGreaterEqual(numpy.sum(likelihoods3 < 0.01), 1)
        self.assertLessEqual(numpy.sum(likelihoods3 < 0.01), 100)

        #------------------------------------------
        # Step 4. Validate that sending data incrementally is the same as sending
        # in one batch
        allData = data1
        allData.extend(data2)
        allData.extend(data3)

        # Compute moving average of all the data and check it's the same
        _, historicalValuesAll, totalAll = (an._anomalyScoreMovingAverage(
            allData, windowSize=5))
        self.assertEqual(
            sum(historicalValuesAll),
            sum(estimatorParams3["movingAverage"]["historicalValues"]))
        self.assertEqual(totalAll, estimatorParams3["movingAverage"]["total"])