예제 #1
0
    def testUpdateAnomalyLikelihoods(self):
        """
    A slight more complex test. This calls estimateAnomalyLikelihoods
    to estimate the distribution on fake data, followed by several calls
    to updateAnomalyLikelihoods.
    """

        # ------------------------------------------
        # Step 1. Generate an initial estimate using fake distribution of anomaly
        # scores.
        data1 = _generateSampleData(mean=0.2)[0:1000]
        _, _, estimatorParams = an.estimateAnomalyLikelihoods(data1, averagingWindow=5)

        # ------------------------------------------
        # Step 2. Generate some new data with a higher average anomaly
        # score. Using the estimator from step 1, to compute likelihoods. Now we
        # should see a lot more anomalies.
        data2 = _generateSampleData(mean=0.6)[0:300]
        likelihoods2, avgRecordList2, estimatorParams2 = an.updateAnomalyLikelihoods(data2, estimatorParams)
        self.assertEqual(len(likelihoods2), len(data2))
        self.assertEqual(len(avgRecordList2), len(data2))
        self.assertTrue(an.isValidEstimatorParams(estimatorParams))

        # The new running total should be different
        self.assertNotEqual(estimatorParams2["movingAverage"]["total"], estimatorParams["movingAverage"]["total"])

        # We should have many more samples where likelihood is < 0.01, but not all
        self.assertGreaterEqual(numpy.sum(likelihoods2 < 0.01), 25)
        self.assertLessEqual(numpy.sum(likelihoods2 < 0.01), 250)

        # ------------------------------------------
        # Step 3. Generate some new data with the expected average anomaly score. We
        # should see fewer anomalies than in Step 2.
        data3 = _generateSampleData(mean=0.2)[0:1000]
        likelihoods3, avgRecordList3, estimatorParams3 = an.updateAnomalyLikelihoods(data3, estimatorParams2)

        self.assertEqual(len(likelihoods3), len(data3))
        self.assertEqual(len(avgRecordList3), len(data3))
        self.assertTrue(an.isValidEstimatorParams(estimatorParams3))

        # The new running total should be different
        self.assertNotEqual(estimatorParams3["movingAverage"]["total"], estimatorParams["movingAverage"]["total"])
        self.assertNotEqual(estimatorParams3["movingAverage"]["total"], estimatorParams2["movingAverage"]["total"])

        # We should have a small number samples where likelihood is < 0.02, but at
        # least one
        self.assertGreaterEqual(numpy.sum(likelihoods3 < 0.01), 1)
        self.assertLessEqual(numpy.sum(likelihoods3 < 0.01), 100)

        # ------------------------------------------
        # Step 4. Validate that sending data incrementally is the same as sending
        # in one batch
        allData = data1
        allData.extend(data2)
        allData.extend(data3)

        # Compute moving average of all the data and check it's the same
        _, historicalValuesAll, totalAll = an._anomalyScoreMovingAverage(allData, windowSize=5)
        self.assertEqual(sum(historicalValuesAll), sum(estimatorParams3["movingAverage"]["historicalValues"]))
        self.assertEqual(totalAll, estimatorParams3["movingAverage"]["total"])
    def testUpdateAnomalyLikelihoods(self):
        """
    A slight more complex test. This calls estimateAnomalyLikelihoods
    to estimate the distribution on fake data, followed by several calls
    to updateAnomalyLikelihoods.
    """

        #------------------------------------------
        # Step 1. Generate an initial estimate using fake distribution of anomaly
        # scores.
        data1 = _generateSampleData(mean=0.2)[0:1000]
        _, _, estimatorParams = (an.estimateAnomalyLikelihoods(
            data1, averagingWindow=5))

        #------------------------------------------
        # Step 2. Generate some new data with a higher average anomaly
        # score. Using the estimator from step 1, to compute likelihoods. Now we
        # should see a lot more anomalies.
        data2 = _generateSampleData(mean=0.6)[0:300]
        likelihoods2, avgRecordList2, estimatorParams2 = (
            an.updateAnomalyLikelihoods(data2, estimatorParams))
        self.assertEqual(len(likelihoods2), len(data2))
        self.assertEqual(len(avgRecordList2), len(data2))
        self.assertTrue(an.isValidEstimatorParams(estimatorParams))

        # The new running total should be different
        self.assertNotEqual(estimatorParams2["movingAverage"]["total"],
                            estimatorParams["movingAverage"]["total"])

        # We should have many more samples where likelihood is < 0.01, but not all
        self.assertGreaterEqual(numpy.sum(likelihoods2 < 0.01), 25)
        self.assertLessEqual(numpy.sum(likelihoods2 < 0.01), 250)

        #------------------------------------------
        # Step 3. Generate some new data with the expected average anomaly score. We
        # should see fewer anomalies than in Step 2.
        data3 = _generateSampleData(mean=0.2)[0:1000]
        likelihoods3, avgRecordList3, estimatorParams3 = (
            an.updateAnomalyLikelihoods(data3, estimatorParams2))

        self.assertEqual(len(likelihoods3), len(data3))
        self.assertEqual(len(avgRecordList3), len(data3))
        self.assertTrue(an.isValidEstimatorParams(estimatorParams3))

        # The new running total should be different
        self.assertNotEqual(estimatorParams3["movingAverage"]["total"],
                            estimatorParams["movingAverage"]["total"])
        self.assertNotEqual(estimatorParams3["movingAverage"]["total"],
                            estimatorParams2["movingAverage"]["total"])

        # We should have a small number samples where likelihood is < 0.02, but at
        # least one
        self.assertGreaterEqual(numpy.sum(likelihoods3 < 0.01), 1)
        self.assertLessEqual(numpy.sum(likelihoods3 < 0.01), 100)

        #------------------------------------------
        # Step 4. Validate that sending data incrementally is the same as sending
        # in one batch
        allData = data1
        allData.extend(data2)
        allData.extend(data3)

        # Compute moving average of all the data and check it's the same
        _, historicalValuesAll, totalAll = (an._anomalyScoreMovingAverage(
            allData, windowSize=5))
        self.assertEqual(
            sum(historicalValuesAll),
            sum(estimatorParams3["movingAverage"]["historicalValues"]))
        self.assertEqual(totalAll, estimatorParams3["movingAverage"]["total"])