def test_threshold(self):
   """
   Test score threshold=0
   """
   detector = AnomalyDetector(self.s1, score_threshold=0)
   self.assertTrue(len(detector.get_anomalies()) == 1)
   self.assertTrue(detector.get_anomalies() is not None)
Example #2
0
 def test_anomaly_threshold(self):
   """
   Test if score_percentile_threshold works as expected.
   """
   detector = AnomalyDetector(self.s1, score_percentile_threshold=0.1, algorithm_name='exp_avg_detector')
   detector1 = AnomalyDetector(self.s1, score_percentile_threshold=0.1, algorithm_name='derivative_detector')
   self.assertNotEqual(detector1.get_anomalies(), detector.get_anomalies())
 def test_threshold(self):
     """
     Test score threshold=0
     """
     detector = AnomalyDetector(self.s1, score_threshold=0)
     self.assertTrue(len(detector.get_anomalies()) == 1)
     self.assertTrue(detector.get_anomalies() is not None)
 def test_score_only(self):
   """
   Test that score_only parameter doesn't give anomalies
   """
   detector = AnomalyDetector(self.s1, score_only=True, algorithm_name='derivative_detector')
   detector2 = AnomalyDetector(self.s1, algorithm_name='derivative_detector')
   self.assertTrue(detector2.get_anomalies() is not None)
   self.assertTrue(len(detector.get_anomalies()) == 0)
Example #5
0
 def test_score_only(self):
     detector = AnomalyDetector(self.s1,
                                score_only=True,
                                algorithm_name='derivative_detector')
     detector2 = AnomalyDetector(self.s1,
                                 algorithm_name='derivative_detector')
     self.assertTrue(detector2.get_anomalies() is not None)
     self.assertTrue(len(detector.get_anomalies()) == 0)
 def test_score_only(self):
     """
     Test that score_only parameter doesn't give anomalies
     """
     detector = AnomalyDetector(self.s1,
                                score_only=True,
                                algorithm_name='derivative_detector')
     detector2 = AnomalyDetector(self.s1,
                                 algorithm_name='derivative_detector')
     self.assertTrue(detector2.get_anomalies() is not None)
     self.assertTrue(len(detector.get_anomalies()) == 0)
Example #7
0
 def test_anomaly_threshold(self):
     """
 Test if score_percentile_threshold works as expected.
 """
     detector = AnomalyDetector(self.s1,
                                score_percent_threshold=0.1,
                                algorithm_name='exp_avg_detector')
     detector1 = AnomalyDetector(self.s1,
                                 score_percent_threshold=0.1,
                                 algorithm_name='derivative_detector')
     self.assertNotEqual(detector1.get_anomalies(),
                         detector.get_anomalies())
    def test_sign_test_algorithm_interface(self):
        """
        Test "sign test" algorithm with a threshold of 0%
        """
        bs = dict()
        bs.update((t, 1) for t in range(1, 30))

        # Simple tests
        algorithm_params = {
            'percent_threshold_upper': 0,
            'offset': 2,
            'scan_window': 24,
            'confidence': 0.01
        }
        ts = dict(bs)
        # bigger than 10 percent but below bias
        ts.update((t, 3.1) for t in range(1, 21))

        # first no anomalies
        detector = AnomalyDetector(ts,
                                   baseline_time_series=bs,
                                   algorithm_name='sign_test',
                                   algorithm_params=algorithm_params)
        anomalies = detector.get_anomalies()

        self.assertTrue(anomalies is not None)
        self.assertEqual(len(anomalies), 1)

        # try lower bound
        algorithm_params = {
            'percent_threshold_lower': 0,
            'offset': 2,
            'scan_window': 24,
            'confidence': 0.01
        }
        ts = dict(bs)
        # less than baseline plus bias
        ts.update((t, 2.9) for t in range(1, 25))

        # first no anomalies
        detector = AnomalyDetector(ts,
                                   baseline_time_series=bs,
                                   algorithm_name='sign_test',
                                   algorithm_params=algorithm_params)
        anomalies = detector.get_anomalies()

        self.assertTrue(anomalies is not None)
        self.assertEqual(len(anomalies), 1)
 def test_custom_algorithm(self):
   """
   Test passing a custom algorithm class
   """
   detector = AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_class=CustomAlgo,
                              algorithm_params={'percent_threshold_upper': 20, 'percent_threshold_lower': -20})
   anomalies = detector.get_anomalies()
   self.assertTrue(anomalies is not None)
   self.assertTrue(len(anomalies) > 0)
 def test_diff_percent_threshold_algorithm(self):
   """
   Test "diff percent threshold" algorithm with a threshold of 20%
   """
   detector = AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_name='diff_percent_threshold',
                              algorithm_params={'percent_threshold_upper': 20, 'percent_threshold_lower': -20})
   anomalies = detector.get_anomalies()
   self.assertTrue(anomalies is not None)
   self.assertTrue(len(anomalies) > 0)
   self.assertRaises(exceptions.RequiredParametersNotPassed,
                     lambda: AnomalyDetector(self.s1, baseline_time_series=self.s2,
                                             algorithm_name='diff_percent_threshold'))
 def test_absolute_threshold_algorithm(self):
   """
   Test "absolute threshold" algorithm with a upper and lower threshold of 0.2
   """
   detector = AnomalyDetector(self.s1, algorithm_name='absolute_threshold',
                              algorithm_params={'absolute_threshold_value_upper': 0.2,
                                                'absolute_threshold_value_lower': 0.2})
   anomalies = detector.get_anomalies()
   self.assertTrue(anomalies is not None)
   self.assertTrue(len(anomalies) > 0)
   self.assertRaises(exceptions.RequiredParametersNotPassed,
                     lambda: AnomalyDetector(self.s1, algorithm_name='absolute_threshold'))
Example #12
0
  def test_sign_test_algorithm_interface(self):
    """
    Test "sign test" algorithm with a threshold of 0%
    """
    bs = dict()
    bs.update((t, 1) for t in range(1, 30))

    # Simple tests
    algorithm_params = {'percent_threshold_upper': 0,
                        'offset': 2,
                        'scan_window': 24,
                        'confidence': 0.01}
    ts = dict(bs)
    # bigger than 10 percent but below bias
    ts.update((t, 3.1) for t in range(1, 21))

    # first no anomalies
    detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test',
                               algorithm_params=algorithm_params)
    anomalies = detector.get_anomalies()

    self.assertTrue(anomalies is not None)
    self.assertEquals(len(anomalies), 1)

    # try lower bound
    algorithm_params = {'percent_threshold_lower': 0,
                        'offset': 2,
                        'scan_window': 24,
                        'confidence': 0.01}
    ts = dict(bs)
    # less than baseline plus bias
    ts.update((t, 2.9) for t in range(1, 25))

    # first no anomalies
    detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test',
                               algorithm_params=algorithm_params)
    anomalies = detector.get_anomalies()

    self.assertTrue(anomalies is not None)
    self.assertEquals(len(anomalies), 1)
Example #13
0
 def isAnomaly(self, data):  # returns if last data point is anonaly
     length = len(data)
     try:
         detector = AnomalyDetector({i: data[i]
                                     for i in range(length)},
                                    algorithm_name='exp_avg_detector',
                                    score_threshold=self.score_threshold)
         anomalies = detector.get_anomalies()
         for anomaly in anomalies:
             if anomaly.exact_timestamp == length - 1:
                 return True, anomalies
     except:
         return False, []
     return False, anomalies
Example #14
0
 def test_custom_algorithm(self):
     """
 Test passing a custom algorithm class
 """
     detector = AnomalyDetector(self.s1,
                                baseline_time_series=self.s2,
                                algorithm_class=CustomAlgo,
                                algorithm_params={
                                    'percent_threshold_upper': 20,
                                    'percent_threshold_lower': -20
                                })
     anomalies = detector.get_anomalies()
     self.assertTrue(anomalies is not None)
     self.assertTrue(len(anomalies) > 0)
def get_anoms(anomalous_ts):

    if not anomalous_ts:
        return []

    anomalies = []
    try:
        anomaly_ts_dict = dict(anomalous_ts)
        my_detector = AnomalyDetector(anomaly_ts_dict, score_threshold=1.5)
        anomalies = my_detector.get_anomalies()
    except:
        logger.error(traceback.format_exc())
        logger.error('error :: AnomalyDetector')
    return anomalies
Example #16
0
def get_anomalies(series: pd.Series,
                  algorithm: str = "bitmap_detector") -> List[dict]:
    assert algorithm in [
        "bitmap_detector", "derivative_detector", "exp_avg_detector"
    ]

    ts = series.to_dict()
    detector = AnomalyDetector(ts, algorithm_name=algorithm)
    anomalies = detector.get_anomalies()
    return [{
        "start_time": _.start_timestamp,
        "end_time": _.end_timestamp,
        "top_score_time": _.exact_timestamp,
        "score": _.anomaly_score,
    } for _ in anomalies]
Example #17
0
 def test_absolute_threshold_algorithm(self):
     """
 Test "absolute threshold" algorithm with a upper and lower threshold of 0.2
 """
     detector = AnomalyDetector(self.s1,
                                algorithm_name='absolute_threshold',
                                algorithm_params={
                                    'absolute_threshold_value_upper': 0.2,
                                    'absolute_threshold_value_lower': 0.2
                                })
     anomalies = detector.get_anomalies()
     self.assertTrue(anomalies is not None)
     self.assertTrue(len(anomalies) > 0)
     self.assertRaises(
         exceptions.RequiredParametersNotPassed,
         lambda: AnomalyDetector(self.s1,
                                 algorithm_name='absolute_threshold'))
Example #18
0
def get_anoms(anomalous_ts):

    logger = logging.getLogger(skyline_app_logger)

    if not anomalous_ts:
        logger.error('error :: get_anoms :: no anomalous_ts')
        return []

    anomalies = []
    try:
        anomaly_ts_dict = dict(anomalous_ts)
        my_detector = AnomalyDetector(anomaly_ts_dict, score_threshold=1.5)
        anomalies = my_detector.get_anomalies()
    except:
        logger.error(traceback.format_exc())
        logger.error('error :: get_anoms :: AnomalyDetector')
    return anomalies
Example #19
0
 def test_diff_percent_threshold_algorithm(self):
     """
 Test "diff percent threshold" algorithm with a threshold of 20%
 """
     detector = AnomalyDetector(self.s1,
                                baseline_time_series=self.s2,
                                algorithm_name='diff_percent_threshold',
                                algorithm_params={
                                    'percent_threshold_upper': 20,
                                    'percent_threshold_lower': -20
                                })
     anomalies = detector.get_anomalies()
     self.assertTrue(anomalies is not None)
     self.assertTrue(len(anomalies) > 0)
     self.assertRaises(
         exceptions.RequiredParametersNotPassed,
         lambda: AnomalyDetector(self.s1,
                                 baseline_time_series=self.s2,
                                 algorithm_name='diff_percent_threshold'))
Example #20
0
def pointsOfCorrelation(ts1, ts2, thresholdVal):
    corrPoints = []

    # Conduct AD on each of each of the time series.
    # algorithm_params={'absolute_threshold_value_lower':lower,'absolute_threshold_value_upper':upper}
    # detector = AnomalyDetector(ts2, score_threshold=thresholdVal, algorithm_name="derivative_detector")
    detector = AnomalyDetector(ts2,
                               score_threshold=thresholdVal,
                               algorithm_name="exp_avg_detector")

    # score = detector.get_all_scores()
    anomalies = detector.get_anomalies()

    # For anomalous points in ts1, return correlated points and correlation coefficient.
    for a in anomalies:
        time_period = a.get_time_window()

        try:
            my_correlator = Correlator(ts1, ts2, time_period)

            if my_correlator.is_correlated(threshold=0.8):
                correlatorResultObj = my_correlator.get_correlation_result()

                # Change time period to human readable format
                start = strftime('%Y-%m-%d %H:%M:%S',
                                 localtime(a.start_timestamp))
                end = strftime('%Y-%m-%d %H:%M:%S', localtime(a.end_timestamp))
                time_period = (start, end)

                # Return anomalous time period, correlation coefficient and anomaly score.
                # Note: Anomaly score for absolute threshold will be diff between value and threshold.
                result = [
                    time_period,
                    round(correlatorResultObj.coefficient, 2),
                    round(a.anomaly_score, 2)
                ]
                corrPoints.append(result)
        except:
            continue

    return corrPoints
class TestAnomalyDetector(unittest.TestCase):
    def setUp(self):
        self.s1 = {0: 0, 1: 0, 2: 0, 3: 0, 4: 1, 5: 2, 6: 2, 7: 2, 8: 0}
        self.s2 = {0: 0, 1: 1, 2: 2, 3: 2, 4: 2, 5: 0, 6: 0, 7: 0, 8: 0}

        self.detector1 = AnomalyDetector(self.s1)
        self.detector2 = AnomalyDetector(self.s2)

    def test_custom_algorithm(self):
        """
        Test passing a custom algorithm class
        """
        algorithm_params = {
            'percent_threshold_upper': 20,
            'percent_threshold_lower': -20
        }
        detector = AnomalyDetector(self.s1,
                                   baseline_time_series=self.s2,
                                   algorithm_class=CustomAlgo,
                                   algorithm_params=algorithm_params)
        anomalies = detector.get_anomalies()
        self.assertTrue(anomalies is not None)
        self.assertTrue(len(anomalies) > 0)

    def test_diff_percent_threshold_algorithm(self):
        """
        Test "diff percent threshold" algorithm with a threshold of 20%
        """
        algorithm_params = {
            'percent_threshold_upper': 20,
            'percent_threshold_lower': -20
        }
        detector = AnomalyDetector(self.s1,
                                   baseline_time_series=self.s2,
                                   algorithm_name='diff_percent_threshold',
                                   algorithm_params=algorithm_params)
        anomalies = detector.get_anomalies()
        self.assertTrue(anomalies is not None)
        self.assertTrue(len(anomalies) > 0)
        self.assertRaises(
            exceptions.RequiredParametersNotPassed,
            lambda: AnomalyDetector(self.s1,
                                    baseline_time_series=self.s2,
                                    algorithm_name='diff_percent_threshold'))

    def test_sign_test_algorithm(self):
        """
        Test "sign test" algorithm with a threshold of 20%
        """
        bs = dict()
        bs.update((t, 1) for t in range(1, 100))
        ts = dict(bs)

        # test missing parameters
        self.assertRaises(
            exceptions.RequiredParametersNotPassed,
            lambda: AnomalyDetector(self.s1,
                                    baseline_time_series=self.s2,
                                    algorithm_name='sign_test'))
        # test over specified
        algorithm_params = {
            'percent_threshold_upper': 20,
            'percent_threshold_lower': -20,
            'scan_window': 24,
            'confidence': 0.01
        }

        self.assertRaises(
            exceptions.RequiredParametersNotPassed,
            lambda: AnomalyDetector(self.s1,
                                    baseline_time_series=self.s2,
                                    algorithm_name='sign_test'))
        # Simple tests
        algorithm_params = {'percent_threshold_upper': 20, 'scan_window': 24}

        # first no anomalies
        detector = AnomalyDetector(ts,
                                   baseline_time_series=bs,
                                   algorithm_name='sign_test',
                                   algorithm_params=algorithm_params)
        anomalies = detector.get_anomalies()

        self.assertTrue(anomalies is not None)
        self.assertEqual(len(anomalies), 0)

        # Next one anomaly exactly equal to scan window
        ts.update((t, 1.200001) for t in range(10, 34))
        detector = AnomalyDetector(ts,
                                   baseline_time_series=bs,
                                   algorithm_name='sign_test',
                                   algorithm_params=algorithm_params)
        anomalies = detector.get_anomalies()

        self.assertTrue(anomalies is not None)
        self.assertEqual(len(anomalies), 1)
        anomaly = anomalies[0]
        s, e = anomaly.get_time_window()

        # note the anomaly is larger than scan window
        self.assertEqual(s, 4)
        self.assertEqual(e, 39)

        # score should be roughly 98.5
        self.assertGreater(anomaly.anomaly_score, 98)
        self.assertLess(anomaly.anomaly_score, 99)

        # anomaly below baseline
        algorithm_params = {
            'percent_threshold_lower': -20,
            'scan_window': 24,
            'confidence': 0.01
        }
        ts.update((t, 0.799999) for t in range(10, 34))

        detector = AnomalyDetector(ts,
                                   baseline_time_series=bs,
                                   algorithm_name='sign_test',
                                   algorithm_params=algorithm_params)
        anomalies = detector.get_anomalies()
        self.assertEqual(len(anomalies), 1)
        anomaly = anomalies[0]
        s, e = anomaly.get_time_window()
        self.assertEqual(s, 4)
        self.assertEqual(e, 39)

        # score should be roughly 98.5
        self.assertGreater(anomaly.anomaly_score, 98)
        self.assertLess(anomaly.anomaly_score, 99)

        # anomalies separated by big gap
        ts.update(bs)
        ts.update((t, 0.799999) for t in range(1, 25))
        ts.update((t, 0.799999) for t in range(60, 84))
        detector = AnomalyDetector(ts,
                                   baseline_time_series=bs,
                                   algorithm_name='sign_test',
                                   algorithm_params=algorithm_params)
        anomalies = detector.get_anomalies()
        self.assertEqual(len(anomalies), 2)
        anomaly = anomalies[0]
        s, e = anomaly.get_time_window()
        self.assertEqual(s, 1)
        self.assertEqual(e, 30)

        # score ~ 99.9
        self.assertGreater(anomaly.anomaly_score, 99)

        anomaly = anomalies[1]
        s, e = anomaly.get_time_window()
        self.assertEqual(s, 54)
        self.assertEqual(e, 89)

        # score should be roughly 98.5
        self.assertGreater(anomaly.anomaly_score, 98)
        self.assertLess(anomaly.anomaly_score, 99)

        # anomalies separated by small gap
        algorithm_params = {'percent_threshold_upper': 20, 'scan_window': 24}
        ts.update(bs)
        ts.update((t, 1.21) for t in range(1, 25))
        ts.update((t, 1.21) for t in range(30, 40))
        detector = AnomalyDetector(ts,
                                   baseline_time_series=bs,
                                   algorithm_name='sign_test',
                                   algorithm_params=algorithm_params)
        anomalies = detector.get_anomalies()

        # just one
        self.assertEqual(len(anomalies), 1)
        anomaly = anomalies[0]
        s, e = anomaly.get_time_window()
        self.assertEqual(s, 1)
        self.assertEqual(e, 40)

        # score ~ 99.9
        self.assertGreater(anomaly.anomaly_score, 99)

        # try noisy data
        ts.update(bs)
        ts.update((t, 1.21) for t in range(1, 25))
        ts.update((t, 1.19) for t in range(1, 25, 6))
        algorithm_params = {
            'percent_threshold_upper': 20,
            'scan_window': 24,
            'confidence': 0.01
        }

        detector = AnomalyDetector(ts,
                                   baseline_time_series=bs,
                                   algorithm_name='sign_test',
                                   algorithm_params=algorithm_params)
        anomalies = detector.get_anomalies()
        self.assertEqual(len(anomalies), 1)

        # now decrease sensitivity
        algorithm_params = {
            'percent_threshold_upper': 20,
            'scan_window': 24,
            'confidence': 0.0001
        }

        detector = AnomalyDetector(ts,
                                   baseline_time_series=bs,
                                   algorithm_name='sign_test',
                                   algorithm_params=algorithm_params)
        anomalies = detector.get_anomalies()
        self.assertEqual(len(anomalies), 0)

    def test_sign_test_algorithm_interface(self):
        """
        Test "sign test" algorithm with a threshold of 0%
        """
        bs = dict()
        bs.update((t, 1) for t in range(1, 30))

        # Simple tests
        algorithm_params = {
            'percent_threshold_upper': 0,
            'offset': 2,
            'scan_window': 24,
            'confidence': 0.01
        }
        ts = dict(bs)
        # bigger than 10 percent but below bias
        ts.update((t, 3.1) for t in range(1, 21))

        # first no anomalies
        detector = AnomalyDetector(ts,
                                   baseline_time_series=bs,
                                   algorithm_name='sign_test',
                                   algorithm_params=algorithm_params)
        anomalies = detector.get_anomalies()

        self.assertTrue(anomalies is not None)
        self.assertEqual(len(anomalies), 1)

        # try lower bound
        algorithm_params = {
            'percent_threshold_lower': 0,
            'offset': 2,
            'scan_window': 24,
            'confidence': 0.01
        }
        ts = dict(bs)
        # less than baseline plus bias
        ts.update((t, 2.9) for t in range(1, 25))

        # first no anomalies
        detector = AnomalyDetector(ts,
                                   baseline_time_series=bs,
                                   algorithm_name='sign_test',
                                   algorithm_params=algorithm_params)
        anomalies = detector.get_anomalies()

        self.assertTrue(anomalies is not None)
        self.assertEqual(len(anomalies), 1)

    def test_sign_test_algorithm_with_shift(self):
        """
        Test "sign test" algorithm with a threshold of 20%
        """
        bs = dict()
        bs.update((t, 1) for t in range(1, 100))

        # Simple tests
        algorithm_params = {
            'percent_threshold_upper': 10,
            'offset': 1,
            'scan_window': 24,
            'confidence': 0.01
        }
        ts = dict(bs)
        # bigger than 10 percent but below bias
        ts.update((t, 1.2) for t in range(10, 34))

        # first no anomalies
        detector = AnomalyDetector(ts,
                                   baseline_time_series=bs,
                                   algorithm_name='sign_test',
                                   algorithm_params=algorithm_params)
        anomalies = detector.get_anomalies()

        self.assertTrue(anomalies is not None)
        self.assertEqual(len(anomalies), 0)

        # Next one anomaly exactly equal to scan window
        # uses bias
        ts.update((t, 2.100001) for t in range(10, 34))
        detector = AnomalyDetector(ts,
                                   baseline_time_series=bs,
                                   algorithm_name='sign_test',
                                   algorithm_params=algorithm_params)
        anomalies = detector.get_anomalies()

        self.assertTrue(anomalies is not None)
        self.assertEqual(len(anomalies), 1)
        anomaly = anomalies[0]
        s, e = anomaly.get_time_window()

        # note the anomaly is larger than scan window
        self.assertEqual(s, 4)
        self.assertEqual(e, 39)

        # score should be roughly 98.5
        self.assertGreater(anomaly.anomaly_score, 98)
        self.assertLess(anomaly.anomaly_score, 99)

        # anomaly below baseline but not below baseline with shift
        algorithm_params = {
            'percent_threshold_lower': -20,
            'offset': -0.1,
            'scan_window': 24
        }

        ts.update((t, 0.799999) for t in range(10, 34))
        # no anomalies
        detector = AnomalyDetector(ts,
                                   baseline_time_series=bs,
                                   algorithm_name='sign_test',
                                   algorithm_params=algorithm_params)
        anomalies = detector.get_anomalies()

        self.assertTrue(anomalies is not None)
        self.assertEqual(len(anomalies), 0)

        # lower the time series by 0.1
        ts.update((t, 0.699999) for t in range(10, 34))

        detector = AnomalyDetector(ts,
                                   baseline_time_series=bs,
                                   algorithm_name='sign_test',
                                   algorithm_params=algorithm_params)
        anomalies = detector.get_anomalies()
        self.assertEqual(len(anomalies), 1)
        anomaly = anomalies[0]
        s, e = anomaly.get_time_window()
        self.assertEqual(s, 4)
        self.assertEqual(e, 39)

        # score should be roughly 98.5
        self.assertGreater(anomaly.anomaly_score, 98)
        self.assertLess(anomaly.anomaly_score, 99)

    def test_absolute_threshold_algorithm(self):
        """
        Test "absolute threshold" algorithm with a upper and lower threshold of 0.2
        """
        algorithm_params = {
            'absolute_threshold_value_upper': 0.2,
            'absolute_threshold_value_lower': 0.2
        }
        detector = AnomalyDetector(self.s1,
                                   algorithm_name='absolute_threshold',
                                   algorithm_params=algorithm_params)
        anomalies = detector.get_anomalies()
        self.assertTrue(anomalies is not None)
        self.assertTrue(len(anomalies) > 0)
        self.assertRaises(
            exceptions.RequiredParametersNotPassed,
            lambda: AnomalyDetector(self.s1,
                                    algorithm_name='absolute_threshold'))

    def test_threshold(self):
        """
        Test score threshold=0
        """
        detector = AnomalyDetector(self.s1, score_threshold=0)
        self.assertTrue(len(detector.get_anomalies()) == 1)
        self.assertTrue(detector.get_anomalies() is not None)

    def test_score_only(self):
        """
        Test that score_only parameter doesn't give anomalies
        """
        detector = AnomalyDetector(self.s1,
                                   score_only=True,
                                   algorithm_name='derivative_detector')
        detector2 = AnomalyDetector(self.s1,
                                    algorithm_name='derivative_detector')
        self.assertTrue(detector2.get_anomalies() is not None)
        self.assertTrue(len(detector.get_anomalies()) == 0)

    def test_get_all_scores(self):
        """
        Test if function get_all_scores works as expected.
        """
        self.assertTrue(isinstance(self.detector1.get_all_scores(),
                                   TimeSeries))
        self.assertEqual(len(self.detector1.get_all_scores()),
                         len(self.detector1.time_series))

    def test_get_anomalies(self):
        """
        Test if anomaly is found as expected.
        """
        self.assertTrue(self.detector1.get_anomalies() is not None)

    def test_algorithm_DefaultDetector(self):
        """
        Test if optional parameter algorithm works as expected.
        """
        detector = AnomalyDetector(self.s1, algorithm_name='default_detector')
        self.assertEqual(detector.get_all_scores().timestamps,
                         self.detector1.get_all_scores().timestamps)
        self.assertEqual(detector.get_all_scores().values,
                         self.detector1.get_all_scores().values)

    def test_algorithm(self):
        """
        Test if exception AlgorithmNotFound is raised as expected.
        """
        self.assertRaises(
            exceptions.AlgorithmNotFound,
            lambda: AnomalyDetector(self.s1,
                                    algorithm_name='NotValidAlgorithm'))

    def test_algorithm_params(self):
        """
        Test if optional parameter algorithm_params works as expected.
        """
        self.assertRaises(
            ValueError,
            lambda: AnomalyDetector(self.s1,
                                    algorithm_name='exp_avg_detector',
                                    algorithm_params='0'))
        detector = AnomalyDetector(self.s1,
                                   algorithm_name="exp_avg_detector",
                                   algorithm_params={'smoothing_factor': 0.3})
        self.assertNotEqual(self.detector1.get_all_scores().values,
                            detector.get_all_scores().values)

    def test_anomaly_threshold(self):
        """
        Test if score_percentile_threshold works as expected.
        """
        detector = AnomalyDetector(self.s1,
                                   score_percent_threshold=0.1,
                                   algorithm_name='exp_avg_detector')
        detector1 = AnomalyDetector(self.s1,
                                    score_percent_threshold=0.1,
                                    algorithm_name='derivative_detector')
        self.assertNotEqual(detector1.get_anomalies(),
                            detector.get_anomalies())
Example #22
0
volume = 0
box_difference = []
lts = {}

for i in df_buy['VALUE']:
    container.append(counter)
    difference = df_buy['PRICE'][counter] - df_sell['PRICE'][counter]
    volume = volume - df_buy['VOLUME'][counter] + df_sell['VOLUME'][counter]
    box_difference.append(difference)
    box_volume.append(volume)
    lts[counter] = difference
    counter += 1
plt.plot(container, box_difference)

detector = AnomalyDetector(lts)
anomalies = detector.get_anomalies()

k = 0
while k < len(anomalies):
    time_period = anomalies[k].get_time_window()
    container_anomalies = []
    box_difference_anomalies = []
    i = time_period[0]
    while i <= time_period[1]:
        i += 1
        container_anomalies.append(i)
        difference = df_buy['PRICE'][i] - df_sell['PRICE'][i]
        box_difference_anomalies.append(difference)
    k += 1
    plt.plot(container_anomalies, box_difference_anomalies)
    def test_sign_test_algorithm(self):
        """
        Test "sign test" algorithm with a threshold of 20%
        """
        bs = dict()
        bs.update((t, 1) for t in range(1, 100))
        ts = dict(bs)

        # test missing parameters
        self.assertRaises(
            exceptions.RequiredParametersNotPassed,
            lambda: AnomalyDetector(self.s1,
                                    baseline_time_series=self.s2,
                                    algorithm_name='sign_test'))
        # test over specified
        algorithm_params = {
            'percent_threshold_upper': 20,
            'percent_threshold_lower': -20,
            'scan_window': 24,
            'confidence': 0.01
        }

        self.assertRaises(
            exceptions.RequiredParametersNotPassed,
            lambda: AnomalyDetector(self.s1,
                                    baseline_time_series=self.s2,
                                    algorithm_name='sign_test'))
        # Simple tests
        algorithm_params = {'percent_threshold_upper': 20, 'scan_window': 24}

        # first no anomalies
        detector = AnomalyDetector(ts,
                                   baseline_time_series=bs,
                                   algorithm_name='sign_test',
                                   algorithm_params=algorithm_params)
        anomalies = detector.get_anomalies()

        self.assertTrue(anomalies is not None)
        self.assertEqual(len(anomalies), 0)

        # Next one anomaly exactly equal to scan window
        ts.update((t, 1.200001) for t in range(10, 34))
        detector = AnomalyDetector(ts,
                                   baseline_time_series=bs,
                                   algorithm_name='sign_test',
                                   algorithm_params=algorithm_params)
        anomalies = detector.get_anomalies()

        self.assertTrue(anomalies is not None)
        self.assertEqual(len(anomalies), 1)
        anomaly = anomalies[0]
        s, e = anomaly.get_time_window()

        # note the anomaly is larger than scan window
        self.assertEqual(s, 4)
        self.assertEqual(e, 39)

        # score should be roughly 98.5
        self.assertGreater(anomaly.anomaly_score, 98)
        self.assertLess(anomaly.anomaly_score, 99)

        # anomaly below baseline
        algorithm_params = {
            'percent_threshold_lower': -20,
            'scan_window': 24,
            'confidence': 0.01
        }
        ts.update((t, 0.799999) for t in range(10, 34))

        detector = AnomalyDetector(ts,
                                   baseline_time_series=bs,
                                   algorithm_name='sign_test',
                                   algorithm_params=algorithm_params)
        anomalies = detector.get_anomalies()
        self.assertEqual(len(anomalies), 1)
        anomaly = anomalies[0]
        s, e = anomaly.get_time_window()
        self.assertEqual(s, 4)
        self.assertEqual(e, 39)

        # score should be roughly 98.5
        self.assertGreater(anomaly.anomaly_score, 98)
        self.assertLess(anomaly.anomaly_score, 99)

        # anomalies separated by big gap
        ts.update(bs)
        ts.update((t, 0.799999) for t in range(1, 25))
        ts.update((t, 0.799999) for t in range(60, 84))
        detector = AnomalyDetector(ts,
                                   baseline_time_series=bs,
                                   algorithm_name='sign_test',
                                   algorithm_params=algorithm_params)
        anomalies = detector.get_anomalies()
        self.assertEqual(len(anomalies), 2)
        anomaly = anomalies[0]
        s, e = anomaly.get_time_window()
        self.assertEqual(s, 1)
        self.assertEqual(e, 30)

        # score ~ 99.9
        self.assertGreater(anomaly.anomaly_score, 99)

        anomaly = anomalies[1]
        s, e = anomaly.get_time_window()
        self.assertEqual(s, 54)
        self.assertEqual(e, 89)

        # score should be roughly 98.5
        self.assertGreater(anomaly.anomaly_score, 98)
        self.assertLess(anomaly.anomaly_score, 99)

        # anomalies separated by small gap
        algorithm_params = {'percent_threshold_upper': 20, 'scan_window': 24}
        ts.update(bs)
        ts.update((t, 1.21) for t in range(1, 25))
        ts.update((t, 1.21) for t in range(30, 40))
        detector = AnomalyDetector(ts,
                                   baseline_time_series=bs,
                                   algorithm_name='sign_test',
                                   algorithm_params=algorithm_params)
        anomalies = detector.get_anomalies()

        # just one
        self.assertEqual(len(anomalies), 1)
        anomaly = anomalies[0]
        s, e = anomaly.get_time_window()
        self.assertEqual(s, 1)
        self.assertEqual(e, 40)

        # score ~ 99.9
        self.assertGreater(anomaly.anomaly_score, 99)

        # try noisy data
        ts.update(bs)
        ts.update((t, 1.21) for t in range(1, 25))
        ts.update((t, 1.19) for t in range(1, 25, 6))
        algorithm_params = {
            'percent_threshold_upper': 20,
            'scan_window': 24,
            'confidence': 0.01
        }

        detector = AnomalyDetector(ts,
                                   baseline_time_series=bs,
                                   algorithm_name='sign_test',
                                   algorithm_params=algorithm_params)
        anomalies = detector.get_anomalies()
        self.assertEqual(len(anomalies), 1)

        # now decrease sensitivity
        algorithm_params = {
            'percent_threshold_upper': 20,
            'scan_window': 24,
            'confidence': 0.0001
        }

        detector = AnomalyDetector(ts,
                                   baseline_time_series=bs,
                                   algorithm_name='sign_test',
                                   algorithm_params=algorithm_params)
        anomalies = detector.get_anomalies()
        self.assertEqual(len(anomalies), 0)
    def test_sign_test_algorithm_with_shift(self):
        """
        Test "sign test" algorithm with a threshold of 20%
        """
        bs = dict()
        bs.update((t, 1) for t in range(1, 100))

        # Simple tests
        algorithm_params = {
            'percent_threshold_upper': 10,
            'offset': 1,
            'scan_window': 24,
            'confidence': 0.01
        }
        ts = dict(bs)
        # bigger than 10 percent but below bias
        ts.update((t, 1.2) for t in range(10, 34))

        # first no anomalies
        detector = AnomalyDetector(ts,
                                   baseline_time_series=bs,
                                   algorithm_name='sign_test',
                                   algorithm_params=algorithm_params)
        anomalies = detector.get_anomalies()

        self.assertTrue(anomalies is not None)
        self.assertEqual(len(anomalies), 0)

        # Next one anomaly exactly equal to scan window
        # uses bias
        ts.update((t, 2.100001) for t in range(10, 34))
        detector = AnomalyDetector(ts,
                                   baseline_time_series=bs,
                                   algorithm_name='sign_test',
                                   algorithm_params=algorithm_params)
        anomalies = detector.get_anomalies()

        self.assertTrue(anomalies is not None)
        self.assertEqual(len(anomalies), 1)
        anomaly = anomalies[0]
        s, e = anomaly.get_time_window()

        # note the anomaly is larger than scan window
        self.assertEqual(s, 4)
        self.assertEqual(e, 39)

        # score should be roughly 98.5
        self.assertGreater(anomaly.anomaly_score, 98)
        self.assertLess(anomaly.anomaly_score, 99)

        # anomaly below baseline but not below baseline with shift
        algorithm_params = {
            'percent_threshold_lower': -20,
            'offset': -0.1,
            'scan_window': 24
        }

        ts.update((t, 0.799999) for t in range(10, 34))
        # no anomalies
        detector = AnomalyDetector(ts,
                                   baseline_time_series=bs,
                                   algorithm_name='sign_test',
                                   algorithm_params=algorithm_params)
        anomalies = detector.get_anomalies()

        self.assertTrue(anomalies is not None)
        self.assertEqual(len(anomalies), 0)

        # lower the time series by 0.1
        ts.update((t, 0.699999) for t in range(10, 34))

        detector = AnomalyDetector(ts,
                                   baseline_time_series=bs,
                                   algorithm_name='sign_test',
                                   algorithm_params=algorithm_params)
        anomalies = detector.get_anomalies()
        self.assertEqual(len(anomalies), 1)
        anomaly = anomalies[0]
        s, e = anomaly.get_time_window()
        self.assertEqual(s, 4)
        self.assertEqual(e, 39)

        # score should be roughly 98.5
        self.assertGreater(anomaly.anomaly_score, 98)
        self.assertLess(anomaly.anomaly_score, 99)
Example #25
0
  def test_sign_test_algorithm_with_shift(self):
    """
    Test "sign test" algorithm with a threshold of 20%
    """
    bs = dict()
    bs.update({t: 1 for t in range(1, 100)})

    # Simple tests
    algorithm_params = {'percent_threshold_upper': 10,
                      'shift': 1,
                      'scan_window': 24,
                      'confidence': 0.01}
    ts = dict(bs)
    # bigger than 10 percent but below bias
    ts.update({t: 1.2 for t in range(10, 34)})

    # first no anomalies
    detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test',
                               algorithm_params=algorithm_params)
    anomalies = detector.get_anomalies()

    self.assertTrue(anomalies is not None)
    self.assertEquals(len(anomalies), 0)

    # Next one anomaly exactly equal to scan window
    # uses bias
    ts.update({t: 2.100001 for t in range(10, 34)})
    detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test',
                               algorithm_params=algorithm_params)
    anomalies = detector.get_anomalies()

    self.assertTrue(anomalies is not None)
    self.assertEquals(len(anomalies), 1)
    anomaly = anomalies[0]
    s, e = anomaly.get_time_window()

    # note the anomaly is larger than scan window
    self.assertEquals(s, 4)
    self.assertEquals(e, 39)

    # score should be roughly 98.5
    self.assertGreater(anomaly.anomaly_score, 98)
    self.assertLess(anomaly.anomaly_score, 99)

    # anomaly below baseline but not below baseline with shift
    algorithm_params = {'percent_threshold_lower': -20,
                      'shift': -0.1,
                      'scan_window': 24,
                      'confidence': 0.01}
    ts.update({t: 0.799999 for t in range(10, 34)})
    # no anomalies
    detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test',
                               algorithm_params=algorithm_params)
    anomalies = detector.get_anomalies()

    self.assertTrue(anomalies is not None)
    self.assertEquals(len(anomalies), 0)

    # lower the time series by 0.1
    ts.update({t: 0.699999 for t in range(10, 34)})

    detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test',
                               algorithm_params=algorithm_params)
    anomalies = detector.get_anomalies()
    self.assertEquals(len(anomalies), 1)
    anomaly = anomalies[0]
    s, e = anomaly.get_time_window()
    self.assertEquals(s, 4)
    self.assertEquals(e, 39)

    # score should be roughly 98.5
    self.assertGreater(anomaly.anomaly_score, 98)
    self.assertLess(anomaly.anomaly_score, 99)
    './SAR-device.sdb.await__DCU1923ZeroOrder(4_8)2017-11-07 11_56_54_491016.csv',
    #score_threshold=0.1, #1.0,
    score_threshold=4.703433200392526,
    algorithm_name='derivative_detector'
)  #derivative_detector'exp_avg_detector#'bitmap_detector)#, algorithm_params = {'smoothing factor': 0.2, 'lag_window_size': 64 })

score = my_detector.get_all_scores()

for timestamp, value in score.iteritems():
    asData.append(value)
    #asTime.append(pd.to_datetime(timestamp))
    asTime.append(timestamp)
    #print(timestamp, value)

# 异常点集合
asAnomal = my_detector.get_anomalies()
#for a in asAnomal:
#    print(a)

asData = asData[:1664]
pylab.figure(figsize=(32, 16))
pylab.subplot(311)
#asData = asData[:582]
x = np.arange(1, len(asData) + 1, 1)
pylab.plot(x, asData)  #测值
pylab.grid(True)
"""
#####################
if asAnomal:
    time_period = asAnomal[0].get_time_window()
    correlator = Correlator(time_series_a='./SAR-device.sdb.await__研发中心波形_高阻接地_00025_20171025_201648_049_F__U0.csv', 
Example #27
0
class RCA(object):
    def __init__(self, metrix, related_metrices):
        """
    Initializer
    :param metrix: a TimeSeries, a dictionary or a path to a csv file(str)
    :param list related_metrixes: a list of time series.
    """
        self.metrix = self._load(metrix)
        self.anomaly_detector = AnomalyDetector(metrix)
        self.related_metrices = related_metrices
        self.anomalies = self.anomaly_detector.get_anomalies()
        self._analyze()

    def _load(self, metrix):
        """
    Load time series.
    :param timeseries: a TimeSeries, a dictionary or a path to a csv file(str).
    :return TimeSeries: a TimeSeries object.
    """
        if isinstance(metrix, TimeSeries):
            return metrix
        if isinstance(metrix, dict):
            return TimeSeries(metrix)
        return TimeSeries(utils.read_csv(metrix))

    def _analyze(self):
        """
    Analyzes if a matrix has anomalies.
    If any anomaly is found, determine if the matrix correlates with any other matrixes.
    To be implemented.
    """
        output = defaultdict(list)
        output_by_name = defaultdict(list)
        scores = self.anomaly_detector.get_all_scores()

        if self.anomalies:
            for anomaly in self.anomalies:
                metrix_scores = scores
                start_t, end_t = anomaly.get_time_window()
                t = anomaly.exact_timestamp

                # Compute extended start timestamp and extended end timestamp.
                room = (end_t - start_t) / 2
                if not room:
                    room = 30
                extended_start_t = start_t - room
                extended_end_t = end_t + room
                metrix_scores_cropped = metrix_scores.crop(
                    extended_start_t, extended_end_t)

                # Adjust the two timestamps if not enough data points are included.
                while len(metrix_scores_cropped) < 2:
                    extended_start_t = extended_start_t - room
                    extended_end_t = extended_end_t + room
                    metrix_scores_cropped = metrix_scores.crop(
                        extended_start_t, extended_end_t)

                # Correlate with other metrics
                for entry in self.related_metrices:
                    try:
                        entry_correlation_result = Correlator(
                            self.metrix,
                            entry,
                            time_period=(extended_start_t, extended_end_t),
                            use_anomaly_score=True).get_correlation_result()
                        record = extended_start_t, extended_end_t, entry_correlation_result.__dict__, entry
                        record_by_name = extended_start_t, extended_end_t, entry_correlation_result.__dict__
                        output[t].append(record)
                        output_by_name[entry].append(record_by_name)
                    except exceptions.NotEnoughDataPoints:
                        pass

        self.output = output
        self.output_by_name = output_by_name
Example #28
0
class TestAnomalyDetector(unittest.TestCase):

  def setUp(self):
    self.s1 = {0: 0, 1: 0, 2: 0, 3: 0, 4: 1, 5: 2, 6: 2, 7: 2, 8: 0}
    self.s2 = {0: 0, 1: 1, 2: 2, 3: 2, 4: 2, 5: 0, 6: 0, 7: 0, 8: 0}

    self.detector1 = AnomalyDetector(self.s1)
    self.detector2 = AnomalyDetector(self.s2)

  def test_custom_algorithm(self):
    """
    Test passing a custom algorithm class
    """
    detector = AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_class=CustomAlgo,
                               algorithm_params={'percent_threshold_upper': 20, 'percent_threshold_lower': -20})
    anomalies = detector.get_anomalies()
    self.assertTrue(anomalies is not None)
    self.assertTrue(len(anomalies) > 0)

  def test_diff_percent_threshold_algorithm(self):
    """
    Test "diff percent threshold" algorithm with a threshold of 20%
    """
    detector = AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_name='diff_percent_threshold',
                               algorithm_params={'percent_threshold_upper': 20, 'percent_threshold_lower': -20})
    anomalies = detector.get_anomalies()
    self.assertTrue(anomalies is not None)
    self.assertTrue(len(anomalies) > 0)
    self.assertRaises(exceptions.RequiredParametersNotPassed,
                      lambda: AnomalyDetector(self.s1, baseline_time_series=self.s2,
                                              algorithm_name='diff_percent_threshold'))

  def test_sign_test_algorithm(self):
    """
    Test "sign test" algorithm with a threshold of 20%
    """
    bs = dict()
    bs.update({t: 1 for t in range(1, 100)})
    ts = dict(bs)

    # test missing parameters
    self.assertRaises(exceptions.RequiredParametersNotPassed,
                      lambda: AnomalyDetector(self.s1, baseline_time_series=self.s2,
                                              algorithm_name='sign_test'))
    #
    # Simple tests
    algorithm_params = {'percent_threshold_upper': 20,
                      'percent_threshold_lower': -20,
                      'scan_window': 24,
                      'confidence': 0.01}

    # first no anomalies
    detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test',
                               algorithm_params=algorithm_params)
    anomalies = detector.get_anomalies()

    self.assertTrue(anomalies is not None)
    self.assertEquals(len(anomalies), 0)

    # Next one anomaly exactly equal to scan window
    ts.update({t: 1.200001 for t in range(10, 34)})
    detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test',
                               algorithm_params=algorithm_params)
    anomalies = detector.get_anomalies()

    self.assertTrue(anomalies is not None)
    self.assertEquals(len(anomalies), 1)
    anomaly = anomalies[0]
    s, e = anomaly.get_time_window()

    # note the anomaly is larger than scan window
    self.assertEquals(s, 4)
    self.assertEquals(e, 39)

    # score should be roughly 98.5
    self.assertGreater(anomaly.anomaly_score, 98)
    self.assertLess(anomaly.anomaly_score, 99)

    # anomaly below baseline
    ts.update({t: 0.799999 for t in range(10, 34)})

    detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test',
                               algorithm_params=algorithm_params)
    anomalies = detector.get_anomalies()
    self.assertEquals(len(anomalies), 1)
    anomaly = anomalies[0]
    s, e = anomaly.get_time_window()
    self.assertEquals(s, 4)
    self.assertEquals(e, 39)

    # score should be roughly 98.5
    self.assertGreater(anomaly.anomaly_score, 98)
    self.assertLess(anomaly.anomaly_score, 99)

    # anomalies separated by big gap
    ts.update(bs)
    ts.update({t: 0.799999 for t in range(1, 25)})
    ts.update({t: 1.21 for t in range(60, 84)})
    detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test',
                               algorithm_params=algorithm_params)
    anomalies = detector.get_anomalies()
    self.assertEquals(len(anomalies), 2)
    anomaly = anomalies[0]
    s, e = anomaly.get_time_window()
    self.assertEquals(s, 1)
    self.assertEquals(e, 30)

    # score ~ 99.9
    self.assertGreater(anomaly.anomaly_score, 99)

    anomaly = anomalies[1]
    s, e = anomaly.get_time_window()
    self.assertEquals(s, 54)
    self.assertEquals(e, 89)

    # score should be roughly 98.5
    self.assertGreater(anomaly.anomaly_score, 98)
    self.assertLess(anomaly.anomaly_score, 99)

    # anomalies separated by small gap
    ts.update(bs)
    ts.update({t: 1.21 for t in range(1, 25)})
    ts.update({t: 1.21 for t in range(30, 40)})
    detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test',
                               algorithm_params=algorithm_params)
    anomalies = detector.get_anomalies()

    # just one
    self.assertEquals(len(anomalies), 1)
    anomaly = anomalies[0]
    s, e = anomaly.get_time_window()
    self.assertEquals(s, 1)
    self.assertEquals(e, 40)

    # score ~ 99.9
    self.assertGreater(anomaly.anomaly_score, 99)

  def test_sign_test_algorithm_with_shift(self):
    """
    Test "sign test" algorithm with a threshold of 20%
    """
    bs = dict()
    bs.update({t: 1 for t in range(1, 100)})

    # Simple tests
    algorithm_params = {'percent_threshold_upper': 10,
                      'shift': 1,
                      'scan_window': 24,
                      'confidence': 0.01}
    ts = dict(bs)
    # bigger than 10 percent but below bias
    ts.update({t: 1.2 for t in range(10, 34)})

    # first no anomalies
    detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test',
                               algorithm_params=algorithm_params)
    anomalies = detector.get_anomalies()

    self.assertTrue(anomalies is not None)
    self.assertEquals(len(anomalies), 0)

    # Next one anomaly exactly equal to scan window
    # uses bias
    ts.update({t: 2.100001 for t in range(10, 34)})
    detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test',
                               algorithm_params=algorithm_params)
    anomalies = detector.get_anomalies()

    self.assertTrue(anomalies is not None)
    self.assertEquals(len(anomalies), 1)
    anomaly = anomalies[0]
    s, e = anomaly.get_time_window()

    # note the anomaly is larger than scan window
    self.assertEquals(s, 4)
    self.assertEquals(e, 39)

    # score should be roughly 98.5
    self.assertGreater(anomaly.anomaly_score, 98)
    self.assertLess(anomaly.anomaly_score, 99)

    # anomaly below baseline but not below baseline with shift
    algorithm_params = {'percent_threshold_lower': -20,
                      'shift': -0.1,
                      'scan_window': 24,
                      'confidence': 0.01}
    ts.update({t: 0.799999 for t in range(10, 34)})
    # no anomalies
    detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test',
                               algorithm_params=algorithm_params)
    anomalies = detector.get_anomalies()

    self.assertTrue(anomalies is not None)
    self.assertEquals(len(anomalies), 0)

    # lower the time series by 0.1
    ts.update({t: 0.699999 for t in range(10, 34)})

    detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test',
                               algorithm_params=algorithm_params)
    anomalies = detector.get_anomalies()
    self.assertEquals(len(anomalies), 1)
    anomaly = anomalies[0]
    s, e = anomaly.get_time_window()
    self.assertEquals(s, 4)
    self.assertEquals(e, 39)

    # score should be roughly 98.5
    self.assertGreater(anomaly.anomaly_score, 98)
    self.assertLess(anomaly.anomaly_score, 99)

  def test_absolute_threshold_algorithm(self):
    """
    Test "absolute threshold" algorithm with a upper and lower threshold of 0.2
    """
    detector = AnomalyDetector(self.s1, algorithm_name='absolute_threshold',
                               algorithm_params={'absolute_threshold_value_upper': 0.2,
                                                 'absolute_threshold_value_lower': 0.2})
    anomalies = detector.get_anomalies()
    self.assertTrue(anomalies is not None)
    self.assertTrue(len(anomalies) > 0)
    self.assertRaises(exceptions.RequiredParametersNotPassed,
                      lambda: AnomalyDetector(self.s1, algorithm_name='absolute_threshold'))

  def test_threshold(self):
    """
    Test score threshold=0
    """
    detector = AnomalyDetector(self.s1, score_threshold=0)
    self.assertTrue(len(detector.get_anomalies()) == 1)
    self.assertTrue(detector.get_anomalies() is not None)

  def test_score_only(self):
    """
    Test that score_only parameter doesn't give anomalies
    """
    detector = AnomalyDetector(self.s1, score_only=True, algorithm_name='derivative_detector')
    detector2 = AnomalyDetector(self.s1, algorithm_name='derivative_detector')
    self.assertTrue(detector2.get_anomalies() is not None)
    self.assertTrue(len(detector.get_anomalies()) == 0)

  def test_get_all_scores(self):
    """
    Test if function get_all_scores works as expected.
    """
    self.assertTrue(isinstance(self.detector1.get_all_scores(), TimeSeries))
    self.assertEqual(len(self.detector1.get_all_scores()), len(self.detector1.time_series))

  def test_get_anomalies(self):
    """
    Test if anomaly is found as expected.
    """
    self.assertTrue(self.detector1.get_anomalies() is not None)

  def test_algorithm_DefaultDetector(self):
    """
    Test if optional parameter algorithm works as expected.
    """
    detector = AnomalyDetector(self.s1, algorithm_name='default_detector')
    self.assertEqual(detector.get_all_scores().timestamps, self.detector1.get_all_scores().timestamps)
    self.assertEqual(detector.get_all_scores().values, self.detector1.get_all_scores().values)

  def test_algorithm(self):
    """
    Test if exception AlgorithmNotFound is raised as expected.
    """
    self.assertRaises(exceptions.AlgorithmNotFound, lambda: AnomalyDetector(self.s1, algorithm_name='NotValidAlgorithm'))

  def test_algorithm_params(self):
    """
    Test if optional parameter algorithm_params works as expected.
    """
    self.assertRaises(ValueError, lambda: AnomalyDetector(self.s1, algorithm_name='exp_avg_detector', algorithm_params='0'))
    detector = AnomalyDetector(self.s1, algorithm_name="exp_avg_detector", algorithm_params={'smoothing_factor': 0.3})
    self.assertNotEqual(self.detector1.get_all_scores().values, detector.get_all_scores().values)

  def test_anomaly_threshold(self):
    """
    Test if score_percentile_threshold works as expected.
    """
    detector = AnomalyDetector(self.s1, score_percent_threshold=0.1, algorithm_name='exp_avg_detector')
    detector1 = AnomalyDetector(self.s1, score_percent_threshold=0.1, algorithm_name='derivative_detector')
    self.assertNotEqual(detector1.get_anomalies(), detector.get_anomalies())
Example #29
0
class TestAnomalyDetector(unittest.TestCase):

  def setUp(self):
    self.s1 = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0.5, 5: 1, 6: 1, 7: 1, 8: 0}
    self.s2 = {0: 0, 1: 0.5, 2: 1, 3: 1, 4: 1, 5: 0, 6: 0, 7: 0, 8: 0}
    self.detector1 = AnomalyDetector(self.s1)
    self.detector2 = AnomalyDetector(self.s2)

  def test_threshold(self):
    detector = AnomalyDetector(self.s1, score_threshold=0)
    self.assertTrue(len(detector.get_anomalies()) == 1)
    self.assertTrue(detector.get_anomalies() is not None)

  def test_score_only(self):
    detector = AnomalyDetector(self.s1, score_only=True, algorithm_name='derivative_detector')
    detector2 = AnomalyDetector(self.s1, algorithm_name='derivative_detector')
    self.assertTrue(detector2.get_anomalies() is not None)
    self.assertTrue(detector.get_anomalies() is None)

  def test_get_all_scores(self):
    """
    Test if function get_all_scores works as expected.
    """
    self.assertTrue(isinstance(self.detector1.get_all_scores(), TimeSeries))
    self.assertEqual(len(self.detector1.get_all_scores()), len(self.detector1.time_series))

  def test_get_anomalies(self):
    """
    Test if anomaly is found as expected.
    """
    self.assertTrue(self.detector1.get_anomalies() is not None)

  def test_algorithm_DefaultDetector(self):
    """
    Test if optional parameter algorithm works as expected.
    """
    detector = AnomalyDetector(self.s1, algorithm_name='default_detector')
    self.assertEqual(detector.get_all_scores().timestamps, self.detector1.get_all_scores().timestamps)
    self.assertEqual(detector.get_all_scores().values, self.detector1.get_all_scores().values)

  def test_algorithm(self):
    """
    Test if exception AlgorithmNotFound is raised as expected.
    """
    self.assertRaises(exceptions.AlgorithmNotFound, lambda: AnomalyDetector(self.s1, algorithm_name='NotValidAlgorithm'))

  def test_algorithm_params(self):
    """
    Test if optional parameter algorithm_params works as expected.
    """
    self.assertRaises(exceptions.InvalidDataFormat, lambda: AnomalyDetector(self.s1, algorithm_name='exp_avg_detector', algorithm_params='0'))
    detector = AnomalyDetector(self.s1, algorithm_name="exp_avg_detector", algorithm_params={'smoothing_factor': 0.3})
    self.assertNotEqual(self.detector1.get_all_scores().values, detector.get_all_scores().values)

  def test_anomaly_threshold(self):
    """
    Test if score_percentile_threshold works as expected.
    """
    detector = AnomalyDetector(self.s1, score_percentile_threshold=0.1, algorithm_name='exp_avg_detector')
    detector1 = AnomalyDetector(self.s1, score_percentile_threshold=0.1, algorithm_name='derivative_detector')
    self.assertNotEqual(detector1.get_anomalies(), detector.get_anomalies())
Example #30
0
                                       function=InterpolateFunction.LINEAR))

    query.set_transformation_filter(tf)

    series_list = svc.query(query)
    for series in series_list:
        metric_id = '- %s %s' % (series.metric, print_tags(series.tags))
        log('\t' + metric_id)
        # exclude empty series for specific tags
        if len(series.data) > 2:
            ts = {int(sample.t / 1000): sample.v for sample in series.data}

            detector = AnomalyDetector(ts, score_threshold=args.min_score)

            anomalies = []
            for anomaly in detector.get_anomalies():
                if time.mktime(now.timetuple()) - args.last_hours * 3600 <= anomaly.exact_timestamp:
                    anomalies.append(anomaly)

            if anomalies:
                message.append(metric_id)
                for anomaly in anomalies:
                    t_start, t_end = format_t(anomaly.start_timestamp), format_t(anomaly.end_timestamp)
                    t_exact = format_t(anomaly.exact_timestamp)
                    anomaly_msg = '\tAnomaly from %s to %s with score %s: %s, %s' % (
                        t_start, t_end, anomaly.anomaly_score, t_exact, ts[anomaly.exact_timestamp])
                    message.append(anomaly_msg)

msg = '\n'.join(message)
message_service.insert(Message('anomaly_detection', 'python_script', 'anomaly', now, 'INFO', {}, msg))
print(msg)
Example #31
0
from luminol.anomaly_detector import AnomalyDetector
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import pandas as pd
import numpy as np
_df = pd.read_csv('article_sample.csv')
_ts = _df[_df.apply(lambda x: x['location'] == 'Belarus' and x['quater'] == 4 and x['keyword'] in ['КАМАЗ'], axis=1)]
_ts = _ts[_ts['date'].apply(lambda x: '2018' in x)]
ts1 = _ts[['date','polarity']]
ts1['date'] = ts1['date'].apply(lambda x: [int(_) for _ in x.split('-')[1:]])
ts1['date'] = ts1['date'].apply(lambda x: (x[0]-10)*30+x[1])
_ts1 = ts1.groupby('date')['polarity'].mean().reset_index().set_index('date').sort_index()
ts1 = _ts1.to_dict()['polarity']
my_detector = AnomalyDetector(ts1, score_threshold=1)
score = my_detector.get_all_scores()
anomalies = my_detector.get_anomalies()
fig = plt.figure(figsize=(15,10))
ax = fig.subplots(1)
ax.plot(_ts1, linestyle='--', marker='o')
for _index, _ano in enumerate(anomalies):
    _c = np.random.rand(1,3)[0]
    _width = _ano.end_timestamp - _ano.start_timestamp
    rect = patches.Rectangle((_ano.start_timestamp,-1.5),_width,3,linewidth=2,edgecolor=_c,facecolor='none', label='Anomaly #{}'.format(_index))
    ax.add_patch(rect)
    
ax.legend()
ax.set_ylim([-1.5,1.5])
fig.tight_layout()
fig.savefig('temp.png')
fig.show()
Example #32
0
  def test_sign_test_algorithm(self):
    """
    Test "sign test" algorithm with a threshold of 20%
    """
    bs = dict()
    bs.update({t: 1 for t in range(1, 100)})
    ts = dict(bs)

    # test missing parameters
    self.assertRaises(exceptions.RequiredParametersNotPassed,
                      lambda: AnomalyDetector(self.s1, baseline_time_series=self.s2,
                                              algorithm_name='sign_test'))
    #
    # Simple tests
    algorithm_params = {'percent_threshold_upper': 20,
                      'percent_threshold_lower': -20,
                      'scan_window': 24,
                      'confidence': 0.01}

    # first no anomalies
    detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test',
                               algorithm_params=algorithm_params)
    anomalies = detector.get_anomalies()

    self.assertTrue(anomalies is not None)
    self.assertEquals(len(anomalies), 0)

    # Next one anomaly exactly equal to scan window
    ts.update({t: 1.200001 for t in range(10, 34)})
    detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test',
                               algorithm_params=algorithm_params)
    anomalies = detector.get_anomalies()

    self.assertTrue(anomalies is not None)
    self.assertEquals(len(anomalies), 1)
    anomaly = anomalies[0]
    s, e = anomaly.get_time_window()

    # note the anomaly is larger than scan window
    self.assertEquals(s, 4)
    self.assertEquals(e, 39)

    # score should be roughly 98.5
    self.assertGreater(anomaly.anomaly_score, 98)
    self.assertLess(anomaly.anomaly_score, 99)

    # anomaly below baseline
    ts.update({t: 0.799999 for t in range(10, 34)})

    detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test',
                               algorithm_params=algorithm_params)
    anomalies = detector.get_anomalies()
    self.assertEquals(len(anomalies), 1)
    anomaly = anomalies[0]
    s, e = anomaly.get_time_window()
    self.assertEquals(s, 4)
    self.assertEquals(e, 39)

    # score should be roughly 98.5
    self.assertGreater(anomaly.anomaly_score, 98)
    self.assertLess(anomaly.anomaly_score, 99)

    # anomalies separated by big gap
    ts.update(bs)
    ts.update({t: 0.799999 for t in range(1, 25)})
    ts.update({t: 1.21 for t in range(60, 84)})
    detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test',
                               algorithm_params=algorithm_params)
    anomalies = detector.get_anomalies()
    self.assertEquals(len(anomalies), 2)
    anomaly = anomalies[0]
    s, e = anomaly.get_time_window()
    self.assertEquals(s, 1)
    self.assertEquals(e, 30)

    # score ~ 99.9
    self.assertGreater(anomaly.anomaly_score, 99)

    anomaly = anomalies[1]
    s, e = anomaly.get_time_window()
    self.assertEquals(s, 54)
    self.assertEquals(e, 89)

    # score should be roughly 98.5
    self.assertGreater(anomaly.anomaly_score, 98)
    self.assertLess(anomaly.anomaly_score, 99)

    # anomalies separated by small gap
    ts.update(bs)
    ts.update({t: 1.21 for t in range(1, 25)})
    ts.update({t: 1.21 for t in range(30, 40)})
    detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test',
                               algorithm_params=algorithm_params)
    anomalies = detector.get_anomalies()

    # just one
    self.assertEquals(len(anomalies), 1)
    anomaly = anomalies[0]
    s, e = anomaly.get_time_window()
    self.assertEquals(s, 1)
    self.assertEquals(e, 40)

    # score ~ 99.9
    self.assertGreater(anomaly.anomaly_score, 99)
class TestAnomalyDetector(unittest.TestCase):

  def setUp(self):
    self.s1 = {0: 0, 1: 0, 2: 0, 3: 0, 4: 1, 5: 2, 6: 2, 7: 2, 8: 0}
    self.s2 = {0: 0, 1: 1, 2: 2, 3: 2, 4: 2, 5: 0, 6: 0, 7: 0, 8: 0}
    self.detector1 = AnomalyDetector(self.s1)
    self.detector2 = AnomalyDetector(self.s2)

  def test_custom_algorithm(self):
    """
    Test passing a custom algorithm class
    """
    detector = AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_class=CustomAlgo,
                               algorithm_params={'percent_threshold_upper': 20, 'percent_threshold_lower': -20})
    anomalies = detector.get_anomalies()
    self.assertTrue(anomalies is not None)
    self.assertTrue(len(anomalies) > 0)

  def test_diff_percent_threshold_algorithm(self):
    """
    Test "diff percent threshold" algorithm with a threshold of 20%
    """
    detector = AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_name='diff_percent_threshold',
                               algorithm_params={'percent_threshold_upper': 20, 'percent_threshold_lower': -20})
    anomalies = detector.get_anomalies()
    self.assertTrue(anomalies is not None)
    self.assertTrue(len(anomalies) > 0)
    self.assertRaises(exceptions.RequiredParametersNotPassed,
                      lambda: AnomalyDetector(self.s1, baseline_time_series=self.s2,
                                              algorithm_name='diff_percent_threshold'))

  def test_absolute_threshold_algorithm(self):
    """
    Test "absolute threshold" algorithm with a upper and lower threshold of 0.2
    """
    detector = AnomalyDetector(self.s1, algorithm_name='absolute_threshold',
                               algorithm_params={'absolute_threshold_value_upper': 0.2,
                                                 'absolute_threshold_value_lower': 0.2})
    anomalies = detector.get_anomalies()
    self.assertTrue(anomalies is not None)
    self.assertTrue(len(anomalies) > 0)
    self.assertRaises(exceptions.RequiredParametersNotPassed,
                      lambda: AnomalyDetector(self.s1, algorithm_name='absolute_threshold'))

  def test_threshold(self):
    """
    Test score threshold=0
    """
    detector = AnomalyDetector(self.s1, score_threshold=0)
    self.assertTrue(len(detector.get_anomalies()) == 1)
    self.assertTrue(detector.get_anomalies() is not None)

  def test_score_only(self):
    """
    Test that score_only parameter doesn't give anomalies
    """
    detector = AnomalyDetector(self.s1, score_only=True, algorithm_name='derivative_detector')
    detector2 = AnomalyDetector(self.s1, algorithm_name='derivative_detector')
    self.assertTrue(detector2.get_anomalies() is not None)
    self.assertTrue(len(detector.get_anomalies()) == 0)

  def test_get_all_scores(self):
    """
    Test if function get_all_scores works as expected.
    """
    self.assertTrue(isinstance(self.detector1.get_all_scores(), TimeSeries))
    self.assertEqual(len(self.detector1.get_all_scores()), len(self.detector1.time_series))

  def test_get_anomalies(self):
    """
    Test if anomaly is found as expected.
    """
    self.assertTrue(self.detector1.get_anomalies() is not None)

  def test_algorithm_DefaultDetector(self):
    """
    Test if optional parameter algorithm works as expected.
    """
    detector = AnomalyDetector(self.s1, algorithm_name='default_detector')
    self.assertEqual(detector.get_all_scores().timestamps, self.detector1.get_all_scores().timestamps)
    self.assertEqual(detector.get_all_scores().values, self.detector1.get_all_scores().values)

  def test_algorithm(self):
    """
    Test if exception AlgorithmNotFound is raised as expected.
    """
    self.assertRaises(exceptions.AlgorithmNotFound, lambda: AnomalyDetector(self.s1, algorithm_name='NotValidAlgorithm'))

  def test_algorithm_params(self):
    """
    Test if optional parameter algorithm_params works as expected.
    """
    self.assertRaises(ValueError, lambda: AnomalyDetector(self.s1, algorithm_name='exp_avg_detector', algorithm_params='0'))
    detector = AnomalyDetector(self.s1, algorithm_name="exp_avg_detector", algorithm_params={'smoothing_factor': 0.3})
    self.assertNotEqual(self.detector1.get_all_scores().values, detector.get_all_scores().values)

  def test_anomaly_threshold(self):
    """
    Test if score_percentile_threshold works as expected.
    """
    detector = AnomalyDetector(self.s1, score_percent_threshold=0.1, algorithm_name='exp_avg_detector')
    detector1 = AnomalyDetector(self.s1, score_percent_threshold=0.1, algorithm_name='derivative_detector')
    self.assertNotEqual(detector1.get_anomalies(), detector.get_anomalies())
Example #34
0
        ts = ts.loc[
            ts['stage_parallel'] ==
            'Puding Mixing #1']  # 'Puding Mixing #1', 'Sterilization #111', 'Storage tank #1'
        value_col = 'sensor_value'
        # TODO: ADDING ZEROS JUST FOR PLAYING. REMOVE LATER
        ts[value_col] = ts[value_col].transform(
            lambda x: x.fillna(x.median(), inplace=False))
    else:
        raise Exception('Unknown example.')

    # run anomaly detection algorithm
    keys = ts['epoch']
    values = ts[value_col]
    ts_dict = dict(zip(keys, values))

    algorithm_name = 'exp_avg_detector'

    anomaly_detector = AnomalyDetector(ts_dict, algorithm_name=algorithm_name)
    anomalies = anomaly_detector.get_anomalies()
    anomaly_scores = anomaly_detector.get_all_scores()

    # plot results
    plot_ts_and_anomalies(ts,
                          value_col,
                          anomalies,
                          anomaly_scores,
                          ts_only=False,
                          dir='/Users/yuval/Desktop/',
                          show=True,
                          plotly=True)
Example #35
0
class TestAnomalyDetector(unittest.TestCase):
    def setUp(self):
        self.s1 = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0.5, 5: 1, 6: 1, 7: 1, 8: 0}
        self.s2 = {0: 0, 1: 0.5, 2: 1, 3: 1, 4: 1, 5: 0, 6: 0, 7: 0, 8: 0}
        self.detector1 = AnomalyDetector(self.s1)
        self.detector2 = AnomalyDetector(self.s2)

    def test_threshold(self):
        detector = AnomalyDetector(self.s1, score_threshold=0)
        self.assertTrue(len(detector.get_anomalies()) == 1)
        self.assertTrue(detector.get_anomalies() is not None)

    def test_score_only(self):
        detector = AnomalyDetector(self.s1,
                                   score_only=True,
                                   algorithm_name='derivative_detector')
        detector2 = AnomalyDetector(self.s1,
                                    algorithm_name='derivative_detector')
        self.assertTrue(detector2.get_anomalies() is not None)
        self.assertTrue(len(detector.get_anomalies()) == 0)

    def test_get_all_scores(self):
        """
    Test if function get_all_scores works as expected.
    """
        self.assertTrue(isinstance(self.detector1.get_all_scores(),
                                   TimeSeries))
        self.assertEqual(len(self.detector1.get_all_scores()),
                         len(self.detector1.time_series))

    def test_get_anomalies(self):
        """
    Test if anomaly is found as expected.
    """
        self.assertTrue(self.detector1.get_anomalies() is not None)

    def test_algorithm_DefaultDetector(self):
        """
    Test if optional parameter algorithm works as expected.
    """
        detector = AnomalyDetector(self.s1, algorithm_name='default_detector')
        self.assertEqual(detector.get_all_scores().timestamps,
                         self.detector1.get_all_scores().timestamps)
        self.assertEqual(detector.get_all_scores().values,
                         self.detector1.get_all_scores().values)

    def test_algorithm(self):
        """
    Test if exception AlgorithmNotFound is raised as expected.
    """
        self.assertRaises(
            exceptions.AlgorithmNotFound,
            lambda: AnomalyDetector(self.s1,
                                    algorithm_name='NotValidAlgorithm'))

    def test_algorithm_params(self):
        """
    Test if optional parameter algorithm_params works as expected.
    """
        self.assertRaises(
            ValueError,
            lambda: AnomalyDetector(self.s1,
                                    algorithm_name='exp_avg_detector',
                                    algorithm_params='0'))
        detector = AnomalyDetector(self.s1,
                                   algorithm_name="exp_avg_detector",
                                   algorithm_params={'smoothing_factor': 0.3})
        self.assertNotEqual(self.detector1.get_all_scores().values,
                            detector.get_all_scores().values)

    def test_anomaly_threshold(self):
        """
    Test if score_percentile_threshold works as expected.
    """
        detector = AnomalyDetector(self.s1,
                                   score_percent_threshold=0.1,
                                   algorithm_name='exp_avg_detector')
        detector1 = AnomalyDetector(self.s1,
                                    score_percent_threshold=0.1,
                                    algorithm_name='derivative_detector')
        self.assertNotEqual(detector1.get_anomalies(),
                            detector.get_anomalies())
Example #36
0
import csv
import time
import luminol
from luminol.anomaly_detector import AnomalyDetector

bandwidth_up_timeseries = {}
with open('data/synthetic/tv.WAN.uncontrolled.csv') as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    for row in csv_reader:
        bandwidth_up_timeseries[row[0]] = float(row[1])

bandwidth_up_detector = AnomalyDetector(bandwidth_up_timeseries,
                                        algorithm_name="bitmap_detector",
                                        score_threshold=10,
                                        algorithm_params={"precision": 10})
anomalies = bandwidth_up_detector.get_anomalies()
for anomaly in anomalies:
    print(anomaly.start_timestamp, anomaly.end_timestamp,
          anomaly.anomaly_score)
Example #37
0
def print_plot_of_qty(csv_path,
                      group_by,
                      qty_interest,
                      include_extremes=False,
                      extreme_threshold=400000):
    doc = """

    @param csv_path (str)           :   path to the CSV file
    @param group_by (str)           :   a one-character long string (either "W", "D", "M") specifying in which units of time to
                                        group the data by (either weeks, days, or months, respectively)
    @param qty_interest (str)       :   the identifier of the quantity to be plotted
    @param include_extremes (bool)  :   whether or not to include extreme outliers. Defaults to false.
    @param extreme_threshold (float):   the threshold to cut off extreme outliers (using qty_interest). Exclusive.
    """

    # Ingest the CSV file
    df = ingest_csv(csv_path)
    # Group by a specific quantity, over a specific frequency
    group = df.groupby(pd.Grouper(freq=str(group_by).upper()))[[
        qty_interest
    ]].sum().apply(list).to_dict()[qty_interest]
    # Get the time (dates)
    gkeys = [
        int((x - datetime(1970, 1, 1)).total_seconds())
        for x in list(group.keys())
    ]
    # Get tye values for each time
    gvals = list(group.values())
    # Make the timeseries
    timeseries = dict(zip(gkeys, gvals))

    # Default algorithm properties
    algo_name = 'derivative_detector'
    #algo_name = 'exp_avg_detector'
    algo_params = {
        'smoothing_factor': 0.2,
        #    'lag_window_size' : int(0.2 * len(gkeys)),
        #    'use_lag_window' : True,
    }
    algo_threshold = 2

    # For any extreme anomalies
    extreme_anomalies = None
    # We ignore extremes by default
    if (not include_extremes):
        timeseries = {
            k: v
            for k, v in timeseries.items() if v < extreme_threshold
        }
    # For when we care about these
    else:
        extreme_anomalies = {
            k: v
            for k, v in timeseries.items() if v <= extreme_threshold
        }

        algo_name = 'bitmap_detector'
        algo_params = {
            'precision': 10,
            'lag_window_size': int(0.30 * len(keys)),
            'future_window_size': int(0.30 * len(keys)),
            'chunk_size': 2,
        }

    # Detector for anomalies
    detector = AnomalyDetector(time_series=timeseries,
                               algorithm_name=algo_name,
                               algorithm_params=algo_params,
                               score_threshold=algo_threshold)
    # Dictionaries of anomalies found
    anomalies = {}
    # Number of anomalies
    n_anomaly = 0
    for anomaly in detector.get_anomalies():
        n_anomaly += 1
        anomalies[anomaly.exact_timestamp] = timeseries[
            anomaly.exact_timestamp]

    # Plot and print the graph, and anomalies (and include extremes if necessary)
    plot_anomalies(timeseries, anomalies, extreme_anomalies)
class TestAnomalyDetector(unittest.TestCase):
    def setUp(self):
        self.s1 = {0: 0, 1: 0, 2: 0, 3: 0, 4: 1, 5: 2, 6: 2, 7: 2, 8: 0}
        self.s2 = {0: 0, 1: 1, 2: 2, 3: 2, 4: 2, 5: 0, 6: 0, 7: 0, 8: 0}
        self.detector1 = AnomalyDetector(self.s1)
        self.detector2 = AnomalyDetector(self.s2)

    def test_diff_percent_threshold_algorithm(self):
        """
    Test "diff percent threshold" algorithm with a threshold of 20%
    """
        detector = AnomalyDetector(self.s1,
                                   baseline_time_series=self.s2,
                                   algorithm_name='diff_percent_threshold',
                                   algorithm_params={
                                       'percent_threshold_upper': 20,
                                       'percent_threshold_lower': -20
                                   })
        anomalies = detector.get_anomalies()
        self.assertTrue(anomalies is not None)
        self.assertTrue(len(anomalies) > 0)
        self.assertRaises(
            exceptions.RequiredParametersNotPassed,
            lambda: AnomalyDetector(self.s1,
                                    baseline_time_series=self.s2,
                                    algorithm_name='diff_percent_threshold'))

    def test_absolute_threshold_algorithm(self):
        """
    Test "absolute threshold" algorithm with a upper and lower threshold of 0.2
    """
        detector = AnomalyDetector(self.s1,
                                   algorithm_name='absolute_threshold',
                                   algorithm_params={
                                       'absolute_threshold_value_upper': 0.2,
                                       'absolute_threshold_value_lower': 0.2
                                   })
        anomalies = detector.get_anomalies()
        self.assertTrue(anomalies is not None)
        self.assertTrue(len(anomalies) > 0)
        self.assertRaises(
            exceptions.RequiredParametersNotPassed,
            lambda: AnomalyDetector(self.s1,
                                    algorithm_name='absolute_threshold'))

    def test_threshold(self):
        """
    Test score threshold=0
    """
        detector = AnomalyDetector(self.s1, score_threshold=0)
        self.assertTrue(len(detector.get_anomalies()) == 1)
        self.assertTrue(detector.get_anomalies() is not None)

    def test_score_only(self):
        """
    Test that score_only parameter doesn't give anomalies
    """
        detector = AnomalyDetector(self.s1,
                                   score_only=True,
                                   algorithm_name='derivative_detector')
        detector2 = AnomalyDetector(self.s1,
                                    algorithm_name='derivative_detector')
        self.assertTrue(detector2.get_anomalies() is not None)
        self.assertTrue(len(detector.get_anomalies()) == 0)

    def test_get_all_scores(self):
        """
    Test if function get_all_scores works as expected.
    """
        self.assertTrue(isinstance(self.detector1.get_all_scores(),
                                   TimeSeries))
        self.assertEqual(len(self.detector1.get_all_scores()),
                         len(self.detector1.time_series))

    def test_get_anomalies(self):
        """
    Test if anomaly is found as expected.
    """
        self.assertTrue(self.detector1.get_anomalies() is not None)

    def test_algorithm_DefaultDetector(self):
        """
    Test if optional parameter algorithm works as expected.
    """
        detector = AnomalyDetector(self.s1, algorithm_name='default_detector')
        self.assertEqual(detector.get_all_scores().timestamps,
                         self.detector1.get_all_scores().timestamps)
        self.assertEqual(detector.get_all_scores().values,
                         self.detector1.get_all_scores().values)

    def test_algorithm(self):
        """
    Test if exception AlgorithmNotFound is raised as expected.
    """
        self.assertRaises(
            exceptions.AlgorithmNotFound,
            lambda: AnomalyDetector(self.s1,
                                    algorithm_name='NotValidAlgorithm'))

    def test_algorithm_params(self):
        """
    Test if optional parameter algorithm_params works as expected.
    """
        self.assertRaises(
            ValueError,
            lambda: AnomalyDetector(self.s1,
                                    algorithm_name='exp_avg_detector',
                                    algorithm_params='0'))
        detector = AnomalyDetector(self.s1,
                                   algorithm_name="exp_avg_detector",
                                   algorithm_params={'smoothing_factor': 0.3})
        self.assertNotEqual(self.detector1.get_all_scores().values,
                            detector.get_all_scores().values)

    def test_anomaly_threshold(self):
        """
    Test if score_percentile_threshold works as expected.
    """
        detector = AnomalyDetector(self.s1,
                                   score_percent_threshold=0.1,
                                   algorithm_name='exp_avg_detector')
        detector1 = AnomalyDetector(self.s1,
                                    score_percent_threshold=0.1,
                                    algorithm_name='derivative_detector')
        self.assertNotEqual(detector1.get_anomalies(),
                            detector.get_anomalies())
Example #39
0
def job():
    print("Run start: " +
          datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))

    # Connect to either the normal, anomaly or combined database. Note for SQLlite, the detect types
    # line converts from SQLlite datatypes(typically text) to Python native datatypes
    conn = sqlite3.connect('../hl7-combined.db',
                           detect_types=sqlite3.PARSE_DECLTYPES
                           | sqlite3.PARSE_COLNAMES)
    conn.row_factory = sqlite3.Row

    cur = conn.cursor()

    # Group messages into X second intervals
    cur.execute(
        "select messages.MSGTYPE, datetime((strftime('%s', messages.QueueTime) /"
        + INTERVAL + ") * " + INTERVAL +
        ", 'unixepoch') interval, count(*)  count from messages"
        " where msgtype = 'ADT_A31' group by interval order by interval desc limit "
        + LIMIT)

    rows = cur.fetchall()
    data = {}

    if len(rows) > 0:

        for row in rows:
            # Luminol library requires a 2 column unix timestamp + count
            obsTimestamp = time.mktime(
                datetime.datetime.strptime(row["interval"],
                                           "%Y-%m-%d %H:%M:%S").timetuple())
            data[obsTimestamp] = row["count"]

        print data
        # DETECTOR TYPE - see https://github.com/linkedin/luminol/tree/master/src/luminol/algorithms/anomaly_detector_algorithms
        detector = AnomalyDetector(data,
                                   algorithm_name=ALGORITH_NAME,
                                   score_threshold=THRESHOLD)

        score = detector.get_all_scores()
        anom_score = []

        for (timestamp, value) in score.iteritems():
            t_str = time.strftime('%Y-%m-%d %H:%M:%S',
                                  time.localtime(timestamp))

            anom_score.append([t_str, value])

        print "----- ALL SCORES ----- "
        for score in anom_score:
            print(score)

        anomalies = detector.get_anomalies()

        for (value) in anomalies:
            print "       match: " + time.strftime(
                '%Y-%m-%d %H:%M:%S', time.localtime(value.exact_timestamp)),
            value.anomaly_score

            if value.anomaly_score >= int(THRESHOLD) and anomalies.count > int(
                    LIMIT):
                print "ANOMALY DETECTED - NOTIFYING ADMINISTRATOR / CALLING WEBSERVICE ETC"
                quit()

        print("Run End: " +
              datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') + "\n")
Example #40
0
def main(argv):
    try:
        opts, args = getopt.getopt(sys.argv[1:], "hi:c:v",
                                   ["help", "input="
                                    "correlate="])
    except getopt.GetoptError as err:
        # print help information and exit:
        print str(err)  # will print something like "option -a not recognized"
        usage()
        sys.exit(2)
    input = None
    correlate = None
    verbose = False
    for o, a in opts:
        if o == "-v":
            verbose = True
        elif o in ("-h", "--help"):
            usage()
            sys.exit()
        elif o in ("-i", "--input"):
            input = a
        elif o in ("-c", "--correlate"):
            correlate = a
        else:
            assert False, "unhandled option"

    regex = r"(\s+)([+-]?\d*\.\d+)(?![-+0-9\.])(\s+)(\d+)(\s+)(\w+)(\s+)(\w+)(\s+)([a-z])(\s+)(\d+)(?:(?!X).)*(Length = )(\d+)(\s+)(BitCount = )(\d+)(\s+)(ID = )(\d+)"

    pattern = re.compile(regex, re.UNICODE)

    file1 = open(input).read()
    matches1 = re.finditer(regex, file1)

    file2 = open(correlate).read()
    matches2 = re.finditer(regex, file2)

    mydict1 = {}
    mydict2 = {}

    for matchNum, match in enumerate(matches1):
        matchNum = matchNum + 1
        myTime = match.group(2)

        myTime = float(myTime) * 1000000

        mydict1[myTime] = match.group(20)

    for matchNum, match in enumerate(matches2):
        matchNum = matchNum + 1
        myTime = match.group(2)

        myTime = float(myTime) * 1000000

        mydict2[myTime] = match.group(20)

    #print mydict1

    my_detector1 = AnomalyDetector(mydict1,
                                   algorithm_name=("exp_avg_detector"))
    score1 = my_detector1.get_all_scores()

    anomalies = my_detector1.get_anomalies()
    for a in anomalies:
        time_period = a.get_time_window()
        my_correlator = Correlator(mydict1, mydict2, time_period)

        if my_correlator.is_correlated(treshold=0.8):
            print "mydict2 correlate with mydict at time period (%d, %d)" % time_period
Example #41
0
def main(argv):
    try:
        opts, args = getopt.getopt(sys.argv[1:], "hi:v", ["help", "input="])
    except getopt.GetoptError as err:
        # print help information and exit:
        print str(err)
        usage()
        sys.exit(2)
    input = None
    verbose = False
    for o, a in opts:
        if o == "-v":
            verbose = True
        elif o in ("-h", "--help"):
            usage()
            sys.exit()
        elif o in ("-i", "--input"):
            input = a
        else:
            assert False, "unhandled option"

    # this is the regular expression used to parse CANoe logs in ASCII format (.asc)
    regex = r"(\s+)([+-]?\d*\.\d+)(?![-+0-9\.])(\s+)(\d+)(\s+)(\w+)(\s+)(\w+)(\s+)([a-z])(\s+)(\d+)(?:(?!X).)*(Length = )(\d+)(\s+)(BitCount = )(\d+)(\s+)(ID = )(\d+)"

    pattern = re.compile(regex, re.UNICODE)

    inputfile = open(input).read()

    # stores all lines which match the regex
    matches = re.finditer(regex, inputfile)

    # event_dict stores the values (timestamp + CAN-ID) extracted from the logs
    event_dict = {}

    for matchNum, match in enumerate(matches):
        matchNum = matchNum + 1
        myTime = match.group(2)

        # converts absolute time from engine start in seconds from engine start to int
        myTime = float(myTime) * 1000000

        # match.group(20) is ID of CAN event in decimal
        event_dict[myTime] = match.group(20)

    #print event_dict

    my_detector = AnomalyDetector(event_dict,
                                  algorithm_name=("exp_avg_detector"))

    # this calculates an anomal yscore for every event in the time series
    score = my_detector.get_all_scores()

    # filter events in time series for anomalies
    anomalies = my_detector.get_anomalies()

    anom_score = []

    print

    for attack in anomalies:

        if (attack.exact_timestamp in event_dict):

            if (verbose == True):
                # if script is run with "-v" it will output all anomaies
                print("{timestamp} - ID: {id} - Score: {value}".format(
                    timestamp=attack.exact_timestamp,
                    id=event_dict[attack.exact_timestamp],
                    value=attack.anomaly_score))

            elif (attack.anomaly_score > 3.4):
                # if script is not run with "-v" it will output only anomalies with score > 3.4
                print("{timestamp} - ID: {id} - Score: {value}".format(
                    timestamp=attack.exact_timestamp,
                    id=event_dict[attack.exact_timestamp],
                    value=attack.anomaly_score))
Example #42
0
 def test_score_only(self):
   detector = AnomalyDetector(self.s1, score_only=True, algorithm_name='derivative_detector')
   detector2 = AnomalyDetector(self.s1, algorithm_name='derivative_detector')
   self.assertTrue(detector2.get_anomalies() is not None)
   self.assertTrue(detector.get_anomalies() is None)
Example #43
0
class RCA(object):
  def __init__(self, metrix, related_metrices):
    """
    Initializer
    :param metrix: a TimeSeries, a dictionary or a path to a csv file(str)
    :param list related_metrixes: a list of time series.
    """
    self.metrix = self._load(metrix)
    self.anomaly_detector = AnomalyDetector(metrix)
    self.related_metrices = related_metrices
    self.anomalies = self.anomaly_detector.get_anomalies()
    self._analyze()

  def _load(self, metrix):
    """
    Load time series.
    :param timeseries: a TimeSeries, a dictionary or a path to a csv file(str).
    :return TimeSeries: a TimeSeries object.
    """
    if isinstance(metrix, TimeSeries):
      return metrix
    if isinstance(metrix, dict):
      return TimeSeries(metrix)
    return TimeSeries(utils.read_csv(metrix))

  def _analyze(self):
    """
    Analyzes if a matrix has anomalies.
    If any anomaly is found, determine if the matrix correlates with any other matrixes.
    To be implemented.
    """
    output = defaultdict(list)
    output_by_name = defaultdict(list)
    scores = self.anomaly_detector.get_all_scores()

    if self.anomalies:
      for anomaly in self.anomalies:
        metrix_scores = scores
        start_t, end_t = anomaly.get_time_window()
        t = anomaly.exact_timestamp

        # Compute extended start timestamp and extended end timestamp.
        room = (end_t - start_t) / 2
        if not room:
          room = 30
        extended_start_t = start_t - room
        extended_end_t = end_t + room
        metrix_scores_cropped = metrix_scores.crop(extended_start_t, extended_end_t)

        # Adjust the two timestamps if not enough data points are included.
        while len(metrix_scores_cropped) < 2:
          extended_start_t = extended_start_t - room
          extended_end_t = extended_end_t + room
          metrix_scores_cropped = metrix_scores.crop(extended_start_t, extended_end_t)

        # Correlate with other metrics
        for entry in self.related_metrices:
          try:
            entry_correlation_result = Correlator(self.metrix, entry, time_period=(extended_start_t, extended_end_t),
                                                  use_anomaly_score=True).get_correlation_result()
            record = extended_start_t, extended_end_t, entry_correlation_result.__dict__, entry
            record_by_name = extended_start_t, extended_end_t, entry_correlation_result.__dict__
            output[t].append(record)
            output_by_name[entry].append(record_by_name)
          except exceptions.NotEnoughDataPoints:
            pass

    self.output = output
    self.output_by_name = output_by_name
                                       function=InterpolateFunction.LINEAR))

    query.set_transformation_filter(tf)

    series_list = svc.query(query)
    for series in series_list:
        metric_id = '- %s %s' % (series.metric, print_tags(series.tags))
        log('\t' + metric_id)
        # exclude empty series for specific tags
        if len(series.data) > 2:
            ts = {int(sample.t / 1000): sample.v for sample in series.data}

            detector = AnomalyDetector(ts, score_threshold=args.min_score)

            anomalies = []
            for anomaly in detector.get_anomalies():
                if time.mktime(now.timetuple()) - args.last_hours * 3600 <= anomaly.exact_timestamp:
                    anomalies.append(anomaly)

            if anomalies:
                message.append(metric_id)
                for anomaly in anomalies:
                    t_start, t_end = format_t(anomaly.start_timestamp), format_t(anomaly.end_timestamp)
                    t_exact = format_t(anomaly.exact_timestamp)
                    anomaly_msg = '\tAnomaly from %s to %s with score %s: %s, %s' % (
                        t_start, t_end, anomaly.anomaly_score, t_exact, ts[anomaly.exact_timestamp])
                    message.append(anomaly_msg)

msg = '\n'.join(message)
message_service.insert(Message('anomaly_detection', 'python_script', 'anomaly', now, 'INFO', {}, msg))
print(msg)