def test_threshold(self): """ Test score threshold=0 """ detector = AnomalyDetector(self.s1, score_threshold=0) self.assertTrue(len(detector.get_anomalies()) == 1) self.assertTrue(detector.get_anomalies() is not None)
def test_anomaly_threshold(self): """ Test if score_percentile_threshold works as expected. """ detector = AnomalyDetector(self.s1, score_percentile_threshold=0.1, algorithm_name='exp_avg_detector') detector1 = AnomalyDetector(self.s1, score_percentile_threshold=0.1, algorithm_name='derivative_detector') self.assertNotEqual(detector1.get_anomalies(), detector.get_anomalies())
def test_threshold(self): """ Test score threshold=0 """ detector = AnomalyDetector(self.s1, score_threshold=0) self.assertTrue(len(detector.get_anomalies()) == 1) self.assertTrue(detector.get_anomalies() is not None)
def test_score_only(self): """ Test that score_only parameter doesn't give anomalies """ detector = AnomalyDetector(self.s1, score_only=True, algorithm_name='derivative_detector') detector2 = AnomalyDetector(self.s1, algorithm_name='derivative_detector') self.assertTrue(detector2.get_anomalies() is not None) self.assertTrue(len(detector.get_anomalies()) == 0)
def test_score_only(self): detector = AnomalyDetector(self.s1, score_only=True, algorithm_name='derivative_detector') detector2 = AnomalyDetector(self.s1, algorithm_name='derivative_detector') self.assertTrue(detector2.get_anomalies() is not None) self.assertTrue(len(detector.get_anomalies()) == 0)
def test_score_only(self): """ Test that score_only parameter doesn't give anomalies """ detector = AnomalyDetector(self.s1, score_only=True, algorithm_name='derivative_detector') detector2 = AnomalyDetector(self.s1, algorithm_name='derivative_detector') self.assertTrue(detector2.get_anomalies() is not None) self.assertTrue(len(detector.get_anomalies()) == 0)
def test_anomaly_threshold(self): """ Test if score_percentile_threshold works as expected. """ detector = AnomalyDetector(self.s1, score_percent_threshold=0.1, algorithm_name='exp_avg_detector') detector1 = AnomalyDetector(self.s1, score_percent_threshold=0.1, algorithm_name='derivative_detector') self.assertNotEqual(detector1.get_anomalies(), detector.get_anomalies())
def test_sign_test_algorithm_interface(self): """ Test "sign test" algorithm with a threshold of 0% """ bs = dict() bs.update((t, 1) for t in range(1, 30)) # Simple tests algorithm_params = { 'percent_threshold_upper': 0, 'offset': 2, 'scan_window': 24, 'confidence': 0.01 } ts = dict(bs) # bigger than 10 percent but below bias ts.update((t, 3.1) for t in range(1, 21)) # first no anomalies detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEqual(len(anomalies), 1) # try lower bound algorithm_params = { 'percent_threshold_lower': 0, 'offset': 2, 'scan_window': 24, 'confidence': 0.01 } ts = dict(bs) # less than baseline plus bias ts.update((t, 2.9) for t in range(1, 25)) # first no anomalies detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEqual(len(anomalies), 1)
def test_custom_algorithm(self): """ Test passing a custom algorithm class """ detector = AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_class=CustomAlgo, algorithm_params={'percent_threshold_upper': 20, 'percent_threshold_lower': -20}) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertTrue(len(anomalies) > 0)
def test_diff_percent_threshold_algorithm(self): """ Test "diff percent threshold" algorithm with a threshold of 20% """ detector = AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_name='diff_percent_threshold', algorithm_params={'percent_threshold_upper': 20, 'percent_threshold_lower': -20}) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertTrue(len(anomalies) > 0) self.assertRaises(exceptions.RequiredParametersNotPassed, lambda: AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_name='diff_percent_threshold'))
def test_absolute_threshold_algorithm(self): """ Test "absolute threshold" algorithm with a upper and lower threshold of 0.2 """ detector = AnomalyDetector(self.s1, algorithm_name='absolute_threshold', algorithm_params={'absolute_threshold_value_upper': 0.2, 'absolute_threshold_value_lower': 0.2}) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertTrue(len(anomalies) > 0) self.assertRaises(exceptions.RequiredParametersNotPassed, lambda: AnomalyDetector(self.s1, algorithm_name='absolute_threshold'))
def test_sign_test_algorithm_interface(self): """ Test "sign test" algorithm with a threshold of 0% """ bs = dict() bs.update((t, 1) for t in range(1, 30)) # Simple tests algorithm_params = {'percent_threshold_upper': 0, 'offset': 2, 'scan_window': 24, 'confidence': 0.01} ts = dict(bs) # bigger than 10 percent but below bias ts.update((t, 3.1) for t in range(1, 21)) # first no anomalies detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEquals(len(anomalies), 1) # try lower bound algorithm_params = {'percent_threshold_lower': 0, 'offset': 2, 'scan_window': 24, 'confidence': 0.01} ts = dict(bs) # less than baseline plus bias ts.update((t, 2.9) for t in range(1, 25)) # first no anomalies detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEquals(len(anomalies), 1)
def isAnomaly(self, data): # returns if last data point is anonaly length = len(data) try: detector = AnomalyDetector({i: data[i] for i in range(length)}, algorithm_name='exp_avg_detector', score_threshold=self.score_threshold) anomalies = detector.get_anomalies() for anomaly in anomalies: if anomaly.exact_timestamp == length - 1: return True, anomalies except: return False, [] return False, anomalies
def test_custom_algorithm(self): """ Test passing a custom algorithm class """ detector = AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_class=CustomAlgo, algorithm_params={ 'percent_threshold_upper': 20, 'percent_threshold_lower': -20 }) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertTrue(len(anomalies) > 0)
def get_anoms(anomalous_ts): if not anomalous_ts: return [] anomalies = [] try: anomaly_ts_dict = dict(anomalous_ts) my_detector = AnomalyDetector(anomaly_ts_dict, score_threshold=1.5) anomalies = my_detector.get_anomalies() except: logger.error(traceback.format_exc()) logger.error('error :: AnomalyDetector') return anomalies
def get_anomalies(series: pd.Series, algorithm: str = "bitmap_detector") -> List[dict]: assert algorithm in [ "bitmap_detector", "derivative_detector", "exp_avg_detector" ] ts = series.to_dict() detector = AnomalyDetector(ts, algorithm_name=algorithm) anomalies = detector.get_anomalies() return [{ "start_time": _.start_timestamp, "end_time": _.end_timestamp, "top_score_time": _.exact_timestamp, "score": _.anomaly_score, } for _ in anomalies]
def test_absolute_threshold_algorithm(self): """ Test "absolute threshold" algorithm with a upper and lower threshold of 0.2 """ detector = AnomalyDetector(self.s1, algorithm_name='absolute_threshold', algorithm_params={ 'absolute_threshold_value_upper': 0.2, 'absolute_threshold_value_lower': 0.2 }) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertTrue(len(anomalies) > 0) self.assertRaises( exceptions.RequiredParametersNotPassed, lambda: AnomalyDetector(self.s1, algorithm_name='absolute_threshold'))
def get_anoms(anomalous_ts): logger = logging.getLogger(skyline_app_logger) if not anomalous_ts: logger.error('error :: get_anoms :: no anomalous_ts') return [] anomalies = [] try: anomaly_ts_dict = dict(anomalous_ts) my_detector = AnomalyDetector(anomaly_ts_dict, score_threshold=1.5) anomalies = my_detector.get_anomalies() except: logger.error(traceback.format_exc()) logger.error('error :: get_anoms :: AnomalyDetector') return anomalies
def test_diff_percent_threshold_algorithm(self): """ Test "diff percent threshold" algorithm with a threshold of 20% """ detector = AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_name='diff_percent_threshold', algorithm_params={ 'percent_threshold_upper': 20, 'percent_threshold_lower': -20 }) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertTrue(len(anomalies) > 0) self.assertRaises( exceptions.RequiredParametersNotPassed, lambda: AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_name='diff_percent_threshold'))
def pointsOfCorrelation(ts1, ts2, thresholdVal): corrPoints = [] # Conduct AD on each of each of the time series. # algorithm_params={'absolute_threshold_value_lower':lower,'absolute_threshold_value_upper':upper} # detector = AnomalyDetector(ts2, score_threshold=thresholdVal, algorithm_name="derivative_detector") detector = AnomalyDetector(ts2, score_threshold=thresholdVal, algorithm_name="exp_avg_detector") # score = detector.get_all_scores() anomalies = detector.get_anomalies() # For anomalous points in ts1, return correlated points and correlation coefficient. for a in anomalies: time_period = a.get_time_window() try: my_correlator = Correlator(ts1, ts2, time_period) if my_correlator.is_correlated(threshold=0.8): correlatorResultObj = my_correlator.get_correlation_result() # Change time period to human readable format start = strftime('%Y-%m-%d %H:%M:%S', localtime(a.start_timestamp)) end = strftime('%Y-%m-%d %H:%M:%S', localtime(a.end_timestamp)) time_period = (start, end) # Return anomalous time period, correlation coefficient and anomaly score. # Note: Anomaly score for absolute threshold will be diff between value and threshold. result = [ time_period, round(correlatorResultObj.coefficient, 2), round(a.anomaly_score, 2) ] corrPoints.append(result) except: continue return corrPoints
class TestAnomalyDetector(unittest.TestCase): def setUp(self): self.s1 = {0: 0, 1: 0, 2: 0, 3: 0, 4: 1, 5: 2, 6: 2, 7: 2, 8: 0} self.s2 = {0: 0, 1: 1, 2: 2, 3: 2, 4: 2, 5: 0, 6: 0, 7: 0, 8: 0} self.detector1 = AnomalyDetector(self.s1) self.detector2 = AnomalyDetector(self.s2) def test_custom_algorithm(self): """ Test passing a custom algorithm class """ algorithm_params = { 'percent_threshold_upper': 20, 'percent_threshold_lower': -20 } detector = AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_class=CustomAlgo, algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertTrue(len(anomalies) > 0) def test_diff_percent_threshold_algorithm(self): """ Test "diff percent threshold" algorithm with a threshold of 20% """ algorithm_params = { 'percent_threshold_upper': 20, 'percent_threshold_lower': -20 } detector = AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_name='diff_percent_threshold', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertTrue(len(anomalies) > 0) self.assertRaises( exceptions.RequiredParametersNotPassed, lambda: AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_name='diff_percent_threshold')) def test_sign_test_algorithm(self): """ Test "sign test" algorithm with a threshold of 20% """ bs = dict() bs.update((t, 1) for t in range(1, 100)) ts = dict(bs) # test missing parameters self.assertRaises( exceptions.RequiredParametersNotPassed, lambda: AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_name='sign_test')) # test over specified algorithm_params = { 'percent_threshold_upper': 20, 'percent_threshold_lower': -20, 'scan_window': 24, 'confidence': 0.01 } self.assertRaises( exceptions.RequiredParametersNotPassed, lambda: AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_name='sign_test')) # Simple tests algorithm_params = {'percent_threshold_upper': 20, 'scan_window': 24} # first no anomalies detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEqual(len(anomalies), 0) # Next one anomaly exactly equal to scan window ts.update((t, 1.200001) for t in range(10, 34)) detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEqual(len(anomalies), 1) anomaly = anomalies[0] s, e = anomaly.get_time_window() # note the anomaly is larger than scan window self.assertEqual(s, 4) self.assertEqual(e, 39) # score should be roughly 98.5 self.assertGreater(anomaly.anomaly_score, 98) self.assertLess(anomaly.anomaly_score, 99) # anomaly below baseline algorithm_params = { 'percent_threshold_lower': -20, 'scan_window': 24, 'confidence': 0.01 } ts.update((t, 0.799999) for t in range(10, 34)) detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertEqual(len(anomalies), 1) anomaly = anomalies[0] s, e = anomaly.get_time_window() self.assertEqual(s, 4) self.assertEqual(e, 39) # score should be roughly 98.5 self.assertGreater(anomaly.anomaly_score, 98) self.assertLess(anomaly.anomaly_score, 99) # anomalies separated by big gap ts.update(bs) ts.update((t, 0.799999) for t in range(1, 25)) ts.update((t, 0.799999) for t in range(60, 84)) detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertEqual(len(anomalies), 2) anomaly = anomalies[0] s, e = anomaly.get_time_window() self.assertEqual(s, 1) self.assertEqual(e, 30) # score ~ 99.9 self.assertGreater(anomaly.anomaly_score, 99) anomaly = anomalies[1] s, e = anomaly.get_time_window() self.assertEqual(s, 54) self.assertEqual(e, 89) # score should be roughly 98.5 self.assertGreater(anomaly.anomaly_score, 98) self.assertLess(anomaly.anomaly_score, 99) # anomalies separated by small gap algorithm_params = {'percent_threshold_upper': 20, 'scan_window': 24} ts.update(bs) ts.update((t, 1.21) for t in range(1, 25)) ts.update((t, 1.21) for t in range(30, 40)) detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() # just one self.assertEqual(len(anomalies), 1) anomaly = anomalies[0] s, e = anomaly.get_time_window() self.assertEqual(s, 1) self.assertEqual(e, 40) # score ~ 99.9 self.assertGreater(anomaly.anomaly_score, 99) # try noisy data ts.update(bs) ts.update((t, 1.21) for t in range(1, 25)) ts.update((t, 1.19) for t in range(1, 25, 6)) algorithm_params = { 'percent_threshold_upper': 20, 'scan_window': 24, 'confidence': 0.01 } detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertEqual(len(anomalies), 1) # now decrease sensitivity algorithm_params = { 'percent_threshold_upper': 20, 'scan_window': 24, 'confidence': 0.0001 } detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertEqual(len(anomalies), 0) def test_sign_test_algorithm_interface(self): """ Test "sign test" algorithm with a threshold of 0% """ bs = dict() bs.update((t, 1) for t in range(1, 30)) # Simple tests algorithm_params = { 'percent_threshold_upper': 0, 'offset': 2, 'scan_window': 24, 'confidence': 0.01 } ts = dict(bs) # bigger than 10 percent but below bias ts.update((t, 3.1) for t in range(1, 21)) # first no anomalies detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEqual(len(anomalies), 1) # try lower bound algorithm_params = { 'percent_threshold_lower': 0, 'offset': 2, 'scan_window': 24, 'confidence': 0.01 } ts = dict(bs) # less than baseline plus bias ts.update((t, 2.9) for t in range(1, 25)) # first no anomalies detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEqual(len(anomalies), 1) def test_sign_test_algorithm_with_shift(self): """ Test "sign test" algorithm with a threshold of 20% """ bs = dict() bs.update((t, 1) for t in range(1, 100)) # Simple tests algorithm_params = { 'percent_threshold_upper': 10, 'offset': 1, 'scan_window': 24, 'confidence': 0.01 } ts = dict(bs) # bigger than 10 percent but below bias ts.update((t, 1.2) for t in range(10, 34)) # first no anomalies detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEqual(len(anomalies), 0) # Next one anomaly exactly equal to scan window # uses bias ts.update((t, 2.100001) for t in range(10, 34)) detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEqual(len(anomalies), 1) anomaly = anomalies[0] s, e = anomaly.get_time_window() # note the anomaly is larger than scan window self.assertEqual(s, 4) self.assertEqual(e, 39) # score should be roughly 98.5 self.assertGreater(anomaly.anomaly_score, 98) self.assertLess(anomaly.anomaly_score, 99) # anomaly below baseline but not below baseline with shift algorithm_params = { 'percent_threshold_lower': -20, 'offset': -0.1, 'scan_window': 24 } ts.update((t, 0.799999) for t in range(10, 34)) # no anomalies detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEqual(len(anomalies), 0) # lower the time series by 0.1 ts.update((t, 0.699999) for t in range(10, 34)) detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertEqual(len(anomalies), 1) anomaly = anomalies[0] s, e = anomaly.get_time_window() self.assertEqual(s, 4) self.assertEqual(e, 39) # score should be roughly 98.5 self.assertGreater(anomaly.anomaly_score, 98) self.assertLess(anomaly.anomaly_score, 99) def test_absolute_threshold_algorithm(self): """ Test "absolute threshold" algorithm with a upper and lower threshold of 0.2 """ algorithm_params = { 'absolute_threshold_value_upper': 0.2, 'absolute_threshold_value_lower': 0.2 } detector = AnomalyDetector(self.s1, algorithm_name='absolute_threshold', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertTrue(len(anomalies) > 0) self.assertRaises( exceptions.RequiredParametersNotPassed, lambda: AnomalyDetector(self.s1, algorithm_name='absolute_threshold')) def test_threshold(self): """ Test score threshold=0 """ detector = AnomalyDetector(self.s1, score_threshold=0) self.assertTrue(len(detector.get_anomalies()) == 1) self.assertTrue(detector.get_anomalies() is not None) def test_score_only(self): """ Test that score_only parameter doesn't give anomalies """ detector = AnomalyDetector(self.s1, score_only=True, algorithm_name='derivative_detector') detector2 = AnomalyDetector(self.s1, algorithm_name='derivative_detector') self.assertTrue(detector2.get_anomalies() is not None) self.assertTrue(len(detector.get_anomalies()) == 0) def test_get_all_scores(self): """ Test if function get_all_scores works as expected. """ self.assertTrue(isinstance(self.detector1.get_all_scores(), TimeSeries)) self.assertEqual(len(self.detector1.get_all_scores()), len(self.detector1.time_series)) def test_get_anomalies(self): """ Test if anomaly is found as expected. """ self.assertTrue(self.detector1.get_anomalies() is not None) def test_algorithm_DefaultDetector(self): """ Test if optional parameter algorithm works as expected. """ detector = AnomalyDetector(self.s1, algorithm_name='default_detector') self.assertEqual(detector.get_all_scores().timestamps, self.detector1.get_all_scores().timestamps) self.assertEqual(detector.get_all_scores().values, self.detector1.get_all_scores().values) def test_algorithm(self): """ Test if exception AlgorithmNotFound is raised as expected. """ self.assertRaises( exceptions.AlgorithmNotFound, lambda: AnomalyDetector(self.s1, algorithm_name='NotValidAlgorithm')) def test_algorithm_params(self): """ Test if optional parameter algorithm_params works as expected. """ self.assertRaises( ValueError, lambda: AnomalyDetector(self.s1, algorithm_name='exp_avg_detector', algorithm_params='0')) detector = AnomalyDetector(self.s1, algorithm_name="exp_avg_detector", algorithm_params={'smoothing_factor': 0.3}) self.assertNotEqual(self.detector1.get_all_scores().values, detector.get_all_scores().values) def test_anomaly_threshold(self): """ Test if score_percentile_threshold works as expected. """ detector = AnomalyDetector(self.s1, score_percent_threshold=0.1, algorithm_name='exp_avg_detector') detector1 = AnomalyDetector(self.s1, score_percent_threshold=0.1, algorithm_name='derivative_detector') self.assertNotEqual(detector1.get_anomalies(), detector.get_anomalies())
volume = 0 box_difference = [] lts = {} for i in df_buy['VALUE']: container.append(counter) difference = df_buy['PRICE'][counter] - df_sell['PRICE'][counter] volume = volume - df_buy['VOLUME'][counter] + df_sell['VOLUME'][counter] box_difference.append(difference) box_volume.append(volume) lts[counter] = difference counter += 1 plt.plot(container, box_difference) detector = AnomalyDetector(lts) anomalies = detector.get_anomalies() k = 0 while k < len(anomalies): time_period = anomalies[k].get_time_window() container_anomalies = [] box_difference_anomalies = [] i = time_period[0] while i <= time_period[1]: i += 1 container_anomalies.append(i) difference = df_buy['PRICE'][i] - df_sell['PRICE'][i] box_difference_anomalies.append(difference) k += 1 plt.plot(container_anomalies, box_difference_anomalies)
def test_sign_test_algorithm(self): """ Test "sign test" algorithm with a threshold of 20% """ bs = dict() bs.update((t, 1) for t in range(1, 100)) ts = dict(bs) # test missing parameters self.assertRaises( exceptions.RequiredParametersNotPassed, lambda: AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_name='sign_test')) # test over specified algorithm_params = { 'percent_threshold_upper': 20, 'percent_threshold_lower': -20, 'scan_window': 24, 'confidence': 0.01 } self.assertRaises( exceptions.RequiredParametersNotPassed, lambda: AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_name='sign_test')) # Simple tests algorithm_params = {'percent_threshold_upper': 20, 'scan_window': 24} # first no anomalies detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEqual(len(anomalies), 0) # Next one anomaly exactly equal to scan window ts.update((t, 1.200001) for t in range(10, 34)) detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEqual(len(anomalies), 1) anomaly = anomalies[0] s, e = anomaly.get_time_window() # note the anomaly is larger than scan window self.assertEqual(s, 4) self.assertEqual(e, 39) # score should be roughly 98.5 self.assertGreater(anomaly.anomaly_score, 98) self.assertLess(anomaly.anomaly_score, 99) # anomaly below baseline algorithm_params = { 'percent_threshold_lower': -20, 'scan_window': 24, 'confidence': 0.01 } ts.update((t, 0.799999) for t in range(10, 34)) detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertEqual(len(anomalies), 1) anomaly = anomalies[0] s, e = anomaly.get_time_window() self.assertEqual(s, 4) self.assertEqual(e, 39) # score should be roughly 98.5 self.assertGreater(anomaly.anomaly_score, 98) self.assertLess(anomaly.anomaly_score, 99) # anomalies separated by big gap ts.update(bs) ts.update((t, 0.799999) for t in range(1, 25)) ts.update((t, 0.799999) for t in range(60, 84)) detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertEqual(len(anomalies), 2) anomaly = anomalies[0] s, e = anomaly.get_time_window() self.assertEqual(s, 1) self.assertEqual(e, 30) # score ~ 99.9 self.assertGreater(anomaly.anomaly_score, 99) anomaly = anomalies[1] s, e = anomaly.get_time_window() self.assertEqual(s, 54) self.assertEqual(e, 89) # score should be roughly 98.5 self.assertGreater(anomaly.anomaly_score, 98) self.assertLess(anomaly.anomaly_score, 99) # anomalies separated by small gap algorithm_params = {'percent_threshold_upper': 20, 'scan_window': 24} ts.update(bs) ts.update((t, 1.21) for t in range(1, 25)) ts.update((t, 1.21) for t in range(30, 40)) detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() # just one self.assertEqual(len(anomalies), 1) anomaly = anomalies[0] s, e = anomaly.get_time_window() self.assertEqual(s, 1) self.assertEqual(e, 40) # score ~ 99.9 self.assertGreater(anomaly.anomaly_score, 99) # try noisy data ts.update(bs) ts.update((t, 1.21) for t in range(1, 25)) ts.update((t, 1.19) for t in range(1, 25, 6)) algorithm_params = { 'percent_threshold_upper': 20, 'scan_window': 24, 'confidence': 0.01 } detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertEqual(len(anomalies), 1) # now decrease sensitivity algorithm_params = { 'percent_threshold_upper': 20, 'scan_window': 24, 'confidence': 0.0001 } detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertEqual(len(anomalies), 0)
def test_sign_test_algorithm_with_shift(self): """ Test "sign test" algorithm with a threshold of 20% """ bs = dict() bs.update((t, 1) for t in range(1, 100)) # Simple tests algorithm_params = { 'percent_threshold_upper': 10, 'offset': 1, 'scan_window': 24, 'confidence': 0.01 } ts = dict(bs) # bigger than 10 percent but below bias ts.update((t, 1.2) for t in range(10, 34)) # first no anomalies detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEqual(len(anomalies), 0) # Next one anomaly exactly equal to scan window # uses bias ts.update((t, 2.100001) for t in range(10, 34)) detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEqual(len(anomalies), 1) anomaly = anomalies[0] s, e = anomaly.get_time_window() # note the anomaly is larger than scan window self.assertEqual(s, 4) self.assertEqual(e, 39) # score should be roughly 98.5 self.assertGreater(anomaly.anomaly_score, 98) self.assertLess(anomaly.anomaly_score, 99) # anomaly below baseline but not below baseline with shift algorithm_params = { 'percent_threshold_lower': -20, 'offset': -0.1, 'scan_window': 24 } ts.update((t, 0.799999) for t in range(10, 34)) # no anomalies detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEqual(len(anomalies), 0) # lower the time series by 0.1 ts.update((t, 0.699999) for t in range(10, 34)) detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertEqual(len(anomalies), 1) anomaly = anomalies[0] s, e = anomaly.get_time_window() self.assertEqual(s, 4) self.assertEqual(e, 39) # score should be roughly 98.5 self.assertGreater(anomaly.anomaly_score, 98) self.assertLess(anomaly.anomaly_score, 99)
def test_sign_test_algorithm_with_shift(self): """ Test "sign test" algorithm with a threshold of 20% """ bs = dict() bs.update({t: 1 for t in range(1, 100)}) # Simple tests algorithm_params = {'percent_threshold_upper': 10, 'shift': 1, 'scan_window': 24, 'confidence': 0.01} ts = dict(bs) # bigger than 10 percent but below bias ts.update({t: 1.2 for t in range(10, 34)}) # first no anomalies detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEquals(len(anomalies), 0) # Next one anomaly exactly equal to scan window # uses bias ts.update({t: 2.100001 for t in range(10, 34)}) detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEquals(len(anomalies), 1) anomaly = anomalies[0] s, e = anomaly.get_time_window() # note the anomaly is larger than scan window self.assertEquals(s, 4) self.assertEquals(e, 39) # score should be roughly 98.5 self.assertGreater(anomaly.anomaly_score, 98) self.assertLess(anomaly.anomaly_score, 99) # anomaly below baseline but not below baseline with shift algorithm_params = {'percent_threshold_lower': -20, 'shift': -0.1, 'scan_window': 24, 'confidence': 0.01} ts.update({t: 0.799999 for t in range(10, 34)}) # no anomalies detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEquals(len(anomalies), 0) # lower the time series by 0.1 ts.update({t: 0.699999 for t in range(10, 34)}) detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertEquals(len(anomalies), 1) anomaly = anomalies[0] s, e = anomaly.get_time_window() self.assertEquals(s, 4) self.assertEquals(e, 39) # score should be roughly 98.5 self.assertGreater(anomaly.anomaly_score, 98) self.assertLess(anomaly.anomaly_score, 99)
'./SAR-device.sdb.await__DCU1923ZeroOrder(4_8)2017-11-07 11_56_54_491016.csv', #score_threshold=0.1, #1.0, score_threshold=4.703433200392526, algorithm_name='derivative_detector' ) #derivative_detector'exp_avg_detector#'bitmap_detector)#, algorithm_params = {'smoothing factor': 0.2, 'lag_window_size': 64 }) score = my_detector.get_all_scores() for timestamp, value in score.iteritems(): asData.append(value) #asTime.append(pd.to_datetime(timestamp)) asTime.append(timestamp) #print(timestamp, value) # 异常点集合 asAnomal = my_detector.get_anomalies() #for a in asAnomal: # print(a) asData = asData[:1664] pylab.figure(figsize=(32, 16)) pylab.subplot(311) #asData = asData[:582] x = np.arange(1, len(asData) + 1, 1) pylab.plot(x, asData) #测值 pylab.grid(True) """ ##################### if asAnomal: time_period = asAnomal[0].get_time_window() correlator = Correlator(time_series_a='./SAR-device.sdb.await__研发中心波形_高阻接地_00025_20171025_201648_049_F__U0.csv',
class RCA(object): def __init__(self, metrix, related_metrices): """ Initializer :param metrix: a TimeSeries, a dictionary or a path to a csv file(str) :param list related_metrixes: a list of time series. """ self.metrix = self._load(metrix) self.anomaly_detector = AnomalyDetector(metrix) self.related_metrices = related_metrices self.anomalies = self.anomaly_detector.get_anomalies() self._analyze() def _load(self, metrix): """ Load time series. :param timeseries: a TimeSeries, a dictionary or a path to a csv file(str). :return TimeSeries: a TimeSeries object. """ if isinstance(metrix, TimeSeries): return metrix if isinstance(metrix, dict): return TimeSeries(metrix) return TimeSeries(utils.read_csv(metrix)) def _analyze(self): """ Analyzes if a matrix has anomalies. If any anomaly is found, determine if the matrix correlates with any other matrixes. To be implemented. """ output = defaultdict(list) output_by_name = defaultdict(list) scores = self.anomaly_detector.get_all_scores() if self.anomalies: for anomaly in self.anomalies: metrix_scores = scores start_t, end_t = anomaly.get_time_window() t = anomaly.exact_timestamp # Compute extended start timestamp and extended end timestamp. room = (end_t - start_t) / 2 if not room: room = 30 extended_start_t = start_t - room extended_end_t = end_t + room metrix_scores_cropped = metrix_scores.crop( extended_start_t, extended_end_t) # Adjust the two timestamps if not enough data points are included. while len(metrix_scores_cropped) < 2: extended_start_t = extended_start_t - room extended_end_t = extended_end_t + room metrix_scores_cropped = metrix_scores.crop( extended_start_t, extended_end_t) # Correlate with other metrics for entry in self.related_metrices: try: entry_correlation_result = Correlator( self.metrix, entry, time_period=(extended_start_t, extended_end_t), use_anomaly_score=True).get_correlation_result() record = extended_start_t, extended_end_t, entry_correlation_result.__dict__, entry record_by_name = extended_start_t, extended_end_t, entry_correlation_result.__dict__ output[t].append(record) output_by_name[entry].append(record_by_name) except exceptions.NotEnoughDataPoints: pass self.output = output self.output_by_name = output_by_name
class TestAnomalyDetector(unittest.TestCase): def setUp(self): self.s1 = {0: 0, 1: 0, 2: 0, 3: 0, 4: 1, 5: 2, 6: 2, 7: 2, 8: 0} self.s2 = {0: 0, 1: 1, 2: 2, 3: 2, 4: 2, 5: 0, 6: 0, 7: 0, 8: 0} self.detector1 = AnomalyDetector(self.s1) self.detector2 = AnomalyDetector(self.s2) def test_custom_algorithm(self): """ Test passing a custom algorithm class """ detector = AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_class=CustomAlgo, algorithm_params={'percent_threshold_upper': 20, 'percent_threshold_lower': -20}) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertTrue(len(anomalies) > 0) def test_diff_percent_threshold_algorithm(self): """ Test "diff percent threshold" algorithm with a threshold of 20% """ detector = AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_name='diff_percent_threshold', algorithm_params={'percent_threshold_upper': 20, 'percent_threshold_lower': -20}) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertTrue(len(anomalies) > 0) self.assertRaises(exceptions.RequiredParametersNotPassed, lambda: AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_name='diff_percent_threshold')) def test_sign_test_algorithm(self): """ Test "sign test" algorithm with a threshold of 20% """ bs = dict() bs.update({t: 1 for t in range(1, 100)}) ts = dict(bs) # test missing parameters self.assertRaises(exceptions.RequiredParametersNotPassed, lambda: AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_name='sign_test')) # # Simple tests algorithm_params = {'percent_threshold_upper': 20, 'percent_threshold_lower': -20, 'scan_window': 24, 'confidence': 0.01} # first no anomalies detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEquals(len(anomalies), 0) # Next one anomaly exactly equal to scan window ts.update({t: 1.200001 for t in range(10, 34)}) detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEquals(len(anomalies), 1) anomaly = anomalies[0] s, e = anomaly.get_time_window() # note the anomaly is larger than scan window self.assertEquals(s, 4) self.assertEquals(e, 39) # score should be roughly 98.5 self.assertGreater(anomaly.anomaly_score, 98) self.assertLess(anomaly.anomaly_score, 99) # anomaly below baseline ts.update({t: 0.799999 for t in range(10, 34)}) detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertEquals(len(anomalies), 1) anomaly = anomalies[0] s, e = anomaly.get_time_window() self.assertEquals(s, 4) self.assertEquals(e, 39) # score should be roughly 98.5 self.assertGreater(anomaly.anomaly_score, 98) self.assertLess(anomaly.anomaly_score, 99) # anomalies separated by big gap ts.update(bs) ts.update({t: 0.799999 for t in range(1, 25)}) ts.update({t: 1.21 for t in range(60, 84)}) detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertEquals(len(anomalies), 2) anomaly = anomalies[0] s, e = anomaly.get_time_window() self.assertEquals(s, 1) self.assertEquals(e, 30) # score ~ 99.9 self.assertGreater(anomaly.anomaly_score, 99) anomaly = anomalies[1] s, e = anomaly.get_time_window() self.assertEquals(s, 54) self.assertEquals(e, 89) # score should be roughly 98.5 self.assertGreater(anomaly.anomaly_score, 98) self.assertLess(anomaly.anomaly_score, 99) # anomalies separated by small gap ts.update(bs) ts.update({t: 1.21 for t in range(1, 25)}) ts.update({t: 1.21 for t in range(30, 40)}) detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() # just one self.assertEquals(len(anomalies), 1) anomaly = anomalies[0] s, e = anomaly.get_time_window() self.assertEquals(s, 1) self.assertEquals(e, 40) # score ~ 99.9 self.assertGreater(anomaly.anomaly_score, 99) def test_sign_test_algorithm_with_shift(self): """ Test "sign test" algorithm with a threshold of 20% """ bs = dict() bs.update({t: 1 for t in range(1, 100)}) # Simple tests algorithm_params = {'percent_threshold_upper': 10, 'shift': 1, 'scan_window': 24, 'confidence': 0.01} ts = dict(bs) # bigger than 10 percent but below bias ts.update({t: 1.2 for t in range(10, 34)}) # first no anomalies detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEquals(len(anomalies), 0) # Next one anomaly exactly equal to scan window # uses bias ts.update({t: 2.100001 for t in range(10, 34)}) detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEquals(len(anomalies), 1) anomaly = anomalies[0] s, e = anomaly.get_time_window() # note the anomaly is larger than scan window self.assertEquals(s, 4) self.assertEquals(e, 39) # score should be roughly 98.5 self.assertGreater(anomaly.anomaly_score, 98) self.assertLess(anomaly.anomaly_score, 99) # anomaly below baseline but not below baseline with shift algorithm_params = {'percent_threshold_lower': -20, 'shift': -0.1, 'scan_window': 24, 'confidence': 0.01} ts.update({t: 0.799999 for t in range(10, 34)}) # no anomalies detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEquals(len(anomalies), 0) # lower the time series by 0.1 ts.update({t: 0.699999 for t in range(10, 34)}) detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertEquals(len(anomalies), 1) anomaly = anomalies[0] s, e = anomaly.get_time_window() self.assertEquals(s, 4) self.assertEquals(e, 39) # score should be roughly 98.5 self.assertGreater(anomaly.anomaly_score, 98) self.assertLess(anomaly.anomaly_score, 99) def test_absolute_threshold_algorithm(self): """ Test "absolute threshold" algorithm with a upper and lower threshold of 0.2 """ detector = AnomalyDetector(self.s1, algorithm_name='absolute_threshold', algorithm_params={'absolute_threshold_value_upper': 0.2, 'absolute_threshold_value_lower': 0.2}) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertTrue(len(anomalies) > 0) self.assertRaises(exceptions.RequiredParametersNotPassed, lambda: AnomalyDetector(self.s1, algorithm_name='absolute_threshold')) def test_threshold(self): """ Test score threshold=0 """ detector = AnomalyDetector(self.s1, score_threshold=0) self.assertTrue(len(detector.get_anomalies()) == 1) self.assertTrue(detector.get_anomalies() is not None) def test_score_only(self): """ Test that score_only parameter doesn't give anomalies """ detector = AnomalyDetector(self.s1, score_only=True, algorithm_name='derivative_detector') detector2 = AnomalyDetector(self.s1, algorithm_name='derivative_detector') self.assertTrue(detector2.get_anomalies() is not None) self.assertTrue(len(detector.get_anomalies()) == 0) def test_get_all_scores(self): """ Test if function get_all_scores works as expected. """ self.assertTrue(isinstance(self.detector1.get_all_scores(), TimeSeries)) self.assertEqual(len(self.detector1.get_all_scores()), len(self.detector1.time_series)) def test_get_anomalies(self): """ Test if anomaly is found as expected. """ self.assertTrue(self.detector1.get_anomalies() is not None) def test_algorithm_DefaultDetector(self): """ Test if optional parameter algorithm works as expected. """ detector = AnomalyDetector(self.s1, algorithm_name='default_detector') self.assertEqual(detector.get_all_scores().timestamps, self.detector1.get_all_scores().timestamps) self.assertEqual(detector.get_all_scores().values, self.detector1.get_all_scores().values) def test_algorithm(self): """ Test if exception AlgorithmNotFound is raised as expected. """ self.assertRaises(exceptions.AlgorithmNotFound, lambda: AnomalyDetector(self.s1, algorithm_name='NotValidAlgorithm')) def test_algorithm_params(self): """ Test if optional parameter algorithm_params works as expected. """ self.assertRaises(ValueError, lambda: AnomalyDetector(self.s1, algorithm_name='exp_avg_detector', algorithm_params='0')) detector = AnomalyDetector(self.s1, algorithm_name="exp_avg_detector", algorithm_params={'smoothing_factor': 0.3}) self.assertNotEqual(self.detector1.get_all_scores().values, detector.get_all_scores().values) def test_anomaly_threshold(self): """ Test if score_percentile_threshold works as expected. """ detector = AnomalyDetector(self.s1, score_percent_threshold=0.1, algorithm_name='exp_avg_detector') detector1 = AnomalyDetector(self.s1, score_percent_threshold=0.1, algorithm_name='derivative_detector') self.assertNotEqual(detector1.get_anomalies(), detector.get_anomalies())
class TestAnomalyDetector(unittest.TestCase): def setUp(self): self.s1 = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0.5, 5: 1, 6: 1, 7: 1, 8: 0} self.s2 = {0: 0, 1: 0.5, 2: 1, 3: 1, 4: 1, 5: 0, 6: 0, 7: 0, 8: 0} self.detector1 = AnomalyDetector(self.s1) self.detector2 = AnomalyDetector(self.s2) def test_threshold(self): detector = AnomalyDetector(self.s1, score_threshold=0) self.assertTrue(len(detector.get_anomalies()) == 1) self.assertTrue(detector.get_anomalies() is not None) def test_score_only(self): detector = AnomalyDetector(self.s1, score_only=True, algorithm_name='derivative_detector') detector2 = AnomalyDetector(self.s1, algorithm_name='derivative_detector') self.assertTrue(detector2.get_anomalies() is not None) self.assertTrue(detector.get_anomalies() is None) def test_get_all_scores(self): """ Test if function get_all_scores works as expected. """ self.assertTrue(isinstance(self.detector1.get_all_scores(), TimeSeries)) self.assertEqual(len(self.detector1.get_all_scores()), len(self.detector1.time_series)) def test_get_anomalies(self): """ Test if anomaly is found as expected. """ self.assertTrue(self.detector1.get_anomalies() is not None) def test_algorithm_DefaultDetector(self): """ Test if optional parameter algorithm works as expected. """ detector = AnomalyDetector(self.s1, algorithm_name='default_detector') self.assertEqual(detector.get_all_scores().timestamps, self.detector1.get_all_scores().timestamps) self.assertEqual(detector.get_all_scores().values, self.detector1.get_all_scores().values) def test_algorithm(self): """ Test if exception AlgorithmNotFound is raised as expected. """ self.assertRaises(exceptions.AlgorithmNotFound, lambda: AnomalyDetector(self.s1, algorithm_name='NotValidAlgorithm')) def test_algorithm_params(self): """ Test if optional parameter algorithm_params works as expected. """ self.assertRaises(exceptions.InvalidDataFormat, lambda: AnomalyDetector(self.s1, algorithm_name='exp_avg_detector', algorithm_params='0')) detector = AnomalyDetector(self.s1, algorithm_name="exp_avg_detector", algorithm_params={'smoothing_factor': 0.3}) self.assertNotEqual(self.detector1.get_all_scores().values, detector.get_all_scores().values) def test_anomaly_threshold(self): """ Test if score_percentile_threshold works as expected. """ detector = AnomalyDetector(self.s1, score_percentile_threshold=0.1, algorithm_name='exp_avg_detector') detector1 = AnomalyDetector(self.s1, score_percentile_threshold=0.1, algorithm_name='derivative_detector') self.assertNotEqual(detector1.get_anomalies(), detector.get_anomalies())
function=InterpolateFunction.LINEAR)) query.set_transformation_filter(tf) series_list = svc.query(query) for series in series_list: metric_id = '- %s %s' % (series.metric, print_tags(series.tags)) log('\t' + metric_id) # exclude empty series for specific tags if len(series.data) > 2: ts = {int(sample.t / 1000): sample.v for sample in series.data} detector = AnomalyDetector(ts, score_threshold=args.min_score) anomalies = [] for anomaly in detector.get_anomalies(): if time.mktime(now.timetuple()) - args.last_hours * 3600 <= anomaly.exact_timestamp: anomalies.append(anomaly) if anomalies: message.append(metric_id) for anomaly in anomalies: t_start, t_end = format_t(anomaly.start_timestamp), format_t(anomaly.end_timestamp) t_exact = format_t(anomaly.exact_timestamp) anomaly_msg = '\tAnomaly from %s to %s with score %s: %s, %s' % ( t_start, t_end, anomaly.anomaly_score, t_exact, ts[anomaly.exact_timestamp]) message.append(anomaly_msg) msg = '\n'.join(message) message_service.insert(Message('anomaly_detection', 'python_script', 'anomaly', now, 'INFO', {}, msg)) print(msg)
from luminol.anomaly_detector import AnomalyDetector import matplotlib.pyplot as plt import matplotlib.patches as patches import pandas as pd import numpy as np _df = pd.read_csv('article_sample.csv') _ts = _df[_df.apply(lambda x: x['location'] == 'Belarus' and x['quater'] == 4 and x['keyword'] in ['КАМАЗ'], axis=1)] _ts = _ts[_ts['date'].apply(lambda x: '2018' in x)] ts1 = _ts[['date','polarity']] ts1['date'] = ts1['date'].apply(lambda x: [int(_) for _ in x.split('-')[1:]]) ts1['date'] = ts1['date'].apply(lambda x: (x[0]-10)*30+x[1]) _ts1 = ts1.groupby('date')['polarity'].mean().reset_index().set_index('date').sort_index() ts1 = _ts1.to_dict()['polarity'] my_detector = AnomalyDetector(ts1, score_threshold=1) score = my_detector.get_all_scores() anomalies = my_detector.get_anomalies() fig = plt.figure(figsize=(15,10)) ax = fig.subplots(1) ax.plot(_ts1, linestyle='--', marker='o') for _index, _ano in enumerate(anomalies): _c = np.random.rand(1,3)[0] _width = _ano.end_timestamp - _ano.start_timestamp rect = patches.Rectangle((_ano.start_timestamp,-1.5),_width,3,linewidth=2,edgecolor=_c,facecolor='none', label='Anomaly #{}'.format(_index)) ax.add_patch(rect) ax.legend() ax.set_ylim([-1.5,1.5]) fig.tight_layout() fig.savefig('temp.png') fig.show()
def test_sign_test_algorithm(self): """ Test "sign test" algorithm with a threshold of 20% """ bs = dict() bs.update({t: 1 for t in range(1, 100)}) ts = dict(bs) # test missing parameters self.assertRaises(exceptions.RequiredParametersNotPassed, lambda: AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_name='sign_test')) # # Simple tests algorithm_params = {'percent_threshold_upper': 20, 'percent_threshold_lower': -20, 'scan_window': 24, 'confidence': 0.01} # first no anomalies detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEquals(len(anomalies), 0) # Next one anomaly exactly equal to scan window ts.update({t: 1.200001 for t in range(10, 34)}) detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEquals(len(anomalies), 1) anomaly = anomalies[0] s, e = anomaly.get_time_window() # note the anomaly is larger than scan window self.assertEquals(s, 4) self.assertEquals(e, 39) # score should be roughly 98.5 self.assertGreater(anomaly.anomaly_score, 98) self.assertLess(anomaly.anomaly_score, 99) # anomaly below baseline ts.update({t: 0.799999 for t in range(10, 34)}) detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertEquals(len(anomalies), 1) anomaly = anomalies[0] s, e = anomaly.get_time_window() self.assertEquals(s, 4) self.assertEquals(e, 39) # score should be roughly 98.5 self.assertGreater(anomaly.anomaly_score, 98) self.assertLess(anomaly.anomaly_score, 99) # anomalies separated by big gap ts.update(bs) ts.update({t: 0.799999 for t in range(1, 25)}) ts.update({t: 1.21 for t in range(60, 84)}) detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertEquals(len(anomalies), 2) anomaly = anomalies[0] s, e = anomaly.get_time_window() self.assertEquals(s, 1) self.assertEquals(e, 30) # score ~ 99.9 self.assertGreater(anomaly.anomaly_score, 99) anomaly = anomalies[1] s, e = anomaly.get_time_window() self.assertEquals(s, 54) self.assertEquals(e, 89) # score should be roughly 98.5 self.assertGreater(anomaly.anomaly_score, 98) self.assertLess(anomaly.anomaly_score, 99) # anomalies separated by small gap ts.update(bs) ts.update({t: 1.21 for t in range(1, 25)}) ts.update({t: 1.21 for t in range(30, 40)}) detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() # just one self.assertEquals(len(anomalies), 1) anomaly = anomalies[0] s, e = anomaly.get_time_window() self.assertEquals(s, 1) self.assertEquals(e, 40) # score ~ 99.9 self.assertGreater(anomaly.anomaly_score, 99)
class TestAnomalyDetector(unittest.TestCase): def setUp(self): self.s1 = {0: 0, 1: 0, 2: 0, 3: 0, 4: 1, 5: 2, 6: 2, 7: 2, 8: 0} self.s2 = {0: 0, 1: 1, 2: 2, 3: 2, 4: 2, 5: 0, 6: 0, 7: 0, 8: 0} self.detector1 = AnomalyDetector(self.s1) self.detector2 = AnomalyDetector(self.s2) def test_custom_algorithm(self): """ Test passing a custom algorithm class """ detector = AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_class=CustomAlgo, algorithm_params={'percent_threshold_upper': 20, 'percent_threshold_lower': -20}) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertTrue(len(anomalies) > 0) def test_diff_percent_threshold_algorithm(self): """ Test "diff percent threshold" algorithm with a threshold of 20% """ detector = AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_name='diff_percent_threshold', algorithm_params={'percent_threshold_upper': 20, 'percent_threshold_lower': -20}) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertTrue(len(anomalies) > 0) self.assertRaises(exceptions.RequiredParametersNotPassed, lambda: AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_name='diff_percent_threshold')) def test_absolute_threshold_algorithm(self): """ Test "absolute threshold" algorithm with a upper and lower threshold of 0.2 """ detector = AnomalyDetector(self.s1, algorithm_name='absolute_threshold', algorithm_params={'absolute_threshold_value_upper': 0.2, 'absolute_threshold_value_lower': 0.2}) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertTrue(len(anomalies) > 0) self.assertRaises(exceptions.RequiredParametersNotPassed, lambda: AnomalyDetector(self.s1, algorithm_name='absolute_threshold')) def test_threshold(self): """ Test score threshold=0 """ detector = AnomalyDetector(self.s1, score_threshold=0) self.assertTrue(len(detector.get_anomalies()) == 1) self.assertTrue(detector.get_anomalies() is not None) def test_score_only(self): """ Test that score_only parameter doesn't give anomalies """ detector = AnomalyDetector(self.s1, score_only=True, algorithm_name='derivative_detector') detector2 = AnomalyDetector(self.s1, algorithm_name='derivative_detector') self.assertTrue(detector2.get_anomalies() is not None) self.assertTrue(len(detector.get_anomalies()) == 0) def test_get_all_scores(self): """ Test if function get_all_scores works as expected. """ self.assertTrue(isinstance(self.detector1.get_all_scores(), TimeSeries)) self.assertEqual(len(self.detector1.get_all_scores()), len(self.detector1.time_series)) def test_get_anomalies(self): """ Test if anomaly is found as expected. """ self.assertTrue(self.detector1.get_anomalies() is not None) def test_algorithm_DefaultDetector(self): """ Test if optional parameter algorithm works as expected. """ detector = AnomalyDetector(self.s1, algorithm_name='default_detector') self.assertEqual(detector.get_all_scores().timestamps, self.detector1.get_all_scores().timestamps) self.assertEqual(detector.get_all_scores().values, self.detector1.get_all_scores().values) def test_algorithm(self): """ Test if exception AlgorithmNotFound is raised as expected. """ self.assertRaises(exceptions.AlgorithmNotFound, lambda: AnomalyDetector(self.s1, algorithm_name='NotValidAlgorithm')) def test_algorithm_params(self): """ Test if optional parameter algorithm_params works as expected. """ self.assertRaises(ValueError, lambda: AnomalyDetector(self.s1, algorithm_name='exp_avg_detector', algorithm_params='0')) detector = AnomalyDetector(self.s1, algorithm_name="exp_avg_detector", algorithm_params={'smoothing_factor': 0.3}) self.assertNotEqual(self.detector1.get_all_scores().values, detector.get_all_scores().values) def test_anomaly_threshold(self): """ Test if score_percentile_threshold works as expected. """ detector = AnomalyDetector(self.s1, score_percent_threshold=0.1, algorithm_name='exp_avg_detector') detector1 = AnomalyDetector(self.s1, score_percent_threshold=0.1, algorithm_name='derivative_detector') self.assertNotEqual(detector1.get_anomalies(), detector.get_anomalies())
ts = ts.loc[ ts['stage_parallel'] == 'Puding Mixing #1'] # 'Puding Mixing #1', 'Sterilization #111', 'Storage tank #1' value_col = 'sensor_value' # TODO: ADDING ZEROS JUST FOR PLAYING. REMOVE LATER ts[value_col] = ts[value_col].transform( lambda x: x.fillna(x.median(), inplace=False)) else: raise Exception('Unknown example.') # run anomaly detection algorithm keys = ts['epoch'] values = ts[value_col] ts_dict = dict(zip(keys, values)) algorithm_name = 'exp_avg_detector' anomaly_detector = AnomalyDetector(ts_dict, algorithm_name=algorithm_name) anomalies = anomaly_detector.get_anomalies() anomaly_scores = anomaly_detector.get_all_scores() # plot results plot_ts_and_anomalies(ts, value_col, anomalies, anomaly_scores, ts_only=False, dir='/Users/yuval/Desktop/', show=True, plotly=True)
class TestAnomalyDetector(unittest.TestCase): def setUp(self): self.s1 = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0.5, 5: 1, 6: 1, 7: 1, 8: 0} self.s2 = {0: 0, 1: 0.5, 2: 1, 3: 1, 4: 1, 5: 0, 6: 0, 7: 0, 8: 0} self.detector1 = AnomalyDetector(self.s1) self.detector2 = AnomalyDetector(self.s2) def test_threshold(self): detector = AnomalyDetector(self.s1, score_threshold=0) self.assertTrue(len(detector.get_anomalies()) == 1) self.assertTrue(detector.get_anomalies() is not None) def test_score_only(self): detector = AnomalyDetector(self.s1, score_only=True, algorithm_name='derivative_detector') detector2 = AnomalyDetector(self.s1, algorithm_name='derivative_detector') self.assertTrue(detector2.get_anomalies() is not None) self.assertTrue(len(detector.get_anomalies()) == 0) def test_get_all_scores(self): """ Test if function get_all_scores works as expected. """ self.assertTrue(isinstance(self.detector1.get_all_scores(), TimeSeries)) self.assertEqual(len(self.detector1.get_all_scores()), len(self.detector1.time_series)) def test_get_anomalies(self): """ Test if anomaly is found as expected. """ self.assertTrue(self.detector1.get_anomalies() is not None) def test_algorithm_DefaultDetector(self): """ Test if optional parameter algorithm works as expected. """ detector = AnomalyDetector(self.s1, algorithm_name='default_detector') self.assertEqual(detector.get_all_scores().timestamps, self.detector1.get_all_scores().timestamps) self.assertEqual(detector.get_all_scores().values, self.detector1.get_all_scores().values) def test_algorithm(self): """ Test if exception AlgorithmNotFound is raised as expected. """ self.assertRaises( exceptions.AlgorithmNotFound, lambda: AnomalyDetector(self.s1, algorithm_name='NotValidAlgorithm')) def test_algorithm_params(self): """ Test if optional parameter algorithm_params works as expected. """ self.assertRaises( ValueError, lambda: AnomalyDetector(self.s1, algorithm_name='exp_avg_detector', algorithm_params='0')) detector = AnomalyDetector(self.s1, algorithm_name="exp_avg_detector", algorithm_params={'smoothing_factor': 0.3}) self.assertNotEqual(self.detector1.get_all_scores().values, detector.get_all_scores().values) def test_anomaly_threshold(self): """ Test if score_percentile_threshold works as expected. """ detector = AnomalyDetector(self.s1, score_percent_threshold=0.1, algorithm_name='exp_avg_detector') detector1 = AnomalyDetector(self.s1, score_percent_threshold=0.1, algorithm_name='derivative_detector') self.assertNotEqual(detector1.get_anomalies(), detector.get_anomalies())
import csv import time import luminol from luminol.anomaly_detector import AnomalyDetector bandwidth_up_timeseries = {} with open('data/synthetic/tv.WAN.uncontrolled.csv') as csv_file: csv_reader = csv.reader(csv_file, delimiter=',') for row in csv_reader: bandwidth_up_timeseries[row[0]] = float(row[1]) bandwidth_up_detector = AnomalyDetector(bandwidth_up_timeseries, algorithm_name="bitmap_detector", score_threshold=10, algorithm_params={"precision": 10}) anomalies = bandwidth_up_detector.get_anomalies() for anomaly in anomalies: print(anomaly.start_timestamp, anomaly.end_timestamp, anomaly.anomaly_score)
def print_plot_of_qty(csv_path, group_by, qty_interest, include_extremes=False, extreme_threshold=400000): doc = """ @param csv_path (str) : path to the CSV file @param group_by (str) : a one-character long string (either "W", "D", "M") specifying in which units of time to group the data by (either weeks, days, or months, respectively) @param qty_interest (str) : the identifier of the quantity to be plotted @param include_extremes (bool) : whether or not to include extreme outliers. Defaults to false. @param extreme_threshold (float): the threshold to cut off extreme outliers (using qty_interest). Exclusive. """ # Ingest the CSV file df = ingest_csv(csv_path) # Group by a specific quantity, over a specific frequency group = df.groupby(pd.Grouper(freq=str(group_by).upper()))[[ qty_interest ]].sum().apply(list).to_dict()[qty_interest] # Get the time (dates) gkeys = [ int((x - datetime(1970, 1, 1)).total_seconds()) for x in list(group.keys()) ] # Get tye values for each time gvals = list(group.values()) # Make the timeseries timeseries = dict(zip(gkeys, gvals)) # Default algorithm properties algo_name = 'derivative_detector' #algo_name = 'exp_avg_detector' algo_params = { 'smoothing_factor': 0.2, # 'lag_window_size' : int(0.2 * len(gkeys)), # 'use_lag_window' : True, } algo_threshold = 2 # For any extreme anomalies extreme_anomalies = None # We ignore extremes by default if (not include_extremes): timeseries = { k: v for k, v in timeseries.items() if v < extreme_threshold } # For when we care about these else: extreme_anomalies = { k: v for k, v in timeseries.items() if v <= extreme_threshold } algo_name = 'bitmap_detector' algo_params = { 'precision': 10, 'lag_window_size': int(0.30 * len(keys)), 'future_window_size': int(0.30 * len(keys)), 'chunk_size': 2, } # Detector for anomalies detector = AnomalyDetector(time_series=timeseries, algorithm_name=algo_name, algorithm_params=algo_params, score_threshold=algo_threshold) # Dictionaries of anomalies found anomalies = {} # Number of anomalies n_anomaly = 0 for anomaly in detector.get_anomalies(): n_anomaly += 1 anomalies[anomaly.exact_timestamp] = timeseries[ anomaly.exact_timestamp] # Plot and print the graph, and anomalies (and include extremes if necessary) plot_anomalies(timeseries, anomalies, extreme_anomalies)
class TestAnomalyDetector(unittest.TestCase): def setUp(self): self.s1 = {0: 0, 1: 0, 2: 0, 3: 0, 4: 1, 5: 2, 6: 2, 7: 2, 8: 0} self.s2 = {0: 0, 1: 1, 2: 2, 3: 2, 4: 2, 5: 0, 6: 0, 7: 0, 8: 0} self.detector1 = AnomalyDetector(self.s1) self.detector2 = AnomalyDetector(self.s2) def test_diff_percent_threshold_algorithm(self): """ Test "diff percent threshold" algorithm with a threshold of 20% """ detector = AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_name='diff_percent_threshold', algorithm_params={ 'percent_threshold_upper': 20, 'percent_threshold_lower': -20 }) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertTrue(len(anomalies) > 0) self.assertRaises( exceptions.RequiredParametersNotPassed, lambda: AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_name='diff_percent_threshold')) def test_absolute_threshold_algorithm(self): """ Test "absolute threshold" algorithm with a upper and lower threshold of 0.2 """ detector = AnomalyDetector(self.s1, algorithm_name='absolute_threshold', algorithm_params={ 'absolute_threshold_value_upper': 0.2, 'absolute_threshold_value_lower': 0.2 }) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertTrue(len(anomalies) > 0) self.assertRaises( exceptions.RequiredParametersNotPassed, lambda: AnomalyDetector(self.s1, algorithm_name='absolute_threshold')) def test_threshold(self): """ Test score threshold=0 """ detector = AnomalyDetector(self.s1, score_threshold=0) self.assertTrue(len(detector.get_anomalies()) == 1) self.assertTrue(detector.get_anomalies() is not None) def test_score_only(self): """ Test that score_only parameter doesn't give anomalies """ detector = AnomalyDetector(self.s1, score_only=True, algorithm_name='derivative_detector') detector2 = AnomalyDetector(self.s1, algorithm_name='derivative_detector') self.assertTrue(detector2.get_anomalies() is not None) self.assertTrue(len(detector.get_anomalies()) == 0) def test_get_all_scores(self): """ Test if function get_all_scores works as expected. """ self.assertTrue(isinstance(self.detector1.get_all_scores(), TimeSeries)) self.assertEqual(len(self.detector1.get_all_scores()), len(self.detector1.time_series)) def test_get_anomalies(self): """ Test if anomaly is found as expected. """ self.assertTrue(self.detector1.get_anomalies() is not None) def test_algorithm_DefaultDetector(self): """ Test if optional parameter algorithm works as expected. """ detector = AnomalyDetector(self.s1, algorithm_name='default_detector') self.assertEqual(detector.get_all_scores().timestamps, self.detector1.get_all_scores().timestamps) self.assertEqual(detector.get_all_scores().values, self.detector1.get_all_scores().values) def test_algorithm(self): """ Test if exception AlgorithmNotFound is raised as expected. """ self.assertRaises( exceptions.AlgorithmNotFound, lambda: AnomalyDetector(self.s1, algorithm_name='NotValidAlgorithm')) def test_algorithm_params(self): """ Test if optional parameter algorithm_params works as expected. """ self.assertRaises( ValueError, lambda: AnomalyDetector(self.s1, algorithm_name='exp_avg_detector', algorithm_params='0')) detector = AnomalyDetector(self.s1, algorithm_name="exp_avg_detector", algorithm_params={'smoothing_factor': 0.3}) self.assertNotEqual(self.detector1.get_all_scores().values, detector.get_all_scores().values) def test_anomaly_threshold(self): """ Test if score_percentile_threshold works as expected. """ detector = AnomalyDetector(self.s1, score_percent_threshold=0.1, algorithm_name='exp_avg_detector') detector1 = AnomalyDetector(self.s1, score_percent_threshold=0.1, algorithm_name='derivative_detector') self.assertNotEqual(detector1.get_anomalies(), detector.get_anomalies())
def job(): print("Run start: " + datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')) # Connect to either the normal, anomaly or combined database. Note for SQLlite, the detect types # line converts from SQLlite datatypes(typically text) to Python native datatypes conn = sqlite3.connect('../hl7-combined.db', detect_types=sqlite3.PARSE_DECLTYPES | sqlite3.PARSE_COLNAMES) conn.row_factory = sqlite3.Row cur = conn.cursor() # Group messages into X second intervals cur.execute( "select messages.MSGTYPE, datetime((strftime('%s', messages.QueueTime) /" + INTERVAL + ") * " + INTERVAL + ", 'unixepoch') interval, count(*) count from messages" " where msgtype = 'ADT_A31' group by interval order by interval desc limit " + LIMIT) rows = cur.fetchall() data = {} if len(rows) > 0: for row in rows: # Luminol library requires a 2 column unix timestamp + count obsTimestamp = time.mktime( datetime.datetime.strptime(row["interval"], "%Y-%m-%d %H:%M:%S").timetuple()) data[obsTimestamp] = row["count"] print data # DETECTOR TYPE - see https://github.com/linkedin/luminol/tree/master/src/luminol/algorithms/anomaly_detector_algorithms detector = AnomalyDetector(data, algorithm_name=ALGORITH_NAME, score_threshold=THRESHOLD) score = detector.get_all_scores() anom_score = [] for (timestamp, value) in score.iteritems(): t_str = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(timestamp)) anom_score.append([t_str, value]) print "----- ALL SCORES ----- " for score in anom_score: print(score) anomalies = detector.get_anomalies() for (value) in anomalies: print " match: " + time.strftime( '%Y-%m-%d %H:%M:%S', time.localtime(value.exact_timestamp)), value.anomaly_score if value.anomaly_score >= int(THRESHOLD) and anomalies.count > int( LIMIT): print "ANOMALY DETECTED - NOTIFYING ADMINISTRATOR / CALLING WEBSERVICE ETC" quit() print("Run End: " + datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') + "\n")
def main(argv): try: opts, args = getopt.getopt(sys.argv[1:], "hi:c:v", ["help", "input=" "correlate="]) except getopt.GetoptError as err: # print help information and exit: print str(err) # will print something like "option -a not recognized" usage() sys.exit(2) input = None correlate = None verbose = False for o, a in opts: if o == "-v": verbose = True elif o in ("-h", "--help"): usage() sys.exit() elif o in ("-i", "--input"): input = a elif o in ("-c", "--correlate"): correlate = a else: assert False, "unhandled option" regex = r"(\s+)([+-]?\d*\.\d+)(?![-+0-9\.])(\s+)(\d+)(\s+)(\w+)(\s+)(\w+)(\s+)([a-z])(\s+)(\d+)(?:(?!X).)*(Length = )(\d+)(\s+)(BitCount = )(\d+)(\s+)(ID = )(\d+)" pattern = re.compile(regex, re.UNICODE) file1 = open(input).read() matches1 = re.finditer(regex, file1) file2 = open(correlate).read() matches2 = re.finditer(regex, file2) mydict1 = {} mydict2 = {} for matchNum, match in enumerate(matches1): matchNum = matchNum + 1 myTime = match.group(2) myTime = float(myTime) * 1000000 mydict1[myTime] = match.group(20) for matchNum, match in enumerate(matches2): matchNum = matchNum + 1 myTime = match.group(2) myTime = float(myTime) * 1000000 mydict2[myTime] = match.group(20) #print mydict1 my_detector1 = AnomalyDetector(mydict1, algorithm_name=("exp_avg_detector")) score1 = my_detector1.get_all_scores() anomalies = my_detector1.get_anomalies() for a in anomalies: time_period = a.get_time_window() my_correlator = Correlator(mydict1, mydict2, time_period) if my_correlator.is_correlated(treshold=0.8): print "mydict2 correlate with mydict at time period (%d, %d)" % time_period
def main(argv): try: opts, args = getopt.getopt(sys.argv[1:], "hi:v", ["help", "input="]) except getopt.GetoptError as err: # print help information and exit: print str(err) usage() sys.exit(2) input = None verbose = False for o, a in opts: if o == "-v": verbose = True elif o in ("-h", "--help"): usage() sys.exit() elif o in ("-i", "--input"): input = a else: assert False, "unhandled option" # this is the regular expression used to parse CANoe logs in ASCII format (.asc) regex = r"(\s+)([+-]?\d*\.\d+)(?![-+0-9\.])(\s+)(\d+)(\s+)(\w+)(\s+)(\w+)(\s+)([a-z])(\s+)(\d+)(?:(?!X).)*(Length = )(\d+)(\s+)(BitCount = )(\d+)(\s+)(ID = )(\d+)" pattern = re.compile(regex, re.UNICODE) inputfile = open(input).read() # stores all lines which match the regex matches = re.finditer(regex, inputfile) # event_dict stores the values (timestamp + CAN-ID) extracted from the logs event_dict = {} for matchNum, match in enumerate(matches): matchNum = matchNum + 1 myTime = match.group(2) # converts absolute time from engine start in seconds from engine start to int myTime = float(myTime) * 1000000 # match.group(20) is ID of CAN event in decimal event_dict[myTime] = match.group(20) #print event_dict my_detector = AnomalyDetector(event_dict, algorithm_name=("exp_avg_detector")) # this calculates an anomal yscore for every event in the time series score = my_detector.get_all_scores() # filter events in time series for anomalies anomalies = my_detector.get_anomalies() anom_score = [] print for attack in anomalies: if (attack.exact_timestamp in event_dict): if (verbose == True): # if script is run with "-v" it will output all anomaies print("{timestamp} - ID: {id} - Score: {value}".format( timestamp=attack.exact_timestamp, id=event_dict[attack.exact_timestamp], value=attack.anomaly_score)) elif (attack.anomaly_score > 3.4): # if script is not run with "-v" it will output only anomalies with score > 3.4 print("{timestamp} - ID: {id} - Score: {value}".format( timestamp=attack.exact_timestamp, id=event_dict[attack.exact_timestamp], value=attack.anomaly_score))
def test_score_only(self): detector = AnomalyDetector(self.s1, score_only=True, algorithm_name='derivative_detector') detector2 = AnomalyDetector(self.s1, algorithm_name='derivative_detector') self.assertTrue(detector2.get_anomalies() is not None) self.assertTrue(detector.get_anomalies() is None)
class RCA(object): def __init__(self, metrix, related_metrices): """ Initializer :param metrix: a TimeSeries, a dictionary or a path to a csv file(str) :param list related_metrixes: a list of time series. """ self.metrix = self._load(metrix) self.anomaly_detector = AnomalyDetector(metrix) self.related_metrices = related_metrices self.anomalies = self.anomaly_detector.get_anomalies() self._analyze() def _load(self, metrix): """ Load time series. :param timeseries: a TimeSeries, a dictionary or a path to a csv file(str). :return TimeSeries: a TimeSeries object. """ if isinstance(metrix, TimeSeries): return metrix if isinstance(metrix, dict): return TimeSeries(metrix) return TimeSeries(utils.read_csv(metrix)) def _analyze(self): """ Analyzes if a matrix has anomalies. If any anomaly is found, determine if the matrix correlates with any other matrixes. To be implemented. """ output = defaultdict(list) output_by_name = defaultdict(list) scores = self.anomaly_detector.get_all_scores() if self.anomalies: for anomaly in self.anomalies: metrix_scores = scores start_t, end_t = anomaly.get_time_window() t = anomaly.exact_timestamp # Compute extended start timestamp and extended end timestamp. room = (end_t - start_t) / 2 if not room: room = 30 extended_start_t = start_t - room extended_end_t = end_t + room metrix_scores_cropped = metrix_scores.crop(extended_start_t, extended_end_t) # Adjust the two timestamps if not enough data points are included. while len(metrix_scores_cropped) < 2: extended_start_t = extended_start_t - room extended_end_t = extended_end_t + room metrix_scores_cropped = metrix_scores.crop(extended_start_t, extended_end_t) # Correlate with other metrics for entry in self.related_metrices: try: entry_correlation_result = Correlator(self.metrix, entry, time_period=(extended_start_t, extended_end_t), use_anomaly_score=True).get_correlation_result() record = extended_start_t, extended_end_t, entry_correlation_result.__dict__, entry record_by_name = extended_start_t, extended_end_t, entry_correlation_result.__dict__ output[t].append(record) output_by_name[entry].append(record_by_name) except exceptions.NotEnoughDataPoints: pass self.output = output self.output_by_name = output_by_name
function=InterpolateFunction.LINEAR)) query.set_transformation_filter(tf) series_list = svc.query(query) for series in series_list: metric_id = '- %s %s' % (series.metric, print_tags(series.tags)) log('\t' + metric_id) # exclude empty series for specific tags if len(series.data) > 2: ts = {int(sample.t / 1000): sample.v for sample in series.data} detector = AnomalyDetector(ts, score_threshold=args.min_score) anomalies = [] for anomaly in detector.get_anomalies(): if time.mktime(now.timetuple()) - args.last_hours * 3600 <= anomaly.exact_timestamp: anomalies.append(anomaly) if anomalies: message.append(metric_id) for anomaly in anomalies: t_start, t_end = format_t(anomaly.start_timestamp), format_t(anomaly.end_timestamp) t_exact = format_t(anomaly.exact_timestamp) anomaly_msg = '\tAnomaly from %s to %s with score %s: %s, %s' % ( t_start, t_end, anomaly.anomaly_score, t_exact, ts[anomaly.exact_timestamp]) message.append(anomaly_msg) msg = '\n'.join(message) message_service.insert(Message('anomaly_detection', 'python_script', 'anomaly', now, 'INFO', {}, msg)) print(msg)