def test_algorithm_params(self): """ Test if optional parameter algorithm_params works as expected. """ self.assertRaises(exceptions.InvalidDataFormat, lambda: AnomalyDetector(self.s1, algorithm_name='exp_avg_detector', algorithm_params='0')) detector = AnomalyDetector(self.s1, algorithm_name="exp_avg_detector", algorithm_params={'smoothing_factor': 0.3}) self.assertNotEqual(self.detector1.get_all_scores().values, detector.get_all_scores().values)
def test_algorithm_DefaultDetector(self): """ Test if optional parameter algorithm works as expected. """ detector = AnomalyDetector(self.s1, algorithm_name='default_detector') self.assertEqual(detector.get_all_scores().timestamps, self.detector1.get_all_scores().timestamps) self.assertEqual(detector.get_all_scores().values, self.detector1.get_all_scores().values)
def clean_anomaly2(df, index_name="15分钟段", var_name="实际功率", anomaly_limit=1.5, base_p=0.3): df_clean = [] for g_name, g in df.groupby(index_name): temp = deepcopy(g).reset_index(drop=True) base_mean = np.median(temp[var_name]) if base_mean > base_p: limit_low, limit_up = np.percentile(temp[var_name], [2, 99]) else: limit_low, limit_up = np.percentile(temp[var_name], [1, 98]) temp = temp[(temp[var_name] <= limit_up) & (temp[var_name] >= limit_low)].reset_index(drop=True) ts = temp[var_name] ts_mean = np.mean(ts) ts_std = np.std(ts) ts = (ts - ts_mean) / ts_std if ts_std > 0: my_detector = AnomalyDetector(ts.to_dict(), algorithm_name='exp_avg_detector') score = my_detector.get_all_scores() df_clean.append(temp[np.array(score.values) < anomaly_limit]) else: df_clean.append(temp) df_clean = pd.concat(df_clean, ignore_index=True) return df_clean
def test_anomaly_threshold(self): """ Test if score_percentile_threshold works as expected. """ detector = AnomalyDetector(self.s1, score_percentile_threshold=0.1, algorithm_name='exp_avg_detector') detector1 = AnomalyDetector(self.s1, score_percentile_threshold=0.1, algorithm_name='derivative_detector') self.assertNotEqual(detector1.get_anomalies(), detector.get_anomalies())
def test_threshold(self): """ Test score threshold=0 """ detector = AnomalyDetector(self.s1, score_threshold=0) self.assertTrue(len(detector.get_anomalies()) == 1) self.assertTrue(detector.get_anomalies() is not None)
def test_score_only(self): """ Test that score_only parameter doesn't give anomalies """ detector = AnomalyDetector(self.s1, score_only=True, algorithm_name='derivative_detector') detector2 = AnomalyDetector(self.s1, algorithm_name='derivative_detector') self.assertTrue(detector2.get_anomalies() is not None) self.assertTrue(len(detector.get_anomalies()) == 0)
def test_custom_algorithm(self): """ Test passing a custom algorithm class """ detector = AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_class=CustomAlgo, algorithm_params={'percent_threshold_upper': 20, 'percent_threshold_lower': -20}) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertTrue(len(anomalies) > 0)
def __init__(self, metrix, related_metrices): """ Initializer :param metrix: a TimeSeries, a dictionary or a path to a csv file(str) :param list related_metrixes: a list of time series. """ self.metrix = self._load(metrix) self.anomaly_detector = AnomalyDetector(metrix) self.related_metrices = related_metrices self.anomalies = self.anomaly_detector.get_anomalies() self._analyze()
def get_anomaly_series(series: pd.Series, algorithm: str = "bitmap_detector") -> pd.Series: assert algorithm in [ "bitmap_detector", "derivative_detector", "exp_avg_detector" ] ts = series.to_dict() detector = AnomalyDetector(ts, algorithm_name=algorithm) scores = detector.get_all_scores() scores = [s for _, s in scores.iteritems()] return pd.Series(scores)
def test_absolute_threshold_algorithm(self): """ Test "absolute threshold" algorithm with a upper and lower threshold of 0.2 """ detector = AnomalyDetector(self.s1, algorithm_name='absolute_threshold', algorithm_params={'absolute_threshold_value_upper': 0.2, 'absolute_threshold_value_lower': 0.2}) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertTrue(len(anomalies) > 0) self.assertRaises(exceptions.RequiredParametersNotPassed, lambda: AnomalyDetector(self.s1, algorithm_name='absolute_threshold'))
def test_diff_percent_threshold_algorithm(self): """ Test "diff percent threshold" algorithm with a threshold of 20% """ detector = AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_name='diff_percent_threshold', algorithm_params={'percent_threshold_upper': 20, 'percent_threshold_lower': -20}) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertTrue(len(anomalies) > 0) self.assertRaises(exceptions.RequiredParametersNotPassed, lambda: AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_name='diff_percent_threshold'))
def isAnomaly(self, data): # returns if last data point is anonaly length = len(data) try: detector = AnomalyDetector({i: data[i] for i in range(length)}, algorithm_name='exp_avg_detector', score_threshold=self.score_threshold) anomalies = detector.get_anomalies() for anomaly in anomalies: if anomaly.exact_timestamp == length - 1: return True, anomalies except: return False, [] return False, anomalies
def get_anoms(anomalous_ts): if not anomalous_ts: return [] anomalies = [] try: anomaly_ts_dict = dict(anomalous_ts) my_detector = AnomalyDetector(anomaly_ts_dict, score_threshold=1.5) anomalies = my_detector.get_anomalies() except: logger.error(traceback.format_exc()) logger.error('error :: AnomalyDetector') return anomalies
def test_custom_algorithm(self): """ Test passing a custom algorithm class """ detector = AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_class=CustomAlgo, algorithm_params={ 'percent_threshold_upper': 20, 'percent_threshold_lower': -20 }) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertTrue(len(anomalies) > 0)
def test_algorithm_params(self): """ Test if optional parameter algorithm_params works as expected. """ self.assertRaises( ValueError, lambda: AnomalyDetector(self.s1, algorithm_name='exp_avg_detector', algorithm_params='0')) detector = AnomalyDetector(self.s1, algorithm_name="exp_avg_detector", algorithm_params={'smoothing_factor': 0.3}) self.assertNotEqual(self.detector1.get_all_scores().values, detector.get_all_scores().values)
def detect(self, ts): my_detector = AnomalyDetector(ts) score = my_detector.get_all_scores() anom_score = [] for (timestamp, value) in score.iteritems(): t_str = time.strftime('%d-%b-%Y %H:%M:%S', time.localtime(timestamp)) anom_score.append([t_str, value]) overall_stats = {} for score in anom_score: overall_stats[score[0]] = score[1] return overall_stats
def get_anomalies(series: pd.Series, algorithm: str = "bitmap_detector") -> List[dict]: assert algorithm in [ "bitmap_detector", "derivative_detector", "exp_avg_detector" ] ts = series.to_dict() detector = AnomalyDetector(ts, algorithm_name=algorithm) anomalies = detector.get_anomalies() return [{ "start_time": _.start_timestamp, "end_time": _.end_timestamp, "top_score_time": _.exact_timestamp, "score": _.anomaly_score, } for _ in anomalies]
def __init__(self, ts={}, param=None): super(DiffPercentD, self).__init__() print self.cvtTimeSeries(ts) self.detector = AnomalyDetector( self.cvtTimeSeries(ts), algorithm_name='diff_percent_threshold', algorithm_params=param)
def test_absolute_threshold_algorithm(self): """ Test "absolute threshold" algorithm with a upper and lower threshold of 0.2 """ detector = AnomalyDetector(self.s1, algorithm_name='absolute_threshold', algorithm_params={ 'absolute_threshold_value_upper': 0.2, 'absolute_threshold_value_lower': 0.2 }) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertTrue(len(anomalies) > 0) self.assertRaises( exceptions.RequiredParametersNotPassed, lambda: AnomalyDetector(self.s1, algorithm_name='absolute_threshold'))
def test_algorithm(self): """ Test if exception AlgorithmNotFound is raised as expected. """ self.assertRaises( exceptions.AlgorithmNotFound, lambda: AnomalyDetector(self.s1, algorithm_name='NotValidAlgorithm'))
def get_anoms(anomalous_ts): logger = logging.getLogger(skyline_app_logger) if not anomalous_ts: logger.error('error :: get_anoms :: no anomalous_ts') return [] anomalies = [] try: anomaly_ts_dict = dict(anomalous_ts) my_detector = AnomalyDetector(anomaly_ts_dict, score_threshold=1.5) anomalies = my_detector.get_anomalies() except: logger.error(traceback.format_exc()) logger.error('error :: get_anoms :: AnomalyDetector') return anomalies
def test_diff_percent_threshold_algorithm(self): """ Test "diff percent threshold" algorithm with a threshold of 20% """ detector = AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_name='diff_percent_threshold', algorithm_params={ 'percent_threshold_upper': 20, 'percent_threshold_lower': -20 }) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertTrue(len(anomalies) > 0) self.assertRaises( exceptions.RequiredParametersNotPassed, lambda: AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_name='diff_percent_threshold'))
def cleanData(df, index_name="15分钟段", var_name="实际功率", limit=0.5): df_clean = [] for g_name, g in df.groupby(index_name): temp = deepcopy(g).reset_index(drop=True) limit_low, limit_up = np.percentile(temp[var_name], [5, 95]) temp = temp[(temp[var_name] < limit_up) & (temp[var_name] > limit_low)].reset_index(drop=True) ts = temp[var_name] ts_mean = np.mean(ts) ts_std = np.std(ts) ts = (ts - ts_mean) / ts_std if ts_std > 0: my_detector = AnomalyDetector(ts.to_dict(), algorithm_name='exp_avg_detector') score = my_detector.get_all_scores() df_clean.append(temp[np.array(score.values) < limit]) else: df_clean.append(temp) df_clean = pd.concat(df_clean, ignore_index=True) return df_clean
def pointsOfCorrelation(ts1, ts2, thresholdVal): corrPoints = [] # Conduct AD on each of each of the time series. # algorithm_params={'absolute_threshold_value_lower':lower,'absolute_threshold_value_upper':upper} # detector = AnomalyDetector(ts2, score_threshold=thresholdVal, algorithm_name="derivative_detector") detector = AnomalyDetector(ts2, score_threshold=thresholdVal, algorithm_name="exp_avg_detector") # score = detector.get_all_scores() anomalies = detector.get_anomalies() # For anomalous points in ts1, return correlated points and correlation coefficient. for a in anomalies: time_period = a.get_time_window() try: my_correlator = Correlator(ts1, ts2, time_period) if my_correlator.is_correlated(threshold=0.8): correlatorResultObj = my_correlator.get_correlation_result() # Change time period to human readable format start = strftime('%Y-%m-%d %H:%M:%S', localtime(a.start_timestamp)) end = strftime('%Y-%m-%d %H:%M:%S', localtime(a.end_timestamp)) time_period = (start, end) # Return anomalous time period, correlation coefficient and anomaly score. # Note: Anomaly score for absolute threshold will be diff between value and threshold. result = [ time_period, round(correlatorResultObj.coefficient, 2), round(a.anomaly_score, 2) ] corrPoints.append(result) except: continue return corrPoints
def __init__(self, X_tr_red, X_te_red, shift_detector, level): logger.info("Run Luminol") X_tr_odim = (-np.amax(X_tr_red, axis=1)).tolist() X_te_odim = (-np.amax(X_te_red, axis=1)).tolist() ts = X_te_odim ts = {i: v for i, v in enumerate(ts)} train_ts = {i: v for i, v in enumerate(X_tr_odim)} my_detector = AnomalyDetector(ts, baseline_time_series=train_ts, algorithm_params={ 'precision': 10, 'lag_window_size': 0.1, 'future_window_size': 0.1, 'chunk_size': 2 }) _score = my_detector.get_all_scores() score = [] for i in range(len(X_te_odim)): score.append(_score[i]) assert len(score) == len(X_te_odim), (len(score), len(X_te_odim)) super().__init__(X_tr_odim, X_te_odim, score, shift_detector, level)
def f1_metrics(self): y = self.y ts = self.ts abn_pt = self.abn_pt req_stamp = pd.Series(y, index=ts) detector = AnomalyDetector(req_stamp.to_dict()) scores = detector.get_all_scores() y_true = np.zeros(y.size) for i in abn_pt: y_true[i] = 1 self.y_true = y_true np_score = [] for i in scores.iteritems(): np_score.append(i[1]) req_ = pd.Series(data=np_score) ap = average_precision_score(y_true, np_score) range_ = np.log10(np.arange(0, 9, .1) + 1) f1 = [] for i in range_: threshold = np.quantile(np_score, i) anomalies = req_[req_.values > threshold].index.values y_score = np.zeros(y.size) for i in anomalies: y_score[i] = 1 f1.append(f1_score(y_true, y_score)) threshold = np.quantile(np_score, range_[np.argmax(f1)]) anomalies = req_[req_.values > threshold].index.values y_score = np.zeros(y.size) for i in anomalies: y_score[i] = 1 return ap, y_true, y_score
def test_sign_test_algorithm_interface(self): """ Test "sign test" algorithm with a threshold of 0% """ bs = dict() bs.update((t, 1) for t in range(1, 30)) # Simple tests algorithm_params = {'percent_threshold_upper': 0, 'offset': 2, 'scan_window': 24, 'confidence': 0.01} ts = dict(bs) # bigger than 10 percent but below bias ts.update((t, 3.1) for t in range(1, 21)) # first no anomalies detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEquals(len(anomalies), 1) # try lower bound algorithm_params = {'percent_threshold_lower': 0, 'offset': 2, 'scan_window': 24, 'confidence': 0.01} ts = dict(bs) # less than baseline plus bias ts.update((t, 2.9) for t in range(1, 25)) # first no anomalies detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEquals(len(anomalies), 1)
def test_score_only(self): detector = AnomalyDetector(self.s1, score_only=True, algorithm_name='derivative_detector') detector2 = AnomalyDetector(self.s1, algorithm_name='derivative_detector') self.assertTrue(detector2.get_anomalies() is not None) self.assertTrue(len(detector.get_anomalies()) == 0)
def anomalies(self, metrics): _metrics = metrics _metrics_for_luminol = {} _time_mapping = {} _anomalies = {} for _str in _metrics: _date = datetime.datetime.strptime(_str, "%Y-%m-%d %H:%M:%S") _time = datetime.datetime.timestamp(_date) _metrics_for_luminol[int(_time)] = _metrics[_str] _time_mapping[int(_time)] = _str if _metrics_for_luminol: _detector = AnomalyDetector(_metrics_for_luminol) _score = _detector.get_all_scores() if _score: for _timestamp, _value in _score.iteritems(): _anomalies[_time_mapping[_timestamp]] = _value return _anomalies else: return False else: return False
def test_anomaly_threshold(self): """ Test if score_percentile_threshold works as expected. """ detector = AnomalyDetector(self.s1, score_percent_threshold=0.1, algorithm_name='exp_avg_detector') detector1 = AnomalyDetector(self.s1, score_percent_threshold=0.1, algorithm_name='derivative_detector') self.assertNotEqual(detector1.get_anomalies(), detector.get_anomalies())
def test_sign_test_algorithm_interface(self): """ Test "sign test" algorithm with a threshold of 0% """ bs = dict() bs.update((t, 1) for t in range(1, 30)) # Simple tests algorithm_params = { 'percent_threshold_upper': 0, 'offset': 2, 'scan_window': 24, 'confidence': 0.01 } ts = dict(bs) # bigger than 10 percent but below bias ts.update((t, 3.1) for t in range(1, 21)) # first no anomalies detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEqual(len(anomalies), 1) # try lower bound algorithm_params = { 'percent_threshold_lower': 0, 'offset': 2, 'scan_window': 24, 'confidence': 0.01 } ts = dict(bs) # less than baseline plus bias ts.update((t, 2.9) for t in range(1, 25)) # first no anomalies detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEqual(len(anomalies), 1)
def test_sign_test_algorithm_with_shift(self): """ Test "sign test" algorithm with a threshold of 20% """ bs = dict() bs.update({t: 1 for t in range(1, 100)}) # Simple tests algorithm_params = {'percent_threshold_upper': 10, 'shift': 1, 'scan_window': 24, 'confidence': 0.01} ts = dict(bs) # bigger than 10 percent but below bias ts.update({t: 1.2 for t in range(10, 34)}) # first no anomalies detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEquals(len(anomalies), 0) # Next one anomaly exactly equal to scan window # uses bias ts.update({t: 2.100001 for t in range(10, 34)}) detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEquals(len(anomalies), 1) anomaly = anomalies[0] s, e = anomaly.get_time_window() # note the anomaly is larger than scan window self.assertEquals(s, 4) self.assertEquals(e, 39) # score should be roughly 98.5 self.assertGreater(anomaly.anomaly_score, 98) self.assertLess(anomaly.anomaly_score, 99) # anomaly below baseline but not below baseline with shift algorithm_params = {'percent_threshold_lower': -20, 'shift': -0.1, 'scan_window': 24, 'confidence': 0.01} ts.update({t: 0.799999 for t in range(10, 34)}) # no anomalies detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEquals(len(anomalies), 0) # lower the time series by 0.1 ts.update({t: 0.699999 for t in range(10, 34)}) detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertEquals(len(anomalies), 1) anomaly = anomalies[0] s, e = anomaly.get_time_window() self.assertEquals(s, 4) self.assertEquals(e, 39) # score should be roughly 98.5 self.assertGreater(anomaly.anomaly_score, 98) self.assertLess(anomaly.anomaly_score, 99)
class TestAnomalyDetector(unittest.TestCase): def setUp(self): self.s1 = {0: 0, 1: 0, 2: 0, 3: 0, 4: 1, 5: 2, 6: 2, 7: 2, 8: 0} self.s2 = {0: 0, 1: 1, 2: 2, 3: 2, 4: 2, 5: 0, 6: 0, 7: 0, 8: 0} self.detector1 = AnomalyDetector(self.s1) self.detector2 = AnomalyDetector(self.s2) def test_custom_algorithm(self): """ Test passing a custom algorithm class """ detector = AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_class=CustomAlgo, algorithm_params={'percent_threshold_upper': 20, 'percent_threshold_lower': -20}) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertTrue(len(anomalies) > 0) def test_diff_percent_threshold_algorithm(self): """ Test "diff percent threshold" algorithm with a threshold of 20% """ detector = AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_name='diff_percent_threshold', algorithm_params={'percent_threshold_upper': 20, 'percent_threshold_lower': -20}) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertTrue(len(anomalies) > 0) self.assertRaises(exceptions.RequiredParametersNotPassed, lambda: AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_name='diff_percent_threshold')) def test_sign_test_algorithm(self): """ Test "sign test" algorithm with a threshold of 20% """ bs = dict() bs.update({t: 1 for t in range(1, 100)}) ts = dict(bs) # test missing parameters self.assertRaises(exceptions.RequiredParametersNotPassed, lambda: AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_name='sign_test')) # # Simple tests algorithm_params = {'percent_threshold_upper': 20, 'percent_threshold_lower': -20, 'scan_window': 24, 'confidence': 0.01} # first no anomalies detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEquals(len(anomalies), 0) # Next one anomaly exactly equal to scan window ts.update({t: 1.200001 for t in range(10, 34)}) detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEquals(len(anomalies), 1) anomaly = anomalies[0] s, e = anomaly.get_time_window() # note the anomaly is larger than scan window self.assertEquals(s, 4) self.assertEquals(e, 39) # score should be roughly 98.5 self.assertGreater(anomaly.anomaly_score, 98) self.assertLess(anomaly.anomaly_score, 99) # anomaly below baseline ts.update({t: 0.799999 for t in range(10, 34)}) detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertEquals(len(anomalies), 1) anomaly = anomalies[0] s, e = anomaly.get_time_window() self.assertEquals(s, 4) self.assertEquals(e, 39) # score should be roughly 98.5 self.assertGreater(anomaly.anomaly_score, 98) self.assertLess(anomaly.anomaly_score, 99) # anomalies separated by big gap ts.update(bs) ts.update({t: 0.799999 for t in range(1, 25)}) ts.update({t: 1.21 for t in range(60, 84)}) detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertEquals(len(anomalies), 2) anomaly = anomalies[0] s, e = anomaly.get_time_window() self.assertEquals(s, 1) self.assertEquals(e, 30) # score ~ 99.9 self.assertGreater(anomaly.anomaly_score, 99) anomaly = anomalies[1] s, e = anomaly.get_time_window() self.assertEquals(s, 54) self.assertEquals(e, 89) # score should be roughly 98.5 self.assertGreater(anomaly.anomaly_score, 98) self.assertLess(anomaly.anomaly_score, 99) # anomalies separated by small gap ts.update(bs) ts.update({t: 1.21 for t in range(1, 25)}) ts.update({t: 1.21 for t in range(30, 40)}) detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() # just one self.assertEquals(len(anomalies), 1) anomaly = anomalies[0] s, e = anomaly.get_time_window() self.assertEquals(s, 1) self.assertEquals(e, 40) # score ~ 99.9 self.assertGreater(anomaly.anomaly_score, 99) def test_sign_test_algorithm_with_shift(self): """ Test "sign test" algorithm with a threshold of 20% """ bs = dict() bs.update({t: 1 for t in range(1, 100)}) # Simple tests algorithm_params = {'percent_threshold_upper': 10, 'shift': 1, 'scan_window': 24, 'confidence': 0.01} ts = dict(bs) # bigger than 10 percent but below bias ts.update({t: 1.2 for t in range(10, 34)}) # first no anomalies detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEquals(len(anomalies), 0) # Next one anomaly exactly equal to scan window # uses bias ts.update({t: 2.100001 for t in range(10, 34)}) detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEquals(len(anomalies), 1) anomaly = anomalies[0] s, e = anomaly.get_time_window() # note the anomaly is larger than scan window self.assertEquals(s, 4) self.assertEquals(e, 39) # score should be roughly 98.5 self.assertGreater(anomaly.anomaly_score, 98) self.assertLess(anomaly.anomaly_score, 99) # anomaly below baseline but not below baseline with shift algorithm_params = {'percent_threshold_lower': -20, 'shift': -0.1, 'scan_window': 24, 'confidence': 0.01} ts.update({t: 0.799999 for t in range(10, 34)}) # no anomalies detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEquals(len(anomalies), 0) # lower the time series by 0.1 ts.update({t: 0.699999 for t in range(10, 34)}) detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertEquals(len(anomalies), 1) anomaly = anomalies[0] s, e = anomaly.get_time_window() self.assertEquals(s, 4) self.assertEquals(e, 39) # score should be roughly 98.5 self.assertGreater(anomaly.anomaly_score, 98) self.assertLess(anomaly.anomaly_score, 99) def test_absolute_threshold_algorithm(self): """ Test "absolute threshold" algorithm with a upper and lower threshold of 0.2 """ detector = AnomalyDetector(self.s1, algorithm_name='absolute_threshold', algorithm_params={'absolute_threshold_value_upper': 0.2, 'absolute_threshold_value_lower': 0.2}) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertTrue(len(anomalies) > 0) self.assertRaises(exceptions.RequiredParametersNotPassed, lambda: AnomalyDetector(self.s1, algorithm_name='absolute_threshold')) def test_threshold(self): """ Test score threshold=0 """ detector = AnomalyDetector(self.s1, score_threshold=0) self.assertTrue(len(detector.get_anomalies()) == 1) self.assertTrue(detector.get_anomalies() is not None) def test_score_only(self): """ Test that score_only parameter doesn't give anomalies """ detector = AnomalyDetector(self.s1, score_only=True, algorithm_name='derivative_detector') detector2 = AnomalyDetector(self.s1, algorithm_name='derivative_detector') self.assertTrue(detector2.get_anomalies() is not None) self.assertTrue(len(detector.get_anomalies()) == 0) def test_get_all_scores(self): """ Test if function get_all_scores works as expected. """ self.assertTrue(isinstance(self.detector1.get_all_scores(), TimeSeries)) self.assertEqual(len(self.detector1.get_all_scores()), len(self.detector1.time_series)) def test_get_anomalies(self): """ Test if anomaly is found as expected. """ self.assertTrue(self.detector1.get_anomalies() is not None) def test_algorithm_DefaultDetector(self): """ Test if optional parameter algorithm works as expected. """ detector = AnomalyDetector(self.s1, algorithm_name='default_detector') self.assertEqual(detector.get_all_scores().timestamps, self.detector1.get_all_scores().timestamps) self.assertEqual(detector.get_all_scores().values, self.detector1.get_all_scores().values) def test_algorithm(self): """ Test if exception AlgorithmNotFound is raised as expected. """ self.assertRaises(exceptions.AlgorithmNotFound, lambda: AnomalyDetector(self.s1, algorithm_name='NotValidAlgorithm')) def test_algorithm_params(self): """ Test if optional parameter algorithm_params works as expected. """ self.assertRaises(ValueError, lambda: AnomalyDetector(self.s1, algorithm_name='exp_avg_detector', algorithm_params='0')) detector = AnomalyDetector(self.s1, algorithm_name="exp_avg_detector", algorithm_params={'smoothing_factor': 0.3}) self.assertNotEqual(self.detector1.get_all_scores().values, detector.get_all_scores().values) def test_anomaly_threshold(self): """ Test if score_percentile_threshold works as expected. """ detector = AnomalyDetector(self.s1, score_percent_threshold=0.1, algorithm_name='exp_avg_detector') detector1 = AnomalyDetector(self.s1, score_percent_threshold=0.1, algorithm_name='derivative_detector') self.assertNotEqual(detector1.get_anomalies(), detector.get_anomalies())
class TestAnomalyDetector(unittest.TestCase): def setUp(self): self.s1 = {0: 0, 1: 0, 2: 0, 3: 0, 4: 1, 5: 2, 6: 2, 7: 2, 8: 0} self.s2 = {0: 0, 1: 1, 2: 2, 3: 2, 4: 2, 5: 0, 6: 0, 7: 0, 8: 0} self.detector1 = AnomalyDetector(self.s1) self.detector2 = AnomalyDetector(self.s2) def test_custom_algorithm(self): """ Test passing a custom algorithm class """ detector = AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_class=CustomAlgo, algorithm_params={'percent_threshold_upper': 20, 'percent_threshold_lower': -20}) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertTrue(len(anomalies) > 0) def test_diff_percent_threshold_algorithm(self): """ Test "diff percent threshold" algorithm with a threshold of 20% """ detector = AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_name='diff_percent_threshold', algorithm_params={'percent_threshold_upper': 20, 'percent_threshold_lower': -20}) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertTrue(len(anomalies) > 0) self.assertRaises(exceptions.RequiredParametersNotPassed, lambda: AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_name='diff_percent_threshold')) def test_absolute_threshold_algorithm(self): """ Test "absolute threshold" algorithm with a upper and lower threshold of 0.2 """ detector = AnomalyDetector(self.s1, algorithm_name='absolute_threshold', algorithm_params={'absolute_threshold_value_upper': 0.2, 'absolute_threshold_value_lower': 0.2}) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertTrue(len(anomalies) > 0) self.assertRaises(exceptions.RequiredParametersNotPassed, lambda: AnomalyDetector(self.s1, algorithm_name='absolute_threshold')) def test_threshold(self): """ Test score threshold=0 """ detector = AnomalyDetector(self.s1, score_threshold=0) self.assertTrue(len(detector.get_anomalies()) == 1) self.assertTrue(detector.get_anomalies() is not None) def test_score_only(self): """ Test that score_only parameter doesn't give anomalies """ detector = AnomalyDetector(self.s1, score_only=True, algorithm_name='derivative_detector') detector2 = AnomalyDetector(self.s1, algorithm_name='derivative_detector') self.assertTrue(detector2.get_anomalies() is not None) self.assertTrue(len(detector.get_anomalies()) == 0) def test_get_all_scores(self): """ Test if function get_all_scores works as expected. """ self.assertTrue(isinstance(self.detector1.get_all_scores(), TimeSeries)) self.assertEqual(len(self.detector1.get_all_scores()), len(self.detector1.time_series)) def test_get_anomalies(self): """ Test if anomaly is found as expected. """ self.assertTrue(self.detector1.get_anomalies() is not None) def test_algorithm_DefaultDetector(self): """ Test if optional parameter algorithm works as expected. """ detector = AnomalyDetector(self.s1, algorithm_name='default_detector') self.assertEqual(detector.get_all_scores().timestamps, self.detector1.get_all_scores().timestamps) self.assertEqual(detector.get_all_scores().values, self.detector1.get_all_scores().values) def test_algorithm(self): """ Test if exception AlgorithmNotFound is raised as expected. """ self.assertRaises(exceptions.AlgorithmNotFound, lambda: AnomalyDetector(self.s1, algorithm_name='NotValidAlgorithm')) def test_algorithm_params(self): """ Test if optional parameter algorithm_params works as expected. """ self.assertRaises(ValueError, lambda: AnomalyDetector(self.s1, algorithm_name='exp_avg_detector', algorithm_params='0')) detector = AnomalyDetector(self.s1, algorithm_name="exp_avg_detector", algorithm_params={'smoothing_factor': 0.3}) self.assertNotEqual(self.detector1.get_all_scores().values, detector.get_all_scores().values) def test_anomaly_threshold(self): """ Test if score_percentile_threshold works as expected. """ detector = AnomalyDetector(self.s1, score_percent_threshold=0.1, algorithm_name='exp_avg_detector') detector1 = AnomalyDetector(self.s1, score_percent_threshold=0.1, algorithm_name='derivative_detector') self.assertNotEqual(detector1.get_anomalies(), detector.get_anomalies())
from luminol.anomaly_detector import AnomalyDetector import time my_detector = AnomalyDetector('Turbidity.csv') score = my_detector.get_all_scores() for (timestamp, value) in score.iteritems(): t_str = time.strftime('%y-%m-%d %H:%M:%S', time.localtime(timestamp)) if value > 0: print(f'{t_str}, {value}')
def test_score_only(self): detector = AnomalyDetector(self.s1, score_only=True, algorithm_name='derivative_detector') detector2 = AnomalyDetector(self.s1, algorithm_name='derivative_detector') self.assertTrue(detector2.get_anomalies() is not None) self.assertTrue(detector.get_anomalies() is None)
def test_sign_test_algorithm(self): """ Test "sign test" algorithm with a threshold of 20% """ bs = dict() bs.update({t: 1 for t in range(1, 100)}) ts = dict(bs) # test missing parameters self.assertRaises(exceptions.RequiredParametersNotPassed, lambda: AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_name='sign_test')) # # Simple tests algorithm_params = {'percent_threshold_upper': 20, 'percent_threshold_lower': -20, 'scan_window': 24, 'confidence': 0.01} # first no anomalies detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEquals(len(anomalies), 0) # Next one anomaly exactly equal to scan window ts.update({t: 1.200001 for t in range(10, 34)}) detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEquals(len(anomalies), 1) anomaly = anomalies[0] s, e = anomaly.get_time_window() # note the anomaly is larger than scan window self.assertEquals(s, 4) self.assertEquals(e, 39) # score should be roughly 98.5 self.assertGreater(anomaly.anomaly_score, 98) self.assertLess(anomaly.anomaly_score, 99) # anomaly below baseline ts.update({t: 0.799999 for t in range(10, 34)}) detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertEquals(len(anomalies), 1) anomaly = anomalies[0] s, e = anomaly.get_time_window() self.assertEquals(s, 4) self.assertEquals(e, 39) # score should be roughly 98.5 self.assertGreater(anomaly.anomaly_score, 98) self.assertLess(anomaly.anomaly_score, 99) # anomalies separated by big gap ts.update(bs) ts.update({t: 0.799999 for t in range(1, 25)}) ts.update({t: 1.21 for t in range(60, 84)}) detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertEquals(len(anomalies), 2) anomaly = anomalies[0] s, e = anomaly.get_time_window() self.assertEquals(s, 1) self.assertEquals(e, 30) # score ~ 99.9 self.assertGreater(anomaly.anomaly_score, 99) anomaly = anomalies[1] s, e = anomaly.get_time_window() self.assertEquals(s, 54) self.assertEquals(e, 89) # score should be roughly 98.5 self.assertGreater(anomaly.anomaly_score, 98) self.assertLess(anomaly.anomaly_score, 99) # anomalies separated by small gap ts.update(bs) ts.update({t: 1.21 for t in range(1, 25)}) ts.update({t: 1.21 for t in range(30, 40)}) detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() # just one self.assertEquals(len(anomalies), 1) anomaly = anomalies[0] s, e = anomaly.get_time_window() self.assertEquals(s, 1) self.assertEquals(e, 40) # score ~ 99.9 self.assertGreater(anomaly.anomaly_score, 99)
def main(argv): try: opts, args = getopt.getopt(sys.argv[1:], "hi:v", ["help", "input="]) except getopt.GetoptError as err: # print help information and exit: print str(err) usage() sys.exit(2) input = None verbose = False for o, a in opts: if o == "-v": verbose = True elif o in ("-h", "--help"): usage() sys.exit() elif o in ("-i", "--input"): input = a else: assert False, "unhandled option" # this is the regular expression used to parse CANoe logs in ASCII format (.asc) regex = r"(\s+)([+-]?\d*\.\d+)(?![-+0-9\.])(\s+)(\d+)(\s+)(\w+)(\s+)(\w+)(\s+)([a-z])(\s+)(\d+)(?:(?!X).)*(Length = )(\d+)(\s+)(BitCount = )(\d+)(\s+)(ID = )(\d+)" pattern = re.compile(regex, re.UNICODE) inputfile = open(input).read() # stores all lines which match the regex matches = re.finditer(regex, inputfile) # event_dict stores the values (timestamp + CAN-ID) extracted from the logs event_dict = {} for matchNum, match in enumerate(matches): matchNum = matchNum + 1 myTime = match.group(2) # converts absolute time from engine start in seconds from engine start to int myTime = float(myTime) * 1000000 # match.group(20) is ID of CAN event in decimal event_dict[myTime] = match.group(20) #print event_dict my_detector = AnomalyDetector(event_dict, algorithm_name=("exp_avg_detector")) # this calculates an anomal yscore for every event in the time series score = my_detector.get_all_scores() # filter events in time series for anomalies anomalies = my_detector.get_anomalies() anom_score = [] print for attack in anomalies: if (attack.exact_timestamp in event_dict): if (verbose == True): # if script is run with "-v" it will output all anomaies print("{timestamp} - ID: {id} - Score: {value}".format( timestamp=attack.exact_timestamp, id=event_dict[attack.exact_timestamp], value=attack.anomaly_score)) elif (attack.anomaly_score > 3.4): # if script is not run with "-v" it will output only anomalies with score > 3.4 print("{timestamp} - ID: {id} - Score: {value}".format( timestamp=attack.exact_timestamp, id=event_dict[attack.exact_timestamp], value=attack.anomaly_score))
class RCA(object): def __init__(self, metrix, related_metrices): """ Initializer :param metrix: a TimeSeries, a dictionary or a path to a csv file(str) :param list related_metrixes: a list of time series. """ self.metrix = self._load(metrix) self.anomaly_detector = AnomalyDetector(metrix) self.related_metrices = related_metrices self.anomalies = self.anomaly_detector.get_anomalies() self._analyze() def _load(self, metrix): """ Load time series. :param timeseries: a TimeSeries, a dictionary or a path to a csv file(str). :return TimeSeries: a TimeSeries object. """ if isinstance(metrix, TimeSeries): return metrix if isinstance(metrix, dict): return TimeSeries(metrix) return TimeSeries(utils.read_csv(metrix)) def _analyze(self): """ Analyzes if a matrix has anomalies. If any anomaly is found, determine if the matrix correlates with any other matrixes. To be implemented. """ output = defaultdict(list) output_by_name = defaultdict(list) scores = self.anomaly_detector.get_all_scores() if self.anomalies: for anomaly in self.anomalies: metrix_scores = scores start_t, end_t = anomaly.get_time_window() t = anomaly.exact_timestamp # Compute extended start timestamp and extended end timestamp. room = (end_t - start_t) / 2 if not room: room = 30 extended_start_t = start_t - room extended_end_t = end_t + room metrix_scores_cropped = metrix_scores.crop( extended_start_t, extended_end_t) # Adjust the two timestamps if not enough data points are included. while len(metrix_scores_cropped) < 2: extended_start_t = extended_start_t - room extended_end_t = extended_end_t + room metrix_scores_cropped = metrix_scores.crop( extended_start_t, extended_end_t) # Correlate with other metrics for entry in self.related_metrices: try: entry_correlation_result = Correlator( self.metrix, entry, time_period=(extended_start_t, extended_end_t), use_anomaly_score=True).get_correlation_result() record = extended_start_t, extended_end_t, entry_correlation_result.__dict__, entry record_by_name = extended_start_t, extended_end_t, entry_correlation_result.__dict__ output[t].append(record) output_by_name[entry].append(record_by_name) except exceptions.NotEnoughDataPoints: pass self.output = output self.output_by_name = output_by_name
class TestAnomalyDetector(unittest.TestCase): def setUp(self): self.s1 = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0.5, 5: 1, 6: 1, 7: 1, 8: 0} self.s2 = {0: 0, 1: 0.5, 2: 1, 3: 1, 4: 1, 5: 0, 6: 0, 7: 0, 8: 0} self.detector1 = AnomalyDetector(self.s1) self.detector2 = AnomalyDetector(self.s2) def test_threshold(self): detector = AnomalyDetector(self.s1, score_threshold=0) self.assertTrue(len(detector.get_anomalies()) == 1) self.assertTrue(detector.get_anomalies() is not None) def test_score_only(self): detector = AnomalyDetector(self.s1, score_only=True, algorithm_name='derivative_detector') detector2 = AnomalyDetector(self.s1, algorithm_name='derivative_detector') self.assertTrue(detector2.get_anomalies() is not None) self.assertTrue(detector.get_anomalies() is None) def test_get_all_scores(self): """ Test if function get_all_scores works as expected. """ self.assertTrue(isinstance(self.detector1.get_all_scores(), TimeSeries)) self.assertEqual(len(self.detector1.get_all_scores()), len(self.detector1.time_series)) def test_get_anomalies(self): """ Test if anomaly is found as expected. """ self.assertTrue(self.detector1.get_anomalies() is not None) def test_algorithm_DefaultDetector(self): """ Test if optional parameter algorithm works as expected. """ detector = AnomalyDetector(self.s1, algorithm_name='default_detector') self.assertEqual(detector.get_all_scores().timestamps, self.detector1.get_all_scores().timestamps) self.assertEqual(detector.get_all_scores().values, self.detector1.get_all_scores().values) def test_algorithm(self): """ Test if exception AlgorithmNotFound is raised as expected. """ self.assertRaises(exceptions.AlgorithmNotFound, lambda: AnomalyDetector(self.s1, algorithm_name='NotValidAlgorithm')) def test_algorithm_params(self): """ Test if optional parameter algorithm_params works as expected. """ self.assertRaises(exceptions.InvalidDataFormat, lambda: AnomalyDetector(self.s1, algorithm_name='exp_avg_detector', algorithm_params='0')) detector = AnomalyDetector(self.s1, algorithm_name="exp_avg_detector", algorithm_params={'smoothing_factor': 0.3}) self.assertNotEqual(self.detector1.get_all_scores().values, detector.get_all_scores().values) def test_anomaly_threshold(self): """ Test if score_percentile_threshold works as expected. """ detector = AnomalyDetector(self.s1, score_percentile_threshold=0.1, algorithm_name='exp_avg_detector') detector1 = AnomalyDetector(self.s1, score_percentile_threshold=0.1, algorithm_name='derivative_detector') self.assertNotEqual(detector1.get_anomalies(), detector.get_anomalies())
class RCA(object): def __init__(self, metrix, related_metrices): """ Initializer :param metrix: a TimeSeries, a dictionary or a path to a csv file(str) :param list related_metrixes: a list of time series. """ self.metrix = self._load(metrix) self.anomaly_detector = AnomalyDetector(metrix) self.related_metrices = related_metrices self.anomalies = self.anomaly_detector.get_anomalies() self._analyze() def _load(self, metrix): """ Load time series. :param timeseries: a TimeSeries, a dictionary or a path to a csv file(str). :return TimeSeries: a TimeSeries object. """ if isinstance(metrix, TimeSeries): return metrix if isinstance(metrix, dict): return TimeSeries(metrix) return TimeSeries(utils.read_csv(metrix)) def _analyze(self): """ Analyzes if a matrix has anomalies. If any anomaly is found, determine if the matrix correlates with any other matrixes. To be implemented. """ output = defaultdict(list) output_by_name = defaultdict(list) scores = self.anomaly_detector.get_all_scores() if self.anomalies: for anomaly in self.anomalies: metrix_scores = scores start_t, end_t = anomaly.get_time_window() t = anomaly.exact_timestamp # Compute extended start timestamp and extended end timestamp. room = (end_t - start_t) / 2 if not room: room = 30 extended_start_t = start_t - room extended_end_t = end_t + room metrix_scores_cropped = metrix_scores.crop(extended_start_t, extended_end_t) # Adjust the two timestamps if not enough data points are included. while len(metrix_scores_cropped) < 2: extended_start_t = extended_start_t - room extended_end_t = extended_end_t + room metrix_scores_cropped = metrix_scores.crop(extended_start_t, extended_end_t) # Correlate with other metrics for entry in self.related_metrices: try: entry_correlation_result = Correlator(self.metrix, entry, time_period=(extended_start_t, extended_end_t), use_anomaly_score=True).get_correlation_result() record = extended_start_t, extended_end_t, entry_correlation_result.__dict__, entry record_by_name = extended_start_t, extended_end_t, entry_correlation_result.__dict__ output[t].append(record) output_by_name[entry].append(record_by_name) except exceptions.NotEnoughDataPoints: pass self.output = output self.output_by_name = output_by_name
def setUp(self): self.s1 = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0.5, 5: 1, 6: 1, 7: 1, 8: 0} self.s2 = {0: 0, 1: 0.5, 2: 1, 3: 1, 4: 1, 5: 0, 6: 0, 7: 0, 8: 0} self.detector1 = AnomalyDetector(self.s1) self.detector2 = AnomalyDetector(self.s2)
from luminol.anomaly_detector import AnomalyDetector import matplotlib.pyplot as plt import pandas as pd df = pd.read_csv( "https://raw.githubusercontent.com/numenta/NAB/master/data/realTweets/Twitter_volume_FB.csv", header=0, index_col=0) # df[:].plot(linewidth=2) # plt.grid(which='both') # plt.show() ts = {} i = 0 for item in df.value: ts[i] = item i += 1 my_detector = AnomalyDetector(ts) anomalies_chart = [] score = my_detector.get_all_scores() for timestamp, value in score.iteritems(): # print(timestamp, value) anomalies_chart.append(value) list_values = [v for v in ts.values()] plt.plot(list_values) plt.show() plt.plot(anomalies_chart, color='r') plt.show()
def setUp(self): self.s1 = {0: 0, 1: 0, 2: 0, 3: 0, 4: 1, 5: 2, 6: 2, 7: 2, 8: 0} self.s2 = {0: 0, 1: 1, 2: 2, 3: 2, 4: 2, 5: 0, 6: 0, 7: 0, 8: 0} self.detector1 = AnomalyDetector(self.s1) self.detector2 = AnomalyDetector(self.s2)
def get_timeseries( phenomenon_time_range, num_time_slots, get_observations, detector_method='bitmap_detector', # LinkedIn bitmap detector_params={ "precision": 8, "lag_window_size": 20, "future_window_size": 20, "chunk_size": 2 }, anomaly_breaks=DEFAULT_ANOMALY_BREAKS, value_breaks=DEFAULT_VALUE_BREAKS, extend_range=True, baseline_time_range=None, shift=True, use_baseline=True): #observations = get_observations(3, 5) #observations = get_observations(0, 0) # if baseline_time_range is not None: # use_baseline = True # baseline_time_series = observation_provider_model.objects.filter( # phenomenon_time_range__contained_by=baseline_time_range, # phenomenon_time_range__duration=frequency, # phenomenon_time_range__matches=frequency, # observed_property=observed_property, # procedure=process, # feature_of_interest=feature_of_interest # ) # baseline_reduced = {obs.phenomenon_time_range.lower.timestamp(): obs.result for obs in baseline_time_series} lower_ext = 0 upper_ext = 0 if extend_range: lower_ext = detector_params["lag_window_size"] upper_ext = detector_params["future_window_size"] if use_baseline and shift: upper_ext = 0 if use_baseline and not shift: lower_ext = int(upper_ext / 2) upper_ext -= lower_ext + 1 observations = get_observations(lower_ext, upper_ext) if not isinstance(observations, list): raise Exception('property_values should be array') if len(observations) == 0: return { 'phenomenon_time_range': DateTimeTZRange(), 'property_values': [], 'property_value_percentiles': {}, 'property_anomaly_rates': [], 'property_anomaly_percentiles': {}, } property_values = observations_to_property_values(observations) VALID_VALUES_LENGTH = len(property_values) - property_values.count(None) if VALID_VALUES_LENGTH == 1: return { 'phenomenon_time_range': phenomenon_time_range, 'property_values': property_values, 'property_value_percentiles': { 50: property_values[0] }, 'property_anomaly_rates': [0], 'property_anomaly_percentiles': { 0: 0 }, } MINIMAL_POINTS_IN_WINDOWS = DEFAULT_BITMAP_MOD_MINIMAL_POINTS_IN_WINDOWS if use_baseline: MINIMAL_POINTS_IN_WINDOWS /= 2 # if VALID_VALUES_LENGTH <= MINIMAL_POINTS_IN_WINDOWS: # # warn the user? WINDOW_LENGTH = detector_params[ "future_window_size"] if use_baseline else detector_params[ "future_window_size"] + detector_params["lag_window_size"] if VALID_VALUES_LENGTH > MINIMAL_POINTS_IN_WINDOWS and VALID_VALUES_LENGTH <= WINDOW_LENGTH: detector_params["future_window_size"] = int( max( DEFAULT_BITMAP_MOD_MINIMAL_POINTS_IN_WINDOWS / 2, VALID_VALUES_LENGTH * DEFAULT_BITMAP_MOD_LEADING_WINDOW_SIZE_PCT)) detector_params["lag_window_size"] = int( max( DEFAULT_BITMAP_MOD_MINIMAL_POINTS_IN_WINDOWS / 2, VALID_VALUES_LENGTH * DEFAULT_BITMAP_MOD_LAGGING_WINDOW_SIZE_PCT)) property_value_percentiles = percentiles( property_values[lower_ext:lower_ext + num_time_slots], value_breaks) if use_baseline and baseline_time_range is None: baseline_time_series = observations baseline_reduced = { obs.phenomenon_time_range.lower.timestamp(): obs.result for obs in baseline_time_series } obs_reduced = { obs.phenomenon_time_range.lower.timestamp(): obs.result for obs in observations } if (VALID_VALUES_LENGTH <= 1): property_anomaly_rates = [ 0 if value is not None else value for value in property_values[lower_ext:lower_ext + num_time_slots] ] return { 'phenomenon_time_range': phenomenon_time_range, 'property_values': property_values[lower_ext:lower_ext + num_time_slots], 'property_value_percentiles': property_value_percentiles, 'property_anomaly_rates': property_anomaly_rates, 'property_anomaly_percentiles': { 0: 0 }, } try: baseline_reduced except NameError: detector = AnomalyDetector(obs_reduced, algorithm_name=detector_method, algorithm_params=detector_params, score_only=True) else: detector = AnomalyDetector(obs_reduced, baseline_reduced, algorithm_name=detector_method, algorithm_params=detector_params, score_only=True) property_anomaly_rates = detector.get_all_scores().values property_anomaly_percentiles = percentiles( property_anomaly_rates[lower_ext:lower_ext + num_time_slots], anomaly_breaks) for i in range(len(property_values)): if property_values[i] is None: property_anomaly_rates.insert(i, None) return { 'phenomenon_time_range': phenomenon_time_range, 'property_values': property_values[lower_ext:lower_ext + num_time_slots], 'property_value_percentiles': property_value_percentiles, 'property_anomaly_rates': property_anomaly_rates[lower_ext:lower_ext + num_time_slots], 'property_anomaly_percentiles': property_anomaly_percentiles, }
if args.interpolate_period > 0: tf.set_interpolate(Interpolate(period={'count': args.interpolate_period, 'unit': TimeUnit.MINUTE}, function=InterpolateFunction.LINEAR)) query.set_transformation_filter(tf) series_list = svc.query(query) for series in series_list: metric_id = '- %s %s' % (series.metric, print_tags(series.tags)) log('\t' + metric_id) # exclude empty series for specific tags if len(series.data) > 2: ts = {int(sample.t / 1000): sample.v for sample in series.data} detector = AnomalyDetector(ts, score_threshold=args.min_score) anomalies = [] for anomaly in detector.get_anomalies(): if time.mktime(now.timetuple()) - args.last_hours * 3600 <= anomaly.exact_timestamp: anomalies.append(anomaly) if anomalies: message.append(metric_id) for anomaly in anomalies: t_start, t_end = format_t(anomaly.start_timestamp), format_t(anomaly.end_timestamp) t_exact = format_t(anomaly.exact_timestamp) anomaly_msg = '\tAnomaly from %s to %s with score %s: %s, %s' % ( t_start, t_end, anomaly.anomaly_score, t_exact, ts[anomaly.exact_timestamp]) message.append(anomaly_msg)