def test_algorithm_DefaultDetector(self): """ Test if optional parameter algorithm works as expected. """ detector = AnomalyDetector(self.s1, algorithm_name='default_detector') self.assertEqual(detector.get_all_scores().timestamps, self.detector1.get_all_scores().timestamps) self.assertEqual(detector.get_all_scores().values, self.detector1.get_all_scores().values)
def test_algorithm_params(self): """ Test if optional parameter algorithm_params works as expected. """ self.assertRaises(exceptions.InvalidDataFormat, lambda: AnomalyDetector(self.s1, algorithm_name='exp_avg_detector', algorithm_params='0')) detector = AnomalyDetector(self.s1, algorithm_name="exp_avg_detector", algorithm_params={'smoothing_factor': 0.3}) self.assertNotEqual(self.detector1.get_all_scores().values, detector.get_all_scores().values)
def clean_anomaly2(df, index_name="15分钟段", var_name="实际功率", anomaly_limit=1.5, base_p=0.3): df_clean = [] for g_name, g in df.groupby(index_name): temp = deepcopy(g).reset_index(drop=True) base_mean = np.median(temp[var_name]) if base_mean > base_p: limit_low, limit_up = np.percentile(temp[var_name], [2, 99]) else: limit_low, limit_up = np.percentile(temp[var_name], [1, 98]) temp = temp[(temp[var_name] <= limit_up) & (temp[var_name] >= limit_low)].reset_index(drop=True) ts = temp[var_name] ts_mean = np.mean(ts) ts_std = np.std(ts) ts = (ts - ts_mean) / ts_std if ts_std > 0: my_detector = AnomalyDetector(ts.to_dict(), algorithm_name='exp_avg_detector') score = my_detector.get_all_scores() df_clean.append(temp[np.array(score.values) < anomaly_limit]) else: df_clean.append(temp) df_clean = pd.concat(df_clean, ignore_index=True) return df_clean
def test_algorithm_DefaultDetector(self): """ Test if optional parameter algorithm works as expected. """ detector = AnomalyDetector(self.s1, algorithm_name='default_detector') self.assertEqual(detector.get_all_scores().timestamps, self.detector1.get_all_scores().timestamps) self.assertEqual(detector.get_all_scores().values, self.detector1.get_all_scores().values)
def get_anomaly_series(series: pd.Series, algorithm: str = "bitmap_detector") -> pd.Series: assert algorithm in [ "bitmap_detector", "derivative_detector", "exp_avg_detector" ] ts = series.to_dict() detector = AnomalyDetector(ts, algorithm_name=algorithm) scores = detector.get_all_scores() scores = [s for _, s in scores.iteritems()] return pd.Series(scores)
def detect(self, ts): my_detector = AnomalyDetector(ts) score = my_detector.get_all_scores() anom_score = [] for (timestamp, value) in score.iteritems(): t_str = time.strftime('%d-%b-%Y %H:%M:%S', time.localtime(timestamp)) anom_score.append([t_str, value]) overall_stats = {} for score in anom_score: overall_stats[score[0]] = score[1] return overall_stats
def test_algorithm_params(self): """ Test if optional parameter algorithm_params works as expected. """ self.assertRaises( ValueError, lambda: AnomalyDetector(self.s1, algorithm_name='exp_avg_detector', algorithm_params='0')) detector = AnomalyDetector(self.s1, algorithm_name="exp_avg_detector", algorithm_params={'smoothing_factor': 0.3}) self.assertNotEqual(self.detector1.get_all_scores().values, detector.get_all_scores().values)
def cleanData(df, index_name="15分钟段", var_name="实际功率", limit=0.5): df_clean = [] for g_name, g in df.groupby(index_name): temp = deepcopy(g).reset_index(drop=True) limit_low, limit_up = np.percentile(temp[var_name], [5, 95]) temp = temp[(temp[var_name] < limit_up) & (temp[var_name] > limit_low)].reset_index(drop=True) ts = temp[var_name] ts_mean = np.mean(ts) ts_std = np.std(ts) ts = (ts - ts_mean) / ts_std if ts_std > 0: my_detector = AnomalyDetector(ts.to_dict(), algorithm_name='exp_avg_detector') score = my_detector.get_all_scores() df_clean.append(temp[np.array(score.values) < limit]) else: df_clean.append(temp) df_clean = pd.concat(df_clean, ignore_index=True) return df_clean
def __init__(self, X_tr_red, X_te_red, shift_detector, level): logger.info("Run Luminol") X_tr_odim = (-np.amax(X_tr_red, axis=1)).tolist() X_te_odim = (-np.amax(X_te_red, axis=1)).tolist() ts = X_te_odim ts = {i: v for i, v in enumerate(ts)} train_ts = {i: v for i, v in enumerate(X_tr_odim)} my_detector = AnomalyDetector(ts, baseline_time_series=train_ts, algorithm_params={ 'precision': 10, 'lag_window_size': 0.1, 'future_window_size': 0.1, 'chunk_size': 2 }) _score = my_detector.get_all_scores() score = [] for i in range(len(X_te_odim)): score.append(_score[i]) assert len(score) == len(X_te_odim), (len(score), len(X_te_odim)) super().__init__(X_tr_odim, X_te_odim, score, shift_detector, level)
def f1_metrics(self): y = self.y ts = self.ts abn_pt = self.abn_pt req_stamp = pd.Series(y, index=ts) detector = AnomalyDetector(req_stamp.to_dict()) scores = detector.get_all_scores() y_true = np.zeros(y.size) for i in abn_pt: y_true[i] = 1 self.y_true = y_true np_score = [] for i in scores.iteritems(): np_score.append(i[1]) req_ = pd.Series(data=np_score) ap = average_precision_score(y_true, np_score) range_ = np.log10(np.arange(0, 9, .1) + 1) f1 = [] for i in range_: threshold = np.quantile(np_score, i) anomalies = req_[req_.values > threshold].index.values y_score = np.zeros(y.size) for i in anomalies: y_score[i] = 1 f1.append(f1_score(y_true, y_score)) threshold = np.quantile(np_score, range_[np.argmax(f1)]) anomalies = req_[req_.values > threshold].index.values y_score = np.zeros(y.size) for i in anomalies: y_score[i] = 1 return ap, y_true, y_score
def anomalies(self, metrics): _metrics = metrics _metrics_for_luminol = {} _time_mapping = {} _anomalies = {} for _str in _metrics: _date = datetime.datetime.strptime(_str, "%Y-%m-%d %H:%M:%S") _time = datetime.datetime.timestamp(_date) _metrics_for_luminol[int(_time)] = _metrics[_str] _time_mapping[int(_time)] = _str if _metrics_for_luminol: _detector = AnomalyDetector(_metrics_for_luminol) _score = _detector.get_all_scores() if _score: for _timestamp, _value in _score.iteritems(): _anomalies[_time_mapping[_timestamp]] = _value return _anomalies else: return False else: return False
def main(argv): try: opts, args = getopt.getopt(sys.argv[1:], "hi:v", ["help", "input="]) except getopt.GetoptError as err: # print help information and exit: print str(err) usage() sys.exit(2) input = None verbose = False for o, a in opts: if o == "-v": verbose = True elif o in ("-h", "--help"): usage() sys.exit() elif o in ("-i", "--input"): input = a else: assert False, "unhandled option" # this is the regular expression used to parse CANoe logs in ASCII format (.asc) regex = r"(\s+)([+-]?\d*\.\d+)(?![-+0-9\.])(\s+)(\d+)(\s+)(\w+)(\s+)(\w+)(\s+)([a-z])(\s+)(\d+)(?:(?!X).)*(Length = )(\d+)(\s+)(BitCount = )(\d+)(\s+)(ID = )(\d+)" pattern = re.compile(regex, re.UNICODE) inputfile = open(input).read() # stores all lines which match the regex matches = re.finditer(regex, inputfile) # event_dict stores the values (timestamp + CAN-ID) extracted from the logs event_dict = {} for matchNum, match in enumerate(matches): matchNum = matchNum + 1 myTime = match.group(2) # converts absolute time from engine start in seconds from engine start to int myTime = float(myTime) * 1000000 # match.group(20) is ID of CAN event in decimal event_dict[myTime] = match.group(20) #print event_dict my_detector = AnomalyDetector(event_dict, algorithm_name=("exp_avg_detector")) # this calculates an anomal yscore for every event in the time series score = my_detector.get_all_scores() # filter events in time series for anomalies anomalies = my_detector.get_anomalies() anom_score = [] print for attack in anomalies: if (attack.exact_timestamp in event_dict): if (verbose == True): # if script is run with "-v" it will output all anomaies print("{timestamp} - ID: {id} - Score: {value}".format( timestamp=attack.exact_timestamp, id=event_dict[attack.exact_timestamp], value=attack.anomaly_score)) elif (attack.anomaly_score > 3.4): # if script is not run with "-v" it will output only anomalies with score > 3.4 print("{timestamp} - ID: {id} - Score: {value}".format( timestamp=attack.exact_timestamp, id=event_dict[attack.exact_timestamp], value=attack.anomaly_score))
def job(): print("Run start: " + datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')) # Connect to either the normal, anomaly or combined database. Note for SQLlite, the detect types # line converts from SQLlite datatypes(typically text) to Python native datatypes conn = sqlite3.connect('../hl7-combined.db', detect_types=sqlite3.PARSE_DECLTYPES | sqlite3.PARSE_COLNAMES) conn.row_factory = sqlite3.Row cur = conn.cursor() # Group messages into X second intervals cur.execute( "select messages.MSGTYPE, datetime((strftime('%s', messages.QueueTime) /" + INTERVAL + ") * " + INTERVAL + ", 'unixepoch') interval, count(*) count from messages" " where msgtype = 'ADT_A31' group by interval order by interval desc limit " + LIMIT) rows = cur.fetchall() data = {} if len(rows) > 0: for row in rows: # Luminol library requires a 2 column unix timestamp + count obsTimestamp = time.mktime( datetime.datetime.strptime(row["interval"], "%Y-%m-%d %H:%M:%S").timetuple()) data[obsTimestamp] = row["count"] print data # DETECTOR TYPE - see https://github.com/linkedin/luminol/tree/master/src/luminol/algorithms/anomaly_detector_algorithms detector = AnomalyDetector(data, algorithm_name=ALGORITH_NAME, score_threshold=THRESHOLD) score = detector.get_all_scores() anom_score = [] for (timestamp, value) in score.iteritems(): t_str = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(timestamp)) anom_score.append([t_str, value]) print "----- ALL SCORES ----- " for score in anom_score: print(score) anomalies = detector.get_anomalies() for (value) in anomalies: print " match: " + time.strftime( '%Y-%m-%d %H:%M:%S', time.localtime(value.exact_timestamp)), value.anomaly_score if value.anomaly_score >= int(THRESHOLD) and anomalies.count > int( LIMIT): print "ANOMALY DETECTED - NOTIFYING ADMINISTRATOR / CALLING WEBSERVICE ETC" quit() print("Run End: " + datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') + "\n")
class TestAnomalyDetector(unittest.TestCase): def setUp(self): self.s1 = {0: 0, 1: 0, 2: 0, 3: 0, 4: 1, 5: 2, 6: 2, 7: 2, 8: 0} self.s2 = {0: 0, 1: 1, 2: 2, 3: 2, 4: 2, 5: 0, 6: 0, 7: 0, 8: 0} self.detector1 = AnomalyDetector(self.s1) self.detector2 = AnomalyDetector(self.s2) def test_diff_percent_threshold_algorithm(self): """ Test "diff percent threshold" algorithm with a threshold of 20% """ detector = AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_name='diff_percent_threshold', algorithm_params={ 'percent_threshold_upper': 20, 'percent_threshold_lower': -20 }) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertTrue(len(anomalies) > 0) self.assertRaises( exceptions.RequiredParametersNotPassed, lambda: AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_name='diff_percent_threshold')) def test_absolute_threshold_algorithm(self): """ Test "absolute threshold" algorithm with a upper and lower threshold of 0.2 """ detector = AnomalyDetector(self.s1, algorithm_name='absolute_threshold', algorithm_params={ 'absolute_threshold_value_upper': 0.2, 'absolute_threshold_value_lower': 0.2 }) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertTrue(len(anomalies) > 0) self.assertRaises( exceptions.RequiredParametersNotPassed, lambda: AnomalyDetector(self.s1, algorithm_name='absolute_threshold')) def test_threshold(self): """ Test score threshold=0 """ detector = AnomalyDetector(self.s1, score_threshold=0) self.assertTrue(len(detector.get_anomalies()) == 1) self.assertTrue(detector.get_anomalies() is not None) def test_score_only(self): """ Test that score_only parameter doesn't give anomalies """ detector = AnomalyDetector(self.s1, score_only=True, algorithm_name='derivative_detector') detector2 = AnomalyDetector(self.s1, algorithm_name='derivative_detector') self.assertTrue(detector2.get_anomalies() is not None) self.assertTrue(len(detector.get_anomalies()) == 0) def test_get_all_scores(self): """ Test if function get_all_scores works as expected. """ self.assertTrue(isinstance(self.detector1.get_all_scores(), TimeSeries)) self.assertEqual(len(self.detector1.get_all_scores()), len(self.detector1.time_series)) def test_get_anomalies(self): """ Test if anomaly is found as expected. """ self.assertTrue(self.detector1.get_anomalies() is not None) def test_algorithm_DefaultDetector(self): """ Test if optional parameter algorithm works as expected. """ detector = AnomalyDetector(self.s1, algorithm_name='default_detector') self.assertEqual(detector.get_all_scores().timestamps, self.detector1.get_all_scores().timestamps) self.assertEqual(detector.get_all_scores().values, self.detector1.get_all_scores().values) def test_algorithm(self): """ Test if exception AlgorithmNotFound is raised as expected. """ self.assertRaises( exceptions.AlgorithmNotFound, lambda: AnomalyDetector(self.s1, algorithm_name='NotValidAlgorithm')) def test_algorithm_params(self): """ Test if optional parameter algorithm_params works as expected. """ self.assertRaises( ValueError, lambda: AnomalyDetector(self.s1, algorithm_name='exp_avg_detector', algorithm_params='0')) detector = AnomalyDetector(self.s1, algorithm_name="exp_avg_detector", algorithm_params={'smoothing_factor': 0.3}) self.assertNotEqual(self.detector1.get_all_scores().values, detector.get_all_scores().values) def test_anomaly_threshold(self): """ Test if score_percentile_threshold works as expected. """ detector = AnomalyDetector(self.s1, score_percent_threshold=0.1, algorithm_name='exp_avg_detector') detector1 = AnomalyDetector(self.s1, score_percent_threshold=0.1, algorithm_name='derivative_detector') self.assertNotEqual(detector1.get_anomalies(), detector.get_anomalies())
class TestAnomalyDetector(unittest.TestCase): def setUp(self): self.s1 = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0.5, 5: 1, 6: 1, 7: 1, 8: 0} self.s2 = {0: 0, 1: 0.5, 2: 1, 3: 1, 4: 1, 5: 0, 6: 0, 7: 0, 8: 0} self.detector1 = AnomalyDetector(self.s1) self.detector2 = AnomalyDetector(self.s2) def test_threshold(self): detector = AnomalyDetector(self.s1, score_threshold=0) self.assertTrue(len(detector.get_anomalies()) == 1) self.assertTrue(detector.get_anomalies() is not None) def test_score_only(self): detector = AnomalyDetector(self.s1, score_only=True, algorithm_name='derivative_detector') detector2 = AnomalyDetector(self.s1, algorithm_name='derivative_detector') self.assertTrue(detector2.get_anomalies() is not None) self.assertTrue(len(detector.get_anomalies()) == 0) def test_get_all_scores(self): """ Test if function get_all_scores works as expected. """ self.assertTrue(isinstance(self.detector1.get_all_scores(), TimeSeries)) self.assertEqual(len(self.detector1.get_all_scores()), len(self.detector1.time_series)) def test_get_anomalies(self): """ Test if anomaly is found as expected. """ self.assertTrue(self.detector1.get_anomalies() is not None) def test_algorithm_DefaultDetector(self): """ Test if optional parameter algorithm works as expected. """ detector = AnomalyDetector(self.s1, algorithm_name='default_detector') self.assertEqual(detector.get_all_scores().timestamps, self.detector1.get_all_scores().timestamps) self.assertEqual(detector.get_all_scores().values, self.detector1.get_all_scores().values) def test_algorithm(self): """ Test if exception AlgorithmNotFound is raised as expected. """ self.assertRaises( exceptions.AlgorithmNotFound, lambda: AnomalyDetector(self.s1, algorithm_name='NotValidAlgorithm')) def test_algorithm_params(self): """ Test if optional parameter algorithm_params works as expected. """ self.assertRaises( ValueError, lambda: AnomalyDetector(self.s1, algorithm_name='exp_avg_detector', algorithm_params='0')) detector = AnomalyDetector(self.s1, algorithm_name="exp_avg_detector", algorithm_params={'smoothing_factor': 0.3}) self.assertNotEqual(self.detector1.get_all_scores().values, detector.get_all_scores().values) def test_anomaly_threshold(self): """ Test if score_percentile_threshold works as expected. """ detector = AnomalyDetector(self.s1, score_percent_threshold=0.1, algorithm_name='exp_avg_detector') detector1 = AnomalyDetector(self.s1, score_percent_threshold=0.1, algorithm_name='derivative_detector') self.assertNotEqual(detector1.get_anomalies(), detector.get_anomalies())
class TestAnomalyDetector(unittest.TestCase): def setUp(self): self.s1 = {0: 0, 1: 0, 2: 0, 3: 0, 4: 1, 5: 2, 6: 2, 7: 2, 8: 0} self.s2 = {0: 0, 1: 1, 2: 2, 3: 2, 4: 2, 5: 0, 6: 0, 7: 0, 8: 0} self.detector1 = AnomalyDetector(self.s1) self.detector2 = AnomalyDetector(self.s2) def test_custom_algorithm(self): """ Test passing a custom algorithm class """ algorithm_params = { 'percent_threshold_upper': 20, 'percent_threshold_lower': -20 } detector = AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_class=CustomAlgo, algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertTrue(len(anomalies) > 0) def test_diff_percent_threshold_algorithm(self): """ Test "diff percent threshold" algorithm with a threshold of 20% """ algorithm_params = { 'percent_threshold_upper': 20, 'percent_threshold_lower': -20 } detector = AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_name='diff_percent_threshold', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertTrue(len(anomalies) > 0) self.assertRaises( exceptions.RequiredParametersNotPassed, lambda: AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_name='diff_percent_threshold')) def test_sign_test_algorithm(self): """ Test "sign test" algorithm with a threshold of 20% """ bs = dict() bs.update((t, 1) for t in range(1, 100)) ts = dict(bs) # test missing parameters self.assertRaises( exceptions.RequiredParametersNotPassed, lambda: AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_name='sign_test')) # test over specified algorithm_params = { 'percent_threshold_upper': 20, 'percent_threshold_lower': -20, 'scan_window': 24, 'confidence': 0.01 } self.assertRaises( exceptions.RequiredParametersNotPassed, lambda: AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_name='sign_test')) # Simple tests algorithm_params = {'percent_threshold_upper': 20, 'scan_window': 24} # first no anomalies detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEqual(len(anomalies), 0) # Next one anomaly exactly equal to scan window ts.update((t, 1.200001) for t in range(10, 34)) detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEqual(len(anomalies), 1) anomaly = anomalies[0] s, e = anomaly.get_time_window() # note the anomaly is larger than scan window self.assertEqual(s, 4) self.assertEqual(e, 39) # score should be roughly 98.5 self.assertGreater(anomaly.anomaly_score, 98) self.assertLess(anomaly.anomaly_score, 99) # anomaly below baseline algorithm_params = { 'percent_threshold_lower': -20, 'scan_window': 24, 'confidence': 0.01 } ts.update((t, 0.799999) for t in range(10, 34)) detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertEqual(len(anomalies), 1) anomaly = anomalies[0] s, e = anomaly.get_time_window() self.assertEqual(s, 4) self.assertEqual(e, 39) # score should be roughly 98.5 self.assertGreater(anomaly.anomaly_score, 98) self.assertLess(anomaly.anomaly_score, 99) # anomalies separated by big gap ts.update(bs) ts.update((t, 0.799999) for t in range(1, 25)) ts.update((t, 0.799999) for t in range(60, 84)) detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertEqual(len(anomalies), 2) anomaly = anomalies[0] s, e = anomaly.get_time_window() self.assertEqual(s, 1) self.assertEqual(e, 30) # score ~ 99.9 self.assertGreater(anomaly.anomaly_score, 99) anomaly = anomalies[1] s, e = anomaly.get_time_window() self.assertEqual(s, 54) self.assertEqual(e, 89) # score should be roughly 98.5 self.assertGreater(anomaly.anomaly_score, 98) self.assertLess(anomaly.anomaly_score, 99) # anomalies separated by small gap algorithm_params = {'percent_threshold_upper': 20, 'scan_window': 24} ts.update(bs) ts.update((t, 1.21) for t in range(1, 25)) ts.update((t, 1.21) for t in range(30, 40)) detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() # just one self.assertEqual(len(anomalies), 1) anomaly = anomalies[0] s, e = anomaly.get_time_window() self.assertEqual(s, 1) self.assertEqual(e, 40) # score ~ 99.9 self.assertGreater(anomaly.anomaly_score, 99) # try noisy data ts.update(bs) ts.update((t, 1.21) for t in range(1, 25)) ts.update((t, 1.19) for t in range(1, 25, 6)) algorithm_params = { 'percent_threshold_upper': 20, 'scan_window': 24, 'confidence': 0.01 } detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertEqual(len(anomalies), 1) # now decrease sensitivity algorithm_params = { 'percent_threshold_upper': 20, 'scan_window': 24, 'confidence': 0.0001 } detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertEqual(len(anomalies), 0) def test_sign_test_algorithm_interface(self): """ Test "sign test" algorithm with a threshold of 0% """ bs = dict() bs.update((t, 1) for t in range(1, 30)) # Simple tests algorithm_params = { 'percent_threshold_upper': 0, 'offset': 2, 'scan_window': 24, 'confidence': 0.01 } ts = dict(bs) # bigger than 10 percent but below bias ts.update((t, 3.1) for t in range(1, 21)) # first no anomalies detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEqual(len(anomalies), 1) # try lower bound algorithm_params = { 'percent_threshold_lower': 0, 'offset': 2, 'scan_window': 24, 'confidence': 0.01 } ts = dict(bs) # less than baseline plus bias ts.update((t, 2.9) for t in range(1, 25)) # first no anomalies detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEqual(len(anomalies), 1) def test_sign_test_algorithm_with_shift(self): """ Test "sign test" algorithm with a threshold of 20% """ bs = dict() bs.update((t, 1) for t in range(1, 100)) # Simple tests algorithm_params = { 'percent_threshold_upper': 10, 'offset': 1, 'scan_window': 24, 'confidence': 0.01 } ts = dict(bs) # bigger than 10 percent but below bias ts.update((t, 1.2) for t in range(10, 34)) # first no anomalies detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEqual(len(anomalies), 0) # Next one anomaly exactly equal to scan window # uses bias ts.update((t, 2.100001) for t in range(10, 34)) detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEqual(len(anomalies), 1) anomaly = anomalies[0] s, e = anomaly.get_time_window() # note the anomaly is larger than scan window self.assertEqual(s, 4) self.assertEqual(e, 39) # score should be roughly 98.5 self.assertGreater(anomaly.anomaly_score, 98) self.assertLess(anomaly.anomaly_score, 99) # anomaly below baseline but not below baseline with shift algorithm_params = { 'percent_threshold_lower': -20, 'offset': -0.1, 'scan_window': 24 } ts.update((t, 0.799999) for t in range(10, 34)) # no anomalies detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEqual(len(anomalies), 0) # lower the time series by 0.1 ts.update((t, 0.699999) for t in range(10, 34)) detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertEqual(len(anomalies), 1) anomaly = anomalies[0] s, e = anomaly.get_time_window() self.assertEqual(s, 4) self.assertEqual(e, 39) # score should be roughly 98.5 self.assertGreater(anomaly.anomaly_score, 98) self.assertLess(anomaly.anomaly_score, 99) def test_absolute_threshold_algorithm(self): """ Test "absolute threshold" algorithm with a upper and lower threshold of 0.2 """ algorithm_params = { 'absolute_threshold_value_upper': 0.2, 'absolute_threshold_value_lower': 0.2 } detector = AnomalyDetector(self.s1, algorithm_name='absolute_threshold', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertTrue(len(anomalies) > 0) self.assertRaises( exceptions.RequiredParametersNotPassed, lambda: AnomalyDetector(self.s1, algorithm_name='absolute_threshold')) def test_threshold(self): """ Test score threshold=0 """ detector = AnomalyDetector(self.s1, score_threshold=0) self.assertTrue(len(detector.get_anomalies()) == 1) self.assertTrue(detector.get_anomalies() is not None) def test_score_only(self): """ Test that score_only parameter doesn't give anomalies """ detector = AnomalyDetector(self.s1, score_only=True, algorithm_name='derivative_detector') detector2 = AnomalyDetector(self.s1, algorithm_name='derivative_detector') self.assertTrue(detector2.get_anomalies() is not None) self.assertTrue(len(detector.get_anomalies()) == 0) def test_get_all_scores(self): """ Test if function get_all_scores works as expected. """ self.assertTrue(isinstance(self.detector1.get_all_scores(), TimeSeries)) self.assertEqual(len(self.detector1.get_all_scores()), len(self.detector1.time_series)) def test_get_anomalies(self): """ Test if anomaly is found as expected. """ self.assertTrue(self.detector1.get_anomalies() is not None) def test_algorithm_DefaultDetector(self): """ Test if optional parameter algorithm works as expected. """ detector = AnomalyDetector(self.s1, algorithm_name='default_detector') self.assertEqual(detector.get_all_scores().timestamps, self.detector1.get_all_scores().timestamps) self.assertEqual(detector.get_all_scores().values, self.detector1.get_all_scores().values) def test_algorithm(self): """ Test if exception AlgorithmNotFound is raised as expected. """ self.assertRaises( exceptions.AlgorithmNotFound, lambda: AnomalyDetector(self.s1, algorithm_name='NotValidAlgorithm')) def test_algorithm_params(self): """ Test if optional parameter algorithm_params works as expected. """ self.assertRaises( ValueError, lambda: AnomalyDetector(self.s1, algorithm_name='exp_avg_detector', algorithm_params='0')) detector = AnomalyDetector(self.s1, algorithm_name="exp_avg_detector", algorithm_params={'smoothing_factor': 0.3}) self.assertNotEqual(self.detector1.get_all_scores().values, detector.get_all_scores().values) def test_anomaly_threshold(self): """ Test if score_percentile_threshold works as expected. """ detector = AnomalyDetector(self.s1, score_percent_threshold=0.1, algorithm_name='exp_avg_detector') detector1 = AnomalyDetector(self.s1, score_percent_threshold=0.1, algorithm_name='derivative_detector') self.assertNotEqual(detector1.get_anomalies(), detector.get_anomalies())
class TestAnomalyDetector(unittest.TestCase): def setUp(self): self.s1 = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0.5, 5: 1, 6: 1, 7: 1, 8: 0} self.s2 = {0: 0, 1: 0.5, 2: 1, 3: 1, 4: 1, 5: 0, 6: 0, 7: 0, 8: 0} self.detector1 = AnomalyDetector(self.s1) self.detector2 = AnomalyDetector(self.s2) def test_threshold(self): detector = AnomalyDetector(self.s1, score_threshold=0) self.assertTrue(len(detector.get_anomalies()) == 1) self.assertTrue(detector.get_anomalies() is not None) def test_score_only(self): detector = AnomalyDetector(self.s1, score_only=True, algorithm_name='derivative_detector') detector2 = AnomalyDetector(self.s1, algorithm_name='derivative_detector') self.assertTrue(detector2.get_anomalies() is not None) self.assertTrue(detector.get_anomalies() is None) def test_get_all_scores(self): """ Test if function get_all_scores works as expected. """ self.assertTrue(isinstance(self.detector1.get_all_scores(), TimeSeries)) self.assertEqual(len(self.detector1.get_all_scores()), len(self.detector1.time_series)) def test_get_anomalies(self): """ Test if anomaly is found as expected. """ self.assertTrue(self.detector1.get_anomalies() is not None) def test_algorithm_DefaultDetector(self): """ Test if optional parameter algorithm works as expected. """ detector = AnomalyDetector(self.s1, algorithm_name='default_detector') self.assertEqual(detector.get_all_scores().timestamps, self.detector1.get_all_scores().timestamps) self.assertEqual(detector.get_all_scores().values, self.detector1.get_all_scores().values) def test_algorithm(self): """ Test if exception AlgorithmNotFound is raised as expected. """ self.assertRaises(exceptions.AlgorithmNotFound, lambda: AnomalyDetector(self.s1, algorithm_name='NotValidAlgorithm')) def test_algorithm_params(self): """ Test if optional parameter algorithm_params works as expected. """ self.assertRaises(exceptions.InvalidDataFormat, lambda: AnomalyDetector(self.s1, algorithm_name='exp_avg_detector', algorithm_params='0')) detector = AnomalyDetector(self.s1, algorithm_name="exp_avg_detector", algorithm_params={'smoothing_factor': 0.3}) self.assertNotEqual(self.detector1.get_all_scores().values, detector.get_all_scores().values) def test_anomaly_threshold(self): """ Test if score_percentile_threshold works as expected. """ detector = AnomalyDetector(self.s1, score_percentile_threshold=0.1, algorithm_name='exp_avg_detector') detector1 = AnomalyDetector(self.s1, score_percentile_threshold=0.1, algorithm_name='derivative_detector') self.assertNotEqual(detector1.get_anomalies(), detector.get_anomalies())
def main(argv): try: opts, args = getopt.getopt(sys.argv[1:], "hi:c:v", ["help", "input=" "correlate="]) except getopt.GetoptError as err: # print help information and exit: print str(err) # will print something like "option -a not recognized" usage() sys.exit(2) input = None correlate = None verbose = False for o, a in opts: if o == "-v": verbose = True elif o in ("-h", "--help"): usage() sys.exit() elif o in ("-i", "--input"): input = a elif o in ("-c", "--correlate"): correlate = a else: assert False, "unhandled option" regex = r"(\s+)([+-]?\d*\.\d+)(?![-+0-9\.])(\s+)(\d+)(\s+)(\w+)(\s+)(\w+)(\s+)([a-z])(\s+)(\d+)(?:(?!X).)*(Length = )(\d+)(\s+)(BitCount = )(\d+)(\s+)(ID = )(\d+)" pattern = re.compile(regex, re.UNICODE) file1 = open(input).read() matches1 = re.finditer(regex, file1) file2 = open(correlate).read() matches2 = re.finditer(regex, file2) mydict1 = {} mydict2 = {} for matchNum, match in enumerate(matches1): matchNum = matchNum + 1 myTime = match.group(2) myTime = float(myTime) * 1000000 mydict1[myTime] = match.group(20) for matchNum, match in enumerate(matches2): matchNum = matchNum + 1 myTime = match.group(2) myTime = float(myTime) * 1000000 mydict2[myTime] = match.group(20) #print mydict1 my_detector1 = AnomalyDetector(mydict1, algorithm_name=("exp_avg_detector")) score1 = my_detector1.get_all_scores() anomalies = my_detector1.get_anomalies() for a in anomalies: time_period = a.get_time_window() my_correlator = Correlator(mydict1, mydict2, time_period) if my_correlator.is_correlated(treshold=0.8): print "mydict2 correlate with mydict at time period (%d, %d)" % time_period
from luminol.anomaly_detector import AnomalyDetector import time my_detector = AnomalyDetector('Turbidity.csv') score = my_detector.get_all_scores() for (timestamp, value) in score.iteritems(): t_str = time.strftime('%y-%m-%d %H:%M:%S', time.localtime(timestamp)) if value > 0: print(f'{t_str}, {value}')
class RCA(object): def __init__(self, metrix, related_metrices): """ Initializer :param metrix: a TimeSeries, a dictionary or a path to a csv file(str) :param list related_metrixes: a list of time series. """ self.metrix = self._load(metrix) self.anomaly_detector = AnomalyDetector(metrix) self.related_metrices = related_metrices self.anomalies = self.anomaly_detector.get_anomalies() self._analyze() def _load(self, metrix): """ Load time series. :param timeseries: a TimeSeries, a dictionary or a path to a csv file(str). :return TimeSeries: a TimeSeries object. """ if isinstance(metrix, TimeSeries): return metrix if isinstance(metrix, dict): return TimeSeries(metrix) return TimeSeries(utils.read_csv(metrix)) def _analyze(self): """ Analyzes if a matrix has anomalies. If any anomaly is found, determine if the matrix correlates with any other matrixes. To be implemented. """ output = defaultdict(list) output_by_name = defaultdict(list) scores = self.anomaly_detector.get_all_scores() if self.anomalies: for anomaly in self.anomalies: metrix_scores = scores start_t, end_t = anomaly.get_time_window() t = anomaly.exact_timestamp # Compute extended start timestamp and extended end timestamp. room = (end_t - start_t) / 2 if not room: room = 30 extended_start_t = start_t - room extended_end_t = end_t + room metrix_scores_cropped = metrix_scores.crop( extended_start_t, extended_end_t) # Adjust the two timestamps if not enough data points are included. while len(metrix_scores_cropped) < 2: extended_start_t = extended_start_t - room extended_end_t = extended_end_t + room metrix_scores_cropped = metrix_scores.crop( extended_start_t, extended_end_t) # Correlate with other metrics for entry in self.related_metrices: try: entry_correlation_result = Correlator( self.metrix, entry, time_period=(extended_start_t, extended_end_t), use_anomaly_score=True).get_correlation_result() record = extended_start_t, extended_end_t, entry_correlation_result.__dict__, entry record_by_name = extended_start_t, extended_end_t, entry_correlation_result.__dict__ output[t].append(record) output_by_name[entry].append(record_by_name) except exceptions.NotEnoughDataPoints: pass self.output = output self.output_by_name = output_by_name
class TestAnomalyDetector(unittest.TestCase): def setUp(self): self.s1 = {0: 0, 1: 0, 2: 0, 3: 0, 4: 1, 5: 2, 6: 2, 7: 2, 8: 0} self.s2 = {0: 0, 1: 1, 2: 2, 3: 2, 4: 2, 5: 0, 6: 0, 7: 0, 8: 0} self.detector1 = AnomalyDetector(self.s1) self.detector2 = AnomalyDetector(self.s2) def test_custom_algorithm(self): """ Test passing a custom algorithm class """ detector = AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_class=CustomAlgo, algorithm_params={'percent_threshold_upper': 20, 'percent_threshold_lower': -20}) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertTrue(len(anomalies) > 0) def test_diff_percent_threshold_algorithm(self): """ Test "diff percent threshold" algorithm with a threshold of 20% """ detector = AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_name='diff_percent_threshold', algorithm_params={'percent_threshold_upper': 20, 'percent_threshold_lower': -20}) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertTrue(len(anomalies) > 0) self.assertRaises(exceptions.RequiredParametersNotPassed, lambda: AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_name='diff_percent_threshold')) def test_sign_test_algorithm(self): """ Test "sign test" algorithm with a threshold of 20% """ bs = dict() bs.update({t: 1 for t in range(1, 100)}) ts = dict(bs) # test missing parameters self.assertRaises(exceptions.RequiredParametersNotPassed, lambda: AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_name='sign_test')) # # Simple tests algorithm_params = {'percent_threshold_upper': 20, 'percent_threshold_lower': -20, 'scan_window': 24, 'confidence': 0.01} # first no anomalies detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEquals(len(anomalies), 0) # Next one anomaly exactly equal to scan window ts.update({t: 1.200001 for t in range(10, 34)}) detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEquals(len(anomalies), 1) anomaly = anomalies[0] s, e = anomaly.get_time_window() # note the anomaly is larger than scan window self.assertEquals(s, 4) self.assertEquals(e, 39) # score should be roughly 98.5 self.assertGreater(anomaly.anomaly_score, 98) self.assertLess(anomaly.anomaly_score, 99) # anomaly below baseline ts.update({t: 0.799999 for t in range(10, 34)}) detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertEquals(len(anomalies), 1) anomaly = anomalies[0] s, e = anomaly.get_time_window() self.assertEquals(s, 4) self.assertEquals(e, 39) # score should be roughly 98.5 self.assertGreater(anomaly.anomaly_score, 98) self.assertLess(anomaly.anomaly_score, 99) # anomalies separated by big gap ts.update(bs) ts.update({t: 0.799999 for t in range(1, 25)}) ts.update({t: 1.21 for t in range(60, 84)}) detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertEquals(len(anomalies), 2) anomaly = anomalies[0] s, e = anomaly.get_time_window() self.assertEquals(s, 1) self.assertEquals(e, 30) # score ~ 99.9 self.assertGreater(anomaly.anomaly_score, 99) anomaly = anomalies[1] s, e = anomaly.get_time_window() self.assertEquals(s, 54) self.assertEquals(e, 89) # score should be roughly 98.5 self.assertGreater(anomaly.anomaly_score, 98) self.assertLess(anomaly.anomaly_score, 99) # anomalies separated by small gap ts.update(bs) ts.update({t: 1.21 for t in range(1, 25)}) ts.update({t: 1.21 for t in range(30, 40)}) detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() # just one self.assertEquals(len(anomalies), 1) anomaly = anomalies[0] s, e = anomaly.get_time_window() self.assertEquals(s, 1) self.assertEquals(e, 40) # score ~ 99.9 self.assertGreater(anomaly.anomaly_score, 99) def test_sign_test_algorithm_with_shift(self): """ Test "sign test" algorithm with a threshold of 20% """ bs = dict() bs.update({t: 1 for t in range(1, 100)}) # Simple tests algorithm_params = {'percent_threshold_upper': 10, 'shift': 1, 'scan_window': 24, 'confidence': 0.01} ts = dict(bs) # bigger than 10 percent but below bias ts.update({t: 1.2 for t in range(10, 34)}) # first no anomalies detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEquals(len(anomalies), 0) # Next one anomaly exactly equal to scan window # uses bias ts.update({t: 2.100001 for t in range(10, 34)}) detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEquals(len(anomalies), 1) anomaly = anomalies[0] s, e = anomaly.get_time_window() # note the anomaly is larger than scan window self.assertEquals(s, 4) self.assertEquals(e, 39) # score should be roughly 98.5 self.assertGreater(anomaly.anomaly_score, 98) self.assertLess(anomaly.anomaly_score, 99) # anomaly below baseline but not below baseline with shift algorithm_params = {'percent_threshold_lower': -20, 'shift': -0.1, 'scan_window': 24, 'confidence': 0.01} ts.update({t: 0.799999 for t in range(10, 34)}) # no anomalies detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEquals(len(anomalies), 0) # lower the time series by 0.1 ts.update({t: 0.699999 for t in range(10, 34)}) detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertEquals(len(anomalies), 1) anomaly = anomalies[0] s, e = anomaly.get_time_window() self.assertEquals(s, 4) self.assertEquals(e, 39) # score should be roughly 98.5 self.assertGreater(anomaly.anomaly_score, 98) self.assertLess(anomaly.anomaly_score, 99) def test_absolute_threshold_algorithm(self): """ Test "absolute threshold" algorithm with a upper and lower threshold of 0.2 """ detector = AnomalyDetector(self.s1, algorithm_name='absolute_threshold', algorithm_params={'absolute_threshold_value_upper': 0.2, 'absolute_threshold_value_lower': 0.2}) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertTrue(len(anomalies) > 0) self.assertRaises(exceptions.RequiredParametersNotPassed, lambda: AnomalyDetector(self.s1, algorithm_name='absolute_threshold')) def test_threshold(self): """ Test score threshold=0 """ detector = AnomalyDetector(self.s1, score_threshold=0) self.assertTrue(len(detector.get_anomalies()) == 1) self.assertTrue(detector.get_anomalies() is not None) def test_score_only(self): """ Test that score_only parameter doesn't give anomalies """ detector = AnomalyDetector(self.s1, score_only=True, algorithm_name='derivative_detector') detector2 = AnomalyDetector(self.s1, algorithm_name='derivative_detector') self.assertTrue(detector2.get_anomalies() is not None) self.assertTrue(len(detector.get_anomalies()) == 0) def test_get_all_scores(self): """ Test if function get_all_scores works as expected. """ self.assertTrue(isinstance(self.detector1.get_all_scores(), TimeSeries)) self.assertEqual(len(self.detector1.get_all_scores()), len(self.detector1.time_series)) def test_get_anomalies(self): """ Test if anomaly is found as expected. """ self.assertTrue(self.detector1.get_anomalies() is not None) def test_algorithm_DefaultDetector(self): """ Test if optional parameter algorithm works as expected. """ detector = AnomalyDetector(self.s1, algorithm_name='default_detector') self.assertEqual(detector.get_all_scores().timestamps, self.detector1.get_all_scores().timestamps) self.assertEqual(detector.get_all_scores().values, self.detector1.get_all_scores().values) def test_algorithm(self): """ Test if exception AlgorithmNotFound is raised as expected. """ self.assertRaises(exceptions.AlgorithmNotFound, lambda: AnomalyDetector(self.s1, algorithm_name='NotValidAlgorithm')) def test_algorithm_params(self): """ Test if optional parameter algorithm_params works as expected. """ self.assertRaises(ValueError, lambda: AnomalyDetector(self.s1, algorithm_name='exp_avg_detector', algorithm_params='0')) detector = AnomalyDetector(self.s1, algorithm_name="exp_avg_detector", algorithm_params={'smoothing_factor': 0.3}) self.assertNotEqual(self.detector1.get_all_scores().values, detector.get_all_scores().values) def test_anomaly_threshold(self): """ Test if score_percentile_threshold works as expected. """ detector = AnomalyDetector(self.s1, score_percent_threshold=0.1, algorithm_name='exp_avg_detector') detector1 = AnomalyDetector(self.s1, score_percent_threshold=0.1, algorithm_name='derivative_detector') self.assertNotEqual(detector1.get_anomalies(), detector.get_anomalies())
class RCA(object): def __init__(self, metrix, related_metrices): """ Initializer :param metrix: a TimeSeries, a dictionary or a path to a csv file(str) :param list related_metrixes: a list of time series. """ self.metrix = self._load(metrix) self.anomaly_detector = AnomalyDetector(metrix) self.related_metrices = related_metrices self.anomalies = self.anomaly_detector.get_anomalies() self._analyze() def _load(self, metrix): """ Load time series. :param timeseries: a TimeSeries, a dictionary or a path to a csv file(str). :return TimeSeries: a TimeSeries object. """ if isinstance(metrix, TimeSeries): return metrix if isinstance(metrix, dict): return TimeSeries(metrix) return TimeSeries(utils.read_csv(metrix)) def _analyze(self): """ Analyzes if a matrix has anomalies. If any anomaly is found, determine if the matrix correlates with any other matrixes. To be implemented. """ output = defaultdict(list) output_by_name = defaultdict(list) scores = self.anomaly_detector.get_all_scores() if self.anomalies: for anomaly in self.anomalies: metrix_scores = scores start_t, end_t = anomaly.get_time_window() t = anomaly.exact_timestamp # Compute extended start timestamp and extended end timestamp. room = (end_t - start_t) / 2 if not room: room = 30 extended_start_t = start_t - room extended_end_t = end_t + room metrix_scores_cropped = metrix_scores.crop(extended_start_t, extended_end_t) # Adjust the two timestamps if not enough data points are included. while len(metrix_scores_cropped) < 2: extended_start_t = extended_start_t - room extended_end_t = extended_end_t + room metrix_scores_cropped = metrix_scores.crop(extended_start_t, extended_end_t) # Correlate with other metrics for entry in self.related_metrices: try: entry_correlation_result = Correlator(self.metrix, entry, time_period=(extended_start_t, extended_end_t), use_anomaly_score=True).get_correlation_result() record = extended_start_t, extended_end_t, entry_correlation_result.__dict__, entry record_by_name = extended_start_t, extended_end_t, entry_correlation_result.__dict__ output[t].append(record) output_by_name[entry].append(record_by_name) except exceptions.NotEnoughDataPoints: pass self.output = output self.output_by_name = output_by_name
class TestAnomalyDetector(unittest.TestCase): def setUp(self): self.s1 = {0: 0, 1: 0, 2: 0, 3: 0, 4: 1, 5: 2, 6: 2, 7: 2, 8: 0} self.s2 = {0: 0, 1: 1, 2: 2, 3: 2, 4: 2, 5: 0, 6: 0, 7: 0, 8: 0} self.detector1 = AnomalyDetector(self.s1) self.detector2 = AnomalyDetector(self.s2) def test_custom_algorithm(self): """ Test passing a custom algorithm class """ detector = AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_class=CustomAlgo, algorithm_params={'percent_threshold_upper': 20, 'percent_threshold_lower': -20}) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertTrue(len(anomalies) > 0) def test_diff_percent_threshold_algorithm(self): """ Test "diff percent threshold" algorithm with a threshold of 20% """ detector = AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_name='diff_percent_threshold', algorithm_params={'percent_threshold_upper': 20, 'percent_threshold_lower': -20}) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertTrue(len(anomalies) > 0) self.assertRaises(exceptions.RequiredParametersNotPassed, lambda: AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_name='diff_percent_threshold')) def test_absolute_threshold_algorithm(self): """ Test "absolute threshold" algorithm with a upper and lower threshold of 0.2 """ detector = AnomalyDetector(self.s1, algorithm_name='absolute_threshold', algorithm_params={'absolute_threshold_value_upper': 0.2, 'absolute_threshold_value_lower': 0.2}) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertTrue(len(anomalies) > 0) self.assertRaises(exceptions.RequiredParametersNotPassed, lambda: AnomalyDetector(self.s1, algorithm_name='absolute_threshold')) def test_threshold(self): """ Test score threshold=0 """ detector = AnomalyDetector(self.s1, score_threshold=0) self.assertTrue(len(detector.get_anomalies()) == 1) self.assertTrue(detector.get_anomalies() is not None) def test_score_only(self): """ Test that score_only parameter doesn't give anomalies """ detector = AnomalyDetector(self.s1, score_only=True, algorithm_name='derivative_detector') detector2 = AnomalyDetector(self.s1, algorithm_name='derivative_detector') self.assertTrue(detector2.get_anomalies() is not None) self.assertTrue(len(detector.get_anomalies()) == 0) def test_get_all_scores(self): """ Test if function get_all_scores works as expected. """ self.assertTrue(isinstance(self.detector1.get_all_scores(), TimeSeries)) self.assertEqual(len(self.detector1.get_all_scores()), len(self.detector1.time_series)) def test_get_anomalies(self): """ Test if anomaly is found as expected. """ self.assertTrue(self.detector1.get_anomalies() is not None) def test_algorithm_DefaultDetector(self): """ Test if optional parameter algorithm works as expected. """ detector = AnomalyDetector(self.s1, algorithm_name='default_detector') self.assertEqual(detector.get_all_scores().timestamps, self.detector1.get_all_scores().timestamps) self.assertEqual(detector.get_all_scores().values, self.detector1.get_all_scores().values) def test_algorithm(self): """ Test if exception AlgorithmNotFound is raised as expected. """ self.assertRaises(exceptions.AlgorithmNotFound, lambda: AnomalyDetector(self.s1, algorithm_name='NotValidAlgorithm')) def test_algorithm_params(self): """ Test if optional parameter algorithm_params works as expected. """ self.assertRaises(ValueError, lambda: AnomalyDetector(self.s1, algorithm_name='exp_avg_detector', algorithm_params='0')) detector = AnomalyDetector(self.s1, algorithm_name="exp_avg_detector", algorithm_params={'smoothing_factor': 0.3}) self.assertNotEqual(self.detector1.get_all_scores().values, detector.get_all_scores().values) def test_anomaly_threshold(self): """ Test if score_percentile_threshold works as expected. """ detector = AnomalyDetector(self.s1, score_percent_threshold=0.1, algorithm_name='exp_avg_detector') detector1 = AnomalyDetector(self.s1, score_percent_threshold=0.1, algorithm_name='derivative_detector') self.assertNotEqual(detector1.get_anomalies(), detector.get_anomalies())
def get_timeseries( phenomenon_time_range, num_time_slots, get_observations, detector_method='bitmap_detector', # LinkedIn bitmap detector_params={ "precision": 8, "lag_window_size": 20, "future_window_size": 20, "chunk_size": 2 }, anomaly_breaks=DEFAULT_ANOMALY_BREAKS, value_breaks=DEFAULT_VALUE_BREAKS, extend_range=True, baseline_time_range=None, shift=True, use_baseline=True): #observations = get_observations(3, 5) #observations = get_observations(0, 0) # if baseline_time_range is not None: # use_baseline = True # baseline_time_series = observation_provider_model.objects.filter( # phenomenon_time_range__contained_by=baseline_time_range, # phenomenon_time_range__duration=frequency, # phenomenon_time_range__matches=frequency, # observed_property=observed_property, # procedure=process, # feature_of_interest=feature_of_interest # ) # baseline_reduced = {obs.phenomenon_time_range.lower.timestamp(): obs.result for obs in baseline_time_series} lower_ext = 0 upper_ext = 0 if extend_range: lower_ext = detector_params["lag_window_size"] upper_ext = detector_params["future_window_size"] if use_baseline and shift: upper_ext = 0 if use_baseline and not shift: lower_ext = int(upper_ext / 2) upper_ext -= lower_ext + 1 observations = get_observations(lower_ext, upper_ext) if not isinstance(observations, list): raise Exception('property_values should be array') if len(observations) == 0: return { 'phenomenon_time_range': DateTimeTZRange(), 'property_values': [], 'property_value_percentiles': {}, 'property_anomaly_rates': [], 'property_anomaly_percentiles': {}, } property_values = observations_to_property_values(observations) VALID_VALUES_LENGTH = len(property_values) - property_values.count(None) if VALID_VALUES_LENGTH == 1: return { 'phenomenon_time_range': phenomenon_time_range, 'property_values': property_values, 'property_value_percentiles': { 50: property_values[0] }, 'property_anomaly_rates': [0], 'property_anomaly_percentiles': { 0: 0 }, } MINIMAL_POINTS_IN_WINDOWS = DEFAULT_BITMAP_MOD_MINIMAL_POINTS_IN_WINDOWS if use_baseline: MINIMAL_POINTS_IN_WINDOWS /= 2 # if VALID_VALUES_LENGTH <= MINIMAL_POINTS_IN_WINDOWS: # # warn the user? WINDOW_LENGTH = detector_params[ "future_window_size"] if use_baseline else detector_params[ "future_window_size"] + detector_params["lag_window_size"] if VALID_VALUES_LENGTH > MINIMAL_POINTS_IN_WINDOWS and VALID_VALUES_LENGTH <= WINDOW_LENGTH: detector_params["future_window_size"] = int( max( DEFAULT_BITMAP_MOD_MINIMAL_POINTS_IN_WINDOWS / 2, VALID_VALUES_LENGTH * DEFAULT_BITMAP_MOD_LEADING_WINDOW_SIZE_PCT)) detector_params["lag_window_size"] = int( max( DEFAULT_BITMAP_MOD_MINIMAL_POINTS_IN_WINDOWS / 2, VALID_VALUES_LENGTH * DEFAULT_BITMAP_MOD_LAGGING_WINDOW_SIZE_PCT)) property_value_percentiles = percentiles( property_values[lower_ext:lower_ext + num_time_slots], value_breaks) if use_baseline and baseline_time_range is None: baseline_time_series = observations baseline_reduced = { obs.phenomenon_time_range.lower.timestamp(): obs.result for obs in baseline_time_series } obs_reduced = { obs.phenomenon_time_range.lower.timestamp(): obs.result for obs in observations } if (VALID_VALUES_LENGTH <= 1): property_anomaly_rates = [ 0 if value is not None else value for value in property_values[lower_ext:lower_ext + num_time_slots] ] return { 'phenomenon_time_range': phenomenon_time_range, 'property_values': property_values[lower_ext:lower_ext + num_time_slots], 'property_value_percentiles': property_value_percentiles, 'property_anomaly_rates': property_anomaly_rates, 'property_anomaly_percentiles': { 0: 0 }, } try: baseline_reduced except NameError: detector = AnomalyDetector(obs_reduced, algorithm_name=detector_method, algorithm_params=detector_params, score_only=True) else: detector = AnomalyDetector(obs_reduced, baseline_reduced, algorithm_name=detector_method, algorithm_params=detector_params, score_only=True) property_anomaly_rates = detector.get_all_scores().values property_anomaly_percentiles = percentiles( property_anomaly_rates[lower_ext:lower_ext + num_time_slots], anomaly_breaks) for i in range(len(property_values)): if property_values[i] is None: property_anomaly_rates.insert(i, None) return { 'phenomenon_time_range': phenomenon_time_range, 'property_values': property_values[lower_ext:lower_ext + num_time_slots], 'property_value_percentiles': property_value_percentiles, 'property_anomaly_rates': property_anomaly_rates[lower_ext:lower_ext + num_time_slots], 'property_anomaly_percentiles': property_anomaly_percentiles, }
ts = ts.loc[ ts['stage_parallel'] == 'Puding Mixing #1'] # 'Puding Mixing #1', 'Sterilization #111', 'Storage tank #1' value_col = 'sensor_value' # TODO: ADDING ZEROS JUST FOR PLAYING. REMOVE LATER ts[value_col] = ts[value_col].transform( lambda x: x.fillna(x.median(), inplace=False)) else: raise Exception('Unknown example.') # run anomaly detection algorithm keys = ts['epoch'] values = ts[value_col] ts_dict = dict(zip(keys, values)) algorithm_name = 'exp_avg_detector' anomaly_detector = AnomalyDetector(ts_dict, algorithm_name=algorithm_name) anomalies = anomaly_detector.get_anomalies() anomaly_scores = anomaly_detector.get_all_scores() # plot results plot_ts_and_anomalies(ts, value_col, anomalies, anomaly_scores, ts_only=False, dir='/Users/yuval/Desktop/', show=True, plotly=True)