def test_score_only(self): detector = AnomalyDetector(self.s1, score_only=True, algorithm_name='derivative_detector') detector2 = AnomalyDetector(self.s1, algorithm_name='derivative_detector') self.assertTrue(detector2.get_anomalies() is not None) self.assertTrue(len(detector.get_anomalies()) == 0)
def test_score_only(self): """ Test that score_only parameter doesn't give anomalies """ detector = AnomalyDetector(self.s1, score_only=True, algorithm_name='derivative_detector') detector2 = AnomalyDetector(self.s1, algorithm_name='derivative_detector') self.assertTrue(detector2.get_anomalies() is not None) self.assertTrue(len(detector.get_anomalies()) == 0)
def test_anomaly_threshold(self): """ Test if score_percentile_threshold works as expected. """ detector = AnomalyDetector(self.s1, score_percent_threshold=0.1, algorithm_name='exp_avg_detector') detector1 = AnomalyDetector(self.s1, score_percent_threshold=0.1, algorithm_name='derivative_detector') self.assertNotEqual(detector1.get_anomalies(), detector.get_anomalies())
def test_algorithm_params(self): """ Test if optional parameter algorithm_params works as expected. """ self.assertRaises( ValueError, lambda: AnomalyDetector(self.s1, algorithm_name='exp_avg_detector', algorithm_params='0')) detector = AnomalyDetector(self.s1, algorithm_name="exp_avg_detector", algorithm_params={'smoothing_factor': 0.3}) self.assertNotEqual(self.detector1.get_all_scores().values, detector.get_all_scores().values)
def test_sign_test_algorithm_interface(self): """ Test "sign test" algorithm with a threshold of 0% """ bs = dict() bs.update((t, 1) for t in range(1, 30)) # Simple tests algorithm_params = { 'percent_threshold_upper': 0, 'offset': 2, 'scan_window': 24, 'confidence': 0.01 } ts = dict(bs) # bigger than 10 percent but below bias ts.update((t, 3.1) for t in range(1, 21)) # first no anomalies detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEqual(len(anomalies), 1) # try lower bound algorithm_params = { 'percent_threshold_lower': 0, 'offset': 2, 'scan_window': 24, 'confidence': 0.01 } ts = dict(bs) # less than baseline plus bias ts.update((t, 2.9) for t in range(1, 25)) # first no anomalies detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test', algorithm_params=algorithm_params) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertEqual(len(anomalies), 1)
def test_threshold(self): """ Test score threshold=0 """ detector = AnomalyDetector(self.s1, score_threshold=0) self.assertTrue(len(detector.get_anomalies()) == 1) self.assertTrue(detector.get_anomalies() is not None)
def __init__(self, ts={}, param=None): super(DiffPercentD, self).__init__() print self.cvtTimeSeries(ts) self.detector = AnomalyDetector( self.cvtTimeSeries(ts), algorithm_name='diff_percent_threshold', algorithm_params=param)
def clean_anomaly2(df, index_name="15分钟段", var_name="实际功率", anomaly_limit=1.5, base_p=0.3): df_clean = [] for g_name, g in df.groupby(index_name): temp = deepcopy(g).reset_index(drop=True) base_mean = np.median(temp[var_name]) if base_mean > base_p: limit_low, limit_up = np.percentile(temp[var_name], [2, 99]) else: limit_low, limit_up = np.percentile(temp[var_name], [1, 98]) temp = temp[(temp[var_name] <= limit_up) & (temp[var_name] >= limit_low)].reset_index(drop=True) ts = temp[var_name] ts_mean = np.mean(ts) ts_std = np.std(ts) ts = (ts - ts_mean) / ts_std if ts_std > 0: my_detector = AnomalyDetector(ts.to_dict(), algorithm_name='exp_avg_detector') score = my_detector.get_all_scores() df_clean.append(temp[np.array(score.values) < anomaly_limit]) else: df_clean.append(temp) df_clean = pd.concat(df_clean, ignore_index=True) return df_clean
def test_algorithm(self): """ Test if exception AlgorithmNotFound is raised as expected. """ self.assertRaises( exceptions.AlgorithmNotFound, lambda: AnomalyDetector(self.s1, algorithm_name='NotValidAlgorithm'))
def test_absolute_threshold_algorithm(self): """ Test "absolute threshold" algorithm with a upper and lower threshold of 0.2 """ detector = AnomalyDetector(self.s1, algorithm_name='absolute_threshold', algorithm_params={ 'absolute_threshold_value_upper': 0.2, 'absolute_threshold_value_lower': 0.2 }) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertTrue(len(anomalies) > 0) self.assertRaises( exceptions.RequiredParametersNotPassed, lambda: AnomalyDetector(self.s1, algorithm_name='absolute_threshold'))
def test_algorithm_DefaultDetector(self): """ Test if optional parameter algorithm works as expected. """ detector = AnomalyDetector(self.s1, algorithm_name='default_detector') self.assertEqual(detector.get_all_scores().timestamps, self.detector1.get_all_scores().timestamps) self.assertEqual(detector.get_all_scores().values, self.detector1.get_all_scores().values)
def test_diff_percent_threshold_algorithm(self): """ Test "diff percent threshold" algorithm with a threshold of 20% """ detector = AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_name='diff_percent_threshold', algorithm_params={ 'percent_threshold_upper': 20, 'percent_threshold_lower': -20 }) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertTrue(len(anomalies) > 0) self.assertRaises( exceptions.RequiredParametersNotPassed, lambda: AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_name='diff_percent_threshold'))
def get_anomaly_series(series: pd.Series, algorithm: str = "bitmap_detector") -> pd.Series: assert algorithm in [ "bitmap_detector", "derivative_detector", "exp_avg_detector" ] ts = series.to_dict() detector = AnomalyDetector(ts, algorithm_name=algorithm) scores = detector.get_all_scores() scores = [s for _, s in scores.iteritems()] return pd.Series(scores)
def __init__(self, metrix, related_metrices): """ Initializer :param metrix: a TimeSeries, a dictionary or a path to a csv file(str) :param list related_metrixes: a list of time series. """ self.metrix = self._load(metrix) self.anomaly_detector = AnomalyDetector(metrix) self.related_metrices = related_metrices self.anomalies = self.anomaly_detector.get_anomalies() self._analyze()
def isAnomaly(self, data): # returns if last data point is anonaly length = len(data) try: detector = AnomalyDetector({i: data[i] for i in range(length)}, algorithm_name='exp_avg_detector', score_threshold=self.score_threshold) anomalies = detector.get_anomalies() for anomaly in anomalies: if anomaly.exact_timestamp == length - 1: return True, anomalies except: return False, [] return False, anomalies
def test_custom_algorithm(self): """ Test passing a custom algorithm class """ detector = AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_class=CustomAlgo, algorithm_params={ 'percent_threshold_upper': 20, 'percent_threshold_lower': -20 }) anomalies = detector.get_anomalies() self.assertTrue(anomalies is not None) self.assertTrue(len(anomalies) > 0)
def detect(self, ts): my_detector = AnomalyDetector(ts) score = my_detector.get_all_scores() anom_score = [] for (timestamp, value) in score.iteritems(): t_str = time.strftime('%d-%b-%Y %H:%M:%S', time.localtime(timestamp)) anom_score.append([t_str, value]) overall_stats = {} for score in anom_score: overall_stats[score[0]] = score[1] return overall_stats
def get_anoms(anomalous_ts): if not anomalous_ts: return [] anomalies = [] try: anomaly_ts_dict = dict(anomalous_ts) my_detector = AnomalyDetector(anomaly_ts_dict, score_threshold=1.5) anomalies = my_detector.get_anomalies() except: logger.error(traceback.format_exc()) logger.error('error :: AnomalyDetector') return anomalies
def get_anomalies(series: pd.Series, algorithm: str = "bitmap_detector") -> List[dict]: assert algorithm in [ "bitmap_detector", "derivative_detector", "exp_avg_detector" ] ts = series.to_dict() detector = AnomalyDetector(ts, algorithm_name=algorithm) anomalies = detector.get_anomalies() return [{ "start_time": _.start_timestamp, "end_time": _.end_timestamp, "top_score_time": _.exact_timestamp, "score": _.anomaly_score, } for _ in anomalies]
def get_anoms(anomalous_ts): logger = logging.getLogger(skyline_app_logger) if not anomalous_ts: logger.error('error :: get_anoms :: no anomalous_ts') return [] anomalies = [] try: anomaly_ts_dict = dict(anomalous_ts) my_detector = AnomalyDetector(anomaly_ts_dict, score_threshold=1.5) anomalies = my_detector.get_anomalies() except: logger.error(traceback.format_exc()) logger.error('error :: get_anoms :: AnomalyDetector') return anomalies
def cleanData(df, index_name="15分钟段", var_name="实际功率", limit=0.5): df_clean = [] for g_name, g in df.groupby(index_name): temp = deepcopy(g).reset_index(drop=True) limit_low, limit_up = np.percentile(temp[var_name], [5, 95]) temp = temp[(temp[var_name] < limit_up) & (temp[var_name] > limit_low)].reset_index(drop=True) ts = temp[var_name] ts_mean = np.mean(ts) ts_std = np.std(ts) ts = (ts - ts_mean) / ts_std if ts_std > 0: my_detector = AnomalyDetector(ts.to_dict(), algorithm_name='exp_avg_detector') score = my_detector.get_all_scores() df_clean.append(temp[np.array(score.values) < limit]) else: df_clean.append(temp) df_clean = pd.concat(df_clean, ignore_index=True) return df_clean
def pointsOfCorrelation(ts1, ts2, thresholdVal): corrPoints = [] # Conduct AD on each of each of the time series. # algorithm_params={'absolute_threshold_value_lower':lower,'absolute_threshold_value_upper':upper} # detector = AnomalyDetector(ts2, score_threshold=thresholdVal, algorithm_name="derivative_detector") detector = AnomalyDetector(ts2, score_threshold=thresholdVal, algorithm_name="exp_avg_detector") # score = detector.get_all_scores() anomalies = detector.get_anomalies() # For anomalous points in ts1, return correlated points and correlation coefficient. for a in anomalies: time_period = a.get_time_window() try: my_correlator = Correlator(ts1, ts2, time_period) if my_correlator.is_correlated(threshold=0.8): correlatorResultObj = my_correlator.get_correlation_result() # Change time period to human readable format start = strftime('%Y-%m-%d %H:%M:%S', localtime(a.start_timestamp)) end = strftime('%Y-%m-%d %H:%M:%S', localtime(a.end_timestamp)) time_period = (start, end) # Return anomalous time period, correlation coefficient and anomaly score. # Note: Anomaly score for absolute threshold will be diff between value and threshold. result = [ time_period, round(correlatorResultObj.coefficient, 2), round(a.anomaly_score, 2) ] corrPoints.append(result) except: continue return corrPoints
def __init__(self, X_tr_red, X_te_red, shift_detector, level): logger.info("Run Luminol") X_tr_odim = (-np.amax(X_tr_red, axis=1)).tolist() X_te_odim = (-np.amax(X_te_red, axis=1)).tolist() ts = X_te_odim ts = {i: v for i, v in enumerate(ts)} train_ts = {i: v for i, v in enumerate(X_tr_odim)} my_detector = AnomalyDetector(ts, baseline_time_series=train_ts, algorithm_params={ 'precision': 10, 'lag_window_size': 0.1, 'future_window_size': 0.1, 'chunk_size': 2 }) _score = my_detector.get_all_scores() score = [] for i in range(len(X_te_odim)): score.append(_score[i]) assert len(score) == len(X_te_odim), (len(score), len(X_te_odim)) super().__init__(X_tr_odim, X_te_odim, score, shift_detector, level)
def f1_metrics(self): y = self.y ts = self.ts abn_pt = self.abn_pt req_stamp = pd.Series(y, index=ts) detector = AnomalyDetector(req_stamp.to_dict()) scores = detector.get_all_scores() y_true = np.zeros(y.size) for i in abn_pt: y_true[i] = 1 self.y_true = y_true np_score = [] for i in scores.iteritems(): np_score.append(i[1]) req_ = pd.Series(data=np_score) ap = average_precision_score(y_true, np_score) range_ = np.log10(np.arange(0, 9, .1) + 1) f1 = [] for i in range_: threshold = np.quantile(np_score, i) anomalies = req_[req_.values > threshold].index.values y_score = np.zeros(y.size) for i in anomalies: y_score[i] = 1 f1.append(f1_score(y_true, y_score)) threshold = np.quantile(np_score, range_[np.argmax(f1)]) anomalies = req_[req_.values > threshold].index.values y_score = np.zeros(y.size) for i in anomalies: y_score[i] = 1 return ap, y_true, y_score
def anomalies(self, metrics): _metrics = metrics _metrics_for_luminol = {} _time_mapping = {} _anomalies = {} for _str in _metrics: _date = datetime.datetime.strptime(_str, "%Y-%m-%d %H:%M:%S") _time = datetime.datetime.timestamp(_date) _metrics_for_luminol[int(_time)] = _metrics[_str] _time_mapping[int(_time)] = _str if _metrics_for_luminol: _detector = AnomalyDetector(_metrics_for_luminol) _score = _detector.get_all_scores() if _score: for _timestamp, _value in _score.iteritems(): _anomalies[_time_mapping[_timestamp]] = _value return _anomalies else: return False else: return False
def get_timeseries( phenomenon_time_range, num_time_slots, get_observations, detector_method='bitmap_detector', # LinkedIn bitmap detector_params={ "precision": 8, "lag_window_size": 20, "future_window_size": 20, "chunk_size": 2 }, anomaly_breaks=DEFAULT_ANOMALY_BREAKS, value_breaks=DEFAULT_VALUE_BREAKS, extend_range=True, baseline_time_range=None, shift=True, use_baseline=True): #observations = get_observations(3, 5) #observations = get_observations(0, 0) # if baseline_time_range is not None: # use_baseline = True # baseline_time_series = observation_provider_model.objects.filter( # phenomenon_time_range__contained_by=baseline_time_range, # phenomenon_time_range__duration=frequency, # phenomenon_time_range__matches=frequency, # observed_property=observed_property, # procedure=process, # feature_of_interest=feature_of_interest # ) # baseline_reduced = {obs.phenomenon_time_range.lower.timestamp(): obs.result for obs in baseline_time_series} lower_ext = 0 upper_ext = 0 if extend_range: lower_ext = detector_params["lag_window_size"] upper_ext = detector_params["future_window_size"] if use_baseline and shift: upper_ext = 0 if use_baseline and not shift: lower_ext = int(upper_ext / 2) upper_ext -= lower_ext + 1 observations = get_observations(lower_ext, upper_ext) if not isinstance(observations, list): raise Exception('property_values should be array') if len(observations) == 0: return { 'phenomenon_time_range': DateTimeTZRange(), 'property_values': [], 'property_value_percentiles': {}, 'property_anomaly_rates': [], 'property_anomaly_percentiles': {}, } property_values = observations_to_property_values(observations) VALID_VALUES_LENGTH = len(property_values) - property_values.count(None) if VALID_VALUES_LENGTH == 1: return { 'phenomenon_time_range': phenomenon_time_range, 'property_values': property_values, 'property_value_percentiles': { 50: property_values[0] }, 'property_anomaly_rates': [0], 'property_anomaly_percentiles': { 0: 0 }, } MINIMAL_POINTS_IN_WINDOWS = DEFAULT_BITMAP_MOD_MINIMAL_POINTS_IN_WINDOWS if use_baseline: MINIMAL_POINTS_IN_WINDOWS /= 2 # if VALID_VALUES_LENGTH <= MINIMAL_POINTS_IN_WINDOWS: # # warn the user? WINDOW_LENGTH = detector_params[ "future_window_size"] if use_baseline else detector_params[ "future_window_size"] + detector_params["lag_window_size"] if VALID_VALUES_LENGTH > MINIMAL_POINTS_IN_WINDOWS and VALID_VALUES_LENGTH <= WINDOW_LENGTH: detector_params["future_window_size"] = int( max( DEFAULT_BITMAP_MOD_MINIMAL_POINTS_IN_WINDOWS / 2, VALID_VALUES_LENGTH * DEFAULT_BITMAP_MOD_LEADING_WINDOW_SIZE_PCT)) detector_params["lag_window_size"] = int( max( DEFAULT_BITMAP_MOD_MINIMAL_POINTS_IN_WINDOWS / 2, VALID_VALUES_LENGTH * DEFAULT_BITMAP_MOD_LAGGING_WINDOW_SIZE_PCT)) property_value_percentiles = percentiles( property_values[lower_ext:lower_ext + num_time_slots], value_breaks) if use_baseline and baseline_time_range is None: baseline_time_series = observations baseline_reduced = { obs.phenomenon_time_range.lower.timestamp(): obs.result for obs in baseline_time_series } obs_reduced = { obs.phenomenon_time_range.lower.timestamp(): obs.result for obs in observations } if (VALID_VALUES_LENGTH <= 1): property_anomaly_rates = [ 0 if value is not None else value for value in property_values[lower_ext:lower_ext + num_time_slots] ] return { 'phenomenon_time_range': phenomenon_time_range, 'property_values': property_values[lower_ext:lower_ext + num_time_slots], 'property_value_percentiles': property_value_percentiles, 'property_anomaly_rates': property_anomaly_rates, 'property_anomaly_percentiles': { 0: 0 }, } try: baseline_reduced except NameError: detector = AnomalyDetector(obs_reduced, algorithm_name=detector_method, algorithm_params=detector_params, score_only=True) else: detector = AnomalyDetector(obs_reduced, baseline_reduced, algorithm_name=detector_method, algorithm_params=detector_params, score_only=True) property_anomaly_rates = detector.get_all_scores().values property_anomaly_percentiles = percentiles( property_anomaly_rates[lower_ext:lower_ext + num_time_slots], anomaly_breaks) for i in range(len(property_values)): if property_values[i] is None: property_anomaly_rates.insert(i, None) return { 'phenomenon_time_range': phenomenon_time_range, 'property_values': property_values[lower_ext:lower_ext + num_time_slots], 'property_value_percentiles': property_value_percentiles, 'property_anomaly_rates': property_anomaly_rates[lower_ext:lower_ext + num_time_slots], 'property_anomaly_percentiles': property_anomaly_percentiles, }
from luminol.anomaly_detector import AnomalyDetector import time my_detector = AnomalyDetector('Turbidity.csv') score = my_detector.get_all_scores() for (timestamp, value) in score.iteritems(): t_str = time.strftime('%y-%m-%d %H:%M:%S', time.localtime(timestamp)) if value > 0: print(f'{t_str}, {value}')
from luminol.anomaly_detector import AnomalyDetector import matplotlib.pyplot as plt import pandas as pd df = pd.read_csv( "https://raw.githubusercontent.com/numenta/NAB/master/data/realTweets/Twitter_volume_FB.csv", header=0, index_col=0) # df[:].plot(linewidth=2) # plt.grid(which='both') # plt.show() ts = {} i = 0 for item in df.value: ts[i] = item i += 1 my_detector = AnomalyDetector(ts) anomalies_chart = [] score = my_detector.get_all_scores() for timestamp, value in score.iteritems(): # print(timestamp, value) anomalies_chart.append(value) list_values = [v for v in ts.values()] plt.plot(list_values) plt.show() plt.plot(anomalies_chart, color='r') plt.show()
def main(argv): try: opts, args = getopt.getopt(sys.argv[1:], "hi:v", ["help", "input="]) except getopt.GetoptError as err: # print help information and exit: print str(err) usage() sys.exit(2) input = None verbose = False for o, a in opts: if o == "-v": verbose = True elif o in ("-h", "--help"): usage() sys.exit() elif o in ("-i", "--input"): input = a else: assert False, "unhandled option" # this is the regular expression used to parse CANoe logs in ASCII format (.asc) regex = r"(\s+)([+-]?\d*\.\d+)(?![-+0-9\.])(\s+)(\d+)(\s+)(\w+)(\s+)(\w+)(\s+)([a-z])(\s+)(\d+)(?:(?!X).)*(Length = )(\d+)(\s+)(BitCount = )(\d+)(\s+)(ID = )(\d+)" pattern = re.compile(regex, re.UNICODE) inputfile = open(input).read() # stores all lines which match the regex matches = re.finditer(regex, inputfile) # event_dict stores the values (timestamp + CAN-ID) extracted from the logs event_dict = {} for matchNum, match in enumerate(matches): matchNum = matchNum + 1 myTime = match.group(2) # converts absolute time from engine start in seconds from engine start to int myTime = float(myTime) * 1000000 # match.group(20) is ID of CAN event in decimal event_dict[myTime] = match.group(20) #print event_dict my_detector = AnomalyDetector(event_dict, algorithm_name=("exp_avg_detector")) # this calculates an anomal yscore for every event in the time series score = my_detector.get_all_scores() # filter events in time series for anomalies anomalies = my_detector.get_anomalies() anom_score = [] print for attack in anomalies: if (attack.exact_timestamp in event_dict): if (verbose == True): # if script is run with "-v" it will output all anomaies print("{timestamp} - ID: {id} - Score: {value}".format( timestamp=attack.exact_timestamp, id=event_dict[attack.exact_timestamp], value=attack.anomaly_score)) elif (attack.anomaly_score > 3.4): # if script is not run with "-v" it will output only anomalies with score > 3.4 print("{timestamp} - ID: {id} - Score: {value}".format( timestamp=attack.exact_timestamp, id=event_dict[attack.exact_timestamp], value=attack.anomaly_score))
if args.interpolate_period > 0: tf.set_interpolate(Interpolate(period={'count': args.interpolate_period, 'unit': TimeUnit.MINUTE}, function=InterpolateFunction.LINEAR)) query.set_transformation_filter(tf) series_list = svc.query(query) for series in series_list: metric_id = '- %s %s' % (series.metric, print_tags(series.tags)) log('\t' + metric_id) # exclude empty series for specific tags if len(series.data) > 2: ts = {int(sample.t / 1000): sample.v for sample in series.data} detector = AnomalyDetector(ts, score_threshold=args.min_score) anomalies = [] for anomaly in detector.get_anomalies(): if time.mktime(now.timetuple()) - args.last_hours * 3600 <= anomaly.exact_timestamp: anomalies.append(anomaly) if anomalies: message.append(metric_id) for anomaly in anomalies: t_start, t_end = format_t(anomaly.start_timestamp), format_t(anomaly.end_timestamp) t_exact = format_t(anomaly.exact_timestamp) anomaly_msg = '\tAnomaly from %s to %s with score %s: %s, %s' % ( t_start, t_end, anomaly.anomaly_score, t_exact, ts[anomaly.exact_timestamp]) message.append(anomaly_msg)