def test_sr(sr_params): window_amp, window_local, n_est_points, return_instance_score = sr_params threshold = 2.5 od = SpectralResidual(threshold=threshold, window_amp=window_amp, window_local=window_local, n_est_points=n_est_points) assert od.threshold == threshold assert od.meta == { 'name': 'SpectralResidual', 'detector_type': 'online', 'data_type': 'time-series', 'version': __version__ } preds_in = od.predict(X, t, return_instance_score=return_instance_score) assert preds_in['data']['is_outlier'].sum() <= 2. if return_instance_score: assert preds_in['data']['is_outlier'].sum() == ( preds_in['data']['instance_score'] > od.threshold).astype(int).sum() else: assert preds_in['data']['instance_score'] is None preds_out = od.predict(X_pert, t, return_instance_score=return_instance_score) assert preds_out['data']['is_outlier'].sum() > 0 if return_instance_score: assert preds_out['data']['is_outlier'].sum() == ( preds_out['data']['instance_score'] > od.threshold).astype(int).sum() else: assert preds_out['data']['instance_score'] is None assert preds_out['meta'] == od.meta
def test_detector(signal, window_amp, window_local, n_est_points, return_instance_score): t, X, X_pert = signal["t"], signal['X'], signal['X_pert'] threshold = 6 od = SpectralResidual(threshold=threshold, window_amp=window_amp, window_local=window_local, n_est_points=n_est_points) assert od.threshold == threshold assert od.meta == { 'name': 'SpectralResidual', 'detector_type': 'online', 'data_type': 'time-series', 'version': __version__ } preds_in = od.predict(X, t, return_instance_score=return_instance_score) assert preds_in['data']['is_outlier'].sum() <= 2. if return_instance_score: assert preds_in['data']['is_outlier'].sum() == ( preds_in['data']['instance_score'] > od.threshold).astype(int).sum() else: assert preds_in['data']['instance_score'] is None preds_out = od.predict(X_pert, t, return_instance_score=return_instance_score) assert preds_out['data']['is_outlier'].sum( ) >= 10 # check if we detect at least the number of perturbed points if return_instance_score: assert preds_out['data']['is_outlier'].sum() == ( preds_out['data']['instance_score'] > od.threshold).astype(int).sum() else: assert preds_out['data']['instance_score'] is None assert preds_out['meta'] == od.meta
def spectral_residual(df): od = SpectralResidual( threshold=None, # threshold for outlier score window_amp=20, # window for the average log amplitude window_local=20, # window for the average saliency map n_est_points= 20 # nb of estimated points padded to the end of the sequence ) X_outlier = np.array(df['value']) time_samples = np.arange(0, len(df)) od.infer_threshold(X_outlier, time_samples, threshold_perc=99) od_preds = od.predict(X_outlier, time_samples, return_instance_score=True) pred = (od_preds['data']['is_outlier'] == 1) a = df.loc[pred] return a
def find_anoms(hosts, df): start = time.time() kpis = dict(tuple(df.groupby(['cmdb_id', 'name']))) res = {} anoms = [] df_info = pd.read_csv('kpi_summary_info.data') df_thresh = pd.read_csv('thresh_99_999.data') for key in kpis: kpis[key]['timestamp'] = kpis[key]['timestamp'].apply( lambda x: datetime.fromtimestamp(x / 1000.0)) kpis[key] = kpis[key].set_index('timestamp').sort_index() per1_kpis = df_info[(df_info.interval == '1min') & (df_info.is_flat == False)]['kpi'].unique() per5_kpis = df_info[(df_info.interval == '5min') & ((df_info.is_flat == False))]['kpi'].unique() print('Calculating rolling window') for key in kpis: if key[0] in hosts: if kpis[key]['value'].std() == 0: continue elif key[1] in per1_kpis: d = kpis[key]['value'].resample('T').mean().interpolate() elif key[1] in per5_kpis: d = kpis[key]['value'].resample('5T').mean().interpolate() else: continue d = (d - d.mean()) / d.std() res[key] = d.rolling(10).mean() for key in res: print('Determining threshold for', key) if len(df_thresh[(df_thresh.host == key[0]) & (df_thresh.name == key[1])]) == 0: print('Anomaly, std in train was 0, now its not') anoms.append((key[1], key[0])) continue thresh = df_thresh[(df_thresh.host == key[0]) & (df_thresh.name == key[1])]['thresh'].values[0] if np.isnan(thresh): print( "SR didn't generate threshhold because of low std for window > 10, skipping" ) continue d = res[key].dropna() od = SpectralResidual(threshold=thresh, window_amp=10, window_local=10, n_est_points=5, n_grad_points=5) if len(d) < 10: print('Rolling window data empty! Skipping') continue outliers = od.predict(d.values)['data'] if np.sum(np.sum(outliers['is_outlier'][-5:-2])) > 0: print(outliers['is_outlier']) print("ST Threshold Anomaly!") anoms.append((key[1], key[0])) print("It took", time.time() - start, "seconds to find", len(anoms), "anomalies") return anoms