def test_sr(sr_params): window_amp, window_local, n_est_points, return_instance_score = sr_params threshold = 2.5 od = SpectralResidual(threshold=threshold, window_amp=window_amp, window_local=window_local, n_est_points=n_est_points) assert od.threshold == threshold assert od.meta == { 'name': 'SpectralResidual', 'detector_type': 'online', 'data_type': 'time-series', 'version': __version__ } preds_in = od.predict(X, t, return_instance_score=return_instance_score) assert preds_in['data']['is_outlier'].sum() <= 2. if return_instance_score: assert preds_in['data']['is_outlier'].sum() == ( preds_in['data']['instance_score'] > od.threshold).astype(int).sum() else: assert preds_in['data']['instance_score'] is None preds_out = od.predict(X_pert, t, return_instance_score=return_instance_score) assert preds_out['data']['is_outlier'].sum() > 0 if return_instance_score: assert preds_out['data']['is_outlier'].sum() == ( preds_out['data']['instance_score'] > od.threshold).astype(int).sum() else: assert preds_out['data']['instance_score'] is None assert preds_out['meta'] == od.meta
def spectral_residual(df): od = SpectralResidual( threshold=None, # threshold for outlier score window_amp=20, # window for the average log amplitude window_local=20, # window for the average saliency map n_est_points= 20 # nb of estimated points padded to the end of the sequence ) X_outlier = np.array(df['value']) time_samples = np.arange(0, len(df)) od.infer_threshold(X_outlier, time_samples, threshold_perc=99) od_preds = od.predict(X_outlier, time_samples, return_instance_score=True) pred = (od_preds['data']['is_outlier'] == 1) a = df.loc[pred] return a
def __init__(self, X_tr_red, X_te_red, shift_detector, level): """ Adopt the parameter settings from https://docs.seldon.io/projects/alibi-detect/en/latest/examples/od_sr_synth.html """ from alibi_detect.od import SpectralResidual as SR logger.info("Run Spectral Residual") X_tr_odim = (-np.amax(X_tr_red, axis=1)).tolist() X_te_odim = (-np.amax(X_te_red, axis=1)).tolist() od = SR( threshold=0, # threshold for outlier score window_amp=20, # window for the average log amplitude 3 window_local=20, # window for the average saliency map 21 n_est_points= 20 # nb of estimated points padded to the end of the sequence 5 ) score = od.score(np.vstack([ X_te_red, ]))[-len(X_te_red):] assert len(score) == len(X_te_odim), (len(score), len(X_te_odim)) super().__init__(X_tr_odim, X_te_odim, score, shift_detector, level)
def test_padding(method, side): np.random.seed(0) for _ in range(100): X_size = np.random.randint(low=10, high=1000) W_size = np.random.randint(low=2, high=X_size - 1) X = np.random.randint(low=0, high=10, size=X_size) W = np.random.randint(low=0, high=10, size=W_size) X_pad = SpectralResidual.pad_same(X=X, W=W, method=method, side=side) X_conv = np.convolve(X_pad, W, 'valid') assert X_conv.shape[0] == X_size # length of the padding for laterals pad_right = (W_size - 1) // 2 pad_left = W_size - 1 - pad_right if method == 'constant': if side == 'left': assert np.all(X_pad[:W_size - 1] == 0) elif side == 'right': assert np.all(X_pad[-W_size + 1:] == 0) else: if pad_left > 0: assert np.all(X_pad[:pad_left] == 0) if pad_right > 0: assert np.all(X_pad[-pad_right:] == 0) elif method == 'replicate': if side == 'left': assert np.all(X_pad[:W_size - 1] == X[0]) elif side == 'right': assert np.all(X_pad[-W_size + 1:] == X[-1]) else: if pad_left > 0: assert np.all(X_pad[:pad_left] == X[0]) if pad_right > 0: assert np.all(X_pad[-pad_right:] == X[-1]) else: if side == 'left': assert np.all(X_pad[:W_size - 1] == X[1:W_size][::-1]) elif side == 'right': assert np.all(X_pad[-W_size + 1:] == X[-2:-W_size - 1:-1]) else: if pad_left > 0: assert np.all(X_pad[:pad_left] == X[1:pad_left + 1][::-1]) if pad_right > 0: assert np.all(X_pad[-pad_right:] == X[-pad_right - 1:-1][::-1])
def test_detector(signal, window_amp, window_local, n_est_points, return_instance_score): t, X, X_pert = signal["t"], signal['X'], signal['X_pert'] threshold = 6 od = SpectralResidual(threshold=threshold, window_amp=window_amp, window_local=window_local, n_est_points=n_est_points) assert od.threshold == threshold assert od.meta == { 'name': 'SpectralResidual', 'detector_type': 'online', 'data_type': 'time-series', 'version': __version__ } preds_in = od.predict(X, t, return_instance_score=return_instance_score) assert preds_in['data']['is_outlier'].sum() <= 2. if return_instance_score: assert preds_in['data']['is_outlier'].sum() == ( preds_in['data']['instance_score'] > od.threshold).astype(int).sum() else: assert preds_in['data']['instance_score'] is None preds_out = od.predict(X_pert, t, return_instance_score=return_instance_score) assert preds_out['data']['is_outlier'].sum( ) >= 10 # check if we detect at least the number of perturbed points if return_instance_score: assert preds_out['data']['is_outlier'].sum() == ( preds_out['data']['instance_score'] > od.threshold).astype(int).sum() else: assert preds_out['data']['instance_score'] is None assert preds_out['meta'] == od.meta
def init_od_sr(state_dict: Dict) -> SpectralResidual: """ Initialize spectral residual detector. Parameters ---------- state_dict Dictionary containing the parameter values. Returns ------- Initialized SpectralResidual instance. """ od = SpectralResidual(threshold=state_dict['threshold'], window_amp=state_dict['window_amp'], window_local=state_dict['window_local'], n_est_points=state_dict['n_est_points'], n_grad_points=state_dict['n_grad_points']) return od
gmm_density_net=gmm_density_net, n_gmm=n_gmm, **kwargs), OutlierVAE(threshold=threshold, latent_dim=latent_dim, samples=samples, **kwargs), OutlierAE(threshold=threshold, **kwargs), OutlierVAEGMM(threshold=threshold, gmm_density_net=gmm_density_net, n_gmm=n_gmm, latent_dim=latent_dim, samples=samples, **kwargs), OutlierProphet(threshold=.7, growth='logistic'), SpectralResidual(threshold=threshold, window_amp=10, window_local=10), OutlierSeq2Seq(input_dim, seq_len, threshold=threshold, threshold_net=threshold_net, latent_dim=latent_dim) ] n_tests = len(detector) @pytest.fixture def select_detector(request): return detector[request.param] @pytest.mark.parametrize('select_detector',
def run_gen(perc): data_path = '/Users/baconbaker/Documents/Studium/ANM/anm-project/data/train_data/host' dfs = {} for path in os.listdir(data_path): dfs[path[:-4]] = pd.read_csv(data_path + '/' + path) df_info = pd.read_csv('kpi_summary_info.data') window_size = 10 od = SpectralResidual(window_amp=window_size, window_local=window_size, n_est_points=5, n_grad_points=5) per1_kpis = df_info[(df_info.interval == '1min') & (df_info.is_flat == False)]['kpi'].unique() per5_kpis = df_info[(df_info.interval == '5min') & ((df_info.is_flat == False))]['kpi'].unique() df_thresh = pd.DataFrame(columns=['name', 'host', 'thresh']) for df_name in dfs: print('*' * 50) print('Running generation for', df_name) interval = 0 start_key = time.time() df = dfs[df_name] kpis = dict(tuple(df.groupby(['cmdb_id', 'name']))) res = {} for key in kpis: kpis[key]['timestamp'] = kpis[key]['timestamp'].apply( lambda x: datetime.fromtimestamp(x / 1000.0)) kpis[key] = kpis[key].set_index('timestamp').sort_index() print('Calculating rolling window') for key in kpis: if kpis[key]['value'].std() == 0: continue elif key[1] in per1_kpis: d = kpis[key]['value'].resample('T').mean().interpolate() elif key[1] in per5_kpis: d = kpis[key]['value'].resample('5T').mean().interpolate() else: continue d = (d - d.mean()) / d.std() res[key] = d.rolling(10).mean() for key in res: print('Determining threshold for', key) d = res[key].dropna() if len(res[key]) == 0: print("ITS EMPTY", key) continue od.infer_threshold(d, threshold_perc=perc) thresh = od.threshold df_thresh = df_thresh.append( { 'name': key[1], 'host': key[0], 'thresh': thresh }, ignore_index=True) df_thresh.to_csv('thresh_' + str(perc).replace('.', '_') + '.data', index=False)
def find_anoms(hosts, df): start = time.time() kpis = dict(tuple(df.groupby(['cmdb_id', 'name']))) res = {} anoms = [] df_info = pd.read_csv('kpi_summary_info.data') df_thresh = pd.read_csv('thresh_99_999.data') for key in kpis: kpis[key]['timestamp'] = kpis[key]['timestamp'].apply( lambda x: datetime.fromtimestamp(x / 1000.0)) kpis[key] = kpis[key].set_index('timestamp').sort_index() per1_kpis = df_info[(df_info.interval == '1min') & (df_info.is_flat == False)]['kpi'].unique() per5_kpis = df_info[(df_info.interval == '5min') & ((df_info.is_flat == False))]['kpi'].unique() print('Calculating rolling window') for key in kpis: if key[0] in hosts: if kpis[key]['value'].std() == 0: continue elif key[1] in per1_kpis: d = kpis[key]['value'].resample('T').mean().interpolate() elif key[1] in per5_kpis: d = kpis[key]['value'].resample('5T').mean().interpolate() else: continue d = (d - d.mean()) / d.std() res[key] = d.rolling(10).mean() for key in res: print('Determining threshold for', key) if len(df_thresh[(df_thresh.host == key[0]) & (df_thresh.name == key[1])]) == 0: print('Anomaly, std in train was 0, now its not') anoms.append((key[1], key[0])) continue thresh = df_thresh[(df_thresh.host == key[0]) & (df_thresh.name == key[1])]['thresh'].values[0] if np.isnan(thresh): print( "SR didn't generate threshhold because of low std for window > 10, skipping" ) continue d = res[key].dropna() od = SpectralResidual(threshold=thresh, window_amp=10, window_local=10, n_est_points=5, n_grad_points=5) if len(d) < 10: print('Rolling window data empty! Skipping') continue outliers = od.predict(d.values)['data'] if np.sum(np.sum(outliers['is_outlier'][-5:-2])) > 0: print(outliers['is_outlier']) print("ST Threshold Anomaly!") anoms.append((key[1], key[0])) print("It took", time.time() - start, "seconds to find", len(anoms), "anomalies") return anoms