Beispiel #1
0
def test_sr(sr_params):
    window_amp, window_local, n_est_points, return_instance_score = sr_params

    threshold = 2.5
    od = SpectralResidual(threshold=threshold,
                          window_amp=window_amp,
                          window_local=window_local,
                          n_est_points=n_est_points)

    assert od.threshold == threshold
    assert od.meta == {
        'name': 'SpectralResidual',
        'detector_type': 'online',
        'data_type': 'time-series',
        'version': __version__
    }
    preds_in = od.predict(X, t, return_instance_score=return_instance_score)
    assert preds_in['data']['is_outlier'].sum() <= 2.
    if return_instance_score:
        assert preds_in['data']['is_outlier'].sum() == (
            preds_in['data']['instance_score'] >
            od.threshold).astype(int).sum()
    else:
        assert preds_in['data']['instance_score'] is None
    preds_out = od.predict(X_pert,
                           t,
                           return_instance_score=return_instance_score)
    assert preds_out['data']['is_outlier'].sum() > 0
    if return_instance_score:
        assert preds_out['data']['is_outlier'].sum() == (
            preds_out['data']['instance_score'] >
            od.threshold).astype(int).sum()
    else:
        assert preds_out['data']['instance_score'] is None
    assert preds_out['meta'] == od.meta
def spectral_residual(df):
    od = SpectralResidual(
        threshold=None,  # threshold for outlier score
        window_amp=20,  # window for the average log amplitude
        window_local=20,  # window for the average saliency map
        n_est_points=
        20  # nb of estimated points padded to the end of the sequence
    )
    X_outlier = np.array(df['value'])
    time_samples = np.arange(0, len(df))
    od.infer_threshold(X_outlier, time_samples, threshold_perc=99)
    od_preds = od.predict(X_outlier, time_samples, return_instance_score=True)
    pred = (od_preds['data']['is_outlier'] == 1)
    a = df.loc[pred]
    return a
Beispiel #3
0
 def __init__(self, X_tr_red, X_te_red, shift_detector, level):
     """
     Adopt the parameter settings from
     https://docs.seldon.io/projects/alibi-detect/en/latest/examples/od_sr_synth.html
     """
     from alibi_detect.od import SpectralResidual as SR
     logger.info("Run Spectral Residual")
     X_tr_odim = (-np.amax(X_tr_red, axis=1)).tolist()
     X_te_odim = (-np.amax(X_te_red, axis=1)).tolist()
     od = SR(
         threshold=0,  # threshold for outlier score
         window_amp=20,  # window for the average log amplitude 3
         window_local=20,  # window for the average saliency map 21
         n_est_points=
         20  # nb of estimated points padded to the end of the sequence 5
     )
     score = od.score(np.vstack([
         X_te_red,
     ]))[-len(X_te_red):]
     assert len(score) == len(X_te_odim), (len(score), len(X_te_odim))
     super().__init__(X_tr_odim, X_te_odim, score, shift_detector, level)
Beispiel #4
0
def test_padding(method, side):
    np.random.seed(0)

    for _ in range(100):
        X_size = np.random.randint(low=10, high=1000)
        W_size = np.random.randint(low=2, high=X_size - 1)

        X = np.random.randint(low=0, high=10, size=X_size)
        W = np.random.randint(low=0, high=10, size=W_size)

        X_pad = SpectralResidual.pad_same(X=X, W=W, method=method, side=side)
        X_conv = np.convolve(X_pad, W, 'valid')
        assert X_conv.shape[0] == X_size

        # length of the padding for laterals
        pad_right = (W_size - 1) // 2
        pad_left = W_size - 1 - pad_right

        if method == 'constant':
            if side == 'left':
                assert np.all(X_pad[:W_size - 1] == 0)
            elif side == 'right':
                assert np.all(X_pad[-W_size + 1:] == 0)
            else:
                if pad_left > 0:
                    assert np.all(X_pad[:pad_left] == 0)
                if pad_right > 0:
                    assert np.all(X_pad[-pad_right:] == 0)

        elif method == 'replicate':
            if side == 'left':
                assert np.all(X_pad[:W_size - 1] == X[0])
            elif side == 'right':
                assert np.all(X_pad[-W_size + 1:] == X[-1])
            else:
                if pad_left > 0:
                    assert np.all(X_pad[:pad_left] == X[0])
                if pad_right > 0:
                    assert np.all(X_pad[-pad_right:] == X[-1])
        else:
            if side == 'left':
                assert np.all(X_pad[:W_size - 1] == X[1:W_size][::-1])
            elif side == 'right':
                assert np.all(X_pad[-W_size + 1:] == X[-2:-W_size - 1:-1])
            else:
                if pad_left > 0:
                    assert np.all(X_pad[:pad_left] == X[1:pad_left + 1][::-1])
                if pad_right > 0:
                    assert np.all(X_pad[-pad_right:] == X[-pad_right -
                                                          1:-1][::-1])
Beispiel #5
0
def test_detector(signal, window_amp, window_local, n_est_points,
                  return_instance_score):
    t, X, X_pert = signal["t"], signal['X'], signal['X_pert']

    threshold = 6
    od = SpectralResidual(threshold=threshold,
                          window_amp=window_amp,
                          window_local=window_local,
                          n_est_points=n_est_points)

    assert od.threshold == threshold
    assert od.meta == {
        'name': 'SpectralResidual',
        'detector_type': 'online',
        'data_type': 'time-series',
        'version': __version__
    }
    preds_in = od.predict(X, t, return_instance_score=return_instance_score)
    assert preds_in['data']['is_outlier'].sum() <= 2.
    if return_instance_score:
        assert preds_in['data']['is_outlier'].sum() == (
            preds_in['data']['instance_score'] >
            od.threshold).astype(int).sum()
    else:
        assert preds_in['data']['instance_score'] is None
    preds_out = od.predict(X_pert,
                           t,
                           return_instance_score=return_instance_score)
    assert preds_out['data']['is_outlier'].sum(
    ) >= 10  # check if we detect at least the number of perturbed points
    if return_instance_score:
        assert preds_out['data']['is_outlier'].sum() == (
            preds_out['data']['instance_score'] >
            od.threshold).astype(int).sum()
    else:
        assert preds_out['data']['instance_score'] is None
    assert preds_out['meta'] == od.meta
Beispiel #6
0
def init_od_sr(state_dict: Dict) -> SpectralResidual:
    """
    Initialize spectral residual detector.

    Parameters
    ----------
    state_dict
        Dictionary containing the parameter values.

    Returns
    -------
    Initialized SpectralResidual instance.
    """
    od = SpectralResidual(threshold=state_dict['threshold'],
                          window_amp=state_dict['window_amp'],
                          window_local=state_dict['window_local'],
                          n_est_points=state_dict['n_est_points'],
                          n_grad_points=state_dict['n_grad_points'])
    return od
                 gmm_density_net=gmm_density_net,
                 n_gmm=n_gmm,
                 **kwargs),
    OutlierVAE(threshold=threshold,
               latent_dim=latent_dim,
               samples=samples,
               **kwargs),
    OutlierAE(threshold=threshold, **kwargs),
    OutlierVAEGMM(threshold=threshold,
                  gmm_density_net=gmm_density_net,
                  n_gmm=n_gmm,
                  latent_dim=latent_dim,
                  samples=samples,
                  **kwargs),
    OutlierProphet(threshold=.7, growth='logistic'),
    SpectralResidual(threshold=threshold, window_amp=10, window_local=10),
    OutlierSeq2Seq(input_dim,
                   seq_len,
                   threshold=threshold,
                   threshold_net=threshold_net,
                   latent_dim=latent_dim)
]
n_tests = len(detector)


@pytest.fixture
def select_detector(request):
    return detector[request.param]


@pytest.mark.parametrize('select_detector',
Beispiel #8
0
def run_gen(perc):
    data_path = '/Users/baconbaker/Documents/Studium/ANM/anm-project/data/train_data/host'
    dfs = {}
    for path in os.listdir(data_path):
        dfs[path[:-4]] = pd.read_csv(data_path + '/' + path)

    df_info = pd.read_csv('kpi_summary_info.data')

    window_size = 10
    od = SpectralResidual(window_amp=window_size,
                          window_local=window_size,
                          n_est_points=5,
                          n_grad_points=5)

    per1_kpis = df_info[(df_info.interval == '1min')
                        & (df_info.is_flat == False)]['kpi'].unique()
    per5_kpis = df_info[(df_info.interval == '5min')
                        & ((df_info.is_flat == False))]['kpi'].unique()

    df_thresh = pd.DataFrame(columns=['name', 'host', 'thresh'])

    for df_name in dfs:
        print('*' * 50)
        print('Running generation for', df_name)
        interval = 0
        start_key = time.time()

        df = dfs[df_name]
        kpis = dict(tuple(df.groupby(['cmdb_id', 'name'])))
        res = {}

        for key in kpis:
            kpis[key]['timestamp'] = kpis[key]['timestamp'].apply(
                lambda x: datetime.fromtimestamp(x / 1000.0))
            kpis[key] = kpis[key].set_index('timestamp').sort_index()

        print('Calculating rolling window')
        for key in kpis:
            if kpis[key]['value'].std() == 0:
                continue
            elif key[1] in per1_kpis:
                d = kpis[key]['value'].resample('T').mean().interpolate()
            elif key[1] in per5_kpis:
                d = kpis[key]['value'].resample('5T').mean().interpolate()
            else:
                continue
            d = (d - d.mean()) / d.std()
            res[key] = d.rolling(10).mean()

        for key in res:
            print('Determining threshold for', key)
            d = res[key].dropna()
            if len(res[key]) == 0:
                print("ITS EMPTY", key)
                continue
            od.infer_threshold(d, threshold_perc=perc)
            thresh = od.threshold
            df_thresh = df_thresh.append(
                {
                    'name': key[1],
                    'host': key[0],
                    'thresh': thresh
                },
                ignore_index=True)

        df_thresh.to_csv('thresh_' + str(perc).replace('.', '_') + '.data',
                         index=False)
Beispiel #9
0
def find_anoms(hosts, df):
    start = time.time()
    kpis = dict(tuple(df.groupby(['cmdb_id', 'name'])))
    res = {}
    anoms = []

    df_info = pd.read_csv('kpi_summary_info.data')
    df_thresh = pd.read_csv('thresh_99_999.data')

    for key in kpis:
        kpis[key]['timestamp'] = kpis[key]['timestamp'].apply(
            lambda x: datetime.fromtimestamp(x / 1000.0))
        kpis[key] = kpis[key].set_index('timestamp').sort_index()

    per1_kpis = df_info[(df_info.interval == '1min')
                        & (df_info.is_flat == False)]['kpi'].unique()
    per5_kpis = df_info[(df_info.interval == '5min')
                        & ((df_info.is_flat == False))]['kpi'].unique()

    print('Calculating rolling window')
    for key in kpis:
        if key[0] in hosts:
            if kpis[key]['value'].std() == 0:
                continue
            elif key[1] in per1_kpis:
                d = kpis[key]['value'].resample('T').mean().interpolate()
            elif key[1] in per5_kpis:
                d = kpis[key]['value'].resample('5T').mean().interpolate()
            else:
                continue
            d = (d - d.mean()) / d.std()
            res[key] = d.rolling(10).mean()

    for key in res:
        print('Determining threshold for', key)
        if len(df_thresh[(df_thresh.host == key[0])
                         & (df_thresh.name == key[1])]) == 0:
            print('Anomaly,  std in train was 0, now its not')
            anoms.append((key[1], key[0]))
            continue
        thresh = df_thresh[(df_thresh.host == key[0])
                           & (df_thresh.name == key[1])]['thresh'].values[0]
        if np.isnan(thresh):
            print(
                "SR didn't generate threshhold because of low std for window > 10, skipping"
            )
            continue

        d = res[key].dropna()
        od = SpectralResidual(threshold=thresh,
                              window_amp=10,
                              window_local=10,
                              n_est_points=5,
                              n_grad_points=5)
        if len(d) < 10:
            print('Rolling window data empty! Skipping')
            continue
        outliers = od.predict(d.values)['data']
        if np.sum(np.sum(outliers['is_outlier'][-5:-2])) > 0:
            print(outliers['is_outlier'])
            print("ST Threshold Anomaly!")
            anoms.append((key[1], key[0]))
    print("It took",
          time.time() - start, "seconds to find", len(anoms), "anomalies")
    return anoms