Exemple #1
0
def test_sr(sr_params):
    window_amp, window_local, n_est_points, return_instance_score = sr_params

    threshold = 2.5
    od = SpectralResidual(threshold=threshold,
                          window_amp=window_amp,
                          window_local=window_local,
                          n_est_points=n_est_points)

    assert od.threshold == threshold
    assert od.meta == {
        'name': 'SpectralResidual',
        'detector_type': 'online',
        'data_type': 'time-series',
        'version': __version__
    }
    preds_in = od.predict(X, t, return_instance_score=return_instance_score)
    assert preds_in['data']['is_outlier'].sum() <= 2.
    if return_instance_score:
        assert preds_in['data']['is_outlier'].sum() == (
            preds_in['data']['instance_score'] >
            od.threshold).astype(int).sum()
    else:
        assert preds_in['data']['instance_score'] is None
    preds_out = od.predict(X_pert,
                           t,
                           return_instance_score=return_instance_score)
    assert preds_out['data']['is_outlier'].sum() > 0
    if return_instance_score:
        assert preds_out['data']['is_outlier'].sum() == (
            preds_out['data']['instance_score'] >
            od.threshold).astype(int).sum()
    else:
        assert preds_out['data']['instance_score'] is None
    assert preds_out['meta'] == od.meta
Exemple #2
0
def test_detector(signal, window_amp, window_local, n_est_points,
                  return_instance_score):
    t, X, X_pert = signal["t"], signal['X'], signal['X_pert']

    threshold = 6
    od = SpectralResidual(threshold=threshold,
                          window_amp=window_amp,
                          window_local=window_local,
                          n_est_points=n_est_points)

    assert od.threshold == threshold
    assert od.meta == {
        'name': 'SpectralResidual',
        'detector_type': 'online',
        'data_type': 'time-series',
        'version': __version__
    }
    preds_in = od.predict(X, t, return_instance_score=return_instance_score)
    assert preds_in['data']['is_outlier'].sum() <= 2.
    if return_instance_score:
        assert preds_in['data']['is_outlier'].sum() == (
            preds_in['data']['instance_score'] >
            od.threshold).astype(int).sum()
    else:
        assert preds_in['data']['instance_score'] is None
    preds_out = od.predict(X_pert,
                           t,
                           return_instance_score=return_instance_score)
    assert preds_out['data']['is_outlier'].sum(
    ) >= 10  # check if we detect at least the number of perturbed points
    if return_instance_score:
        assert preds_out['data']['is_outlier'].sum() == (
            preds_out['data']['instance_score'] >
            od.threshold).astype(int).sum()
    else:
        assert preds_out['data']['instance_score'] is None
    assert preds_out['meta'] == od.meta
def spectral_residual(df):
    od = SpectralResidual(
        threshold=None,  # threshold for outlier score
        window_amp=20,  # window for the average log amplitude
        window_local=20,  # window for the average saliency map
        n_est_points=
        20  # nb of estimated points padded to the end of the sequence
    )
    X_outlier = np.array(df['value'])
    time_samples = np.arange(0, len(df))
    od.infer_threshold(X_outlier, time_samples, threshold_perc=99)
    od_preds = od.predict(X_outlier, time_samples, return_instance_score=True)
    pred = (od_preds['data']['is_outlier'] == 1)
    a = df.loc[pred]
    return a
Exemple #4
0
def find_anoms(hosts, df):
    start = time.time()
    kpis = dict(tuple(df.groupby(['cmdb_id', 'name'])))
    res = {}
    anoms = []

    df_info = pd.read_csv('kpi_summary_info.data')
    df_thresh = pd.read_csv('thresh_99_999.data')

    for key in kpis:
        kpis[key]['timestamp'] = kpis[key]['timestamp'].apply(
            lambda x: datetime.fromtimestamp(x / 1000.0))
        kpis[key] = kpis[key].set_index('timestamp').sort_index()

    per1_kpis = df_info[(df_info.interval == '1min')
                        & (df_info.is_flat == False)]['kpi'].unique()
    per5_kpis = df_info[(df_info.interval == '5min')
                        & ((df_info.is_flat == False))]['kpi'].unique()

    print('Calculating rolling window')
    for key in kpis:
        if key[0] in hosts:
            if kpis[key]['value'].std() == 0:
                continue
            elif key[1] in per1_kpis:
                d = kpis[key]['value'].resample('T').mean().interpolate()
            elif key[1] in per5_kpis:
                d = kpis[key]['value'].resample('5T').mean().interpolate()
            else:
                continue
            d = (d - d.mean()) / d.std()
            res[key] = d.rolling(10).mean()

    for key in res:
        print('Determining threshold for', key)
        if len(df_thresh[(df_thresh.host == key[0])
                         & (df_thresh.name == key[1])]) == 0:
            print('Anomaly,  std in train was 0, now its not')
            anoms.append((key[1], key[0]))
            continue
        thresh = df_thresh[(df_thresh.host == key[0])
                           & (df_thresh.name == key[1])]['thresh'].values[0]
        if np.isnan(thresh):
            print(
                "SR didn't generate threshhold because of low std for window > 10, skipping"
            )
            continue

        d = res[key].dropna()
        od = SpectralResidual(threshold=thresh,
                              window_amp=10,
                              window_local=10,
                              n_est_points=5,
                              n_grad_points=5)
        if len(d) < 10:
            print('Rolling window data empty! Skipping')
            continue
        outliers = od.predict(d.values)['data']
        if np.sum(np.sum(outliers['is_outlier'][-5:-2])) > 0:
            print(outliers['is_outlier'])
            print("ST Threshold Anomaly!")
            anoms.append((key[1], key[0]))
    print("It took",
          time.time() - start, "seconds to find", len(anoms), "anomalies")
    return anoms