Ejemplo n.º 1
0
def test_binary_label_to_list_freq_as_period_merge_consecutive():
    binary_series = pd.DataFrame(
        np.array(
            [
                [0, 1, 0, 0, 1, 1, 0, 1, 1, 0],
                [1, 0, 0, 0, 0, 0, 0, 1, 1, 1],
                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
            ]
        ).T,
        index=pd.date_range(start=0, periods=10, freq="1d"),
        columns=["type1", "type2", "type3", "type4"],
    )

    anomaly_list = to_events(
        binary_series, freq_as_period=True, merge_consecutive=True
    )

    anomaly_list_true = {
        "type1": [
            (
                Timestamp("1970-01-02 00:00:00"),
                Timestamp("1970-01-02 23:59:59.999999999"),
            ),
            (
                Timestamp("1970-01-05 00:00:00"),
                Timestamp("1970-01-06 23:59:59.999999999"),
            ),
            (
                Timestamp("1970-01-08 00:00:00"),
                Timestamp("1970-01-09 23:59:59.999999999"),
            ),
        ],
        "type2": [
            (
                Timestamp("1970-01-01 00:00:00"),
                Timestamp("1970-01-01 23:59:59.999999999"),
            ),
            (
                Timestamp("1970-01-08 00:00:00"),
                Timestamp("1970-01-10 23:59:59.999999999"),
            ),
        ],
        "type3": [],
        "type4": [
            (
                Timestamp("1970-01-01 00:00:00"),
                Timestamp("1970-01-10 23:59:59.999999999"),
            )
        ],
    }

    assert anomaly_list == anomaly_list_true

    for i in range(4):
        assert (
            to_events(binary_series["type{}".format(i + 1)])
            == anomaly_list_true["type{}".format(i + 1)]
        )
Ejemplo n.º 2
0
def arundo_adtk(data_path):
    data = get_data(data_path)
    data['date'] = data['timestamp'].apply(
        lambda i: datetime.fromtimestamp(i))  # 时间转换
    s_train = data[['date', 'value']]

    # 设置索引项
    s_train = s_train.set_index('date')
    s_train = validate_series(s_train)
    print(s_train)
    # plot(s_train)

    # STL分解+离群点检测
    steps = [("deseasonal", STLDecomposition(freq=20)),
             ("quantile_ad", QuantileAD(high=0.9997, low=0.005))]
    pipeline = Pipeline(steps)
    anomalies = pipeline.fit_detect(s_train)
    print(anomalies)
    # plot(s_train, anomaly_pred=anomalies, ap_color='red', ap_marker_on_curve=True)

    # 绘制检测结果]
    known_anomalies = data.loc[data['label'] == 1]
    known_anomalies = known_anomalies[['date', 'label']]
    known_anomalies = known_anomalies.set_index('date')
    known_anomalies = to_events(known_anomalies)
    print(known_anomalies)
    plot(s_train,
         anomaly_true=known_anomalies,
         anomaly_pred=anomalies,
         ap_color='red',
         ap_marker_on_curve=True,
         at_color="orange")

    plt.savefig(img_path + "arundo_adtk.png", dpi=1000)
    plt.show()
Ejemplo n.º 3
0
def test_nan():
    s = pd.Series(
        [1, 1, 0, 0, 0, np.nan, 1, 1, np.nan, np.nan, 0, 1],
        index=pd.date_range(start="2017-1-1", periods=12, freq="D"),
    )
    anomaly_list = to_events(s)
    anomaly_list_true = [
        (
            Timestamp("2017-01-01 00:00:00"),
            Timestamp("2017-01-02 23:59:59.999999999"),
        ),
        (
            Timestamp("2017-01-07 00:00:00"),
            Timestamp("2017-01-08 23:59:59.999999999"),
        ),
        (
            Timestamp("2017-01-12 00:00:00"),
            Timestamp("2017-01-12 23:59:59.999999999"),
        ),
    ]
    assert anomaly_list == anomaly_list_true
Ejemplo n.º 4
0
df['datetime'] = pd.to_datetime(df['date'])
df = df.set_index('datetime')
df.drop(['date'], axis=1, inplace=True)
df.head()
s_train = df

# the same for the label
df = pd.DataFrame(dti, columns=['date'])
df[1] = (Y)
df['datetime'] = pd.to_datetime(df['date'])
df = df.set_index('datetime')
df.drop(['date'], axis=1, inplace=True)
df.head()
from adtk.data import to_events
known_anomalies = to_events(df)

from adtk.visualization import plot
plot(s_train, anomaly_true=known_anomalies)
plt.plot(Y)

from adtk.detector import SeasonalAD
seasonal_ad = SeasonalAD()
anomalies = seasonal_ad.fit_detect(s_train)
plot(s_train, anomaly_pred=anomalies, ap_color='red', ap_marker_on_curve=True)

from adtk.detector import LevelShiftAD
levelshift_ad = LevelShiftAD()
anomalies = levelshift_ad.fit_detect(s_train)
plot(s_train, anomaly_pred=anomalies, ap_color='red', ap_marker_on_curve=True)