def test_binary_label_to_list_freq_as_period_merge_consecutive(): binary_series = pd.DataFrame( np.array( [ [0, 1, 0, 0, 1, 1, 0, 1, 1, 0], [1, 0, 0, 0, 0, 0, 0, 1, 1, 1], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], ] ).T, index=pd.date_range(start=0, periods=10, freq="1d"), columns=["type1", "type2", "type3", "type4"], ) anomaly_list = to_events( binary_series, freq_as_period=True, merge_consecutive=True ) anomaly_list_true = { "type1": [ ( Timestamp("1970-01-02 00:00:00"), Timestamp("1970-01-02 23:59:59.999999999"), ), ( Timestamp("1970-01-05 00:00:00"), Timestamp("1970-01-06 23:59:59.999999999"), ), ( Timestamp("1970-01-08 00:00:00"), Timestamp("1970-01-09 23:59:59.999999999"), ), ], "type2": [ ( Timestamp("1970-01-01 00:00:00"), Timestamp("1970-01-01 23:59:59.999999999"), ), ( Timestamp("1970-01-08 00:00:00"), Timestamp("1970-01-10 23:59:59.999999999"), ), ], "type3": [], "type4": [ ( Timestamp("1970-01-01 00:00:00"), Timestamp("1970-01-10 23:59:59.999999999"), ) ], } assert anomaly_list == anomaly_list_true for i in range(4): assert ( to_events(binary_series["type{}".format(i + 1)]) == anomaly_list_true["type{}".format(i + 1)] )
def arundo_adtk(data_path): data = get_data(data_path) data['date'] = data['timestamp'].apply( lambda i: datetime.fromtimestamp(i)) # 时间转换 s_train = data[['date', 'value']] # 设置索引项 s_train = s_train.set_index('date') s_train = validate_series(s_train) print(s_train) # plot(s_train) # STL分解+离群点检测 steps = [("deseasonal", STLDecomposition(freq=20)), ("quantile_ad", QuantileAD(high=0.9997, low=0.005))] pipeline = Pipeline(steps) anomalies = pipeline.fit_detect(s_train) print(anomalies) # plot(s_train, anomaly_pred=anomalies, ap_color='red', ap_marker_on_curve=True) # 绘制检测结果] known_anomalies = data.loc[data['label'] == 1] known_anomalies = known_anomalies[['date', 'label']] known_anomalies = known_anomalies.set_index('date') known_anomalies = to_events(known_anomalies) print(known_anomalies) plot(s_train, anomaly_true=known_anomalies, anomaly_pred=anomalies, ap_color='red', ap_marker_on_curve=True, at_color="orange") plt.savefig(img_path + "arundo_adtk.png", dpi=1000) plt.show()
def test_nan(): s = pd.Series( [1, 1, 0, 0, 0, np.nan, 1, 1, np.nan, np.nan, 0, 1], index=pd.date_range(start="2017-1-1", periods=12, freq="D"), ) anomaly_list = to_events(s) anomaly_list_true = [ ( Timestamp("2017-01-01 00:00:00"), Timestamp("2017-01-02 23:59:59.999999999"), ), ( Timestamp("2017-01-07 00:00:00"), Timestamp("2017-01-08 23:59:59.999999999"), ), ( Timestamp("2017-01-12 00:00:00"), Timestamp("2017-01-12 23:59:59.999999999"), ), ] assert anomaly_list == anomaly_list_true
df['datetime'] = pd.to_datetime(df['date']) df = df.set_index('datetime') df.drop(['date'], axis=1, inplace=True) df.head() s_train = df # the same for the label df = pd.DataFrame(dti, columns=['date']) df[1] = (Y) df['datetime'] = pd.to_datetime(df['date']) df = df.set_index('datetime') df.drop(['date'], axis=1, inplace=True) df.head() from adtk.data import to_events known_anomalies = to_events(df) from adtk.visualization import plot plot(s_train, anomaly_true=known_anomalies) plt.plot(Y) from adtk.detector import SeasonalAD seasonal_ad = SeasonalAD() anomalies = seasonal_ad.fit_detect(s_train) plot(s_train, anomaly_pred=anomalies, ap_color='red', ap_marker_on_curve=True) from adtk.detector import LevelShiftAD levelshift_ad = LevelShiftAD() anomalies = levelshift_ad.fit_detect(s_train) plot(s_train, anomaly_pred=anomalies, ap_color='red', ap_marker_on_curve=True)