m=args.period, information_criterion="aicc", seasonal=args.seasonal, error_action="ignore", suppress_warnings=True, ) elif args.fit: with warnings.catch_warnings(): warnings.simplefilter("ignore") arima = ARIMA(order=args.order, seasonal_order=args.seasonal_order) arima.fit(train) print(arima.summary()) residuals = arima.resid() print("train lengths: data={} resid={}".format( train_len, residuals.shape[0])) len_delta = train_len - residuals.shape[0] # Diagnostics plot arima.plot_diagnostics(lags=50) box_ljung(residuals, nlags=20).format() plt.gcf().suptitle('Diagnostics Plot') plt.figure() plt.plot(df.value.index[len_delta:train_len], np.abs(residuals), label="abs(residuals)") plt.plot(df.value, label="data", alpha=0.5) # fig, axes = plt.subplots(3, 1, sharex=True) # axes[0].plot(arima.resid(), label="residuals") # axes[1].plot(arima.resid()**2, label="residuals^2")
plt.gcf().suptitle('Diagnostics Plot', fontsize=14) # !! not necessary !! Everything already plotted # Plot Residuals and fitted values # plt.figure() # fitted_values = arima.predict_in_sample() # plt.plot(df.index[:train_len - 1], fitted_values, # color='C0', label="Fitted values") # plt.plot(pd.to_datetime(df.index), data, color='C1', label="Data") # plt.plot(df.index[:train_len - 1], arima.resid(), # color='C2', label="Residuals") # plt.gca().grid(which='both', axis='x', linestyle='--') # plt.title("Residuals and fitted values") # plt.legend() print("SSE: {}".format((arima.resid()**2).sum())) # Plot fitted values and forecasts predictions = arima.predict(n_periods=test.shape[0]) fitted_values = arima.predict_in_sample() plt.figure() plt.plot(df.index[train_len:], test, '--', color='C0', label="test set") plt.plot(df.index[train_len:], predictions, '--', color='C1', label="forecasted values") plt.plot(df.index[:train_len], train, color='C0', label="train set") plt.plot(df.index[:train_len - 1], fitted_values, color='C1',
joblib.dump(arima, fpath, compress=3) else: print("Reading model from disk") arima = joblib.load(fpath) gt_pred, gt_windows = get_gt_arrays( df.index, df.index, labels, labels_windows ) # Compute metrics metrics_columns = ["precision", "recall", "f_score", "nab_score"] Metrics = collections.namedtuple("Metrics", metrics_columns) window_size = 30 alpha = 0.15 pred = get_arima_predictions(arima.resid(), window_size, alpha) print("Anomalies number:", pred[pred == -1].shape[0]) len_delta = len(df.value) - pred.shape[0] nab_score = get_nab_score(gt_windows, pred) simple_metrics = get_simple_metrics(gt_pred[len_delta:], pred) metrics = simple_metrics + (nab_score,) metrics = Metrics(*metrics) anomalies = df.value[len_delta:][pred == -1] ax = make_predictions_plots( whole_df, df, labels, labels_windows, "", anomalies, "" ) line = "& {} & {:.2f} & {:.2f} & {:.2f} & {:.2f} \\\\".format( "\\emph{ARIMA}", metrics.nab_score,