def checkSeries(series): # numpy arrays or series if type(series)==pd.core.series.Series: series=series.to_numpy() pmd.tsdisplay(series) print(pmd.acf(series)) pmd.plot_acf(series) pmd.plot_pacf(series)
(4-10), (2-4), (9-2), (34-9) x # 2 x_lag = x[1:] # second lag x_lag x[:-1] x = x_lag - x[:-1] # x = [ 4., 9., 18.] (-2 - (-6)), (7 - (-2)), (18-7) #check this #%%% Stationary import pmdarima as pm from pmdarima import datasets y = datasets.load_lynx() pm.plot_acf(y) from pmdarima.arima.stationarity import ADFTest # Test whether we should difference at the alpha=0.05 # significance level adf_test = ADFTest(alpha=0.05) p_val, should_diff = adf_test.should_diff(y) # (0.01, False) p_val #The verdict, per the ADF test, is that we should not difference. Pmdarima also provides a more handy interface for estimating your d parameter more directly. This is the preferred public method for accessing tests of stationarity: from pmdarima.arima.utils import ndiffs # Estimate the number of differences using an ADF test: n_adf = ndiffs(y, test='adf') # -> 0
def predict_arima(df): time_in=current_milli_time() try: forecast_in = open("forecast.pickle","rb") future_forecast = pickle.load(forecast_in) forecast_in.append(df) error=[] """ Calculate errors """ if len(df) < len(future_forecast): error=df["memory_used"] - future_forecast[:len(df)]["memory_used"] elif len(df) > len(future_forecast): error=df[0:len(future_forecast)]["memory_used"]- future_forecast["memory_used"] else: error=df["memory_used"]-future_forecast["memory_used"] overestimation=[x for x in error if x<0] overestimation=sum(overestimation)/len(overestimation) underestimation=[x for x in error if x>=0] underestimation=sum(underestimation)/len(underestimation) print("UNDERESTIMATION ERROR: "+underestimation) print("OVERESTIMATION ERROR: "+overestimation) print("Mean Absolute Error in Last iteration "+str(error)) """ Overestimation & Underestimation errors """ except Exception as e: print("RMSE To be computed") # Do Nothing try: pm.plot_pacf(df,show=False).savefig('pacf.png') pm.plot_acf(df,show=False).savefig('acf.png') except: print("Data points insufficient for ACF & PACF") try: pickle_in = open("arima.pickle","rb") arima_data = pickle.load(pickle_in) arima_data.append(df) #df=arima_data except Exception as e: arima_data_out = open("arima.pickle","wb") pickle.dump([], arima_data_out) arima_data_out = open("arima.pickle","wb") pickle.dump(df, arima_data_out) arima_data_out.close() ''' tests ''' nd=1 nsd=1 try: adf_test=ADFTest(alpha=0.05) p_val, should_diff = adf_test.is_stationary(df["memory_used"]) nd = ndiffs(df, test='adf') logging.info(nd) nsd = nsdiffs(df,12) logging.info(nd) except: nd=1 print("Exception on tests") ch_test=CHTest(12) try: nsd=ch_test.estimate_seasonal_differencing_term(df) except Exception as e: print(e) logging.error(e) ''' ARIMA MODEL ''' ''' Find p,q dynamically ''' acf_lags=acf(df["memory_used"]) acf_lags_threshold=[x for x in acf_lags if x>=getThreshold()] p=len(acf_lags_threshold) if len(acf_lags_threshold)<=4 else 4 pacf_lags=pacf(df["memory_used"]) pacf_lags_threshold=[x for x in pacf_lags if x>=getThreshold()] q=len(pacf_lags_threshold) if len(pacf_lags_threshold)<=1 else 1 d=nd train, test = train_test_split(df,shuffle=False, test_size=0.3) # If data is seasonal set the values of P,D,Q in seasonal order stepwise_model = ARIMA( order=(p,d,q), seasonal_order=(0,nsd,0,12), suppress_warnings=True, scoring='mse' ) x=str(p)+" "+str(nd)+" "+str(q) print("Model with p="+str(q)+" d="+str(d)+" q="+str(q)) try: stepwise_model.fit(df) """ Vary the periods as per the forecasting window n_periods= 30 = 5mins n_periods= 60 = 10mins n_periods= 90 = 15mins """ future_forecast = stepwise_model.predict(n_periods=len(test)) future_forecast = pd.DataFrame(future_forecast,index=test.index,columns=["prediction"]) res=pd.concat([df,future_forecast],axis=1) ''' Save Forecast in Pickle ''' forecast_out = open("forecast.pickle","wb") pickle.dump(future_forecast,forecast_out) forecast_out.close() trace1 = go.Scatter(x=res.index, y=res["prediction"],name="Prediction", mode='lines') trace2 = go.Scatter(x=df.index, y=df["memory_used"],name="DF data", mode='lines') data=[trace1,trace2] layout = go.Layout( title=x ) fig = go.Figure(data=data, layout=layout) plot(fig, filename="prediction") print("Current values") print(df) print("Predicted Data Points") print(future_forecast) time_out=current_milli_time() print("TIME for RNN(ms):"+str(time_out-time_in)) return future_forecast except Exception as e: time_out=current_milli_time() print("TIME for RNN(ms):"+str(time_out-time_in)) print(e) return None
def test_plot_acf(plot_type, dataset): return pm.plot_acf(dataset, show=False)
def predict_arima(df): trace = go.Scatter(x=df.index, y=df["memory_used"], mode='lines+markers') data = [trace] try: forecast_in = open("forecast.pickle", "rb") future_forecast = pickle.load(forecast_in) forecast_in.append(df) error = 0 if len(df) < len(future_forecast): error = mean_absolute_error(df, abs(future_forecast[:len(df)])) elif len(df) > len(future_forecast): error = mean_absolute_error(df[0:len(future_forecast)], abs(future_forecast)) else: error = mean_absolute_error(df, abs(future_forecast)) print("Mean Absolute Error in Last iteration " + str(error)) except Exception as e: print("RMSE To be computed") # Do Nothing plot(data, filename="memory-used-overtime") try: pm.plot_pacf(df, show=False).savefig('pacf.png') pm.plot_acf(df, show=False).savefig('acf.png') except: print("Data points insufficient for ACF & PACF") try: pickle_in = open("arima.pickle", "rb") arima_data = pickle.load(pickle_in) arima_data.append(df) df = arima_data except Exception as e: arima_data_out = open("arima.pickle", "wb") pickle.dump([], arima_data_out) arima_data_out = open("arima.pickle", "wb") pickle.dump(df, arima_data_out) arima_data_out.close() ''' AUTO ARIMA MODEL ''' train, test = train_test_split(df, shuffle=False, test_size=0.3) stepwise_model = auto_arima(train, start_p=0, start_q=0, max_p=4, max_q=4, m=12, start_P=0, start_Q=0, seasonal=True, d=0, max_d=2, D=1, max_D=2, trace=True, error_action='ignore', suppress_warnings=True, stepwise=True) try: stepwise_model.fit(df) future_forecast = stepwise_model.predict(n_periods=len(test)) future_forecast = pd.DataFrame(future_forecast, index=test.index, columns=["prediction"]) res = pd.concat([df, future_forecast], axis=1) ''' Save Forecast in Pickle ''' forecast_out = open("forecast.pickle", "wb") pickle.dump(future_forecast, forecast_out) forecast_out.close() trace1 = go.Scatter(x=res.index, y=res["prediction"], name="Prediction", mode='lines') trace2 = go.Scatter(x=df.index, y=df["memory_used"], name="DF data", mode='lines') data = [trace1, trace2] layout = go.Layout(title=x) fig = go.Figure(data=data, layout=layout) plot(fig, filename="prediction") print("Current values") print(df) print("Predicted Data Points") print(future_forecast) return future_forecast except Exception as e: print(e) return None
traffic = connection.execute("select * from traffic_date").fetchall() print("Data extracted from sales_forecast successfully!") connection.close() traffic = pd.DataFrame(traffic) traffic.columns = [col for col in traffic_cols.keys()] traffic.columns = cleaner.sanitize(traffic.columns) traffic = traffic.filter(['date', 'traffic']) traffic.set_index(['date'], inplace=True) traffic.index = pd.to_datetime(traffic.index) traffic['traffic'] += 0.001 # result = seasonal_decompose(traffic, model='multiplicative', period=7) # result.plot() # plt.show() pm.plot_acf(traffic) stepwise_model = auto_arima(traffic, start_p=1, start_q=1, max_p=5, max_q=5, m=7, start_P=0, seasonal=True, d=1, D=1, trace=True, error_action='ignore', suppress_warnings=True, stepwise=True) #
from src.data.load_data import load_processed_data # Setting styles InteractiveShell.ast_node_interactivity = "all" sns.set(style="whitegrid", color_codes=True) #%% data = load_processed_data() #%% # Determining the differencing manually to speed up model evaluation pm.plot_acf(data["Global_active_power"], lags=24 * 1, zero=False) pm.plot_pacf(data["Global_active_power"], lags=24 * 1, zero=False) print("The p-value for the ADF test is ", adfuller(data["Global_active_power"])[1]) pm.plot_acf(data["Global_active_power"].diff(1).dropna(), lags=24 * 1, zero=False) pm.plot_pacf(data["Global_active_power"].diff(1).dropna(), lags=24 * 1, zero=False) print( "The p-value for the ADF test is ", adfuller(data["Global_active_power"].diff(1).dropna())[1], )