def test_standardized_forecasts_error(): """ Simple test that standardized forecasts errors are calculated correctly. Just uses a different calculation method on a univariate series. """ # Get the dataset true = results_kalman_filter.uc_uni data = pd.DataFrame( true['data'], index=pd.date_range('1947-01-01', '1995-07-01', freq='QS'), columns=['GDP'] ) data['lgdp'] = np.log(data['GDP']) # Fit an ARIMA(1,1,0) to log GDP mod = SARIMAX(data['lgdp'], order=(1,1,0)) res = mod.fit(disp=-1) standardized_forecasts_error = ( res.filter_results.forecasts_error[0] / np.sqrt(res.filter_results.forecasts_error_cov[0,0]) ) assert_allclose( res.filter_results.standardized_forecasts_error[0], standardized_forecasts_error, )
def sarima_model(feature): data = df1[feature] values = data.values values = values.astype('float32') split = int(len(list(df1["Dollar_eq"])) * 0.8) #scaled = scale(values,0,1) train = values[:split] test = values[split:] #order=(1, 1, 1), seasonal_order=(1, 1, 1, 1) model = SARIMAX(train, order=(1, 1, 1), seasonal_order=(1, 1, 1, 1)) model_fit = model.fit(disp=False) yhat = model_fit.predict(len(train), len(train) + len(test) - 1, typ="levels") x = [] for i in range(len(test)): x.append((yhat[i], test[i])) #print(x[0:10]) rmse = sqrt(mean_squared_error(test, yhat)) print("\n\n SARIMA RMSE: %.5f" % rmse) print()
def sarimax_fc(train, test, order, seas_order, exog_train=None, exog_test=None): model = SARIMAX(train, order=order, exog=exog_train, seasonal_order=seas_order) results = model.fit() start, end = len(train), len(test) + len(train) - 1 pred = results.predict(start, end, exog=exog_test, typ='levels').rename('sarima_predictions') rmse_pred, rmse_pred_pct = rmse(test, pred), rmse(test, pred) / test.mean() results = { 'prediction': pred, 'rmse': rmse_pred, 'rmse_pct': rmse_pred_pct } return results
def test_small_sample_serial_correlation_test(): # Test the Ljung Box serial correlation test for small samples with df # adjustment using the Nile dataset. Ljung-Box statistic and p-value # are compared to R's Arima() and checkresiduals() functions in forecast # package: # library(forecast) # fit <- Arima(y, order=c(1,0,1), include.constant=FALSE) # checkresiduals(fit, lag=10) from statsmodels.tsa.statespace.sarimax import SARIMAX niledata = nile.data.load_pandas().data niledata.index = pd.date_range('1871-01-01', '1970-01-01', freq='AS') mod = SARIMAX(endog=niledata['volume'], order=(1, 0, 1), trend='n', freq=niledata.index.freq) res = mod.fit() actual = res.test_serial_correlation(method='ljungbox', lags=10, df_adjust=True)[0, :, -1] assert_allclose(actual, [14.116, 0.0788], atol=1e-3)
def sarima_optimizer_aic(train, pdq, seasonal_pdq): best_aic, best_order, best_seasonal_order = float("inf"), float( "inf"), None for param in pdq: for param_seasonal in seasonal_pdq: try: sarimax_model = SARIMAX(train, order=param, seasonal_order=param_seasonal) results = sarimax_model.fit(disp=0) aic = results.aic if aic < best_aic: best_aic, best_order, best_seasonal_order = aic, param,\ param_seasonal print('SARIMA{}x{}12 - AIC:{}'.format(param, param_seasonal, aic)) except: continue print('SARIMA{}x{}12 - AIC:{}'.format(best_order, best_seasonal_order, best_aic)) return best_order, best_seasonal_order
def test_dynamic_against_sarimax(): rs = np.random.RandomState(12345678) e = rs.standard_normal(1001) y = np.empty(1001) y[0] = e[0] * np.sqrt(1.0 / (1 - 0.9**2)) for i in range(1, 1001): y[i] = 0.9 * y[i - 1] + e[i] smod = SARIMAX(y, order=(1, 0, 0), trend='c') sres = smod.fit(disp=False) mod = AutoReg(y, 1, old_names=False) spred = sres.predict(900, 1100) pred = mod.predict(sres.params[:2], 900, 1100) assert_allclose(spred, pred) spred = sres.predict(900, 1100, dynamic=True) pred = mod.predict(sres.params[:2], 900, 1100, dynamic=True) assert_allclose(spred, pred) spred = sres.predict(900, 1100, dynamic=50) pred = mod.predict(sres.params[:2], 900, 1100, dynamic=50) assert_allclose(spred, pred)
def sarimax_forecast(df): '''it takes a dataframe split it into train/forecast sets based on the availability of price and then forecasts electricity price for next hour. it returns forecast dataframe ('price','lower_interval', 'upper_interval') and historical price dataframe ('price')''' # split past and furture past = df[~df.price.isnull()] future = df[df.price.isnull()].drop('price', axis=1) # forecast for next time point only future = future.iloc[:1, :] if future.temp.isnull( )[0]: # when weather forecast data is not available for that hour forecast = np.nan lower = np.nan upper = np.nan print('weather data is not available') else: past.index = pd.DatetimeIndex(past.index.values, freq=past.index.inferred_freq) # Build Model sarima = SARIMAX(past.price, exog=past.drop('price', axis=1), order=(1, 1, 1), seasonal_order=(1, 0, 2, 7)) sarima = sarima.fit(maxiter=300) # forecasting results = sarima.get_forecast(1, exog=future, alpha=0.05) forecast = sarima.forecast(1, exog=future, alpha=0.05) lower = results.conf_int()['lower price'][0] upper = results.conf_int()['upper price'][0] # create forecast df with datetimeIndex forecast = pd.DataFrame(dict(price=forecast, lower_interval=lower, upper_interval=upper), index=future.index) forecast.index.name = 'date_time' past = past.iloc[-1:, 0] return forecast, past
def SARIMA_forecast(experiment, plot_fit=False): """ Fit SARIMA model on L_train data and forecast N_test steps ahead Args: experiment(dict): Experiment dictionary. Returns: experiment(dict): Experiment dictionary with additional keys. """ L_train = experiment['L_train'] N_train = experiment['N_train'] N_test = experiment['N_test'] # Fit SARIMA model model = SARIMAX(L_train, order=(1, 1, 1), seasonal_order=(1, 1, 0, 24), enforce_invertibility=False, enforce_stationarity=False) model_fit = model.fit(disp=False) L_train_prediction = model_fit.fittedvalues L_test_prediction = model_fit.forecast(steps=N_test) # reshape arrays L_test_prediction = L_test_prediction.reshape((N_test, 1)) L_train_prediction = L_train_prediction.reshape((N_train, 1)) # plot fit on training data and prediction if plot_fit: plt.figure() plt.plot(L_train) plt.plot(L_train_prediction, 'red') plt.figure() plt.plot(L_test_prediction, 'red') experiment['L_test_prediction'] = L_test_prediction experiment['L_train_prediction'] = L_train_prediction return experiment
def SARIMA(self, idx, numpredictions, order=(0, 1, 1), seasonal_order=(0, 1, 1, 24), trend=None): # use every hour to gain the parameters nDays = 7 * 4 trainingData = self.dataX[max(idx - nDays * 24, 0):idx, cfg.prediction['pos']] model = SARIMAX(trainingData, order=order, trend=trend, seasonal_order=seasonal_order, enforce_stationarity=False, enforce_invertibility=False) with warnings.catch_warnings(): warnings.filterwarnings("ignore") self.trained_model = model.fit(disp=False) pred = self.trained_model.forecast(cfg.prediction['num_predictions']) return pred
def SeasonaAutoIntegratedMovingAverageExogenousRegressors( self, data, exodata): #currently, exodata not used. #make a dataframe the size of prediction datahat = pd.DataFrame(np.zeros(shape=((self.end - self.start), 3))) # create a model for each axis and predict each axis for i in range(3): # make prediction x = data[i].values.tolist() #get the col values to be a list x1 = exodata[i].values.tolist() model = SARIMAX(x, exog=x1, order=(1, 1, 1), seasonal_order=(0, 0, 0, 0)) model_fit = model.fit(disp=False) datahat.iloc[:, i] = model_fit.predict(self.start, self.end - 1, exog=[exodata]) #not sure here return (datahat)
def forcast(i): sarima_model = SARIMAX(train, order=(1,0,1), seasonal_order=(0,1,1,2), enforce_stationarity=False, enforce_invertibility=False) sfit = sarima_model.fit() print(sfit.summary()) sfit.plot_diagnostics(figsize=(10, 6)) plt.show() #print(sarima_model.summary()) #dati di predicton non di forcast ancora detto prediction in sample ypred=sfit.predict(start=0,end=len(train)) plt.plot(train) plt.plot(ypred) plt.title("trian") #previsione de dati quindi il forcat --->il modelo è stato esteso al futuro forewrap = sfit.get_forecast(steps=523) forecast_ci = forewrap.conf_int() forecast_val = forewrap.predicted_mean #forecast_val=forecast_val[1:] plt.plot(train) #plt.fill_between(forecast_ci.index,forecast_ci.iloc[:, 0],forecast_ci.iloc[:, 1], color='k', alpha=.25) plt.plot(forecast_val) plt.plot(test) plt.show() yfore = [] for j in range(0, horizon_data_length): print("Actual {} {} {:.2f} forcast {:.2f}".format(i, j, test[j], forecast_val[j])) yfore.append(forecast_val[j]) metrics = forecast_accuracy(forecast_val, test) print("RMSE is {}={:.2f} forecast{:.2f}".format(i ,metrics['rmse'])) return yfore, horizon_data_length
def sarimaxPrdict(train_y, p_order, p_seasonal_order, vtrend, steps=1, disp=False): model = SARIMAX(train_y, order=p_order, seasonal_order=p_seasonal_order, trend=vtrend, enforce_stationarity=False, enforce_invertibility=False) model_fit = model.fit(disp=False) pred = model_fit.forecast(steps=steps) if disp: pred_ci = pd.DataFrame(index=pred.index) pred_ci['low'] = pred - pred * 0.05 pred_ci['upper'] = pred + pred * 0.05 #pred_ci.loc[y.index[-1]]=[y[-1],y[-1]] #pred_ci=pred_ci.sort_index() ax = train_y['2018':].plot(label='observed') pred.plot(ax=ax, label='Forecast', alpha=.7) ax.fill_between(pred.index, pred_ci.iloc[:, 0], pred_ci.iloc[:, 1], color='k', alpha=.1) ax.set_xlabel('Date') ax.set_ylabel(train_y.name) plt.legend() plt.show() return pred
def sarima_orders(ts): p = q = d = range(0, 2) pdq = list(itertools.product(p, d, q)) seasonal_pdq = [(x[0], x[1], x[2], 12) for x in list(itertools.product(p, d, q))] print('SARIMA parameters...') for param in pdq: for param_seasonal in seasonal_pdq: try: mod = SARIMAX(ts, order=param, seasonal_order=param_seasonal, enforce_stationarity=False, enforce_invertibility=False) results = mod.fit() print('ARIMA{}x{} - AIC:{}'.format(param, param_seasonal, results.aic)) except: print('hello') continue
def modifyFile(reader, writer, count): global dateToModify, index while (True): clas, data, label = getData(reader, count) if (clas == 0): break data0 = pd.Series(label) data0.index = pd.Index(index) try: model = SARIMAX(data0, order=(1,1,1), seasonal_order=(0,1,1,7)) result = model.fit() except: print("%d: failed to train sarimax model, abort" % clas) for i in range(0, count): writer.writerow(data[i] + [label[i]]) continue for i in dateToModify: label[i] = round(result.predict(i, i)[0]) if (label[i] < 0): label[i] = 0 for i in range(0, count): writer.writerow(data[i] + [label[i]])
def GridSearch(self, n_days): self.Build_Training_Data(v=n_days, training=True) warnings.filterwarnings("ignore") params = [] scores = [] for p in range(1, 5): for q in range(1, 5): for d in range(3): try: model = SARIMAX(self.Values, order=(p, d, q), missing='drop', enforce_invertibility=False) results = model.fit(disp=0) scores_counties = [] for county in self.Counties: DataCounty = self.Data_Dates[county].dropna() ModelCounty = SARIMAX(DataCounty[:-self.v], order=(p, d, q), missing='drop', enforce_invertibility=False) res = ModelCounty.smooth(results.params) fc = res.get_prediction( len(DataCounty) - self.v, len(DataCounty)) frame = fc.summary_frame(alpha=0.05) fc = frame['mean'] Y = DataCounty.iloc[-self.v:].values Yhat = fc[-self.v:].values # Ybar = np.mean(Y) MAE = (sum(abs(Y - Yhat)) / self.v) scores_counties.append(MAE) except: print('Training failed for parameters :', (p, d, q)) scores.append(np.nanmean(scores_counties)) params.append((p, d, q)) argbest = np.argmin(scores) print('Best MAE : ', scores[argbest]) print('Best params : ', params[argbest]) self.BestParams = params[argbest]
def arima_grid_search(dataframe, s): p = d = q = range(2) param_combinations = list(itertools.product(p, d, q)) lowest_aic, pdq, pdqs = None, None, None total_iterations = 0 for order in param_combinations: for (p, q, d) in param_combinations: seasonal_order = (p, q, d, s) total_iterations += 1 model = SARIMAX(data, order=order, seasonal_order=seasonal_order, enforce_stationarity=False, enforce_invertibility=False, disp=False, simple_differencing=False) model_result = model.fit(maxiter=500, disp=False) if not lowest_aic or model_result.aic < lowest_aic: lowest_aic = model_result.aic ret = model_result return ret
def PrintParameterMatrix(self, dataset, maxNonSeasonal, maxSeasonal, lagsPerDay, noTrainingDays): max_p = maxNonSeasonal[0] max_d = maxNonSeasonal[1] max_q = maxNonSeasonal[2] max_P = maxSeasonal[0] max_D = maxSeasonal[1] max_Q = maxSeasonal[2] aic = list() bic = list() orders = list() train_set = dataset[-noTrainingDays * int(lagsPerDay):-int(lagsPerDay)] for P, D, Q in itertools.product(range(0, max_P + 1), range(0, max_D + 1), range(0, max_Q + 1)): for p, d, q in itertools.product(range(0, max_p + 1), range(0, max_d + 1), range(0, max_q + 1)): if P == 0 and D == 0 and Q == 0 and p == 0 and d == 0 and q == 0: continue try: model = SARIMAX(train_set, order=(p, d, q), seasonal_order=(P, D, Q, int(lagsPerDay))) result = model.fit() order = 'Model: Nonseanonal ('+ repr(p) +','+repr(d)+','+ repr(q) + ') Seasonal: '+\ '(' + repr(P) +',' + repr(D) +','+repr(Q) + ')' print(order) print(result.summary()) orders.append(order) bic.append(result.bic) aic.append(result.aic) except Exception as e: print(e) results = pd.DataFrame(index=orders, data={'aic': aic, 'bic': bic}) print(results)
def SARIMA_GridSearch(self, idx, numpredictions): nDays = 7 trainingData = self.dataX[(idx - (nDays * 24) - 1):(idx), cfg.prediction['pos']] cfg_list = sarima_configs(seasonal=[24]) # do the grid search on the training data with warnings.catch_warnings(): warnings.filterwarnings("ignore") #scores = grid_search(trainingData, cfg_list, cfg.prediction['num_predictions'], parallel=False) scores = grid_search(trainingData, cfg_list, 1, parallel=True) err = list() for i, data in enumerate(scores): err.append(data[1]) index = err.index(min(err)) # train the modell with the last 7 days and predict trainingData = self.dataX[max(idx - nDays * 24, 0):(idx), cfg.prediction['pos']] order = cfg_list[index][0] season_order = cfg_list[index][1] trend = cfg_list[index][2] #print('order = ', order) #print('season_order = ', season_order) #print('season_order = ', trend) model = SARIMAX(trainingData, order=order, seasonal_order=season_order, trend=trend, enforce_invertibility=False, enforce_stationarity=False) with warnings.catch_warnings(): warnings.filterwarnings("ignore") self.trained_model = model.fit(disp=False) pred = self.trained_model.forecast(cfg.prediction['num_predictions']) return pred
def show_bestScore(train_set, test_set): """ Returns best cross-validated MAE and (p,d,q) order for a ts model. """ start = input("Do you have p, d and q values defined? ") if start == "No" or start == "no" or start == "N" or start == "n": print("Please define p, d, q values and retry.") else: print("Finding out...") target = [values for values in train_set] testVals = [values for values in test_set] target = train_set.astype("float32") testVals = test_set.astype("float32") score = [10000, (0, 0, 0)] for p in pList: for d in dList: for q in qList: order = (p, d, q) model = SARIMAX(target, order=order) fit = model.fit(disp=False) preds = fit.forecast(len(test_set)) error = mean_absolute_error(testVals, preds) if score[0] != 0 and error < score[0]: score.pop() score.pop() score.append(error) score.append(order) best_score, best_order = score[0], score[1] out = print("Best SARIMAX: MAE = %.f :: Order = %s" % (best_score, best_order)) if not best_score: print("Invalid or missing value for MAE. Please retry.") elif not best_order: print("Invalid or missing order of values. Please retry.") else: return out # Best MAE = 700 :: Order = (8, 3, 1)
def train_SARIMA_model(self, df, game_id, dep_var, indep_var, target, type, model_type): Y = df[dep_var] X = df[indep_var] if len(Y) <= 24 and not self.seasonal_order is None: print("Length of dependent variable is to short: {}".format(len(Y))) return if len(X) <= 24 and not self.seasonal_order is None: print("Length of dependent variables is to short: {}".format(len(X))) return if self.seasonal_order is None: model = SARIMAX(endog=Y, exog=X, order=self.order, enforce_stationarity=False, enforce_invertibility=False) else: model = SARIMAX(endog=Y, exog=X, order=self.order, seasonal_order=self.seasonal_order, enforce_stationarity=False, enforce_invertibility=False) fitted_model = model.fit() # print(fitted_model.summary()) save_path = util.build_model_save_path(game_id, target, type, model_type) fitted_model.save(save_path, remove_data=True)
def forecast(data): """ Function to predict the next 7 values for the input data using SARIMAX Auto regression function Returns: A list of predictions for next 7 days """ predictions = [] for i in range(7): # Initializing the model model = SARIMAX(data, order=(1, 1, 1), trace=True, error_action="ignore") # Fitting the model model_fit = model.fit(disp=False) # Predicting the values one by one yhat = model_fit.predict(len(data), len(data)) data.append(int(yhat)) # Appending each prediction to a list predictions.append(int(yhat)) return predictions
def sarimax_forecast(hour=11): '''hour: hour of a day, range(0, 23), returns forecast, upper_intervals, lower_intervals, mape, mase, test, train''' df_all = get_data(hour=hour) # split past and furture past = df_all[~df_all.price.isnull()] future = df_all[df_all.price.isnull()].drop('price', axis=1) future = future.iloc[:1, :] if future.temp.isnull()[0]: forecast = np.array([np.nan]) confidence_int = pd.DataFrame( { 'lower price': np.nan, 'upper price': np.nan }, index=['x']) else: past.index = pd.DatetimeIndex(past.index.values, freq=past.index.inferred_freq) # Build Model sarima = SARIMAX(past.price, past.drop('price', axis=1), order=(1, 1, 1), seasonal_order=(1, 0, 2, 7)) sarima = sarima.fit(maxiter=300) # forecasting results = sarima.get_forecast(1, exog=future, alpha=0.05) forecast = sarima.forecast(1, exog=future, alpha=0.05) confidence_int = results.conf_int() # create forecast df with datetimeIndex lower = confidence_int['lower price'][0] upper = confidence_int['upper price'][0] forecast = pd.DataFrame(dict(price=forecast, lower=lower, upper=upper), index=future.index) past = past.iloc[-1:, 0] return forecast, past
def sarima_forecast(history, config): order, sorder, trend = config # define model blockPrint() model = SARIMAX(history, order=order, seasonal_order=sorder, trend=trend, enforce_stationarity=False, enforce_invertibility=False) # fit model t0 = time.time() model_fit = model.fit(disp=False) train_time = time.time() - t0 #make one step forecast t0 = time.time() yhat = model_fit.predict(len(history), len(history)) predi_time = time.time() - t0 enablePrint() return yhat[0], train_time, predi_time
def train_sarima_model(y_train, order, seasonal_order, plot_diagnostics=True, **kwargs): """Trains a SARIMAX model based on the training data, y_train, SARIMA parameters, order and seasonal_order, and other keywords. Will also return a diagnostics plot of the model if plot_diagnostics=True.""" # Fit Model model = SARIMAX(y_train, order=order, seasonal_order=seasonal_order, **kwargs) results = model.fit() # Plot diagnostics, if true if plot_diagnostics: results.plot_diagnostics(figsize=(15, 8)) plt.show() return results
def SARIMAX_forecast(series, cfg, pred_len): """DOCSTRING""" X = series # set trainset to include all but last 48 months (4 years) # only training on data between 9-4 years ago train_size = int(len(X) - pred_len) train, test = X[0:train_size], X[train_size:] model = SARIMAX(train, order=cfg[0], seasonal_order=cfg[1], trend=cfg[2], initialization='approximate_diffuse') results = model.fit() # Predict 48 months from end of train set forecast = results.predict(start=test.index[0], end=test.index[-1]) return forecast
def make_arimax_model_with_params(param_dict, time_series, train_fraction, exog_var): ''' This function takes a dictionary of parameters and parameter values and makes a SARIMA model with those parameters. It then splits a time series into a train section and a test section. It fits the model on the train time series, then returns the fitted model, the train time series, and the test time series. It expects a parameter dictionary of the following format: param_dict = {'order': (p, d, q), 'seasonal': (p, d, q, m), 'trend': 'c' or 't' or 'n' or 'ct', 'enforce_stationarity': True or False, 'enforce_invertibility': True or False } It returns the fitted model (result), time_series_train, and time_series_test. If there are exogenous variables, it will return the fitted model, time_series_train, time_series_test, exog_series_train, and exog_series_test. ''' warnings.filterwarnings("ignore") order_params = param_dict['order'] seasonal_params = param_dict['seasonal'] trend_param = param_dict['trend'] stationarity_param = param_dict['enforce_stationarity'] invertibility_param = param_dict['enforce_invertibility'] time_series_length = len(time_series) time_series_train = time_series[0:math.ceil(train_fraction*time_series_length)] time_series_test = time_series[math.ceil(train_fraction*time_series_length):] if exog_var is None: exog_series_train = None exog_series_test = None else: exog_series_train = exog_var[0:math.ceil(train_fraction*time_series_length)] exog_series_test = exog_var[math.ceil(train_fraction*time_series_length):] model = SARIMAX(time_series_train, exog=exog_series_train, order=order_params, seasonal_order=seasonal_params, trend=trend_param, enforce_stationarity=stationarity_param, enforce_invertibility=invertibility_param) result = model.fit(display=0) if exog_var is None: return result, time_series_train, time_series_test, None, None else: return result, time_series_train, time_series_test, exog_series_train, exog_series_test
def predictTemperature2(startDate, endDate, temperature, n) -> list: if len(temperature) != 24: raise ValueError('temperature must be an array with 24 elements') start_date = datetime.strptime(startDate, "%Y-%m-%d") end_date = datetime.strptime(endDate, "%Y-%m-%d") + timedelta(hours=23) artificial_days = 10 artificial_start = start_date - timedelta(days=artificial_days) date_list = [ artificial_start + timedelta(hours=x) for x in range(((end_date - artificial_start).days + 1) * 24) ] # There is too little data (1 day) and we want to predict many days so we are going to create several # days worth of data prior to the true data, based on the existing data with a +- randomization rdm_temp = np.random.choice(range(-20, 20, 1), artificial_days * 24) / 10 full_temp = np.concatenate( [rdm_temp + temperature * artificial_days, temperature]) # Group in dataframe df = pd.DataFrame.from_dict({ 'date': date_list, 'temp': full_temp }).set_index('date') sarima_model = SARIMAX(df, order=(1, 1, 1), seasonal_order=(1, 1, 1, 24), enforce_invertibility=False, enforce_stationarity=True, initialization='approximate_diffuse') sarima_fit = sarima_model.fit() new_data_points = [(date_list[-1] + timedelta(days=k)).strftime("%Y-%m-%d") for k in [1, n + 1]] sarima_pred = sarima_fit.get_prediction(*new_data_points) # Filter last value of the next day results = sarima_pred.prediction_results._forecasts[0][:-1] return results
def sarimax_cv(df): p = range(0, 4) d = range(0, 2) q = range(0, 2) s = [6, 12] # Generate all different combinations of p, q and q triplets pdq = list(itertools.product(p, d, q)) # Generate all different combinations of seasonal p, q and q triplets seasonal_pdq = [(x[0], x[1], x[2], x[3]) for x in list(itertools.product(p, d, q, s))] #reformatting the data for use in SARIMAX model sorted_df = df[['Month_Year', 'Eviction_Notice' ]].groupby('Month_Year').sum().reset_index() sorted_df['Eviction_Notice'] = sorted_df['Eviction_Notice'].astype(float) y = sorted_df[['Month_Year', 'Eviction_Notice']].set_index(['Month_Year'], inplace=False) results = 40000000 pdq_best = None seasonal_best = None for param in pdq: for param_seasonal in seasonal_pdq: try: mod = SARIMAX(y,order=param, seasonal_order=param_seasonal,\ enforce_stationarity=False, enforce_invertibility=False) results = mod.fit() if results.aic < results: results = results.aic pdq_best = param seasonal_best = param_seasonal except: continue return results, pdq_best, seasonal_best
def sarima(n_input, n_preds, df): # To get which SARIMA model to use """ auto_arima(df['Value'], m=12).summary() -> SARIMAX(1, 1, 2)x(2, 1, 2, 12) """ train, test, _, _, inputs, _ = train_test_split(n_preds, df) model = SARIMAX(train['Value'], order=(1, 1, 2), seasonal_order=(2, 1, 2, 12)) results = model.fit() start = len(train) end = len(train) + len(test) - 1 predictions = results.predict(start=start, end=end, dynamic=False, typ='levels') predictions = predictions.tolist() return test, 'SARIMA ({}, {})'.format(n_input, n_preds), predictions
def modelEvaluated(data, sarimaOrder): #training data will be 8 years or 96 data points t = floor(len(data) * 0.8) x_train, x_test = data[0:t], data[t:] history = [x for x in x_train] #predictions predictions = list() order, sorder, trend = sarimaOrder for i in range(len(x_test)): model = SARIMAX(history, order=order, seasonal_order=sorder, trend=trend, simple_differencing=True, enforce_stationarity=False, enforce_invertibility=False) model_fit = model.fit(disp=False) y_hat = model_fit.forecast()[0] predictions.append(y_hat) history.append(x_test[i]) #out-of-sample error error = mean_squared_error(x_test, predictions) return sqrt(error)
def sarima_forecast(history, config): order, sorder, trend = config # 定义模型 # order是普通参数,seasonal_order是季节参数,trend是趋势类型 # 该实现称为SARIMAX而不是SARIMA,因为方法名称的"X"表示该实现还支持外生变量。 # 外生变量是并行时间序列变量,不是直接通过AR,I或MA流程建模的,而是作为模型的加权输入提供的。 # 外生变量是可选的,可以通过"exog"参数指定,SARIMAX(data, exog=other_data,...) model = SARIMAX(history, order=order, seasonal_order=sorder, trend=trend, enforce_stationarity=False, enforce_invertibility=False) # 训练模型过程中会有很多调试信息,disp=0或disp=False表示关闭信息 model_fit = model.fit(disp=False) # 进行预测,有forecast(n)和predict(start,end)两种预测方法,foreast预测是对样本外的数据进行预测,predict可以对样本内和样本外的进行预测: # forecast(n)对于输入的训练数据history,每次向后预测n个数值,不写n时默认预测一个值 # predict(start,end)表示预测从输入训练样本的第一个值开始计数,预测第start到第end个数据。输入5条训练数据,predict(8,9)表示预测第9~10条数据(样本外),predict(3,6)表示预测第4~7条数据(样本内) #yhat = model_fit.forecast() #yhat = model_fit.predict(start=len(history),end=len(history)),start和end可以省略 yhat = model_fit.predict(len(history), len(history)) # 返回预测数组中的第一条数据 return yhat[0]