def order_selection(train, test, params, loss_func=mean_squared_error, **loss_kwargs): warnings.filterwarnings( "ignore") # to ignore statsmodels warning for unconverged models best_score, best_cfg = float("inf"), None keys, values = zip(*params.items()) grid = [dict(zip(keys, v)) for v in itertools.product(*values)] for params in grid: try: model_fit = SARIMAX(train, **params).fit() except: continue else: yhat = model_fit.forecast(test.shape[0]) loss = loss_func(test, yhat, **loss_kwargs) if loss < best_score: best_score, best_params = loss, params print(best_score) print('Best ARIMA%s Loss=%.3f' % (best_params, best_score)) return best_params, best_score
def sarima_models_top_18(): new_sarima_orders = [ ((1, 1, 1), (1, 1, 1, 12)), ((1, 1, 1), (0, 1, 1, 12)), ((1, 1, 1), (0, 1, 1, 12)), ((1, 1, 1), (1, 1, 1, 12)), ((1, 1, 1), (0, 1, 1, 12)), ((1, 1, 1), (1, 1, 1, 12)), ((1, 1, 1), (0, 1, 1, 12)), ((0, 1, 1), (1, 1, 1, 12)), ((1, 1, 1), (0, 1, 1, 12)), ((1, 1, 1), (1, 1, 1, 12)), ((1, 1, 1), (1, 1, 1, 12)), ((1, 1, 1), (0, 1, 1, 12)), ((0, 1, 1), (0, 1, 1, 12)), ((1, 1, 1), (1, 1, 0, 12)), ((0, 1, 1), (1, 1, 1, 12)), ((1, 1, 1), (1, 1, 1, 12)), ((1, 1, 1), (1, 1, 1, 12)), ((1, 1, 1), (0, 1, 1, 12)) ] codes = [ 60804, 60085, 60110, 60104, 60505, 60651, 60073, 60436, 60120, 60165, 60160, 60641, 60432, 46327, 60633, 46324, 60099, 46394 ] data = load_data_top_27() forecasts = {} for i, code in enumerate(codes): model = SARIMAX(data.loc[:, code], order=new_sarima_orders[i][0], seasonal_order=new_sarima_orders[i][1], enforce_invertibility=False, enforce_stationarity=False).fit() forecasts[code] = model.forecast(steps=12).values return forecasts
def process_data6(): series = pd.read_excel('../../Data/Styrene-Net Industry Average 2010-2015.xlsx', header=0, index_col=0, parse_dates=True) series.index.freq = 'MS' data = series.copy() actuals = pd.read_excel('../../Data/Styrene-Net Industry Average 2015-2018 Actuals.xlsx', header=0, index_col=0, parse_dates=True) actuals.index.freq = 'MS' #Test ranges data = data['2010-01-01':] model = SARIMAX(np.log(data['Styrene']), order=(1,1,2), seasonal_order=(0,0,1,12), enforce_invertibility = False, exog = data[['Oil_Lag', 'Gas_Lag']]).fit() #auto_arima(data['Styrene'], seasonal=True, m=12, enforce_invertibility = False, exog = data[['Oil_Lag']]).summary() preds = [] for i in actuals.index: df = actuals.loc[i,:] df = pd.DataFrame(df).T fd = pd.DataFrame(data = [df['Oil_Lag'], df['Gas_Lag']]) fd.set_index = i+1 fd = pd.DataFrame(fd).T fd2 = pd.DataFrame(data = [df['Oil_Lag'], df['Gas_Lag']]) fd2.set_index = i+2 fd2 = pd.DataFrame(fd2).T fd3 = pd.DataFrame(data = [df['Oil_Lag'], df['Gas_Lag']]) fd3.set_index = i+3 fd3 = pd.DataFrame(fd3).T fd4 = pd.DataFrame(data = [df['Oil_Lag'], df['Gas_Lag']]) fd4.set_index = i+4 fd4 = pd.DataFrame(fd4).T fd5 = pd.DataFrame(data = [df['Oil_Lag'], df['Gas_Lag']]) fd5.set_index = i+5 fd5 = pd.DataFrame(fd5).T df = pd.concat([df, fd, fd2, fd3, fd4, fd5]) yhat_log = model.forecast(steps = 6, exog = df[['Oil_Lag', 'Gas_Lag']]) yhat_log = yhat_log[[5]] yhat = numpy.exp(yhat_log) preds.append(yhat) act = pd.Series(actuals.loc[i,:]) act = pd.DataFrame(act).T data = pd.concat([data, act], axis = 0) model = SARIMAX(np.log(data['Styrene']), order=(1,1,2), seasonal_order=(0,0,1,12), enforce_invertibility = False, exog = data[['Oil_Lag', 'Gas_Lag']]).fit() df = pd.DataFrame({'timestamp': [i.index for i in preds], 'value':[round(i[0],2) for i in preds]}) df['timestamp'] = df.timestamp.apply(lambda x: str(x).split('[')[1].split(']')[0]) df['timestamp'] = pd.to_datetime(df['timestamp']) df.to_csv('../../Data/Results.csv', index = False)
class SARIMAModel(SMModel): type = [ModelType.CONTINUOUS_PRICE, ModelType.UNIVARIATE] name = 'statsmodels.arima' default_params = {'order': (1, 1, 1)} @with_params def fit(self, x, **kwargs): params = kwargs.get('params') try: self.model = SARIMAX(x, order=params['order']) \ .fit(disp=params.get('disp',0)) return self.model except (ValueError, np.linalg.linalg.LinAlgError): logger.error('ARIMA convergence error (order {} {} {})'.format( params['order'][0], params['order'][1], params['order'][2])) return None def predict(self, x, **kwargs): if not self.model: return None try: forecast = self.model.forecast(steps=x.shape[0]) return to_discrete_double(forecast, -0.01, 0.01) except (ValueError, np.linalg.linalg.LinAlgError): logger.error('ARIMA convergence error (order {} {} {})'.format( self.params['order'][0], self.params['order'][1], self.params['order'][2])) @with_x def get_grid_search_configs(self, **kwargs): x_train = kwargs.get('x_train') x_test = kwargs.get('x_test') p_values = range(0, 6) d_values = range(0, 6) q_values = range(0, 6) # If series is stationary, don't apply differentiation adf = adfuller(x_train) # 0 is score, 1 is pvalue if adf[1] < 0.05: # Null hp rejected, series is stationary and requires no differentiation logger.info('Series is stationary, no need for differencing') d_values = [0] # Set d = 0 # Get all possible configs configs = [] for p in p_values: for d in d_values: for q in q_values: configs.append({ 'params': { 'order': (p, d, q) }, 'x_train': x_train, 'x_test': x_test }) return configs
def f_ARIMA(self, O_Train, O_Test, order1, seasonal_order1): ar_model = SARIMAX(O_Train, order=order1, seasonal_order=seasonal_order1).fit() #pred = ar_model.predict(start=O_Test.index[0], end=O_Test.index[-1]) p1 = O_Test.reset_index() pre = pd.DataFrame(ar_model.forecast(len(O_Test))) pre.reset_index(drop=True, inplace=True) pred1 = pd.concat([p1['Date'], pre], axis=1) pred1.columns = ['Date', 'pred'] pred1 = pred1.set_index('Date') pred = pred1['pred'] return pred
def sarima_prediction(data, pollutant, p, q, length=1): seasonality = 7 if pollutant == 'O3': d = 1 else: d = 0 order_arima = (p, d, q) order_sarima = (1, d, 1, seasonality) fit = SARIMAX(np.asarray(data), order=order_arima, seasonal_order=order_sarima, initialization='approximate_diffuse').fit() # Forecast one value in the future return fit.forecast(length)
class SARIMA_regressor(BaseEstimator, RegressorMixin): """Uses a SARIMAX model in a sklearn compatible regressor""" def __init__( self, endog_col, exog_cols, order, seasonal_order, measurement_error=True, ): """ Parameters ---------- endog_col : str Column in X for endogenous data exog_cols : list Column in X for exogenous data order : tuple (p,d,q) for ARIMA seasonal_order : [type] (P,D,Q,s) for SARIMA measurement_error : bool, optional Does the endog_col have measurement error?, by default True """ self.endog_col = endog_col self.exog_cols = exog_cols self.order = order self.seasonal_order = seasonal_order self.measurement_error = measurement_error def fit(self, X, y=None): self.model = SARIMAX( X[self.endog_col], exog=X[self.exog_cols], order=self.order, seasonal_order=self.seasonal_order, measurement_error=self.measurement_error, ).fit() return self def predict(self, X): """ Parameters ---------- X : array-like Array like specifying the number of periods into the future to fit after end of X used in `fit()` """ return self.model.forecast(X.shape[0], exog=X[self.exog_cols])
def model_sarima(df, steps, kwargs): exog_to_train, exog_to_test = None, None if 'fourier' in kwargs and kwargs['fourier']: exog_to_train, exog_to_test = _get_fourier_terms(df, steps) # train try: model = SARIMAX(df, order=kwargs['order'], seasonal_order=kwargs['seasonal_order'], exog=exog_to_train) model = model.fit(disp=-1) except: return None # predict return model.forecast(steps, exog=exog_to_test).reset_index(drop=True)
def DomesticModelMaking(self): self.Domesticdata = self.Domesticdata.set_index('InvoiceDate') sar = SARIMAX( self.Domesticdata['AvgNetFare'], order=(6, 2, 4), seasonal_order=(6, 2, 4, 12), trend='n', ) sar = sar.fit() pred = sar.forecast(steps=30) pred = pd.DataFrame(pred, columns=['AvgNetFare']) predDomestic = pd.DataFrame(self.Domesticdata['AvgNetFare']) predDomestic.append(pred) predDomestic.to_csv('PredictedDomesticDataset.csv')
def test_autoreg_predict_forecast_equiv(reset_randomstate): e = np.random.normal(size=1000) nobs = e.shape[0] idx = pd.date_range("2020-1-1", freq="D", periods=nobs) for i in range(1, nobs): e[i] = 0.95 * e[i - 1] + e[i] y = pd.Series(e, index=idx) m = AutoReg(y, trend="c", lags=1, old_names=False) res = m.fit() a = res.forecast(12) b = res.predict(nobs, nobs + 11) c = res.forecast("2022-10-08") assert_series_equal(a, b) assert_series_equal(a, c) sarimax_res = SARIMAX(y, order=(1, 0, 0), trend="c").fit(disp=False) d = sarimax_res.forecast(12) pd.testing.assert_index_equal(a.index, d.index)
def predict_next_sales(best_params, dataset): order, sorder, trend = best_params model = SARIMAX(dataset, order=order, seasonal_order=sorder, trend=trend, enforce_stationarity=False, enforce_invertibility=False) model = model.fit(disp=False) predictions = list() # split dataset train, test = train_test_split(dataset, num_test) # seed history with training dataset history = [x for x in train] # step over each time-step in the test set for i in range(len(test)): # fit model and make forecast for history yhat = sarima_forecast(history, best_params) # store forecast in list of predictions predictions.append(yhat) # add actual observation to history for the next loop history.append(test[i]) # estimate prediction error # prints and saves accuracy final model error_2019 = measure_rmse(test, predictions) print("Estimated RMSE is ", error_2019) plt.close() plt.plot(predictions) plt.plot(history[-len(test):]) plt.savefig("Final Model 2018 estimated sales.png") plt.close() predictions = model.forecast(3) print("Predictions are") print(predictions) # plot bar graph of predictions predictions.plot.bar() plt.savefig("2019 Forecast Bar Chart.png") plt.close() # plot line graph of predictions predictions.plot() plt.savefig("2019 Forecast Line plot.png")
def arima_best(fh, train, val, p_range, d_range, q_range, loss_metric="MSE"): ''' fh : int. Forecast horizon. While validation set can be longer than the forecast horizon, only the fh portion of the validation set will be used to calculate score/loss, instead of forecasting the entire length of the validation set. This is to keep consistent with the actual use purpose of the model which will be to predict only the selected forecast horizon. p_range: tuple of 2 d_range: tuple of 2 q_range: tuple of 2 ''' # Hyperparameters tunning #print("Tuning p, d, q:") #print("-"*50) # true values to be scored again true = val[:fh] min_loss = float("inf") best_model = None best_p = best_d = best_q = None for p in range(*p_range): for d in range(*d_range): for q in range(*q_range): model = SARIMAX(train, order=(p, d, q), seasonal_order=(4, 1, 2, 8), enforce_stationarity=False, enforce_invertibility=False, trend=None).fit(maxiter=100, method="powell") # make prediction predictions = model.forecast(fh) loss = loss_func(loss_metric, tensor=False)(true, predictions) if loss < min_loss: min_loss = loss best_model = model best_p = p best_d = d best_q = q #print(f"{p}, {d}, {q}: Validation {loss_metric} ", round(min_loss, 4), end="\r") #print("-"*50) #return (best_p, best_d, best_q) return best_model, (best_p, best_d, best_q)
def arima_evaluate(model, test, fh=8, refit=pd.Series(), metric=MAPE): ''' model : SARIMAX model. test : pd Time series. Test data set. fh : int. Forecast horizon. refit : pd Time series. New time series data to refit the model on. ''' if not refit.empty: params = model.params # store previous parameters p_d_q = (model.model.k_ar_params, model.model.k_diff, model.model.k_ma_params) model = SARIMAX(refit, order=p_d_q, enforce_stationarity=False, enforce_invertibility=False, trend=None).fit(params, maxiter=1000) pred = model.forecast(steps=fh) # Forcast value true = test[:fh] # true values loss = metric(pred.array, true.array) return pred, true, loss
def sarimax_forecast(df): '''it takes a dataframe split it into train/forecast sets based on the availability of price and then forecasts electricity price for next hour. it returns forecast dataframe ('price','lower_interval', 'upper_interval') and historical price dataframe ('price')''' # split past and furture past = df[~df.price.isnull()] future = df[df.price.isnull()].drop('price', axis=1) # forecast for next time point only future = future.iloc[:1, :] if future.temp.isnull( )[0]: # when weather forecast data is not available for that hour forecast = np.nan lower = np.nan upper = np.nan print('weather data is not available') else: past.index = pd.DatetimeIndex(past.index.values, freq=past.index.inferred_freq) # Build Model sarima = SARIMAX(past.price, exog=past.drop('price', axis=1), order=(1, 1, 1), seasonal_order=(1, 0, 2, 7)) sarima = sarima.fit(maxiter=300) # forecasting results = sarima.get_forecast(1, exog=future, alpha=0.05) forecast = sarima.forecast(1, exog=future, alpha=0.05) lower = results.conf_int()['lower price'][0] upper = results.conf_int()['upper price'][0] # create forecast df with datetimeIndex forecast = pd.DataFrame(dict(price=forecast, lower_interval=lower, upper_interval=upper), index=future.index) forecast.index.name = 'date_time' past = past.iloc[-1:, 0] return forecast, past
def sarimax_forecast(hour=11): '''hour: hour of a day, range(0, 23), returns forecast, upper_intervals, lower_intervals, mape, mase, test, train''' df_all = get_data(hour=hour) # split past and furture past = df_all[~df_all.price.isnull()] future = df_all[df_all.price.isnull()].drop('price', axis=1) future = future.iloc[:1, :] if future.temp.isnull()[0]: forecast = np.array([np.nan]) confidence_int = pd.DataFrame( { 'lower price': np.nan, 'upper price': np.nan }, index=['x']) else: past.index = pd.DatetimeIndex(past.index.values, freq=past.index.inferred_freq) # Build Model sarima = SARIMAX(past.price, past.drop('price', axis=1), order=(1, 1, 1), seasonal_order=(1, 0, 2, 7)) sarima = sarima.fit(maxiter=300) # forecasting results = sarima.get_forecast(1, exog=future, alpha=0.05) forecast = sarima.forecast(1, exog=future, alpha=0.05) confidence_int = results.conf_int() # create forecast df with datetimeIndex lower = confidence_int['lower price'][0] upper = confidence_int['upper price'][0] forecast = pd.DataFrame(dict(price=forecast, lower=lower, upper=upper), index=future.index) past = past.iloc[-1:, 0] return forecast, past
def predictionArima(df): start_time = time.time() window = pd.DataFrame(columns=[ 'Current test', 'Current prediction', 'MSE', 'Glycemia prediction RMSE (mg/dl)', 'PSW', 'Prediction Horizon (minutes)' ]) for n in PSW: for v in range(0, inter): interval = (v + 1) * 15 windo = n / 12 for x in range((len(df) - n - v)): #print(v, x) train = df.iloc[x:n + x] test = df.iloc[n + x:n + x + v + 1] model = SARIMAX(train, order=orderArima, enforce_stationarity=False, enforce_invertibility=False).fit() #pred = result.predict(start= n+x, end= n+x+v, exog= test['sugarValue']) pred = model.forecast(step=v + 1) pred = pred.values #model = SARIMAX(df['sugarValue'], order=(0, 1, 3), seasonal_order=(0, 0, 0, 12), enforce_invertibility=False).fit() #pred = result.predict(n, n+v) window = app(window, train, test['sugarValue'], pred, interval, windo) v = v + 1 print("--- %s Seconds for computation ---" % (time.time() - start_time)) return window
def sarimaParaSelect(self, classNo, trainLabel, testLabel, useAic=False): dataLength = len(trainLabel) data = pd.Series(trainLabel) for i in range(0, dataLength): data[i] = log(data[i] + 1) index = self.dtIndex[0:dataLength] data.index = pd.Index(index) minBias = 99999.0 minAic = 99999.0 (ar, ma) = (0, 0) label = array(testLabel) for p, q in [(1, 1), (0, 1), (1, 2), (2, 0), (2, 1), (2, 2)]: try: model = SARIMAX(data, order=(p, 1, q), seasonal_order=(0, 1, 1, 7)).fit() output = array(model.forecast(len(testLabel))) for i in range(0, len(testLabel)): output[i] = exp(output[i]) - 1 bias = math.sqrt( sum((output - label) * (output - label)) / len(testLabel)) if (bias < minBias and (useAic == False or model.aic < minAic)): (ar, ma) = (p, q) minBias = bias minAic = model.aic bestOutput = output except: pass if (minBias < 90000.0): self.ParaChoose[classNo] = (ar, ma) return ((ar, ma), bestOutput) else: raise ValueError
enforce_invertibility=False).fit() agile_model.summary() #just do deactive warnings regarding PyCharm and Numpy # noinspection PyTypeChecker agile_model_pred = np.exp( agile_model.predict(start=test_first_date, end=test_last_date, dynamic=True, typ='levels')) print(f'MAPE {np.round(mean_abs_pct_error(test_data,agile_model_pred),2)}%') # print(f'MAE:{np.round(mean_absolute_error(test_data,agile_model_pred),2)}') # noinspection PyTypeChecker agile_model_forecast = np.exp(agile_model.forecast(steps=2)) print(agile_model_forecast) def plot_prediciton(training_data, agile_model, agile_model_pred, original_data): model_data = training_data.values[1:].reshape(-1) - agile_model.resid[1:] model_data = pd.concat((model_data, agile_model_pred)) plt.figure(figsize=(16, 6)) plt.plot(model_data) plt.plot(original_data[1:]) plt.legend('Model Forecast', 'Original Data') plt.show() plot_prediciton(train_data, agile_model, agile_model_pred, df['Last'])
n_jobs=1, station) auto = auto.fit(xTrain) pred = auto.predict(len(xTest)) mean_squared_error(xTest, pred) np.sqrt(mean_squared_error(xTest, pred)) """Use of SARIMAX""" sar = SARIMAX( xTrain, order=(6, 2, 4), seasonal_order=(6, 2, 4, 1), trend='n', ) sar = sar.fit() pred = sar.forecast(steps=len(xTest)) print(mean_squared_error(xTest, pred)) print(np.sqrt(mean_squared_error(xTest, pred))) import pickle # Saving model to disk pickle.dump(sar, open('model.pkl', 'wb')) # Loading model to compare the results model = pickle.load(open('model.pkl', 'rb')) print(model.predict([[2020 - 01 - 01]]))
# bestModel: SARIMAX(0, 1, 1)x(1, 1, 1, 52) #经检测的最优训练模型 best_model = SARIMAX(df_day_train.tmax, order=(0, 1, 1), seasonal_order=(1, 1, 1, 52)).fit(disp=-1) # tsa.plot_acf(best_model.resid[13:].values.squeeze(), lags=48,) # # 下图是对残差进行的检验。可以确认服从正太分布,且不存在滞后效应。 # best_model.plot_diagnostics(lags=30, figsize=(16, 12)) # df_month2 = df_month_test[['tmax']] # best_model.predict() 设定开始结束时间 # invboxcox函数用于还愿boxcox序列 # df_month2['forecast'] = invboxcox(best_model.forecast(steps=5), lmbda) # 预测未来500个单位的数据 df_day2 = best_model.forecast(500) # plt.figure(figsize=(15, 7)) #数据展示 plt.plot(df_day2) df_day_train.tmax.plot(color='r', ls='--', label='Origin') #保存图片 plt.savefig('长春week.png') plt.show() # 获取rmse # 将预测数据切片 df_day2 = df_day2['20-':'2013'] # print(np.sqrt(sum((df_day2-ts)**2)/ts.size)) # save = pd.DataFrame(df_day2, columns = ['data', 'tmax']) #保存预测数据
def train_sarima(data=False, hour=11, split_date='2019-10-22 11:00:00', n=30, exog=False): '''hour: hour of a day, range(0, 23), split_date: train, test splitted on this date, n: number of days that will be forecasted, exog: in case of sarimax, takes (list of exog features, order, seasonal_order) returns forecast, upper_intervals, lower_intervals, mape, mase, test, train''' if isinstance(data, bool): if isinstance(exog, bool): df = get_daily(hour=hour) else: df = get_all(hour=hour) else: df = data # formating split_date split_date = pd.DatetimeIndex(np.array([split_date])) # get train and test for plotting only train = df[(df.index <= split_date[0])] test = df[(df.index > split_date[0]) & \ (df.index <= (split_date + pd.Timedelta(days=n))[0])] # will collect following information from forecast forecasts = [] upper = [] lower = [] # loop over to get walk forward forecast for n days for i in range(1, n + 1): # walk one day forward to set train_set predict_date = df[df.index == split_date[0]].index + pd.Timedelta( days=i) train_set = df[df.index < predict_date[0]] train_set.index = pd.DatetimeIndex(train_set.index.values, freq=train_set.index.inferred_freq) # Build Model without exogenous features if isinstance(exog, bool): sarima = SARIMAX(train_set, order=(1, 1, 1), seasonal_order=(1, 0, 2, 7)) sarima = sarima.fit(maxiter=200) # Forecast results = sarima.get_forecast(1, alpha=0.05) forecast = sarima.forecast(1, alpha=0.05) confidence_int = results.conf_int() # Build Model with exogenous features else: # StandardScaling the exogenous features # scaler = StandardScaler() # scaler = scaler.fit(train_set[['wind_speed', 'temp', 'humidity']]) # train_set.loc[:,['wind_speed', 'temp', 'humidity']]=\ # scaler.transform(train_set[['wind_speed', 'temp', 'humidity']]) # training model sarima = SARIMAX(train_set.price, exog=train_set[exog[0]], order=exog[1], seasonal_order=exog[2]) sarima = sarima.fit(maxiter=200) # get features for forecast exog_fore = test[test.index == predict_date[0]][exog[0]] # scaling features for forecast # exog_fore.loc[:,['wind_speed', 'temp', 'humidity']]=\ # scaler.transform(exog_fore[['wind_speed', 'temp', 'humidity']]) # forecasting results = sarima.get_forecast(1, exog=exog_fore, alpha=0.05) forecast = sarima.forecast(1, exog=exog_fore, alpha=0.05) confidence_int = results.conf_int() # add forecast result into the list lower.append(confidence_int['lower price'][0]) upper.append(confidence_int['upper price'][0]) forecasts.append(forecast[0]) # calculate the mape mape = get_mape(test.price, forecasts) mase = get_mase(test.price, forecasts, train.price) # create forecast df with datetimeIndex forecast = pd.DataFrame(forecasts, index=test.index, columns=['price']) return forecast, lower, upper, mape, mase, train, test
#打开文件 df = pd.read_csv('长春.csv',encoding='utf-8') #加载date到时间轴 df.ds = pd.to_datetime(df.date) df.index = df.ds #加载tmax到变量 df['平均气温'].astype('double')#1 #展示训练前数据 df.drop(['date'], axis=1, inplace=True) df.平均气温.plot(color='r', ls='--', label='Origin')#1 plt.show() #按周拆分 df_day = df.resample('D').mean() # 拆分出训练数据 df_day_train = df_day['2017-5-31':'2020-5-31'] #经检测的最优训练模型 best_model=SARIMAX(df_day_train.平均气温, order=(1, 1, 1),seasonal_order=(1, 1, 1, 90)).fit(disp=-1) # 预测未来500个单位的数据 df_day2 = best_model.forecast(90) # plt.figure(figsize=(15, 7)) #数据展示 plt.plot(df_day2) df_day_train.平均气温.plot(color='r', ls='--', label='Origin')#1 #保存图片 plt.savefig('长春daytave.png')#2 plt.show() # 将预测数据切片 df_day2=df_day2['2020-5-31':'2025-5-31'] #保存预测数据 df_day2.to_csv('长春daytave.csv')#2
AIC = [] label = [] for p in range(0, 3): for d in range(0, 3): for q in range(0, 3): for P in range(0, 3): for D in range(0, 3): for Q in range(0, 3): model_fit = SARIMAX( training, order=(p, d, q), seasonal_order=(P, D, Q, 12), enforce_stationarity=False, enforce_invertibility=False).fit(disp=-1) forecast = model_fit.forecast(len(testing)) label.append( int( str(p) + str(d) + str(q) + str(P) + str(D) + str(Q) + str(12))) error.append(mse(testing, forecast)) AIC.append(model_fit.aic) BIC.append(model_fit.bic) print('ARIMA:', p, d, q, 'Seasonal:', P, D, Q) del model_fit del forecast # Convert the results into a dataframe using pandas import pandas as pd BIC = pd.DataFrame(np.asarray(BIC).reshape(729, 1))
def fit_sarimax(self): # sarimax= auto_arima(y=self.data_lag[["fallecimientos"]], # exogenous=self.data_lag[["casos_total"]], # start_p=1, start_q=1, # test='adf', # max_p=2, max_q=2, m=7, # start_P=0, seasonal=True, # d=None, D=1, trace=False, # error_action='ignore', # suppress_warnings=True, # stepwise=True) sarimax = SARIMAX(endog=self.data_lag.iloc[:-1, ][["fallecimientos"]], exog=self.data_lag.iloc[:-1, ][["casos_total"]], order=(0, 0, 3), seasonal_order=(0, 0, 0, 0)).fit() sum = sarimax.summary() predictions = pd.DataFrame( sarimax.forecast(steps=5, exog=self.forecast[["casos_total"]])) e = pd.DataFrame({ "Modelo": "SARIMAX", "Predicción de hoy": [predictions.iloc[0, 0]], "Error de hoy": [ abs(predictions.iloc[0, 0] - self.dt.loc[len(self.dt) - 1, "fallecimientos"]) ] }) predictions["fecha"] = self.dt.loc[len(self.dt) - 1, "fecha"] predictions.columns = ["fallecimientos", "fecha"] predictions.reset_index(drop=True, inplace=True) for i in range(len(self.forecast)): c = 0 c += i predictions.loc[i, "fecha"] = predictions.fecha[i] + timedelta(days=c) new = pd.concat( (self.dt[["fallecimientos", "fecha"]], predictions.iloc[1:, :]), axis=0) new["Predicciones"] = np.where( new.fecha <= self.dt.loc[len(self.dt) - 1, "fecha"], "Real", "Pred") fig = px.bar( new, x="fecha", y="fallecimientos", color="Predicciones", ) # predictions.columns =["Predicciones_Fallecimientos", "fecha"] # # load = str(self.dt.loc[len(self.dt)-1, "fecha"] - timedelta(days=1)) # load = load[0:10] + "_.pkl" # # with open(load, "rb") as file: # historic = pickle.load(file) # predictions["Error"] = 0 # p=pd.concat([predictions.reset_index(drop=True), historic], ignore_index=True) # p = p.loc[p.fecha <= self.dt.loc[len(self.dt)-1, "fecha"],:] # p.reset_index(drop=True, inplace=True) # for i in range(0,len(p)): # if self.dt.loc[len(self.dt)-1,"fecha"] == p.loc[i,"fecha"]: # p.loc[i,"Error"] = np.sqrt((self.dt.loc[len(self.dt)-1,"fallecimientos"] - p.loc[i,"Predicciones_Fallecimientos"])**2) # # save = str(self.dt.loc[len(self.dt)-1, "fecha"]) # save = save[0:10] + "_.pkl" # # with open(save, "wb") as file: # pickle.dump(p, file) return e, fig, sum
# if aic < best_aic: # best_model = model # best_aic = aic # best_param = parameters # results.append([parameters, model.aic]) # # result_table = pd.DataFrame(results) # result_table.columns = ['parameters', 'aic'] # print(result_table.sort_values(by='aic', ascending=True).head()) # print(best_model.summary()) # bestModel: SARIMAX(0, 1, 1)x(1, 1, 1, 12) best_model=SARIMAX(df_day_train.tmax, order=(0, 1, 1),seasonal_order=(1, 1, 1, 52)).fit(disp=-1) # tsa.plot_acf(best_model.resid[13:].values.squeeze(), lags=48,) # # 下图是对残差进行的检验。可以确认服从正太分布,且不存在滞后效应。 # best_model.plot_diagnostics(lags=30, figsize=(16, 12)) # df_month2 = df_month_test[['tmax']] # best_model.predict() 设定开始结束时间 # invboxcox函数用于还愿boxcox序列 # df_month2['forecast'] = invboxcox(best_model.forecast(steps=5), lmbda) df_day2 = best_model.forecast(1000) # plt.figure(figsize=(15, 7)) plt.plot(df_day2) df_day_train.tmax.plot(color='r', ls='--', label='Origin') plt.show() # 获取mse
#print(predictions) #creating the basis of error in the test error_test = check_error(compare_test_df['AveragePrice'], compare_test_df['Predicted_AveragePrice'], name_col='Value Comp. Pred.vs. Fit', index_name='Testing Base') print(' TEST and PREDICTION') plot_compare_error(compare_test_df, len(compare_test_df) - 1) print(error_test) #dti = pd.date_range(data_index_max, periods=5, freq='W-SUN') print("____________________________") print("Forecast for one period") print(model.forecast()[0]) #print("on") #print( dti[1] ) nstepsfor = int(15) pred_uc = model.forecast(steps=nstepsfor)[0] #print(pred_ci = pred_uc.conf_int()) print("CONFIDENCE INTERVALS") print("____________________________") print("Forecast for") print(nstepsfor) #for t in range(0,nstepsfor): # print(pred_uc[t])
X_Test_CS.Country = le.fit_transform(X_Test_CS.Country) X_Test_CS['State'] = le.fit_transform(X_Test_CS['State']) X_Test_CS_Min_Date = X_Test_CS['Date'].min() X_Train_CS_Max_Date = X_Train_CS['Date'].max() #SARIMA Data model1 = SARIMAX(y1_Train_CS, order=(1,1,0), #seasonal_order=(1,1,0,12), measurement_error=True).fit(disp=False) model2 = SARIMAX(y2_Train_CS, order=(1,1,0), #seasonal_order=(1,1,0,12), measurement_error=True).fit(disp=False) y1_xpred = model1.forecast(X_Test_CS[X_Test_CS['Date'] > X_Train_CS_Max_Date].shape[0]) y2_xpred = model2.forecast(X_Test_CS[X_Test_CS['Date'] > X_Train_CS_Max_Date].shape[0]) train_confirmed_y1 = X_Train_CS[(X_Train_CS['Date'] >= X_Test_CS_Min_Date)]['ConfirmedCases'] train_confirmed_y2 = X_Train_CS[(X_Train_CS['Date'] >= X_Test_CS_Min_Date)]['Fatalities'] y1_xpred = np.concatenate((train_confirmed_y1,y1_xpred), axis = 0) y2_xpred = np.concatenate((train_confirmed_y2,y2_xpred), axis = 0) #Simple Linear Model witnout Enchancing the Data #After we transform them they should roughly follow linear regression trend X_Train_CS = X_Train_CS.loc[:, ['State', 'Country', 'Date']] # y1_Train_CS = y1_Train_CS.apply(lambda x: np.log1p(x)) # y2_Train_CS = y2_Train_CS.apply(lambda x: np.log1p(x)) # train_confirmed_y1 = train_confirmed_y1.apply(lambda x: np.log1p(x))
results = mod.fit() if results.aic < a: a = results.aic s = 'ARIMA{}x{} - AIC:{}'.format(param, param_seasonal, results.aic) except: continue print(s) ''' pdq = (0, 1, 1) PDQ = (1, 1, 1, 4) model_train = SARIMAX(train.REVENUE, order=pdq, seasonal_order=PDQ, enforce_stationarity=False).fit() predict_train = model_train.forecast(test_size + 1) model_run = SARIMAX(df.REVENUE, order=pdq, seasonal_order=PDQ).fit() predict_run = model_run.forecast(1) #residual = predict_train - test ''' print(model_train.summary()) model_train.plot_diagnostics() ''' print(predict_run[0]) plt.plot(df.REVENUE, label='df', marker='o') plt.plot(predict_train, label='SARIMA', marker='o', linestyle='--') plt.plot(predict_run, label='SARIMA_RUN', marker='o') plt.legend(loc='best')
class SARIMAXModel(ModelStrategy): ''' A class for a Seasonal Autoregressive Integrated Moving Average Model and the standard operations on it ''' def __init__(self, hparams, log_dir=None): univariate = True model = None name = 'SARIMAX' self.auto_params = hparams.get('AUTO_PARAMS', False) self.trend_p = int(hparams.get('TREND_P', 10)) self.trend_d = int(hparams.get('TREND_D', 2)) self.trend_q = int(hparams.get('TREND_Q', 0)) self.seasonal_p = int(hparams.get('SEASONAL_P', 5)) self.seasonal_d = int(hparams.get('SEASONAL_D', 2)) self.seasonal_q = int(hparams.get('SEASONAL_Q', 0)) self.m = int(hparams.get('M', 12)) super(SARIMAXModel, self).__init__(model, univariate, name, log_dir=log_dir) def fit(self, dataset): ''' Fits a SARIMAX forecasting model :param dataset: A Pandas DataFrame with 2 columns: Date and Consumption ''' if dataset.shape[1] != 2: raise Exception( 'Univariate models cannot fit with datasets with more than 1 feature.' ) dataset.rename(columns={ 'Date': 'ds', 'Consumption': 'y' }, inplace=True) series = dataset.set_index('ds') if self.auto_params: best_model = pmdarima.auto_arima( series, seasonal=True, stationary=False, m=self.m, information_criterion='aic', max_order=2 * (self.p + self.q), max_p=2 * self.p, max_d=2 * self.d, max_q=2 * self.q, max_P=2 * self.p, max_D=2 * self.d, max_Q=2 * self.q, error_action='ignore' ) # Automatically determine model parameters order = best_model.order seasonal_order = best_model.seasonal_order print("Best SARIMAX params: (p, d, q):", best_model.order, " and (P, D, Q, s):", best_model.seasonal_order) else: order = (self.trend_p, self.trend_d, self.trend_q) seasonal_order = (self.seasonal_p, self.seasonal_d, self.seasonal_q, self.m) self.model = SARIMAX(series, order=order, seasonal_order=seasonal_order, enforce_stationarity=True, enforce_invertibility=True).fit() print(self.model.summary()) return def evaluate(self, train_set, test_set, save_dir=None, plot=False): ''' Evaluates performance of SARIMAX model on test set :param train_set: A Pandas DataFrame with 2 columns: Date and Consumption :param test_set: A Pandas DataFrame with 2 columns: Date and Consumption :param save_dir: Directory in which to save forecast metrics :param plot: Flag indicating whether to plot the forecast evaluation ''' train_set.rename(columns={ 'Date': 'ds', 'Consumption': 'y' }, inplace=True) test_set.rename(columns={ 'Date': 'ds', 'Consumption': 'y' }, inplace=True) train_set = train_set.set_index('ds') test_set = test_set.set_index('ds') train_set["model"] = self.model.fittedvalues test_set["forecast"] = self.forecast( test_set.shape[0])['Consumption'].tolist() df_forecast = train_set.append(test_set).rename(columns={'y': 'gt'}) test_metrics = self.evaluate_forecast(df_forecast, save_dir=save_dir, plot=plot) return test_metrics def forecast(self, days, recent_data=None): ''' Create a forecast for the test set. Note that this is different than obtaining predictions for the test set. The model makes a prediction for the provided example, then uses the result for the next prediction. Repeat this process for a specified number of days. :param days: Number of days into the future to produce a forecast for :param recent_data: A factual example for the first prediction :return: An array of predictions ''' forecast_df = self.model.forecast(steps=days).reset_index(level=0) forecast_df.columns = ['Date', 'Consumption'] return forecast_df def save(self, save_dir, scaler_dir=None): ''' Saves the model to disk :param save_dir: Directory in which to save the model ''' if self.model: model_path = os.path.join(save_dir, self.name + self.train_date + '.pkl') self.model.save(model_path) # Serialize and save the model object def load(self, model_path, scaler_path=None): ''' Loads the model from disk :param model_path: Path to saved model ''' if os.path.splitext(model_path)[1] != '.pkl': raise Exception('Model file path for ' + self.name + ' must have ".pkl" extension.') self.model = SARIMAXResults.load(model_path) return
adjusted_y_train_fatalities = y_train_fatalities[ idx:] #.values.reshape(-1, 1) idx = X_pred[X_pred[feature_use] == 0].shape[0] adjusted_X_pred = X_pred[idx:][feature_use].values.reshape(-1, 1) pred_data = test[(test['Country_Region'] == country) & (test['Province_State'] == province)] max_train_date = train[(train['Country_Region'] == country) & ( train['Province_State'] == province)]['Date'].max() min_test_date = pred_data['Date'].min() model = SARIMAX( adjusted_y_train_confirmed, order=(1, 1, 0), #seasonal_order=(1,1,0,12), measurement_error=True).fit(disp=False) y_hat_confirmed = model.forecast( pred_data[pred_data['Date'] > max_train_date].shape[0]) y_train_confirmed = train[(train['Country_Region'] == country) & (train['Province_State'] == province) & (train['Date'] >= min_test_date)]['ConfirmedCases'].values y_hat_confirmed = np.concatenate((y_train_confirmed, y_hat_confirmed), axis=0) model = SARIMAX( adjusted_y_train_fatalities, order=(1, 1, 0), #seasonal_order=(1,1,0,12), measurement_error=True).fit(disp=False) y_hat_fatalities = model.forecast( pred_data[pred_data['Date'] > max_train_date].shape[0]) y_train_fatalities = train[(train['Country_Region'] == country)