def main(): #data dfYield=pd.read_excel('LW_monthly.xlsx') dfYield.head() (iT, iN)=dfYield.shape lMaturity=list(dfYield.columns.values) mData=np.array(dfYield.values) mYield=mData[:, 1:] #print(mYield.shape) #print(iN) mThetaOpt= NonLinearLeastSquares(lMaturity, mYield, iT, iN) #print('\n\n', np.max(mThetaOpt[:, 3])) vLambda = mThetaOpt[:,3] dLambdaFix = np.average(vLambda) #print('\n The average Lambda is: ', dLambdaFix) (iP, iQ)= (1,1) iBeta= 2 #The beta which is regressed with ARMA(P,Q) mBeta= mThetaOpt (vP, vSr, dSigmastd, vARcoef, vMAcoef, dLL)= ARMA_Model(iBeta, mBeta, iP, iQ) #compare package results arma_mod20 = ARIMA(mBeta[:,iBeta-1], order=(iP,0,iQ)).fit() print(arma_mod20.summary()) (dAIC, dBIC)= Model_AICBIC(dSigmastd, iP, iQ, iT, dLL) #T+12 iH=12 Forecast(iT, dSigmastd, iBeta, mBeta, vARcoef, vMAcoef, iH, iQ, iP)
def train_model(series, pdq, verbose=False): """ Train ARIMA model """ model = ARIMA(series.values, order=pdq) model = model.fit() if verbose: print(model.summary()) return model
def arima_model(serie, order, model_report=True, get_residuals=True): """ Parameters ---------- serie : pd.Series La série selectionnée. order : tuple L'ordre du modèle ARIMA. model_report : bool, optional True si l'on veut le rapport du modèle, False sinon. get_residuals : bool, optional True si l'on veut la série des résidus, False sinon. Returns ------- tuple : (model , pd.Series) Renvoie le rapport du modèle et la série des résidus. """ model = ARIMA(serie, order=order) model = model.fit() if model_report: print(model.summary()) if get_residuals: residuals = pd.DataFrame(model.resid).rename({0: 'res'}, axis=1)['res'] return model, residuals else: return model
def test_ARIMA(): from statsmodels.tsa.arima.model import ARIMA df = get_ytw_test() model_fit = ARIMA(endog=df['CS-Aaa-3MO'], exog=None, order=(1, 1, 0), trend=None).fit() print(model_fit.summary())
def q3_b(): print("begin") df = get_data("data/HW5_WMT.xlsx", "HW5_WMT") df.index = pd.to_datetime(df.index, format='%Y%m%d') df['first_difference'] = np.log(df['WMT']) - np.log(df['WMT']).shift(1) df['season_difference'] = np.log(df['WMT']) - np.log(df['WMT']).shift(4) df = df.head(df.index.get_loc('2016-03-31')) print(df) ARIMA_model = ARIMA(np.log(df['WMT']), order=(0, 1, 1)).fit() # p=0, d=1, q=1 print(ARIMA_model.summary()) ARIMA_model.predict().plot() np.log(df['WMT']).plot() plt.show()
def arima_model(vEndog, mExog=None, tPDQ=None): """ Fits an ARIMA model. Order can be specified or determined by auto_arima. Differently from other models, it does not work on patsy/R formula syntax. :param vEndog: DataFrame column/numpy vector containing endogenous data (which will be regressed upon itself) :param mExog: vector/matrix containing exogenous data. Defaults to None :param tPDQ: tuple (p, d, q) containing order of the model; p: number of autorregressions (AR) q: number of differentiations (I) q: number of past prevision errors/moving averages (MA) If None (default), performs an auto_arima() :return mod: fitted model instance """ ## Creating model # If order is specified if tPDQ is not None: # Conditional on whether there are exogenous variables if mExog is None: mod_arima = ARIMA(endog=vEndog, order=tPDQ).fit(cov_type='robust') else: mod_arima = ARIMA(endog=vEndog, exog=mExog, order=tPDQ).fit(cov_type='robust') # If order isn't specified, use auto_arima() else: mod_arima = auto_arima(y=vEndog, X=mExog) mod_arima = mod_arima.fit(y=vEndog, cov_type='robust') ## Printing summary and diagnostics print(mod_arima.summary()) print("For heteroskdasticity, check Prob(H), where H0: homoskedasticity, and the standardized residual graph.") print("If there is hetero., the model error can't be a white noise (which is the desired thing).") print("Estimaed Density and Jarque-Bera have information on normality.") print("In the correlogram, all lollipops must be inside of the shaded area.") # Plots mod_arima.plot_diagnostics(figsize=(10, 10)) plt.show() # Residual means tMean0 = stats.ttest_1samp(mod_arima.resid(), 0, nan_policy='omit') print(f"P-value for the test that residual mean is equal to 0: {np.around(tMean0[1], 5)}.") print("If p < 0.05, H0 is rejected and the residual mean is different from 0 (not ideal).") ## Returning return mod_arima
def arima(other_args: List[str], s_ticker: str, df_stock: pd.DataFrame): """ ARIMA prediction Parameters ---------- other_args: List[str] Argparse arguments s_ticker: str ticker df_stock: pd.DataFrame Dataframe of prices """ parser = argparse.ArgumentParser( add_help=False, prog="arima", description=""" In statistics and econometrics, and in particular in time series analysis, an autoregressive integrated moving average (ARIMA) model is a generalization of an autoregressive moving average (ARMA) model. Both of these models are fitted to time series data either to better understand the data or to predict future points in the series (forecasting). ARIMA(p,d,q) where parameters p, d, and q are non-negative integers, p is the order (number of time lags) of the autoregressive model, d is the degree of differencing (the number of times the data have had past values subtracted), and q is the order of the moving-average model. """, ) parser.add_argument( "-d", "--days", action="store", dest="n_days", type=check_positive, default=5, help="prediction days.", ) parser.add_argument( "-i", "--ic", action="store", dest="s_ic", type=str, default="aic", choices=["aic", "aicc", "bic", "hqic", "oob"], help="information criteria.", ) parser.add_argument( "-s", "--seasonal", action="store_true", default=False, dest="b_seasonal", help="Use weekly seasonal data.", ) parser.add_argument( "-o", "--order", action="store", dest="s_order", type=str, help="arima model order (p,d,q) in format: p,d,q.", ) parser.add_argument( "-r", "--results", action="store_true", dest="b_results", default=False, help="results about ARIMA summary flag.", ) parser.add_argument( "-e", "--end", action="store", type=valid_date, dest="s_end_date", default=None, help="The end date (format YYYY-MM-DD) to select - Backtesting", ) try: ns_parser = parse_known_args_and_warn(parser, other_args) if not ns_parser: return # BACKTESTING if ns_parser.s_end_date: if ns_parser.s_end_date < df_stock.index[0]: print( "Backtesting not allowed, since End Date is older than Start Date of historical data\n" ) return if (ns_parser.s_end_date < get_next_stock_market_days( last_stock_day=df_stock.index[0], n_next_days=5 + ns_parser.n_days)[-1]): print( "Backtesting not allowed, since End Date is too close to Start Date to train model\n" ) return future_index = get_next_stock_market_days( last_stock_day=ns_parser.s_end_date, n_next_days=ns_parser.n_days) if future_index[-1] > datetime.datetime.now(): print( "Backtesting not allowed, since End Date + Prediction days is in the future\n" ) return df_future = df_stock[future_index[0]:future_index[-1]] df_stock = df_stock[:ns_parser.s_end_date] # Machine Learning model if ns_parser.s_order: t_order = tuple(int(ord) for ord in ns_parser.s_order.split(",")) model = ARIMA(df_stock["5. adjusted close"].values, order=t_order).fit() l_predictions = model.predict( start=len(df_stock["5. adjusted close"]) + 1, end=len(df_stock["5. adjusted close"]) + ns_parser.n_days, ) else: if ns_parser.b_seasonal: model = pmdarima.auto_arima( df_stock["5. adjusted close"].values, error_action="ignore", seasonal=True, m=5, information_criteria=ns_parser.s_ic, ) else: model = pmdarima.auto_arima( df_stock["5. adjusted close"].values, error_action="ignore", seasonal=False, information_criteria=ns_parser.s_ic, ) l_predictions = [ i if i > 0 else 0 for i in model.predict(n_periods=ns_parser.n_days) ] # Prediction data l_pred_days = get_next_stock_market_days( last_stock_day=df_stock["5. adjusted close"].index[-1], n_next_days=ns_parser.n_days, ) df_pred = pd.Series(l_predictions, index=l_pred_days, name="Price") if ns_parser.b_results: print(model.summary()) print("") # Plotting plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI) plt.plot(df_stock.index, df_stock["5. adjusted close"], lw=2) if ns_parser.s_order: # BACKTESTING if ns_parser.s_end_date: plt.title( f"BACKTESTING: ARIMA {str(t_order)} on {s_ticker} - {ns_parser.n_days} days prediction" ) else: plt.title( f"ARIMA {str(t_order)} on {s_ticker} - {ns_parser.n_days} days prediction" ) else: # BACKTESTING if ns_parser.s_end_date: plt.title( f"BACKTESTING: ARIMA {model.order} on {s_ticker} - {ns_parser.n_days} days prediction" ) else: plt.title( f"ARIMA {model.order} on {s_ticker} - {ns_parser.n_days} days prediction" ) plt.xlim(df_stock.index[0], get_next_stock_market_days(df_pred.index[-1], 1)[-1]) plt.xlabel("Time") plt.ylabel("Share Price ($)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.plot( [df_stock.index[-1], df_pred.index[0]], [df_stock["5. adjusted close"].values[-1], df_pred.values[0]], lw=1, c="tab:green", linestyle="--", ) plt.plot(df_pred.index, df_pred, lw=2, c="tab:green") plt.axvspan(df_stock.index[-1], df_pred.index[-1], facecolor="tab:orange", alpha=0.2) _, _, ymin, ymax = plt.axis() plt.vlines(df_stock.index[-1], ymin, ymax, linewidth=1, linestyle="--", color="k") # BACKTESTING if ns_parser.s_end_date: plt.plot( df_future.index, df_future["5. adjusted close"], lw=2, c="tab:blue", ls="--", ) plt.plot( [df_stock.index[-1], df_future.index[0]], [ df_stock["5. adjusted close"].values[-1], df_future["5. adjusted close"].values[0], ], lw=1, c="tab:blue", linestyle="--", ) if gtff.USE_ION: plt.ion() plt.show() # BACKTESTING if ns_parser.s_end_date: plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI) plt.subplot(211) plt.plot( df_future.index, df_future["5. adjusted close"], lw=2, c="tab:blue", ls="--", ) plt.plot(df_pred.index, df_pred, lw=2, c="green") plt.scatter(df_future.index, df_future["5. adjusted close"], c="tab:blue", lw=3) plt.plot( [df_stock.index[-1], df_future.index[0]], [ df_stock["5. adjusted close"].values[-1], df_future["5. adjusted close"].values[0], ], lw=2, c="tab:blue", ls="--", ) plt.scatter(df_pred.index, df_pred, c="green", lw=3) plt.plot( [df_stock.index[-1], df_pred.index[0]], [df_stock["5. adjusted close"].values[-1], df_pred.values[0]], lw=2, c="green", ls="--", ) plt.title("BACKTESTING: Real data price versus Prediction") plt.xlim(df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1)) plt.xticks( [ df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1) ], visible=True, ) plt.ylabel("Share Price ($)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.legend(["Real data", "Prediction data"]) plt.xticks([]) plt.subplot(212) plt.axhline(y=0, color="k", linestyle="--", linewidth=2) plt.plot( df_future.index, 100 * (df_pred.values - df_future["5. adjusted close"].values) / df_future["5. adjusted close"].values, lw=2, c="red", ) plt.scatter( df_future.index, 100 * (df_pred.values - df_future["5. adjusted close"].values) / df_future["5. adjusted close"].values, c="red", lw=5, ) plt.title( "BACKTESTING: Error between Real data and Prediction [%]") plt.plot( [df_stock.index[-1], df_future.index[0]], [ 0, 100 * (df_pred.values[0] - df_future["5. adjusted close"].values[0]) / df_future["5. adjusted close"].values[0], ], lw=2, ls="--", c="red", ) plt.xlim(df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1)) plt.xticks( [ df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1) ], visible=True, ) plt.xlabel("Time") plt.ylabel("Prediction Error (%)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.legend(["Real data", "Prediction data"]) if gtff.USE_ION: plt.ion() plt.show() # Refactor prediction dataframe for backtesting print df_pred.name = "Prediction" df_pred = df_pred.to_frame() df_pred["Real"] = df_future["5. adjusted close"] if gtff.USE_COLOR: patch_pandas_text_adjustment() print("Time Real [$] x Prediction [$]") print( df_pred.apply(price_prediction_backtesting_color, axis=1).to_string()) else: print(df_pred[["Real", "Prediction"]].round(2).to_string()) print("") print_prediction_kpis(df_pred["Real"].values, df_pred["Prediction"].values) else: # Print prediction data print_pretty_prediction(df_pred, df_stock["5. adjusted close"].values[-1]) print("") except Exception as e: print(e, "\n")
fig = sm.graphics.tsa.plot_pacf(df["Close First Difference"].iloc[2:], lags=40, ax=ax2) plt.title("Close First Difference") plt.show() # splitting the data into training and testing (last 30 for testing, everything prior is training) print(df.shape) train = df.iloc[:-30] test = df.iloc[-30:] print("train and test shape:", train.shape, test.shape) # fitting the train model model = ARIMA(train["close"], order=(2, 1, 2)) model = model.fit() print("Model summary for training set:", model.summary()) # now we predict via specifying the start and end range # in this case, we want to compare prediction with the testing dataset start = len(train) end = len(train) + len(test) - 1 # if the predicted values don't have date values as index, uncomment specified line below* prediction = model.predict(start=start, end=end, typ="levels").rename("ARIMA Predictions") prediction.index = df.index[start:end + 1] # uncomment if needed* # plotting comparison of predicted vs test plt.title("Prediction vs Testing Set") test["close"].plot(legend=True) prediction.plot(legend=True) plt.show()
data = np.log(data_price) - np.log(data_price.shift(1)) scaled_data = pd.DataFrame() for i in data.columns: scaled_data[i] = scaler_timeseries(data[i]) scaled_index = scaled_data["DJI"] scaled_crypto = scaled_data["BTC"] order_index = (1, 0, 0) order_crypto = (0, 0, 0) lag = 6 #def models_charts(scaled_index, scaled_crypto, order_index, order_crypto, lag): modelo_index = ARIMA(scaled_index, order=order_index).fit() print(modelo_index.summary()) modelo_index_garch = arch.arch_model(modelo_index.resid.dropna(), vol="GARCH").fit() print(modelo_index_garch.summary()) cond_var_index = modelo_index_garch.conditional_volatility**2 exog_crypto = pd.concat([scaled_index, modelo_index.resid], axis=1).shift(-1 * lag) # lag 2 debvido a causalidade exog_crypto.columns = ["Index", "Index resid"] dados_crypto = pd.concat([exog_crypto, scaled_crypto], axis=1).dropna(how='any') modelo_crypto = ARIMA(endog=dados_crypto[scaled_crypto.name], exog=dados_crypto[["Index", "Index resid"]], order=order_crypto).fit() print(modelo_crypto.summary()) modelo_crypto_garch = arch.arch_model(modelo_crypto.resid.dropna(),
def arima(l_args, s_ticker, df_stock): parser = argparse.ArgumentParser( prog="arima", description=""" In statistics and econometrics, and in particular in time series analysis, an autoregressive integrated moving average (ARIMA) model is a generalization of an autoregressive moving average (ARMA) model. Both of these models are fitted to time series data either to better understand the data or to predict future points in the series (forecasting). ARIMA(p,d,q) where parameters p, d, and q are non-negative integers, p is the order (number of time lags) of the autoregressive model, d is the degree of differencing (the number of times the data have had past values subtracted), and q is the order of the moving-average model. """, ) parser.add_argument( "-d", "--days", action="store", dest="n_days", type=check_positive, default=5, help="prediction days.", ) parser.add_argument( "-i", "--ic", action="store", dest="s_ic", type=str, default="aic", choices=["aic", "aicc", "bic", "hqic", "oob"], help="information criteria.", ) parser.add_argument( "-s", "--seasonal", action="store_true", default=False, dest="b_seasonal", help="Use weekly seasonal data.", ) parser.add_argument( "-o", "--order", action="store", dest="s_order", type=str, help="arima model order (p,d,q) in format: pdq.", ) parser.add_argument( "-r", "--results", action="store_true", dest="b_results", default=False, help="results about ARIMA summary flag.", ) try: ns_parser = parse_known_args_and_warn(parser, l_args) # Machine Learning model if ns_parser.s_order: t_order = tuple([int(ord) for ord in list(ns_parser.s_order)]) model = ARIMA(df_stock["5. adjusted close"].values, order=t_order).fit() l_predictions = model.predict( start=len(df_stock["5. adjusted close"]) + 1, end=len(df_stock["5. adjusted close"]) + ns_parser.n_days, ) else: if ns_parser.b_seasonal: model = pmdarima.auto_arima( df_stock["5. adjusted close"].values, error_action="ignore", seasonal=True, m=5, information_criteria=ns_parser.s_ic, ) else: model = pmdarima.auto_arima( df_stock["5. adjusted close"].values, error_action="ignore", seasonal=False, information_criteria=ns_parser.s_ic, ) l_predictions = model.predict(n_periods=ns_parser.n_days) # Prediction data l_pred_days = get_next_stock_market_days( last_stock_day=df_stock["5. adjusted close"].index[-1], n_next_days=ns_parser.n_days, ) df_pred = pd.Series(l_predictions, index=l_pred_days, name="Price") if ns_parser.b_results: print(model.summary()) print("") # Plotting plt.figure() plt.plot(df_stock.index, df_stock["5. adjusted close"], lw=2) if ns_parser.s_order: plt.title( f"ARIMA {str(t_order)} on {s_ticker} - {ns_parser.n_days} days prediction" ) else: plt.title( f"ARIMA {model.order} on {s_ticker} - {ns_parser.n_days} days prediction" ) plt.xlim( df_stock.index[0], get_next_stock_market_days(df_pred.index[-1], 1)[-1] ) plt.xlabel("Time") plt.ylabel("Share Price ($)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.plot( [df_stock.index[-1], df_pred.index[0]], [df_stock["5. adjusted close"].values[-1], df_pred.values[0]], lw=1, c="tab:green", linestyle="--", ) plt.plot(df_pred.index, df_pred, lw=2, c="tab:green") plt.axvspan( df_stock.index[-1], df_pred.index[-1], facecolor="tab:orange", alpha=0.2 ) _, _, ymin, ymax = plt.axis() plt.vlines( df_stock.index[-1], ymin, ymax, linewidth=1, linestyle="--", color="k" ) plt.ion() plt.show() # Print prediction data print_pretty_prediction(df_pred, df_stock["5. adjusted close"].values[-1]) print("") except Exception as e: print(e) print("")
) plt.xlabel('Date') plt.ylabel('Nombre de vélos') plt.title( 'Nombre de vélos par jour entre 00h00 et 09h00 au totem d\'Albert 1er') plt.legend() plt.show() #Using auto_arima algorithm to find the best suitable orders for ARIMA model stepwise_fit = auto_arima(totem['Count'], trace=True, seasonal=True) stepwise_fit.summary #Building the ARIMA model model = ARIMA(totem['Count'], order=(2, 1, 1)) model = model.fit() model.summary() pred = model.predict(end=len(totem) + 1, type="levels").rename( 'Prediction ARIMA' ) #The last index printed corresponds to the day we want the prediction for #Plotting the prediction curve pred.plot(legend=True) plt.xlabel('Date') plt.ylabel('Nombre de vélos') plt.title( 'Nombre de vélos par jour entre 00h00 et 09h00 au totem d\'Albert 1er') plt.legend() plt.show() print(pred.tail)
resid_frame = pd.DataFrame(columns = data.columns) conditional_volatilities_stocks_frame = pd.DataFrame(columns=data.columns) for i in data.columns: resid_frame[i] = pd.Series(modelos[i].arima_res_.resid, index= data[i].dropna().index) conditional_volatilities_stocks_frame[i] = pd.Series(modelos_garch[i].conditional_volatility, index= data[i].dropna().index) #DADOS DO INDICE indice = yf.download("^DJI", start="2002-01-01",interval="1d", group_by='ticker', auto_adjust=True) indice = indice.fillna(method="ffill").fillna(method="bfill") indice_ret = (np.log(indice["Close"]) - np.log(indice["Close"].shift(1))).dropna() indice_modelo = ARIMA(endog=indice_ret, exog=resid_frame, order=(1,0,0)).fit() print(indice_modelo.summary()) indice_garch = arch.arch_model(indice_modelo.resid, vol = "GARCH", rescale=True).fit() print(indice_garch.summary()) cond_var_index = indice_garch.conditional_volatility h_stocks = pd.Series(0,name="total_vol", index=resid_frame.index) for i in resid_frame.columns: h_stocks = h_stocks + indice_modelo.params[i]**2*conditional_volatilities_stocks_frame[i]**2 print(f"{i} h_stocks {h_stocks}") h = h_stocks + cond_var_index**2 spillovers = pd.DataFrame(columns=resid_frame.columns) for i in resid_frame.columns: spillovers[i] = (indice_modelo.params[i]*conditional_volatilities_stocks_frame[i])/(h**0.5) for i in spillovers.columns:
class ARIMAModel(ModelStrategy): ''' A class for an Autoregressive Integrated Moving Average Model and the standard operations on it ''' def __init__(self, hparams, log_dir=None): univariate = True model = None name = 'ARIMA' self.auto_params = hparams['AUTO_PARAMS'] self.p = int(hparams.get('P', 30)) self.d = int(hparams.get('D', 0)) self.q = int(hparams.get('Q', 0)) super(ARIMAModel, self).__init__(model, univariate, name, log_dir=log_dir) def fit(self, dataset): ''' Fits an ARIMA forecasting model :param dataset: A Pandas DataFrame with 2 columns: Date and Consumption ''' if dataset.shape[1] != 2: raise Exception( 'Univariate models cannot fit with datasets with more than 1 feature.' ) dataset.rename(columns={ 'Date': 'ds', 'Consumption': 'y' }, inplace=True) series = dataset.set_index('ds') if self.auto_params: best_model = pmdarima.auto_arima(series, seasonal=False, stationary=False, information_criterion='aic', max_order=2 * (self.p + self.q), max_p=2 * self.p, max_d=2 * self.d, max_q=2 * self.q, error_action='ignore') order = best_model.order print("Best ARIMA params: (p, d, q):", best_model.order) else: order = (self.p, self.d, self.q) self.model = ARIMA(series, order=order).fit() print(self.model.summary()) return def evaluate(self, train_set, test_set, save_dir=None, plot=False): ''' Evaluates performance of ARIMA model on test set :param train_set: A Pandas DataFrame with 2 columns: Date and Consumption :param test_set: A Pandas DataFrame with 2 columns: Date and Consumption :param save_dir: Directory in which to save forecast metrics :param plot: Flag indicating whether to plot the forecast evaluation ''' train_set.rename(columns={ 'Date': 'ds', 'Consumption': 'y' }, inplace=True) test_set.rename(columns={ 'Date': 'ds', 'Consumption': 'y' }, inplace=True) train_set = train_set.set_index('ds') test_set = test_set.set_index('ds') train_set["model"] = self.model.fittedvalues test_set["forecast"] = self.forecast( test_set.shape[0])['Consumption'].tolist() df_forecast = train_set.append(test_set).rename(columns={'y': 'gt'}) test_metrics = self.evaluate_forecast(df_forecast, save_dir=save_dir, plot=plot) return test_metrics def forecast(self, days, recent_data=None): ''' Create a forecast for the test set. Note that this is different than obtaining predictions for the test set. The model makes a prediction for the provided example, then uses the result for the next prediction. Repeat this process for a specified number of days. :param days: Number of days into the future to produce a forecast for :param recent_data: A factual example for the first prediction :return: An array of predictions ''' forecast_df = self.model.forecast(steps=days).reset_index(level=0) forecast_df.columns = ['Date', 'Consumption'] return forecast_df def save(self, save_dir, scaler_dir=None): ''' Saves the model to disk :param save_dir: Directory in which to save the model ''' if self.model: model_path = os.path.join(save_dir, self.name + self.train_date + '.pkl') self.model.save(model_path) # Serialize and save the model object def load(self, model_path, scaler_path=None): ''' Loads the model from disk :param model_path: Path to saved model ''' if os.path.splitext(model_path)[1] != '.pkl': raise Exception('Model file path for ' + self.name + ' must have ".pkl" extension.') self.model = ARIMAResults.load(model_path) return
plt.savefig(f"images/{company}/{event}/{company} Autocorrelation, {d} Diffs Applied, before {event}", bbox_inches='tight') plt.clf() else: plt.show() ######################################### # forecast with ARIMA model and perform statistical analysis ######################################### if not run_forecast: continue model = ARIMA(y1, order=ARIMA_orders[j][i]) model = model.fit() print(model.summary()) # plot residuals # normal residual plot residuals = model.resid[1:] # remove first residual because it is huge and messes with scale of plot fig, ax = plt.subplots(1,2) ax[0].plot(x1[1:], residuals) ax[0].set_ylabel('Stock Price ($)', color='purple') ax[0].set_xlabel('Date', color='purple') plt.sca(ax[0]) plt.xticks(x1[1::len(x1)//5], rotation=30) plt.title('Residuals', color='purple') # density plot plt.sca(ax[1]) residuals.plot(kind='density', ax=ax[1]) # pandas series/dataframe function that plots each column separately plt.ylabel('Density', color='purple')
# estimators. # #### `SARIMAX` def print_params(s): from io import StringIO return pd.read_csv(StringIO(s.tables[1].as_csv()), index_col=0) print_params(sarimax_exog_res.summary()) # #### `ARIMA` print_params(arima_exog_res.summary()) # ### `exog` in `AutoReg` # # When using `AutoReg` to estimate a model using OLS, the model differs # from both `SARIMAX` and `ARIMA`. The `AutoReg` specification with # exogenous variables is # # $$ # \begin{align} # Y_t & = \phi + \rho Y_{t-1} + X_{t}\beta + \eta_t \\ # \eta_t & \sim WN(0,\sigma^2) \\ # \end{align} # $$ # # This specification is not equivalent to the specification estimated in
def fit_model(self, train_data, namefile): model = ARIMA(train_data, order=(2, 1, 3), enforce_stationarity=True) model = model.fit() model.summary() model.save(namefile + '.pickle') return model
def arima(l_args, s_ticker, s_interval, df_stock): parser = argparse.ArgumentParser( prog='arima', description="""In statistics and econometrics, and in particular in time series analysis, an autoregressive integrated moving average (ARIMA) model is a generalization of an autoregressive moving average (ARMA) model. Both of these models are fitted to time series data either to better understand the data or to predict future points in the series (forecasting). ARIMA(p,d,q) where parameters p, d, and q are non-negative integers, p is the order (number of time lags) of the autoregressive model, d is the degree of differencing (the number of times the data have had past values subtracted), and q is the order of the moving-average model.""" ) parser.add_argument('-d', "--days", action="store", dest="n_days", type=check_positive, default=5, help='prediction days.') parser.add_argument('-i', "--ic", action="store", dest="s_ic", type=str, default='aic', choices=['aic', 'aicc', 'bic', 'hqic', 'oob'], help='information criteria.') parser.add_argument('-s', "--seasonal", action="store_true", default=False, dest="b_seasonal", help='Use weekly seasonal data.') parser.add_argument('-o', "--order", action="store", dest="s_order", type=str, help='arima model order (p,d,q) in format: pdq.') parser.add_argument('-r', "--results", action="store_true", dest="b_results", default=False, help='results about ARIMA summary flag.') try: (ns_parser, l_unknown_args) = parser.parse_known_args(l_args) if l_unknown_args: print( f"The following args couldn't be interpreted: {l_unknown_args}\n" ) return # Machine Learning model if ns_parser.s_order: t_order = tuple([int(ord) for ord in list(ns_parser.s_order)]) model = ARIMA(df_stock['5. adjusted close'].values, order=t_order).fit() l_predictions = model.predict( start=len(df_stock['5. adjusted close']) + 1, end=len(df_stock['5. adjusted close']) + ns_parser.n_days) else: if ns_parser.b_seasonal: model = pmdarima.auto_arima( df_stock['5. adjusted close'].values, error_action='ignore', seasonal=True, m=5, information_criteria=ns_parser.s_ic) else: model = pmdarima.auto_arima( df_stock['5. adjusted close'].values, error_action='ignore', seasonal=False, information_criteria=ns_parser.s_ic) l_predictions = model.predict(n_periods=ns_parser.n_days) # Prediction data l_pred_days = get_next_stock_market_days( last_stock_day=df_stock['5. adjusted close'].index[-1], n_next_days=ns_parser.n_days) df_pred = pd.Series(l_predictions, index=l_pred_days, name='Price') if ns_parser.b_results: print(model.summary()) print("") # Plotting plt.plot(df_stock.index, df_stock['5. adjusted close'], lw=2) if ns_parser.s_order: plt.title( f"ARIMA {str(t_order)} on {s_ticker} - {ns_parser.n_days} days prediction" ) else: plt.title( f"ARIMA {model.order} on {s_ticker} - {ns_parser.n_days} days prediction" ) plt.xlim(df_stock.index[0], get_next_stock_market_days(df_pred.index[-1], 1)[-1]) plt.xlabel('Time') plt.ylabel('Share Price ($)') plt.grid(b=True, which='major', color='#666666', linestyle='-') plt.minorticks_on() plt.grid(b=True, which='minor', color='#999999', linestyle='-', alpha=0.2) plt.plot([df_stock.index[-1], df_pred.index[0]], [df_stock['5. adjusted close'].values[-1], df_pred.values[0]], lw=1, c='tab:green', linestyle='--') plt.plot(df_pred.index, df_pred, lw=2, c='tab:green') plt.axvspan(df_stock.index[-1], df_pred.index[-1], facecolor='tab:orange', alpha=0.2) xmin, xmax, ymin, ymax = plt.axis() plt.vlines(df_stock.index[-1], ymin, ymax, linewidth=1, linestyle='--', color='k') plt.show() # Print prediction data print("Predicted share price:") df_pred = df_pred.apply(lambda x: f"{x:.2f} $") print(df_pred.to_string()) print("") except: print("")
import pandas as pd from matplotlib import pyplot from statsmodels.tsa.arima.model import ARIMA from sklearn.metrics import mean_squared_error import warnings warnings.simplefilter('ignore', category=UserWarning) dataset = "Paleo" series = pd.read_csv(f'{dataset}.csv', header=0, index_col=0, parse_dates=True, squeeze=True) for header in series.iloc[:, 11:17]: index_no = series.columns.get_loc(f'{header}') arima_model = ARIMA(series[f'{header}'], order=(1, 0, 1)).fit() pred = arima_model.predict(dynamic=False) rmse = mean_squared_error(series[f'{header}'], pred, squared=False) print(arima_model.summary()) print(f"RMSE = {rmse}") pyplot.plot(series[f'{header}']) pyplot.plot(pred, color='red') pyplot.title(f'{series.columns[index_no]} intake over time ({dataset})') pyplot.xlabel('Date') pyplot.ylabel(f'{series.columns[index_no]}') pyplot.show()