def plot_arima(self, df, label): indx = df.index start = indx[-20] end = indx[-1] arima_model = ARIMA(df[label][:-20], order=(2, 3, 2)).fit() pred = arima_model.predict(start, end, typ='levels') rolling_mean = df[label].rolling(window=12).mean().dropna() rolling_std = df[label].rolling(window=12).std().dropna() arima_model = ARIMA(rolling_mean[:-20], order=(2, 3, 3), dates=df.index).fit() pred_mean = arima_model.predict(start, end, typ='levels') arima_model = ARIMA(rolling_std[:-20], order=(2, 3, 3), dates=df.index).fit() pred_std = arima_model.predict(start, end, typ='levels') plt.figure(figsize=(17, 8)) plt.plot(df[label], alpha=0.5) plt.plot(rolling_mean, color='g', alpha=0.5) plt.plot(rolling_std, color='blue', alpha=0.5) # Predicted plt.plot(pred) plt.plot(pred_mean, color='g') plt.plot(pred_std, color='b') plt.title('Rolling mean') plt.legend([label, 'mean', 'std', 'predicted'])
def get_arima_model( values: Union[pd.Series, pd.DataFrame], arima_order: str, n_predict: int, seasonal: bool, ic: str, ) -> Tuple[List[float], Any]: """Get an ARIMA model for data Parameters ---------- values : Union[pd.Series, pd.DataFrame] Data to fit arima_order : str String of ARIMA params in form "p,q,d" n_predict : int Days to predict seasonal : bool Flag to use seasonal model ic : str Information Criteria for model evaluation Returns ------- List[float] List of predicted values Any Fit ARIMA model object. """ if arima_order: model = ARIMA( values, order=tuple(int(ord) for ord in arima_order.split(",")) ).fit() l_predictions = list( model.predict( start=len(values.values) + 1, end=len(values.values) + n_predict, ) ) else: if seasonal: model = pmdarima.auto_arima( values.values, error_action="ignore", seasonal=True, m=5, information_criteria=ic, ) else: model = pmdarima.auto_arima( values.values, error_action="ignore", seasonal=False, information_criteria=ic, ) l_predictions = list(model.predict(n_predict)) return l_predictions, model
def q3_b(): print("begin") df = get_data("data/HW5_WMT.xlsx", "HW5_WMT") df.index = pd.to_datetime(df.index, format='%Y%m%d') df['first_difference'] = np.log(df['WMT']) - np.log(df['WMT']).shift(1) df['season_difference'] = np.log(df['WMT']) - np.log(df['WMT']).shift(4) df = df.head(df.index.get_loc('2016-03-31')) print(df) ARIMA_model = ARIMA(np.log(df['WMT']), order=(0, 1, 1)).fit() # p=0, d=1, q=1 print(ARIMA_model.summary()) ARIMA_model.predict().plot() np.log(df['WMT']).plot() plt.show()
def arimamodel(self, train, test, ar_param, order, ma_param, iterative=True): history = [x for x in train] preds = list() stamps = [int(len(test) / 10) * i for i in range(1, 10)] if iterative: for i in range(len(test)): arima_model = ARIMA(history, order=(ar_param, order, ma_param)) arima_model = arima_model.fit() output = arima_model.forecast() yhat = output[0] preds.append(yhat) obs = test[i] history.append(obs) if i in stamps: print( str(datetime.now()) + ': Arima Prediction {}0% Complete: {} out of {}'. format((stamps.index(i) + 1), i, len(test))) else: arima_model = ARIMA(history, order=(ar_param, order, ma_param)) arima_model = arima_model.fit() preds = arima_model.predict(start=len(history), end=len(history) + len(test) - 1) return preds
def predict(coinDataset, daysSelected): # running auto_arima on the selected coin closing price to get its best p, d, q values # predictions not looking so good long term, perhaps short term predictions would suit stepwise_fit = auto_arima(coinDataset["close"], trace=True, suppress_warnings=True, test="adf") order = stepwise_fit.get_params().get("order") # training the model now based on entire dataset to make future predictions model = ARIMA(coinDataset["close"], order=order) model = model.fit() # print(coinDataset.tail()) # checking what the last date is, then predict from this day onward # predicting from the current date onwards startDate = dt.datetime.now() # for testing, let's predict the x days into the future (depending on how many days user selected) indexFutureDates = pd.date_range( start=startDate.strftime("%Y-%m-%d"), end=(startDate + relativedelta(days=+daysSelected)).strftime("%Y-%m-%d")) prediction = model.predict(start=len(coinDataset), end=len(coinDataset) + daysSelected, typ="levels") # like before, we're handling dataset for indexing so we can plot it prediction.index = indexFutureDates # converting to pandas dataframe and creating columns (this MIGHT be needed for JavaScript chart) # predictionToDf = pd.DataFrame({"date": prediction.index, "predictedClose": prediction.values}) # print(predictionToDf) return prediction
def _predict_n_samples(self, instance, n_samples): exp_model = ARIMA( instance, order=self._order_params).fit() # Use fit model with new instance start_index = instance.shape[0] end_index = start_index + n_samples - 1 return exp_model.predict(start=start_index, end=end_index)
def ARIMA_predict(self): ''' Would you like the model to look for an ARIMA order?: if user enters Y: model will look for best p,d,q order user must input Auto or Manual Auto uses auto_arima function Manual uses best_order function CAUTION: MANUAL IS VERY COMPUTATIONALLY EXPENSIVE (~20 minutes) if user enters N: user is asked if they would like to enter their own p,d,q if user enters Y: inputs for p,d, and q will follow if user enters N: model will use ARIMA p,d,q (4,1,1) as order ''' print('Would you like the model to look for an ARIMA order? (Y/N):') find_order = input() if find_order.lower() == 'y': print('Auto or Manual?:') pdq_type = input() if pdq_type.lower() == 'manual': print( 'CAUTION: MANUAL IS VERY COMPUTATIONALLY EXPENSIVE (~20 minutes) \nPlease enter "confirm" to proceed' ) confirm = input() if confirm.lower == 'confirm': print('Please hold') order = self.best_order() elif confirm.lower != 'confirm': pdq_type == 'auto' elif pdq_type.lower() == 'auto': order = self.auto_pdq() elif find_order.lower == 'n': print('Would you like to use a specific order? (Y/N)') enter_order = input() if enter_order.lower() == 'y': ord_p = int(input('p:')) ord_d = int(input('d:')) ord_q = int(input('q:')) order = (ord_p, ord_d, ord_q) elif enter_order.lower() == 'n': order = (4, 1, 1) atrain, atest = self.train_test() atest_s, atest_e = atest.index.date[0], atest.index.date[-1] atrain_s, atrain_e = atrain.index.date[0], atrain.index.date[-1] res = ARIMA(atrain, order=order).fit() a_pred = res.predict(atest_s, atest_e) arima_title = f'ARIMA {order} MSE={round(mean_squared_error(atest,a_pred),5)}' return res, atrain, atest, arima_title, a_pred
def ARIMA_predict(self, df, diff_type): ''' ==Function== Attain user inputs to decide ARIMA order ==Parameters== |diff_type| : order of differencing applied to df 'first' or 'second' ==Returns== res = .fit() atrain, atest = train and test set used for ARIMA arima_title = title to be used in a plot a_pred = predictions from ARIMA model order = order used in ARIMA ''' if self.order_method.lower() == 'predetermined': if diff_type == 'first': order = (3, 0, 0) elif diff_type == 'second': order = (3, 0, 1) elif self.order_method.lower() == 'auto': order = self.auto_pdq(df) elif self.order_method.lower() == 'manual': print( 'CAUTION: MANUAL IS VERY COMPUTATIONALLY EXPENSIVE (~20 minutes) \nPlease enter "confirm" to proceed' ) confirmation = input() if confirmation.lower() == 'confirm': print('Please hold') order = self.best_order(df) else: print('Changing to Auto') order = self.auto_pdq(df) elif self.order_method.lower() == 'select': print('Please input each parameter') ord_p = int(input('p:')) ord_d = int(input('d:')) ord_q = int(input('q:')) order = (ord_p, ord_d, ord_q) atrain, atest = self.train_test(df) atest_s, atest_e = atest.index.date[0], atest.index.date[-1] atrain_s, atrain_e = atrain.index.date[0], atrain.index.date[-1] res = ARIMA(df, order=order).fit() a_pred = res.predict(atest_s, atest_e) arima_title = f'ARIMA {order} MSE={round(mean_squared_error(atest,a_pred),5)}' return res, atrain, atest, arima_title, a_pred, order
def ARIMA_predict(df, order): train, test = train_test(df) test_s, test_e = test.index.date[0], test.index.date[-1] train_s, train_e = train.index.date[0], train.index.date[-1] res = ARIMA(train, order=order).fit() fig, ax = plt.subplots(1, figsize=(14, 4)) ax.plot(test.index, test) ax.plot(train.index[-20:], train[-20:]) fig = res.plot_predict(test_s,test_e, ax=ax, plot_insample=True) # plt.title('MSE {}'.format(round(mean_squared_error(test,res.predict('2015-06-14','2019-1-6')),5))) plt.title('Forecasted [{} - {}] Data \n Based On [{} - {}] Data\n ARIMA {} MSE= {}'.format( test_s, test_e, train_s, train_e,order, round(mean_squared_error(test,res.predict(test_s, test_e)),5))) plt.show()
def arima(l_args, s_ticker, df_stock): parser = argparse.ArgumentParser( prog="arima", description=""" In statistics and econometrics, and in particular in time series analysis, an autoregressive integrated moving average (ARIMA) model is a generalization of an autoregressive moving average (ARMA) model. Both of these models are fitted to time series data either to better understand the data or to predict future points in the series (forecasting). ARIMA(p,d,q) where parameters p, d, and q are non-negative integers, p is the order (number of time lags) of the autoregressive model, d is the degree of differencing (the number of times the data have had past values subtracted), and q is the order of the moving-average model. """, ) parser.add_argument( "-d", "--days", action="store", dest="n_days", type=check_positive, default=5, help="prediction days.", ) parser.add_argument( "-i", "--ic", action="store", dest="s_ic", type=str, default="aic", choices=["aic", "aicc", "bic", "hqic", "oob"], help="information criteria.", ) parser.add_argument( "-s", "--seasonal", action="store_true", default=False, dest="b_seasonal", help="Use weekly seasonal data.", ) parser.add_argument( "-o", "--order", action="store", dest="s_order", type=str, help="arima model order (p,d,q) in format: pdq.", ) parser.add_argument( "-r", "--results", action="store_true", dest="b_results", default=False, help="results about ARIMA summary flag.", ) try: ns_parser = parse_known_args_and_warn(parser, l_args) # Machine Learning model if ns_parser.s_order: t_order = tuple([int(ord) for ord in list(ns_parser.s_order)]) model = ARIMA(df_stock["5. adjusted close"].values, order=t_order).fit() l_predictions = model.predict( start=len(df_stock["5. adjusted close"]) + 1, end=len(df_stock["5. adjusted close"]) + ns_parser.n_days, ) else: if ns_parser.b_seasonal: model = pmdarima.auto_arima( df_stock["5. adjusted close"].values, error_action="ignore", seasonal=True, m=5, information_criteria=ns_parser.s_ic, ) else: model = pmdarima.auto_arima( df_stock["5. adjusted close"].values, error_action="ignore", seasonal=False, information_criteria=ns_parser.s_ic, ) l_predictions = model.predict(n_periods=ns_parser.n_days) # Prediction data l_pred_days = get_next_stock_market_days( last_stock_day=df_stock["5. adjusted close"].index[-1], n_next_days=ns_parser.n_days, ) df_pred = pd.Series(l_predictions, index=l_pred_days, name="Price") if ns_parser.b_results: print(model.summary()) print("") # Plotting plt.figure() plt.plot(df_stock.index, df_stock["5. adjusted close"], lw=2) if ns_parser.s_order: plt.title( f"ARIMA {str(t_order)} on {s_ticker} - {ns_parser.n_days} days prediction" ) else: plt.title( f"ARIMA {model.order} on {s_ticker} - {ns_parser.n_days} days prediction" ) plt.xlim( df_stock.index[0], get_next_stock_market_days(df_pred.index[-1], 1)[-1] ) plt.xlabel("Time") plt.ylabel("Share Price ($)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.plot( [df_stock.index[-1], df_pred.index[0]], [df_stock["5. adjusted close"].values[-1], df_pred.values[0]], lw=1, c="tab:green", linestyle="--", ) plt.plot(df_pred.index, df_pred, lw=2, c="tab:green") plt.axvspan( df_stock.index[-1], df_pred.index[-1], facecolor="tab:orange", alpha=0.2 ) _, _, ymin, ymax = plt.axis() plt.vlines( df_stock.index[-1], ymin, ymax, linewidth=1, linestyle="--", color="k" ) plt.ion() plt.show() # Print prediction data print_pretty_prediction(df_pred, df_stock["5. adjusted close"].values[-1]) print("") except Exception as e: print(e) print("")
ax1 = fig.add_subplot(211) fig = sm.graphics.tsa.plot_acf(resid.values.squeeze(), lags=40, ax=ax1) ax2 = fig.add_subplot(212) fig = sm.graphics.tsa.plot_pacf(resid, lags=40, ax=ax2) r, q, p = sm.tsa.acf(resid.values.squeeze(), fft=True, qstat=True) data = np.c_[np.arange(1, 25), r[1:], q, p] table = pd.DataFrame(data, columns=["lag", "AC", "Q", "Prob(>Q)"]) print(table.set_index("lag")) # * This indicates a lack of fit. # * In-sample dynamic prediction. How good does our model do? predict_sunspots = arma_mod30.predict("1990", "2012", dynamic=True) print(predict_sunspots) def mean_forecast_err(y, yhat): return y.sub(yhat).mean() mean_forecast_err(dta.SUNACTIVITY, predict_sunspots) # ### Exercise: Can you obtain a better fit for the Sunspots model? (Hint: # sm.tsa.AR has a method select_order) # ### Simulated ARMA(4,1): Model Identification is Difficult from statsmodels.tsa.arima_process import ArmaProcess
) plt.xlabel('Date') plt.ylabel('Nombre de vélos') plt.title( 'Nombre de vélos par jour entre 00h00 et 09h00 au totem d\'Albert 1er') plt.legend() plt.show() #Using auto_arima algorithm to find the best suitable orders for ARIMA model stepwise_fit = auto_arima(totem['Count'], trace=True, seasonal=True) stepwise_fit.summary #Building the ARIMA model model = ARIMA(totem['Count'], order=(2, 1, 1)) model = model.fit() model.summary() pred = model.predict(end=len(totem) + 1, type="levels").rename( 'Prediction ARIMA' ) #The last index printed corresponds to the day we want the prediction for #Plotting the prediction curve pred.plot(legend=True) plt.xlabel('Date') plt.ylabel('Nombre de vélos') plt.title( 'Nombre de vélos par jour entre 00h00 et 09h00 au totem d\'Albert 1er') plt.legend() plt.show() print(pred.tail)
class Nesterov(Approximator): r""" Реализация метода Нестерова, в случае фиксированого параметра \Delta и предсказаний \gamma, k и l """ _name = 'Модель Нестерова' _parameters = { 'model': { 'description': 'Модель предсказания: ARIMA', 'type': 'choise', 'values': ['ARIMA'], 'default': 'ARIMA', 'min': None, 'max': None }, 'delta': { 'description': 'Параметр задержки заболевения.' ' В диапазоне от 1 до 30', 'type': 'continues', 'values': [], 'default': '14', 'min': '1', 'max': '30' } } def __init__(self, delta=14, model='ARIMA'): super(Nesterov, self).__init__() self.delta = int(delta) if self.delta < int(self._parameters['delta']['min']): self.delta = int(self._parameters['delta']['min']) if self.delta > int(self._parameters['delta']['max']): self.delta = int(self._parameters['delta']['max']) self.gamma = 1 / self.delta self.k_param = 0.0007 self.l_param = 0.03 self.model = model def calculate_S(self, date): # S(d) = S(d - 1) + C(d) - D(d) - L(d) return (self.dict_of_data.get(date + datetime.timedelta(days=-1), {'S': 0})['S'] + self.dict_of_data[date]['new sick'] - self.dict_of_data[date]['new died'] - self.dict_of_data[date]['new reco']) def calculate_gamma(self, key): # gamma(d) = C(d + \delta) / (T(d + \delta - 1) - T(d - 1)) delta = self.dict_of_data[key]['delta'] return (self.dict_of_data[key + datetime.timedelta(days=delta)]['new sick'] / (self.dict_of_data[key + datetime.timedelta(days=delta - 1)] ['sick'] - self.dict_of_data.get( key + datetime.timedelta(days=-1), {'sick': 0})['sick'])) def calculate_k_and_l(self, key): # k(d) = D(d) / S(d - 1) # l(d) = R(d) / S(d - 1) def calc(value): return (value / S_prev) if S_prev != 0 else 0 S_prev = self.dict_of_data.get(key + datetime.timedelta(days=-1), {'S': 0})['S'] self.dict_of_data[key]['k'] = calc(self.dict_of_data[key]['new died']) self.dict_of_data[key]['l'] = calc(self.dict_of_data[key]['new reco']) def fit(self, data): r""" Данная функция должна аппроксимировать выборку для полученных данных. Под аппроксимацией подрозумевается настройка всех параметров модели. Предполагается, что все дни представлены в выборки. :param data: Словарь вида key - номер объекта, value словарь {'date': строка в формате day.month.year, 'sick': int, 'recovered': int, 'died': int} :type data: dict """ self.dict_of_data = dict() for key in data: date = datetime.datetime.strptime(data[key]['date'], '%d.%m.%Y').date() if date not in self.dict_of_data: self.dict_of_data[date] = dict() self.dict_of_data[date]['new sick'] = data[key]['sick'] self.dict_of_data[date]['new died'] = data[key]['died'] self.dict_of_data[date]['new reco'] = data[key]['recovered'] # Надо бы обработать пропуск значений for key in self.dict_of_data: self.dict_of_data[key]['sick'] = ( self.dict_of_data.get(key - datetime.timedelta(days=1), {'sick': 0})['sick'] + self.dict_of_data[key]['new sick']) if self.model == 'ARIMA': for key in self.dict_of_data: self.dict_of_data[key]['delta'] = self.delta try: self.dict_of_data[key]['gamma'] = self.calculate_gamma(key) except Exception: pass self.dict_of_data[key]['S'] = self.calculate_S(key) self.calculate_k_and_l(key) gammas = [ self.dict_of_data[key]['gamma'] for key in self.dict_of_data if 'gamma' in self.dict_of_data[key] ] g_dates = [ key.strftime('%Y-%m-%d') for key in self.dict_of_data if 'gamma' in self.dict_of_data[key] ] ds = [ self.dict_of_data[key]['k'] for key in self.dict_of_data if 'k' in self.dict_of_data[key] ] ls = [ self.dict_of_data[key]['l'] for key in self.dict_of_data if 'l' in self.dict_of_data[key] ] dl_dates = [ key.strftime('%Y-%m-%d') for key in self.dict_of_data if 'k' in self.dict_of_data[key] ] self.gamma_model = ARIMA(pd.Series(gammas, index=g_dates), order=(6, 0, 4), trend='n').fit() self.d_model = ARIMA(pd.Series(ds, index=dl_dates), order=(5, 1, 4), trend='n').fit() self.l_model = ARIMA(pd.Series(ls, index=dl_dates), order=(6, 1, 6), trend='n').fit() for key in self.dict_of_data: self.predict_params(key) def predict_params(self, date): date_str = date.strftime('%Y-%m-%d') if 'gamma' not in self.dict_of_data[date]: self.dict_of_data[date]['gamma'] = \ self.gamma_model.predict(start=date_str, end=date_str).values[0] if 'k' not in self.dict_of_data[date]: self.dict_of_data[date]['k'] = \ self.d_model.predict(start=date_str, end=date_str).values[0] if 'l' not in self.dict_of_data[date]: self.dict_of_data[date]['l'] = \ self.l_model.predict(start=date_str, end=date_str).values[0] def predict(self, date): r""" Данная функция должна возвращать предсказания для данной даты. Предсказывать нужно количество заболевших, выздоровших и умерших. :param date: Строка формата "day.month.year" :type date: str return: ссловарь вида: { 'date': строка в формате day.month.year, 'sick': int, 'recovered': int, 'died': int } :rtype: dict """ date = datetime.datetime.strptime(date, '%d.%m.%Y').date() cur_date = max(self.dict_of_data) + datetime.timedelta(days=1) while cur_date <= date: self.dict_of_data[cur_date] = dict() self.predict_params(cur_date) self.dict_of_data[cur_date]['delta'] = self.delta # gamma(d) = gamma(d - \delta) * (C(d - 1) - C(d - \delta + 1)) self.dict_of_data[cur_date]['new sick'] = int( self.dict_of_data.get( cur_date - datetime.timedelta( days=self.dict_of_data[cur_date]['delta']), {'gamma': self.gamma})['gamma'] * (self.dict_of_data.get(cur_date - datetime.timedelta(days=1), {'sick': 0})['sick'] - self.dict_of_data.get( cur_date - datetime.timedelta( days=self.dict_of_data[cur_date]['delta'] + 1), {'sick': 0})['sick'])) # D(d) = k(d) * S(d - 1) self.dict_of_data[cur_date]['new died'] = int( self.dict_of_data.get(cur_date, {'k', self.k_param})['k'] * self.dict_of_data[cur_date + datetime.timedelta(days=-1)]['S']) # R(d) = l(d) * S(d - 1) self.dict_of_data[cur_date]['new reco'] = int( self.dict_of_data.get(cur_date, {'l', self.l_param})['l'] * self.dict_of_data[cur_date + datetime.timedelta(days=-1)]['S']) self.dict_of_data[cur_date]['S'] = self.calculate_S(cur_date) self.dict_of_data[cur_date]['sick'] = self.dict_of_data.get( cur_date - datetime.timedelta(days=1), {'sick': 0})['sick'] + self.dict_of_data[cur_date]['new sick'] cur_date = cur_date + datetime.timedelta(days=1) return { 'date': date.strftime('%d.%m.%Y'), 'sick': self.dict_of_data[date]['new sick'], 'recovered': self.dict_of_data[date]['new reco'], 'died': self.dict_of_data[date]['new died'] } def predict_between(self, date_from, date_to): r""" Данная функция должна возвращать предсказания для всех дат между адаными. Предсказывать нужно количество заболевших, выздоровших и умерших. :param date: Строка формата "day.month.year" :type date: str :return: список словарей вида: { 'date': строка в формате day.month.year, 'sick': int, 'recovered': int, 'died': int } :rtype: list """ date_from = datetime.datetime.strptime(date_from, '%d.%m.%Y') date_to = datetime.datetime.strptime(date_to, '%d.%m.%Y') cur_date = date_from list_of_ret = [] while cur_date <= date_to: pred = self.predict(cur_date.strftime('%d.%m.%Y')) cur_date = cur_date + datetime.timedelta(days=1) list_of_ret.append(pred) return list_of_ret
ax = plt.gca() ax.plot(train, label='train') ax.plot(test, label='test') ax.set_xlabel(x_label) ax.set_ylabel(y_label) ax.plot(pred.index, pred.values, label='predicted', color='r') plt.legend() p, d, q = 2, 1, 2 n = len(df) train = df[:n*9//10] test = df[n*9//10+1:] mod = ARIMA(train, order=(p, d, q)) mod = mod.fit() pred = mod.predict(start = len(train), end = len(df)-1) plt.figure(figsize=(FIG_WIDTH, FIG_HEIGHT)) plot_forecasting(train, test, pred, x_label=x_label, y_label=y_label) plt.savefig(graphsDir + 'Deaths - Train vs Test vs Predicted') fig, axs = plt.subplots(1, 1, figsize=(FIG_WIDTH, FIG_HEIGHT)) fig.suptitle(f'ARIMA predictions (p={p},d={d},q={q})') # PREDICT 2019 print(df)
# AR(1), the second prediction will follow the model, so that when using # `ARIMA`, the prediction is # # $$ # Y_2 = \hat{\delta} + \hat{\rho} \left(Y_1 - \hat{\delta}\right) # $$ # # since `ARIMA` treats both exogenous and trend terms as regression with # ARMA errors. # # This can be seen in the next set of cells. arima_res = ARIMA(y, order=(1, 0, 0), trend="c").fit() print_params(arima_res.summary()) arima_res.predict(0, 2) delta_hat, rho_hat = arima_res.params[:2] delta_hat + rho_hat * (y[0] - delta_hat) # `SARIMAX` treats trend terms differently, and so the one-step forecast # from a model estimated using `SARIMAX` is # # $$ # Y_2 = \hat\delta + \hat\rho Y_1 # $$ sarima_res = SARIMAX(y, order=(1, 0, 0), trend="c").fit() print_params(sarima_res.summary()) sarima_res.predict(0, 2)
def arima(other_args: List[str], s_ticker: str, df_stock: pd.DataFrame): """ ARIMA prediction Parameters ---------- other_args: List[str] Argparse arguments s_ticker: str ticker df_stock: pd.DataFrame Dataframe of prices """ parser = argparse.ArgumentParser( add_help=False, prog="arima", description=""" In statistics and econometrics, and in particular in time series analysis, an autoregressive integrated moving average (ARIMA) model is a generalization of an autoregressive moving average (ARMA) model. Both of these models are fitted to time series data either to better understand the data or to predict future points in the series (forecasting). ARIMA(p,d,q) where parameters p, d, and q are non-negative integers, p is the order (number of time lags) of the autoregressive model, d is the degree of differencing (the number of times the data have had past values subtracted), and q is the order of the moving-average model. """, ) parser.add_argument( "-d", "--days", action="store", dest="n_days", type=check_positive, default=5, help="prediction days.", ) parser.add_argument( "-i", "--ic", action="store", dest="s_ic", type=str, default="aic", choices=["aic", "aicc", "bic", "hqic", "oob"], help="information criteria.", ) parser.add_argument( "-s", "--seasonal", action="store_true", default=False, dest="b_seasonal", help="Use weekly seasonal data.", ) parser.add_argument( "-o", "--order", action="store", dest="s_order", type=str, help="arima model order (p,d,q) in format: p,d,q.", ) parser.add_argument( "-r", "--results", action="store_true", dest="b_results", default=False, help="results about ARIMA summary flag.", ) parser.add_argument( "-e", "--end", action="store", type=valid_date, dest="s_end_date", default=None, help="The end date (format YYYY-MM-DD) to select - Backtesting", ) try: ns_parser = parse_known_args_and_warn(parser, other_args) if not ns_parser: return # BACKTESTING if ns_parser.s_end_date: if ns_parser.s_end_date < df_stock.index[0]: print( "Backtesting not allowed, since End Date is older than Start Date of historical data\n" ) return if (ns_parser.s_end_date < get_next_stock_market_days( last_stock_day=df_stock.index[0], n_next_days=5 + ns_parser.n_days)[-1]): print( "Backtesting not allowed, since End Date is too close to Start Date to train model\n" ) return future_index = get_next_stock_market_days( last_stock_day=ns_parser.s_end_date, n_next_days=ns_parser.n_days) if future_index[-1] > datetime.datetime.now(): print( "Backtesting not allowed, since End Date + Prediction days is in the future\n" ) return df_future = df_stock[future_index[0]:future_index[-1]] df_stock = df_stock[:ns_parser.s_end_date] # Machine Learning model if ns_parser.s_order: t_order = tuple(int(ord) for ord in ns_parser.s_order.split(",")) model = ARIMA(df_stock["5. adjusted close"].values, order=t_order).fit() l_predictions = model.predict( start=len(df_stock["5. adjusted close"]) + 1, end=len(df_stock["5. adjusted close"]) + ns_parser.n_days, ) else: if ns_parser.b_seasonal: model = pmdarima.auto_arima( df_stock["5. adjusted close"].values, error_action="ignore", seasonal=True, m=5, information_criteria=ns_parser.s_ic, ) else: model = pmdarima.auto_arima( df_stock["5. adjusted close"].values, error_action="ignore", seasonal=False, information_criteria=ns_parser.s_ic, ) l_predictions = [ i if i > 0 else 0 for i in model.predict(n_periods=ns_parser.n_days) ] # Prediction data l_pred_days = get_next_stock_market_days( last_stock_day=df_stock["5. adjusted close"].index[-1], n_next_days=ns_parser.n_days, ) df_pred = pd.Series(l_predictions, index=l_pred_days, name="Price") if ns_parser.b_results: print(model.summary()) print("") # Plotting plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI) plt.plot(df_stock.index, df_stock["5. adjusted close"], lw=2) if ns_parser.s_order: # BACKTESTING if ns_parser.s_end_date: plt.title( f"BACKTESTING: ARIMA {str(t_order)} on {s_ticker} - {ns_parser.n_days} days prediction" ) else: plt.title( f"ARIMA {str(t_order)} on {s_ticker} - {ns_parser.n_days} days prediction" ) else: # BACKTESTING if ns_parser.s_end_date: plt.title( f"BACKTESTING: ARIMA {model.order} on {s_ticker} - {ns_parser.n_days} days prediction" ) else: plt.title( f"ARIMA {model.order} on {s_ticker} - {ns_parser.n_days} days prediction" ) plt.xlim(df_stock.index[0], get_next_stock_market_days(df_pred.index[-1], 1)[-1]) plt.xlabel("Time") plt.ylabel("Share Price ($)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.plot( [df_stock.index[-1], df_pred.index[0]], [df_stock["5. adjusted close"].values[-1], df_pred.values[0]], lw=1, c="tab:green", linestyle="--", ) plt.plot(df_pred.index, df_pred, lw=2, c="tab:green") plt.axvspan(df_stock.index[-1], df_pred.index[-1], facecolor="tab:orange", alpha=0.2) _, _, ymin, ymax = plt.axis() plt.vlines(df_stock.index[-1], ymin, ymax, linewidth=1, linestyle="--", color="k") # BACKTESTING if ns_parser.s_end_date: plt.plot( df_future.index, df_future["5. adjusted close"], lw=2, c="tab:blue", ls="--", ) plt.plot( [df_stock.index[-1], df_future.index[0]], [ df_stock["5. adjusted close"].values[-1], df_future["5. adjusted close"].values[0], ], lw=1, c="tab:blue", linestyle="--", ) if gtff.USE_ION: plt.ion() plt.show() # BACKTESTING if ns_parser.s_end_date: plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI) plt.subplot(211) plt.plot( df_future.index, df_future["5. adjusted close"], lw=2, c="tab:blue", ls="--", ) plt.plot(df_pred.index, df_pred, lw=2, c="green") plt.scatter(df_future.index, df_future["5. adjusted close"], c="tab:blue", lw=3) plt.plot( [df_stock.index[-1], df_future.index[0]], [ df_stock["5. adjusted close"].values[-1], df_future["5. adjusted close"].values[0], ], lw=2, c="tab:blue", ls="--", ) plt.scatter(df_pred.index, df_pred, c="green", lw=3) plt.plot( [df_stock.index[-1], df_pred.index[0]], [df_stock["5. adjusted close"].values[-1], df_pred.values[0]], lw=2, c="green", ls="--", ) plt.title("BACKTESTING: Real data price versus Prediction") plt.xlim(df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1)) plt.xticks( [ df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1) ], visible=True, ) plt.ylabel("Share Price ($)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.legend(["Real data", "Prediction data"]) plt.xticks([]) plt.subplot(212) plt.axhline(y=0, color="k", linestyle="--", linewidth=2) plt.plot( df_future.index, 100 * (df_pred.values - df_future["5. adjusted close"].values) / df_future["5. adjusted close"].values, lw=2, c="red", ) plt.scatter( df_future.index, 100 * (df_pred.values - df_future["5. adjusted close"].values) / df_future["5. adjusted close"].values, c="red", lw=5, ) plt.title( "BACKTESTING: Error between Real data and Prediction [%]") plt.plot( [df_stock.index[-1], df_future.index[0]], [ 0, 100 * (df_pred.values[0] - df_future["5. adjusted close"].values[0]) / df_future["5. adjusted close"].values[0], ], lw=2, ls="--", c="red", ) plt.xlim(df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1)) plt.xticks( [ df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1) ], visible=True, ) plt.xlabel("Time") plt.ylabel("Prediction Error (%)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.legend(["Real data", "Prediction data"]) if gtff.USE_ION: plt.ion() plt.show() # Refactor prediction dataframe for backtesting print df_pred.name = "Prediction" df_pred = df_pred.to_frame() df_pred["Real"] = df_future["5. adjusted close"] if gtff.USE_COLOR: patch_pandas_text_adjustment() print("Time Real [$] x Prediction [$]") print( df_pred.apply(price_prediction_backtesting_color, axis=1).to_string()) else: print(df_pred[["Real", "Prediction"]].round(2).to_string()) print("") print_prediction_kpis(df_pred["Real"].values, df_pred["Prediction"].values) else: # Print prediction data print_pretty_prediction(df_pred, df_stock["5. adjusted close"].values[-1]) print("") except Exception as e: print(e, "\n")
fig = plt.figure(figsize=(12, 8)) ax = fig.add_subplot(111) ax = arma_mod30.resid.plot(ax=ax) resid = arma_mod30.resid stats.normaltest(resid) fig = plt.figure(figsize=(12, 8)) ax = fig.add_subplot(111) fig = qqplot(resid, line='q', ax=ax, fit=True) fig = plt.figure(figsize=(12, 8)) ax1 = fig.add_subplot(211) fig = sm.graphics.tsa.plot_acf(resid.values.squeeze(), lags=40, ax=ax1) ax2 = fig.add_subplot(212) fig = sm.graphics.tsa.plot_pacf(resid, lags=40, ax=ax2) plt.show() r, q, p = sm.tsa.acf(resid.values.squeeze(), fft=True, qstat=True) data = np.c_[range(1, 41), r[1:], q, p] table = pd.DataFrame(data, columns=['lag', "AC", "Q", "Prob(>Q)"]) print(table.set_index('lag')) predict_sunspots = arma_mod30.predict('1990', '2012', dynamic=True) print(predict_sunspots) def mean_forecast_err(y, yhat): return y.sub(yhat).mean() mean_forecast_err(dta.SUNACTIVITY, predict_sunspots)
class ARIMAImplementation(ModelImplementation): def __init__(self, log: Log = None, **params): super().__init__(log) self.params = params self.arima = None self.lambda_value = None self.scope = None self.actual_ts_len = None self.sts = None def fit(self, input_data): """ Class fit arima model on data :param input_data: data with features, target and ids to process """ source_ts = np.array(input_data.features) # Save actual time series length self.actual_ts_len = len(source_ts) self.sts = source_ts # Apply box-cox transformation for positive values min_value = np.min(source_ts) if min_value > 0: pass else: # Making a shift to positive values self.scope = abs(min_value) + 1 source_ts = source_ts + self.scope _, self.lambda_value = stats.boxcox(source_ts) transformed_ts = boxcox(source_ts, self.lambda_value) # Set parameters p = int(self.params.get('p')) d = int(self.params.get('d')) q = int(self.params.get('q')) params = {'order': (p, d, q)} self.arima = ARIMA(transformed_ts, **params).fit() return self.arima def predict(self, input_data, is_fit_pipeline_stage: bool): """ Method for time series prediction on forecast length :param input_data: data with features, target and ids to process :param is_fit_pipeline_stage: is this fit or predict stage for pipeline :return output_data: output data with smoothed time series """ input_data = copy(input_data) parameters = input_data.task.task_params forecast_length = parameters.forecast_length old_idx = input_data.idx target = input_data.target # For training pipeline get fitted data if is_fit_pipeline_stage: fitted_values = self.arima.fittedvalues fitted_values = self._inverse_boxcox(predicted=fitted_values, lambda_param=self.lambda_value) # Undo shift operation fitted_values = self._inverse_shift(fitted_values) diff = int(self.actual_ts_len - len(fitted_values)) # If first elements skipped if diff != 0: # Fill nans with first values first_element = fitted_values[0] first_elements = [first_element] * diff first_elements.extend(list(fitted_values)) fitted_values = np.array(first_elements) _, predict = _ts_to_table(idx=old_idx, time_series=fitted_values, window_size=forecast_length) new_idx, target_columns = _ts_to_table(idx=old_idx, time_series=target, window_size=forecast_length) # Update idx and target input_data.idx = new_idx input_data.target = target_columns # For predict stage we can make prediction else: start_id = old_idx[-1] - forecast_length + 1 end_id = old_idx[-1] predicted = self.arima.predict(start=start_id, end=end_id) predicted = self._inverse_boxcox(predicted=predicted, lambda_param=self.lambda_value) # Undo shift operation predict = self._inverse_shift(predicted) # Convert one-dim array as column predict = np.array(predict).reshape(1, -1) new_idx = np.arange(start_id, end_id + 1) # Update idx input_data.idx = new_idx # Update idx and features output_data = self._convert_to_output(input_data, predict=predict, data_type=DataTypesEnum.table) return output_data def get_params(self): return self.params def _inverse_boxcox(self, predicted, lambda_param): """ Method apply inverse Box-Cox transformation """ if lambda_param == 0: return np.exp(predicted) else: res = inv_boxcox(predicted, lambda_param) res = self._filling_gaps(res) return res def _inverse_shift(self, values): """ Method apply inverse shift operation """ if self.scope is None: pass else: values = values - self.scope return values @staticmethod def _filling_gaps(res): nan_ind = np.argwhere(np.isnan(res)) res[nan_ind] = -100.0 # Gaps in first and last elements fills with mean value if 0 in nan_ind: res[0] = np.mean(res) if int(len(res) - 1) in nan_ind: res[int(len(res) - 1)] = np.mean(res) # Gaps in center of timeseries fills with linear interpolation if len(np.ravel(np.argwhere(np.isnan(res)))) != 0: gf = SimpleGapFiller() res = gf.linear_interpolation(res) return res
import pandas as pd from matplotlib import pyplot from statsmodels.tsa.arima.model import ARIMA from sklearn.metrics import mean_squared_error import warnings warnings.simplefilter('ignore', category=UserWarning) dataset = "Paleo" series = pd.read_csv(f'{dataset}.csv', header=0, index_col=0, parse_dates=True, squeeze=True) for header in series.iloc[:, 11:17]: index_no = series.columns.get_loc(f'{header}') arima_model = ARIMA(series[f'{header}'], order=(1, 0, 1)).fit() pred = arima_model.predict(dynamic=False) rmse = mean_squared_error(series[f'{header}'], pred, squared=False) print(arima_model.summary()) print(f"RMSE = {rmse}") pyplot.plot(series[f'{header}']) pyplot.plot(pred, color='red') pyplot.title(f'{series.columns[index_no]} intake over time ({dataset})') pyplot.xlabel('Date') pyplot.ylabel(f'{series.columns[index_no]}') pyplot.show()
residuals.plot(kind='density', ax=ax[1]) # pandas series/dataframe function that plots each column separately plt.ylabel('Density', color='purple') plt.xlabel('Stock Price ($)', color='purple') plt.title('Residuals Density', color='purple') if save_images: plt.savefig(f'''images/{company}/{event}/{company} Stock Price Residuals, {event}, ARIMA Model (p, d, q) = {ARIMA_orders[j][i]}''', bbox_inches='tight') plt.clf() else: plt.show() # plot actual vs fitted(T_1) and forecasted(T_2) values over entire time frame (T_1 + T_2) # include confidence interval plt.plot(x1, y1) plt.plot(x1[-1:] + x2, y1[y1.shape[0] - 1:].append(y2), color='tab:orange') # fitted values (predictions) plt.plot(x1, model.predict(), color='tab:red') # plot forecasted values forecast = model.get_forecast(steps=y2.shape[0]) # class statsmodels.tsa.statespace.mlemodel.PredictionResults forecasted_values = forecast.predicted_mean # a pd.Series last_predict = model.predict(start=y1.shape[0] - 1) # a pd.Series plt.plot(x1[-1:] + x2, last_predict.append(pd.Series(forecasted_values)), color='tab:green') # plot 1-alpha% (let's use 95%) confidence interval alpha = 0.05 confidence_interval = forecast.conf_int(alpha=alpha) # a pd.DataFrame confidence_interval_lower = confidence_interval['lower Close'] confidence_interval_upper = confidence_interval['upper Close'] # fills black with 10% opaqueness between lower bound and upper bound points plt.fill_between(x1[-1:] + x2, last_predict.append(confidence_interval_lower) , last_predict.append(confidence_interval_upper) , color='black', alpha=0.1) # customize x axis
ax.plot(train, label='train') ax.plot(test, label='test') ax.set_xlabel(x_label) ax.set_ylabel(y_label) ax.plot(pred.index, pred.values, label='predicted', color='r') plt.legend() p, d, q = 2, 1, 2 n = len(df) train = df[:n * 9 // 10] test = df[n * 9 // 10 + 1:] mod = ARIMA(train, order=(p, d, q)) mod = mod.fit() pred = mod.predict(start=len(train), end=len(df) - 1) plt.figure(figsize=(FIG_WIDTH, FIG_HEIGHT)) plot_forecasting(train, test, pred, x_label=x_label, y_label=y_label) plt.savefig(graphsDir + 'Covid19 - Train vs Test vs Predicted') fig, axs = plt.subplots(5, 1, figsize=(FIG_WIDTH, 5 * FIG_HEIGHT)) fig.suptitle(f'ARIMA predictions (p={p},d={d},q={q})') k = 0 for i in range(50, 100, 10): train = df[:n * i // 100] test = df[n * i // 100 + 1:] mod = ARIMA(train, order=(p, d, q)) mod = mod.fit() pred = mod.predict(start=len(train), end=len(df) - 1)
plt.plot(y_pred_out, color='green', label='ARMA Predictions') plt.legend() import numpy as np from sklearn.metrics import mean_squared_error arma_rmse = np.sqrt( mean_squared_error(test["BTC-USD"].values, y_pred_df["Predictions"])) print("ARMA RMSE: ", arma_rmse) ARIMAmodel = ARIMA(y, order=(5, 4, 2)) ARIMAmodel = ARIMAmodel.fit() y_pred = ARIMAmodel.get_forecast(len(test.index)) y_pred_df = y_pred.conf_int(alpha=0.05) y_pred_df["Predictions"] = ARIMAmodel.predict(start=y_pred_df.index[0], end=y_pred_df.index[-1]) y_pred_df.index = test.index y_pred_out = y_pred_df["Predictions"] plt.plot(y_pred_out, color='Yellow', label='ARIMA Predictions') plt.legend() import numpy as np from sklearn.metrics import mean_squared_error arma_rmse = np.sqrt( mean_squared_error(test["BTC-USD"].values, y_pred_df["Predictions"])) print("ARIMA RMSE: ", arma_rmse) SARIMAXmodel = SARIMAX(y, order=(5, 4, 2), seasonal_order=(2, 2, 2, 12)) SARIMAXmodel = SARIMAXmodel.fit()
print(df.shape) train = df.iloc[:-30] test = df.iloc[-30:] print("train and test shape:", train.shape, test.shape) # fitting the train model model = ARIMA(train["close"], order=(2, 1, 2)) model = model.fit() print("Model summary for training set:", model.summary()) # now we predict via specifying the start and end range # in this case, we want to compare prediction with the testing dataset start = len(train) end = len(train) + len(test) - 1 # if the predicted values don't have date values as index, uncomment specified line below* prediction = model.predict(start=start, end=end, typ="levels").rename("ARIMA Predictions") prediction.index = df.index[start:end + 1] # uncomment if needed* # plotting comparison of predicted vs test plt.title("Prediction vs Testing Set") test["close"].plot(legend=True) prediction.plot(legend=True) plt.show() # mean squared error for analysis print(test["close"].mean()) rmse = sqrt(mean_squared_error(prediction, test["close"])) print(rmse) # training the model now based on entire dataset to make future predictions model2 = ARIMA(df["close"], order=(2, 1, 2)) # (need to experiment w/ p, d, q values)
def submit_ts(): f = request.files['userfile'] f.save(f.filename) print(f) s1 = request.form['query1'] s2 = request.form['query2'] s3 = int(request.form['query3']) s4 = request.form['query4'] s5 = request.form['query5'] if s5 == 'Yes': s6 = request.form['query6'] s7 = request.form['query7'] t = int(request.form['query8']) d1 = f.filename print(d1) d3 = pd.read_csv(d1) if s3 == 1: d3[s1] = pd.to_datetime(d3[s1], format=s2, infer_datetime_format=True) list1 = [] list3 = [] list9 = [] """ for i in range(len(d3[s4])): try: list1.append(int(d3[s4][i])) except: list3.append(i) continue for i in range(len(list3)): n2=d3[s4][list3[i]] d3[s4].replace(n2,np.nan,inplace=True) for i in range(len(d3)): d3[s4].fillna(d3[s4].median(),inplace=True) d3[s4]=d3[s4].astype(int)""" if s5 == 'No': datewise = d3.groupby([s1]).agg({s4: 'sum'}) elif s5 == 'Yes': s8 = d3[d3[s6] == s7] datewise = s8.groupby([s1]).agg({s4: 'sum'}) #ARIMA datewise = datewise.astype('float32') model_train = datewise.iloc[:int(datewise.shape[0] * 0.95)] valid = datewise.iloc[int(datewise.shape[0] * 0.95):] n11 = pd.infer_freq(datewise.index, warn=True) list9 = [] model_arima = auto_arima(model_train[s4], trace=True, error_action='ignore', start_p=1, start_q=1, max_p=3, max_q=3, suppress_warnings=True, stepwise=False, seasonal=False) model_arima.fit(model_train[s4]) prediction_arima = model_arima.predict(len(valid)) print("Root Mean Square Error for ARIMA Model: ", np.sqrt(mean_squared_error(list(valid[s4]), (prediction_arima)))) list9.append( np.sqrt(mean_squared_error(list(valid[s4]), (prediction_arima)))) print('\n') m1 = model_arima.order model = ARIMA(datewise[s4], order=m1) results = model.fit() s = t - 1 forecast_arima = results.predict(len(datewise), len(datewise) + s, typ='levels').rename(s4) #Prophet datewise1 = datewise.reset_index() datewise1.rename(columns={s1: 'ds', s4: 'y'}, inplace=True) train = datewise1.iloc[:int(datewise1.shape[0] * 0.95)] valid = datewise1.iloc[int(datewise1.shape[0] * 0.95):] m = Prophet(weekly_seasonality=True) m.fit(train) future = m.make_future_dataframe(periods=len(valid), freq=n11) forecast = m.predict(future) predictions = forecast.tail(len(valid))['yhat'] print('\n') print("Root Mean Squared Error for Prophet Model: ", rmse(valid['y'], predictions)) print('\n') list9.append(rmse(valid['y'], predictions)) m = Prophet(weekly_seasonality=True) m.fit(datewise1) future = m.make_future_dataframe(periods=t, freq=n11) forecast = m.predict(future) forecast_prophet = forecast[['ds', 'yhat']].tail(t) #LSTM train = datewise.iloc[:int(datewise.shape[0] * 0.95)] test = datewise.iloc[int(datewise.shape[0] * 0.95):] scaler = MinMaxScaler() scaler.fit(train) scaled_train = scaler.transform(train) scaled_test = scaler.transform(test) n_input = len(test) n_features = 1 generator = TimeseriesGenerator(scaled_train, scaled_train, length=n_input, batch_size=1) model = Sequential() model.add( LSTM(150, activation='relu', input_shape=(n_input, n_features))) model.add(Dense(1)) model.compile(optimizer='adam', loss='mse') model.fit_generator(generator, epochs=30) first_eval_batch = scaled_train[-n_input:] test_predictions = [] first_eval_batch = scaled_train[-n_input:] current_batch = first_eval_batch.reshape((1, n_input, n_features)) for i in range(len(test)): current_pred = model.predict(current_batch)[0] test_predictions.append(current_pred) current_batch = np.append(current_batch[:, 1:, :], [[current_pred]], axis=1) true_predictions = scaler.inverse_transform(test_predictions) test['predictions'] = true_predictions list9.append(rmse(test[s4], test['predictions'])) print('\n') print("Root Mean Square Error for LSTM Model: ", rmse(test[s4], test['predictions'])) print('\n') train = datewise scaler.fit(train) train = scaler.transform(train) n_input = len(test) n_features = 1 generator = TimeseriesGenerator(train, train, length=n_input, batch_size=1) model.fit_generator(generator, epochs=30) test_predictions = [] first_eval_batch = train[-n_input:] current_batch = first_eval_batch.reshape((1, n_input, n_features)) for i in range(t): current_pred = model.predict(current_batch)[0] test_predictions.append(current_pred) current_batch = np.append(current_batch[:, 1:, :], [[current_pred]], axis=1) from pandas.tseries.offsets import DateOffset add_dates = [ datewise.index[-1] + DateOffset(months=x) for x in range(0, t + 1) ] future_dates = pd.DataFrame(index=add_dates[1:], columns=datewise.columns) df_predict = pd.DataFrame(scaler.inverse_transform(test_predictions), index=future_dates[-t:].index, columns=[s4]) d_proj = df_predict d_proj.reset_index(drop=True, inplace=True) forecast_prophet.reset_index(drop=True, inplace=True) d1 = pd.DataFrame(forecast_prophet['ds']) lstm = pd.concat([d1, d_proj], axis=1) #print('\n') #t=str(t) #print('Forecasted Data of '+s4+' feature for '+t+ ' days : ' ) #print('\n') small = float('inf') for i in range(len(list9)): if list9[i] < small: small = list9[i] no = list9.index(small) if no == 0: forecast_arima = pd.DataFrame(forecast_arima) forecast_arima.reset_index(drop=True, inplace=True) d18 = pd.DataFrame(forecast_prophet['ds']) d18.reset_index(drop=True, inplace=True) forecast_arima = pd.concat([d18, forecast_arima], axis=1) forecast_arima.rename(columns={'ds': s1}, inplace=True) forecast_data = forecast_arima forecast_data1 = forecast_data.set_index(s1) forecast_data1 #print(forecast_data1) elif no == 1: forecast_prophet.rename(columns={ 'ds': s1, 'yhat': s4 }, inplace=True) forecast_data = forecast_prophet forecast_data1 = forecast_data.set_index(s1) #plt.plot(datewise[s4],label="Original Data") #plt.plot(forecast_data[s4],label="Forecasted Data") #plt.legend() #plt.xlabel("Date") #plt.ylabel('Confirmed Cases') #plt.title("Confirmed Cases Prophet Model Forecasting") #plt.xticks(rotation=90) elif no == 2: lstm.rename(columns={'ds': s1, 'yhat': s4}, inplace=True) forecast_data = lstm forecast_data1 = forecast_data.set_index(s1) #plt.plot(datewise[s4],label="Original Data") #plt.plot(forecast_data[s4],label="Forecasted Data") #plt.legend() #plt.xlabel("Date") #plt.ylabel('Confirmed Cases') #plt.title("Confirmed Cases LSTM Model Forecasting") #plt.xticks(rotation=90)""" fig, ax = plt.subplots(nrows=1, ncols=1) ax.plot(datewise[s4], label="Original Data") ax.plot(forecast_data1[s4], label="Forecasted Data") ax.legend() ax.set_xlabel("Date") ax.set_ylabel(s4) ax.set_title('forecasted data of ' + s4) plt.xticks(rotation=90) plt.show() n = randint(0, 1000000000000) n = str(n) fig.savefig( os.path.join(app.config["IMAGE_UPLOADS"], n + 'time_series.png')) full_filename = os.path.join(app.config["IMAGE_UPLOADS"], n + 'time_series.png') # VARMAX if s3 > 1: n2 = s4 n4 = n2.split() n5 = n2.split() if s5 == 'No': datewise = d3.groupby([s1]).agg({n4[0]: 'sum'}) n4.pop(0) for i in range(len(n4)): d3i = d3.groupby([s1]).agg({n4[i]: 'sum'}) datewise = pd.concat([datewise, d3i], axis=1) elif s5 == 'Yes': #s6=str(input('Enter the feature name from which who want to pick the category (eg:- country): ')) #s7=str(input('Ente the category name from'+' '+s6+' '+'to forecast'+' '+s4+' '+' : ')) s8 = d3[d3[s6] == s7] datewise = s8.groupby([s1]).agg({n4[0]: 'sum'}) n4.pop(0) for i in range(len(n4)): d3i = s8.groupby([s1]).agg({n4[i]: 'sum'}) datewise = pd.concat([datewise, d3i], axis=1) #datewise=pd.concat([datewise,d3i],axis=1) list1 = [] list2 = [] list3 = [] list4 = [] for i in range(len(n5)): model_arima = auto_arima(datewise[n5[i]], trace=True, error_action='ignore', start_p=1, start_q=1, max_p=3, max_q=3, suppress_warnings=True, stepwise=False, seasonal=False) list1.append(model_arima.order) for i in range(len(list1)): list2.append(list1[i][0]) list3.append(list1[i][1]) list4.append(list1[i][2]) list2.sort(reverse=True) p = list2[0] list3.sort(reverse=True) d = list3[0] list4.sort(reverse=True) q = list4[0] if d < 1: df_transformed = datewise elif d == 1: df_transformed = datewise.diff() df_transformed = df_transformed.dropna() elif d > 1: df_transformed = datewise.diff().diff() df_transformed = df_transformed.dropna() nobs = 12 train, test = df_transformed[0:-nobs], df_transformed[-nobs:] model = VARMAX(train, order=(p, q), trend='c') results = model.fit(maxiter=100, disp=False) results.summary() df_forecast = results.forecast(nobs) for i in range(len(n5)): j = '1d' df_forecast[n5[i] + j] = ( datewise[n5[i]].iloc[-nobs - 1] - datewise[n5[i]].iloc[-nobs - 2]) + df_forecast[n5[i]].cumsum() df_forecast[n5[i] + 'forecasteed'] = datewise[n5[i]].iloc[ -nobs - 1] + df_forecast[n5[i]].cumsum() list89 = df_forecast.columns list98 = [] for i in range(len(list89)): if list89[i][-11:] == 'forecasteed': list98.append(list89[i]) d_new = pd.concat([datewise.iloc[-12:], df_forecast[list98]], axis=1) for i in range(len(n5)): RMSE = rmse(datewise[n5[i]][-nobs:], df_forecast[list98[i]]) print('Root Mean Square Error for ' + n5[i] + ':', RMSE) model = VARMAX(df_transformed, order=(p, q), trend='c') results = model.fit(maxiter=100, disp=False) results.summary() #t=int(input('Enter number of days to forecast ? :')) df_forecast = results.forecast(t) for i in range(len(n5)): j = '2d' df_forecast[n5[i] + j] = ( datewise[n5[i]].iloc[-t - 1] - datewise[n5[i]].iloc[-t - 2]) + df_forecast[n5[i]].cumsum() df_forecast[n5[i] + ' Forecasted'] = datewise[n5[i]].iloc[ -t - 1] + df_forecast[n5[i]].cumsum() list89 = df_forecast.columns list98 = [] for i in range(len(list89)): if list89[i][-11:] == ' Forecasted': list98.append(list89[i]) df_forecast = df_forecast[list98] df_forecast.reset_index(inplace=True) df_forecast.rename(columns={'index': s1}, inplace=True) df_forecast.set_index(s1, inplace=True) forecast_data1 = df_forecast[list98] fig, b = plt.subplots(len(n5), 2, figsize=(15, 5)) for i in range(len(n5)): datewise[n5[i]].plot(kind='line', ax=b[i][0], title=n5[i]) df_forecast[list98[i]].plot(kind='line', ax=b[i][1], title='Forecasted data of ' + n5[i], color='orange') fig.tight_layout(pad=1.0) plt.show() n = randint(0, 1000000000000) n = str(n) fig.savefig( os.path.join(app.config["IMAGE_UPLOADS"], n + 'time_series.png')) full_filename = os.path.join(app.config["IMAGE_UPLOADS"], n + 'time_series.png') return render_template('step1_img.html', user_image=full_filename, tables=[forecast_data1.to_html(classes='page')], titles=['na', 'Job'], query1=request.form['query1'], query2=request.form['query2'], query3=request.form['query3'], query4=request.form['query4'], query5=request.form['query5'], query6=request.form['query6'], query7=request.form['query7'], query8=request.form['query8'])
# 1. Check distribution of residual errors arima.resid.plot(kind='kde', grid=False) plt.title('Residual Errors Distribution', size=17) plt.xlabel('Residual Errors', size=13) plt.ylabel('Density', size=13) plt.show() # 2. Check statistics print('[Basic statistics]') print(arima.resid.describe()) if query == '2': # 1. Overlap predictions(+1 step to the last observation) onto the truth fig, ax = plt.subplots(figsize=(13, 7)) plt.plot(daily.Date_x, daily.increased_confirmed, color='#33322B', ls=':', lw=3) plt.plot(daily.Date_x, arima.predict()) title = 'ARIMA (one-step forecasting for every date) in ' + region plt.title(title, size=17) plt.xlabel('Date', size=13) plt.ylabel('Number of daily confirmed cases', size=13) ax.set_xticks(ax.get_xticks()[::int(len(daily.Date_x)/8)]) plt.legend(['Truth', 'Prediction'], loc='upper left') plt.show() # 2. Check scores meae = metrics.median_absolute_error( daily.increased_confirmed, arima.predict()) mae = metrics.mean_absolute_error( daily.increased_confirmed, arima.predict()) rmse = metrics.mean_squared_error( daily.increased_confirmed, arima.predict())
cv_manual_ar = cross_val_score(estimator=m_full, X=X_full, y=y_full, cv=splits) print(f'CV-Score (Manual AR): {round(cv_manual_ar.mean(),3)}') '''AutoRegressive Model - Statsmodels (on data taking into account trend and seasonality)''' ar_model = AutoReg(y_season, lags=3, exog=X_season).fit() #print(ar_model.summary()) prediction_ar = ar_model.predict() '''ARIMA Model - Statsmodels (on data taking into account trend and seasonality) - very slow!!''' #arima_model = ARIMA(y_season, order=(1,0,1), exog=X_season).fit() #print(arima_model.summary()) #prediction_arima = arima_model.predict() '''ARIMA Model - only on remainder ''' arima_model = ARIMA(remainder, order=(2, 0, 2), freq='D').fit() prediction_arima = arima_model.predict() prediction_arima.name = 'Arima_lags' # Use prediction of ARIMA Model as feature(includes lags2 , MA 2) for LinearRegression X_arima = X_season.join(prediction_arima) m_arima = LinearRegression() m_arima.fit(X_arima, y_season) outcome_arima = pd.Series(m_arima.predict(X_arima), index=y_season.index) '''Plot data as residuals ''' #sns.lineplot(x=train.loc['2000'].index, y='temp', data=train.loc['2000'], label = 'true values') #sns.lineplot(x=train.loc['2000'].index, y='full_model', data=train.loc['2000'], label = 'Manual AR') sns.lineplot(x=train.loc['2000'].index, y=(train['temp'].loc['2000'] - train['full_model'].loc['2000']), label='Residuals Manual AR') plt.show()
def arima(l_args, s_ticker, s_interval, df_stock): parser = argparse.ArgumentParser( prog='arima', description="""In statistics and econometrics, and in particular in time series analysis, an autoregressive integrated moving average (ARIMA) model is a generalization of an autoregressive moving average (ARMA) model. Both of these models are fitted to time series data either to better understand the data or to predict future points in the series (forecasting). ARIMA(p,d,q) where parameters p, d, and q are non-negative integers, p is the order (number of time lags) of the autoregressive model, d is the degree of differencing (the number of times the data have had past values subtracted), and q is the order of the moving-average model.""" ) parser.add_argument('-d', "--days", action="store", dest="n_days", type=check_positive, default=5, help='prediction days.') parser.add_argument('-i', "--ic", action="store", dest="s_ic", type=str, default='aic', choices=['aic', 'aicc', 'bic', 'hqic', 'oob'], help='information criteria.') parser.add_argument('-s', "--seasonal", action="store_true", default=False, dest="b_seasonal", help='Use weekly seasonal data.') parser.add_argument('-o', "--order", action="store", dest="s_order", type=str, help='arima model order (p,d,q) in format: pdq.') parser.add_argument('-r', "--results", action="store_true", dest="b_results", default=False, help='results about ARIMA summary flag.') try: (ns_parser, l_unknown_args) = parser.parse_known_args(l_args) if l_unknown_args: print( f"The following args couldn't be interpreted: {l_unknown_args}\n" ) return # Machine Learning model if ns_parser.s_order: t_order = tuple([int(ord) for ord in list(ns_parser.s_order)]) model = ARIMA(df_stock['5. adjusted close'].values, order=t_order).fit() l_predictions = model.predict( start=len(df_stock['5. adjusted close']) + 1, end=len(df_stock['5. adjusted close']) + ns_parser.n_days) else: if ns_parser.b_seasonal: model = pmdarima.auto_arima( df_stock['5. adjusted close'].values, error_action='ignore', seasonal=True, m=5, information_criteria=ns_parser.s_ic) else: model = pmdarima.auto_arima( df_stock['5. adjusted close'].values, error_action='ignore', seasonal=False, information_criteria=ns_parser.s_ic) l_predictions = model.predict(n_periods=ns_parser.n_days) # Prediction data l_pred_days = get_next_stock_market_days( last_stock_day=df_stock['5. adjusted close'].index[-1], n_next_days=ns_parser.n_days) df_pred = pd.Series(l_predictions, index=l_pred_days, name='Price') if ns_parser.b_results: print(model.summary()) print("") # Plotting plt.plot(df_stock.index, df_stock['5. adjusted close'], lw=2) if ns_parser.s_order: plt.title( f"ARIMA {str(t_order)} on {s_ticker} - {ns_parser.n_days} days prediction" ) else: plt.title( f"ARIMA {model.order} on {s_ticker} - {ns_parser.n_days} days prediction" ) plt.xlim(df_stock.index[0], get_next_stock_market_days(df_pred.index[-1], 1)[-1]) plt.xlabel('Time') plt.ylabel('Share Price ($)') plt.grid(b=True, which='major', color='#666666', linestyle='-') plt.minorticks_on() plt.grid(b=True, which='minor', color='#999999', linestyle='-', alpha=0.2) plt.plot([df_stock.index[-1], df_pred.index[0]], [df_stock['5. adjusted close'].values[-1], df_pred.values[0]], lw=1, c='tab:green', linestyle='--') plt.plot(df_pred.index, df_pred, lw=2, c='tab:green') plt.axvspan(df_stock.index[-1], df_pred.index[-1], facecolor='tab:orange', alpha=0.2) xmin, xmax, ymin, ymax = plt.axis() plt.vlines(df_stock.index[-1], ymin, ymax, linewidth=1, linestyle='--', color='k') plt.show() # Print prediction data print("Predicted share price:") df_pred = df_pred.apply(lambda x: f"{x:.2f} $") print(df_pred.to_string()) print("") except: print("")