curr = df[mask] pct_occupied = curr.PercentOccupied if len(pct_occupied) > 0 and not path.exists(filename): num_split = int(.7*len(pct_occupied)) oob_length = int(0.2*len(pct_occupied)) time_chunks_per_day = curr.groupby(curr.OccupancyDateTime.dt.dayofyear).count().SourceElementKey.max() tr, tt = pct_occupied.iloc[:num_split], pct_occupied.iloc[num_split:] mdl = auto_arima(tr, error_action='ignore', trace=True, start_p=2, start_q=2, start_P=2, start_Q=2, max_p=10, max_q=10, max_P=10, max_Q=10, d=0, D=0, out_of_sample_size=oob_length, max_order=None, information_criterion='oob', seasonal=True, m=time_chunks_per_day) with open(filename, 'wb') as pkl: pickle.dump(mdl, pkl) else: print('no vals or already ran') # preds, conf_int = mdl.predict(n_periods=tt.shape[0], return_conf_int=True) # print("Test RMSE: %.3f" % np.sqrt(mean_squared_error(tt, preds))) # # ############################################################################# # # Plot the points and the forecasts
def test_inf_max_order(): _ = pm.auto_arima( lynx, max_order=None, # noqa: F841 suppress_warnings=True, error_action='trace')
def test_valid_max_order_edges(endog, max_order, kwargs): fit = pm.auto_arima(endog, max_order=max_order, **kwargs) order = fit.order ssnal = fit.seasonal_order assert (sum(order) + sum(ssnal[:3])) <= max_order
def ARIMA(self, model_var='adj_close', n_periods=50, timescale='D'): #timeseries = self.tstock_info[model_var] timeseries = None if model_var == 'close': timeseries = self.minute_prices[model_var] timeseries.index = np.arange(0, len(timeseries)) else: timeseries = GetTimeSlot(self.tstock_info, days=5 * 365)[model_var] fig = plt.figure(figsize=(10, 10)) ax1 = fig.add_subplot(311) fig = plot_acf(timeseries, ax=ax1, title="Autocorrelation on Original Series") ax2 = fig.add_subplot(312) fig = plot_acf(timeseries.diff().dropna(), ax=ax2, title="1st Order Differencing") ax3 = fig.add_subplot(313) fig = plot_acf(timeseries.diff().diff().dropna(), ax=ax3, title="2nd Order Differencing") #model = ARIMA(timeseries, order=(1, 1, 1)) #results = model.fit() #results.plot_predict(1, 210) autoarima_model = pmd.auto_arima(timeseries, start_p=1, start_q=1, test="adf", trace=True) #timeseries['ARIMA'] = fitted, confint = autoarima_model.predict(n_periods, return_conf_int=True, start=timeseries.index[-1]) fittedv = autoarima_model.predict_in_sample() index_of_fc = pd.date_range(timeseries.index[-1], periods=n_periods, freq=timescale) if model_var == 'close': index_of_fc = np.arange(timeseries.index[-1], +timeseries.index[-1] + n_periods) # make series for plotting purpose plt.show() fittedv_series = pd.Series(fittedv, index=timeseries.index) fitted_series = pd.Series(fitted, index=index_of_fc) print(fittedv_series - timeseries) lower_series = pd.Series(confint[:, 0], index=index_of_fc) upper_series = pd.Series(confint[:, 1], index=index_of_fc) print(lower_series) print(fitted_series) print(upper_series) # Plot plt.plot(timeseries) plt.plot(fitted_series, color='darkgreen') plt.plot(fittedv_series, color='yellow') plt.fill_between(lower_series.index, lower_series, upper_series, color='k', alpha=.15) plt.title( "SARIMA - Final Forecast of Stock prices - Time Series Dataset") plt.show()
plt.title('Wykres cen akcji firmy ' + sStock + '\nPrzedział czasu równy ' + sPeriod) plt.show() FigPlot(vStockDate, vStockClose) modArima = pmd.auto_arima( vStockClose, start_p=0, start_q=0, test='adf', # use adftest to find optimal 'd' max_p=5, max_q=5, # maximum p and q m=1, # frequency of series d=None, # let model determine 'd' seasonal=False, # No Seasonality start_P=0, D=0, trace=True, error_action='ignore', suppress_warnings=True, stepwise=True) #Summary of the Arima model modSummary = modArima.summary() print(modSummary) #Few graphs to check the correctness of model modArima.plot_diagnostics(figsize=(8, 8)) plt.show()
def prediction_generator(investor, prediction_file='data/prediction.csv', horizon=5): """ Main function that will generate predictions getting data from IEXFINANCE """ start = datetime.now() - timedelta(days=duration) end = datetime.now() # get data from IEXFINANCE, import data to a panda's dataframe df = get_historical_data(ticker_list, start, end, output_format='pandas', token="sk_261e4411a4ef43fab3fea00a67631841") idx = pd.IndexSlice df = df.loc[:, idx[:, "close"]] df.columns = ticker_list # importing predictions predictions = pd.DataFrame() for i in ticker_list: model = pm.auto_arima(df[i].values[~np.isnan(df[i].values)]) preds = model.predict(n_periods=5) predictions[i] = preds # measuring growth to differentiate between risky and non-risky predictions new_df = df.iloc[df.shape[0] - 1:, ].reset_index(drop=True).append( predictions.iloc[:1, :].reset_index(drop=True)).reset_index(drop=True) growth = pd.DataFrame() for i in range(new_df.shape[1]): growth[ticker_list[i]] = [((new_df[ticker_list[i]].values[1] - new_df[ticker_list[i]].values[0]) / new_df[ticker_list[i]].values[1])] growth = growth.iloc[0].sort_values(ascending=False) # differentiate ticker lists for risky (a) and non risky investors (b) ticker_list_a = growth.index[0:3] ticker_list_b = growth.index[3:] # OUTPUT if investor == "risky": predictions = predictions.loc[:, ticker_list_a] # saving a txt with text to be printed to the client with open('data/growth_info.txt', 'w', encoding="utf-8") as f: print(*[ "Stock growth:\n" + " " + ticker_list_a[i] + ": " + str(growth[ticker_list_a[i]] * 100) + "%\n " if i == 0 else ticker_list_a[i] + ":" + str(growth[ticker_list_a[i]] * 100) + "%\n" for i in range(len(ticker_list_a)) ], file=f) # importing the txt file again to an object to be printed to the client with open("data/growth_info.txt", "r", encoding="utf-8") as f_open: growth_info = f_open.read() # generating text to print predictions to the client values_list = [] for k in range(len(ticker_list_a)): values = "" # setting prediction horizon (5 days) days = ["Day " + str(i + 1) + ": €" for i in range(horizon)] for i in range(predictions.shape[0]): if i != predictions.shape[0] - 1: values += days[i] + str(np.round(predictions.iloc[i, k], 2)) + ", " else: values += days[i] + str(np.round(predictions.iloc[i, k], 2)) + "." values_list.append(values) # saving a txt with text to be printed to the client with open('data/predition_info.txt', 'w', encoding="utf-8") as f: print( *[ ticker_list_a[i] + "\n" + values_list[i] + "\n" if i == 0 else "\n" + ticker_list_a[i] + "\n" + values_list[i] + "\n" for i in range(len(values_list)) ], file=f, ) # importing the txt file again to an object to be printed to the client with open("data/predition_info.txt", "r", encoding="utf-8") as f_open: predicion_info = f_open.read() elif investor == "non-risky": # saving a txt with text to be printed to the client with open('data/growth_info.txt', 'w', encoding="utf-8") as f: print(*[ "Stock growth:\n" + " " + ticker_list_b[i] + ": " + str(growth[ticker_list_b[i]] * 100) + "%\n" if i == 0 else ticker_list_b[i] + ":" + str(growth[ticker_list_b[i]] * 100) + "%\n" for i in range(len(ticker_list_b)) ], file=f) # importing the txt file again to an object to be printed to the client with open("data/growth_info.txt", "r", encoding="utf-8") as f_open: growth_info = f_open.read() # generating text to print predictions to the client values_list = [] predictions = predictions.loc[:, ticker_list_b] for k in range(len(ticker_list_b)): values = "" # setting prediction horizon (5 days) days = ["Day " + str(i + 1) + ": €" for i in range(horizon)] for i in range(predictions.shape[0]): if i != predictions.shape[0] - 1: values += days[i] + str(np.round(predictions.iloc[i, k], 2)) + ", " else: values += days[i] + str(np.round(predictions.iloc[i, k], 2)) + "." values_list.append(values) # saving a txt with text to be printed to the client with open('data/predition_info.txt', 'w', encoding="utf-8") as f: print( *[ ticker_list_b[i] + "\n" + values_list[i] + "\n" if i == 0 else "\n" + ticker_list_b[i] + "\n" + values_list[i] + "\n" for i in range(len(values_list)) ], file=f, ) # importing the txt file again to an object to be printed to the client with open("data/predition_info.txt", "r", encoding="utf-8") as f_open: predicion_info = f_open.read() return growth_info + predicion_info
dfCrime_sup2010['mois'] >= '2019-01-01'] timeseries_19_20 = dfCrime_2019_2020.set_index('mois') timeseries_19_20.index = pd.to_datetime(timeseries_19_20.index, format='%Y_%m') crime_decomposed = seasonal_decompose(timeseries_10_19['total'], model='multiplicative') # Fit auto_arima function to dataset stepwise_fit = auto_arima(timeseries_10_19['total'], start_p=1, start_q=1, max_p=3, max_q=3, m=12, start_P=0, seasonal=True, d=None, D=1, trace=True, error_action='ignore', suppress_warnings=True, stepwise=True) ### FIT TO DATASET # Split data into train / test sets train = timeseries_10_19.iloc[:len(timeseries_10_19) - 12] test = timeseries_10_19.iloc[len(timeseries_10_19) - 12:] # set one year(12 months) for testing # Fit a SARIMAX on the training set
def arima(l_args, s_ticker, s_interval, df_stock): parser = argparse.ArgumentParser( prog='arima', description="""In statistics and econometrics, and in particular in time series analysis, an autoregressive integrated moving average (ARIMA) model is a generalization of an autoregressive moving average (ARMA) model. Both of these models are fitted to time series data either to better understand the data or to predict future points in the series (forecasting). ARIMA(p,d,q) where parameters p, d, and q are non-negative integers, p is the order (number of time lags) of the autoregressive model, d is the degree of differencing (the number of times the data have had past values subtracted), and q is the order of the moving-average model.""" ) parser.add_argument('-d', "--days", action="store", dest="n_days", type=check_positive, default=5, help='prediction days.') parser.add_argument('-i', "--ic", action="store", dest="s_ic", type=str, default='aic', choices=['aic', 'aicc', 'bic', 'hqic', 'oob'], help='information criteria.') parser.add_argument('-s', "--seasonal", action="store_true", default=False, dest="b_seasonal", help='Use weekly seasonal data.') parser.add_argument('-o', "--order", action="store", dest="s_order", type=str, help='arima model order (p,d,q) in format: pdq.') parser.add_argument('-r', "--results", action="store_true", dest="b_results", default=False, help='results about ARIMA summary flag.') (ns_parser, l_unknown_args) = parser.parse_known_args(l_args) if l_unknown_args: print( f"The following args couldn't be interpreted: {l_unknown_args}\n") return # Machine Learning model if ns_parser.s_order: t_order = tuple([int(ord) for ord in list(ns_parser.s_order)]) model = ARIMA(df_stock['5. adjusted close'].values, order=t_order).fit() l_predictions = model.predict( start=len(df_stock['5. adjusted close']) + 1, end=len(df_stock['5. adjusted close']) + ns_parser.n_days) else: if ns_parser.b_seasonal: model = pmdarima.auto_arima(df_stock['5. adjusted close'].values, error_action='ignore', seasonal=True, m=5, information_criteria=ns_parser.s_ic) else: model = pmdarima.auto_arima(df_stock['5. adjusted close'].values, error_action='ignore', seasonal=False, information_criteria=ns_parser.s_ic) l_predictions = model.predict(n_periods=ns_parser.n_days) # Prediction data l_pred_days = get_next_stock_market_days( last_stock_day=df_stock['5. adjusted close'].index[-1], n_next_days=ns_parser.n_days) df_pred = pd.Series(l_predictions, index=l_pred_days, name='Price') if ns_parser.b_results: print(model.summary()) print("") # Plotting plt.plot(df_stock.index, df_stock['5. adjusted close'], lw=2) if ns_parser.s_order: plt.title( f"ARIMA {str(t_order)} on {s_ticker} - {ns_parser.n_days} days prediction" ) else: plt.title( f"ARIMA {model.order} on {s_ticker} - {ns_parser.n_days} days prediction" ) plt.xlim(df_stock.index[0], get_next_stock_market_days(df_pred.index[-1], 1)[-1]) plt.xlabel('Time') plt.ylabel('Share Price ($)') plt.grid(b=True, which='major', color='#666666', linestyle='-') plt.minorticks_on() plt.grid(b=True, which='minor', color='#999999', linestyle='-', alpha=0.2) plt.plot([df_stock.index[-1], df_pred.index[0]], [df_stock['5. adjusted close'].values[-1], df_pred.values[0]], lw=1, c='tab:green', linestyle='--') plt.plot(df_pred.index, df_pred, lw=2, c='tab:green') plt.axvspan(df_stock.index[-1], df_pred.index[-1], facecolor='tab:orange', alpha=0.2) xmin, xmax, ymin, ymax = plt.axis() plt.vlines(df_stock.index[-1], ymin, ymax, linewidth=1, linestyle='--', color='k') plt.show() # Print prediction data print("Predicted share price:") df_pred = df_pred.apply(lambda x: f"{x:.2f} $") print(df_pred.to_string()) print("")
import matplotlib.pyplot as plt import statsmodels.api as sm import seaborn as sb from scipy import stats sb.set_style('darkgrid') from pmdarima import auto_arima from statsmodels.tsa.arima_model import ARIMA from sklearn.metrics import mean_absolute_error,mean_squared_error import math #import the csv and store in a dataframe stock_data = pd.read_csv('C:/Users/Smit/Dataset/yahoo/stockMarket.csv') newdata = stock_data.set_index('Date') newdata = newdata.iloc[:,3] newdata = pd.DataFrame(newdata) summary = auto_arima(newdata['Close'],start_p=0, start_q=0,max_p=3,max_q=3,seasonal=False,trace=True) summary.summary() trian = newdata.iloc[:1200] test = newdata.iloc[1200:] start = len(trian) end = len(trian) + len(test) - 1 model_arima = ARIMA(trian['Close'],order=(0,1,0)) result_arima = model_arima.fit() prediction= result_arima.predict(start=start,end=end,typ='levels') prediction= pd.DataFrame(prediction) test['prediction'] = prediction.values test.plot() mae = mean_absolute_error(prediction,test['Close'])
sm.graphics.tsa.plot_pacf(diff_stock_data_train.values.squeeze(), lags=40, ax=ax[1]) # modify not to generatoe graph write # Parameter search (ESstimate Parameters) # Auto Diagnosis Check - ARIMA # ARIMA model fitting # The (p,d,q) order of the model for the number of AR parameters, # diffrences, and MA parameters to use # AIC ? auto_arima_model = auto_arima(stock_data_train, start_p=1, start_q=1, max_p=3, max_q=3, seasonal=False, d=1, trace=True, error_action='ignore', suppress_warnings=True, stepwise=False) summary = auto_arima_model.summary() prediction = auto_arima_model.predict(len(stock_data_test), return_conf_int=True) predicted_value = prediction[0] predicted_ub = prediction[1][:, 0] predicted_lb = prediction[1][:, 1] predict_index = list(stock_data_test.index)
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Wed Jun 24 19:31:50 2020 @author: Max """ import pmdarima as pm import yfinance as yf import pandas as pd df = yf.download('GOOG', start='2015-01-01', end='2018-12-31', adjusted=True, progress=False) goog = df.resample('W').last() \ .rename(columns={'Adj Close':'adj_close'}).adj_close model = pm.auto_arima(goog, error_action='ignore', suppress_warnings=True, seasonal=False, stepwise=False, approximation=False, n_jobs=-1) print(model.summary())
def train_models(train, models, forecast_len, full_df=None, seasonality="infer_from_data", in_sample=None, freq=None, GPU=None): seasons = select_seasonality(train, seasonality) periods = select_seasonality(train, 'periodocity') models_dict = {} for m in models: if in_sample: print( "Model {} is being trained for in sample prediction".format(m)) else: print("Model {} is being trained for out of sample prediction". format(m)) if m == "ARIMA": models_dict[m] = pm.auto_arima(train, seasonal=True, m=seasons) if m == "Prophet": if freq == "D": model = Prophet(daily_seasonality=True) else: model = Prophet() models_dict[m] = model.fit(prophet_dataframe(train)) if m == "HWAAS": try: models_dict[m] = ExponentialSmoothing( train, seasonal_periods=seasons, trend='add', seasonal='add', damped=True).fit(use_boxcox=True) except: models_dict[m] = ExponentialSmoothing( train, seasonal_periods=seasons, trend='add', seasonal='add', damped=True).fit(use_boxcox=False) if m == "HWAMS": try: models_dict[m] = ExponentialSmoothing( train, seasonal_periods=seasons, trend='add', seasonal='mul', damped=True).fit(use_boxcox=True) except: try: models_dict[m] = ExponentialSmoothing( train, seasonal_periods=seasons, trend='add', seasonal='mul', damped=True).fit(use_boxcox=False) except: models_dict[m] = ExponentialSmoothing( train, seasonal_periods=seasons, trend=None, seasonal='add').fit(use_boxcox=False) # if m=="HOLT": # models_dict["HOLT"] = Holt(train,exponential=True).fit() if m == "PYAF": model = autof() model.train(iInputDS=train.reset_index(), iTime='Date', iSignal='Target', iHorizon=len(train)) # bad coding to have horison here models_dict[m] = model.forecast(iInputDS=train.reset_index(), iHorizon=forecast_len) if m == "Gluonts": freqed = pd.infer_freq(train.index) if freqed == "MS": freq = "M" else: freq = freqed estimator = DeepAREstimator( freq=freq, prediction_length=forecast_len, trainer=Trainer(epochs=6, ctx='gpu')) #use_feat_dynamic_real=True if GPU: models_dict[m] = estimator.train( training_data=gluonts_dataframe(train)) else: models_dict[m] = estimator.train( training_data=gluonts_dataframe(train)) if m == "NBEATS": if GPU: device = torch.device('cuda') else: device = torch.device('cpu') if os.path.isfile(CHECKPOINT_NAME): os.remove(CHECKPOINT_NAME) stepped = 35 batch_size = 10 if in_sample: x_train, y_train, x_test, y_test, net, norm_constant = nbeats_dataframe( full_df, forecast_len, in_sample=True, device=device) optimiser = optim.Adam(net.parameters()) data = data_generator(x_train, y_train, batch_size) #test_losses = [] for r in range(stepped): train_100_grad_steps(data, device, net, optimiser) #test_losses models_dict[m] = {} models_dict[m]["model"] = net models_dict[m]["x_test"] = x_test models_dict[m]["y_test"] = y_test models_dict[m]["constant"] = norm_constant else: # if out_sample train is df x_train, y_train, net, norm_constant = nbeats_dataframe( full_df, forecast_len, in_sample=False, device=device) batch_size = 10 # greater than 4 for viz optimiser = optim.Adam(net.parameters()) data = data_generator(x_train, y_train, batch_size) stepped = 5 #test_losses = [] for r in range(stepped): # _, forecast = net(torch.tensor(x_train, dtype=torch.float)) ### Not Used # if GPU: # p = forecast.detach().numpy() ### Not Used # else: # p = forecast.detach().numpy() ### Not Used train_100_grad_steps(data, device, net, optimiser) #test_losses models_dict[m] = {} models_dict[m]["model"] = net models_dict[m]["tuple"] = (x_train, y_train, net, norm_constant) # if m=="TBA": # bat = TBATS(use_arma_errors=False,use_box_cox=True) # models_dict[m] = bat.fit(train) if m == "TATS": bat = TBATS(seasonal_periods=list( get_unique_N(season_list(train), 1)), use_arma_errors=False, use_trend=True) models_dict[m] = bat.fit(train) if m == "TBAT": bat = TBATS(use_arma_errors=False, use_box_cox=True, use_trend=True) models_dict[m] = bat.fit(train) if m == "TBATS1": bat = TBATS(seasonal_periods=[seasons], use_arma_errors=False, use_box_cox=True, use_trend=True) models_dict[m] = bat.fit(train) if m == "TBATP1": bat = TBATS(seasonal_periods=[periods], use_arma_errors=False, use_box_cox=True, use_trend=True) models_dict[m] = bat.fit(train) if m == "TBATS2": bat = TBATS(seasonal_periods=list( get_unique_N(season_list(train), 2)), use_arma_errors=False, use_box_cox=True, use_trend=True) models_dict[m] = bat.fit(train) # if m=="ProphetGluonts": # freqed = pd.infer_freq(train.index) # if freqed=="MS": # freq= "M" # else: # freq= freqed # models_dict["ProphetGluonts"] = ProphetPredictor(freq=freq, prediction_length=forecast_len) #use_feat_dynamic_real=True # models_dict["ProphetGluonts"] = list(models_dict["ProphetGluonts"]) return models_dict, seasons
for key, value in births1_adf[4].items(): print('\t%s: %.3f' % (key, value)) ''' ADF Statistic: -0.331281 p-value: 0.920956 Critical Values: 1%: -3.474 5%: -2.880 10%: -2.577''' #p-value: 0.920956 ie > 0.05, Null Hypothesis accepted and the data is not stationary #H0: Data is not stationary #Applying auto - arima to forecast from pmdarima import auto_arima births_mod = auto_arima(births) births_mod.summary() ''' SARIMAX Results ============================================================================== Dep. Variable: y No. Observations: 168 Model: SARIMAX(2, 1, 1) Log Likelihood -271.935 Date: Fri, 26 Mar 2021 AIC 551.870 Time: 00:35:55 BIC 564.342 Sample: 0 HQIC 556.932 - 168 Covariance Type: opg ============================================================================== coef std err z P>|z| [0.025 0.975] ------------------------------------------------------------------------------ ar.L1 0.2509 0.095 2.643 0.008 0.065 0.437
def auto_arima_model(test_data): return pmdarima.auto_arima(test_data["orders"], max_d=1, suppress_warnings=True, error_action="raise")
from pmdarima import auto_arima # Ignore harmless warnings import warnings warnings.filterwarnings("ignore") # Fit auto_arima function to AirPassengers dataset stepwise_fit = auto_arima( airline['#Passengers'], start_p=1, start_q=1, max_p=3, max_q=3, m=12, start_P=0, seasonal=False, d=None, D=1, trace=True, error_action='ignore', # we don't want to know if an order does not work suppress_warnings=True, # we don't want convergence warnings stepwise=True) # set to stepwise # To print the summary stepwise_fit.summary() # Split data into train / test sets train = airline.iloc[:len(airline) - 12] test = airline.iloc[len(airline) - 12:] # set one year(12 months) for testing # Fit a SARIMAX(0, 1, 1)x(2, 1, 1, 12) on the training set
"Turnover_std_lag3", "Trades_mean_lag3", "Trades_std_lag3", "High_mean_lag7", "High_std_lag7", "Low_mean_lag7", "Low_std_lag7", "Volume_mean_lag7", "Volume_std_lag7", "Turnover_mean_lag7", "Turnover_std_lag7", "Trades_mean_lag7", "Trades_std_lag7", "High_mean_lag30", "High_std_lag30", "Low_mean_lag30", "Low_std_lag30", "Volume_mean_lag30", "Volume_std_lag30", "Turnover_mean_lag30", "Turnover_std_lag30", "Trades_mean_lag30", "Trades_std_lag30", "month", "week", "day", "day_of_week" ] #LOADING AND FITTING ARIMA -AUTO REGRESSIVE INTEGRATED MOVING AVERAGE- TO FORECAST PRICES #arima uses its own preceding values lagging figures and lagging forecast errors to predict future values model = auto_arima(df_train.VWAP, exogenous=df_train[exogenous_features], trace=True, error_action="ignore", suppress_warnings=True) model.fit(df_train.VWAP, exogenous=df_train[exogenous_features]) forecast = model.predict(n_periods=len(df_valid), exogenous=df_valid[exogenous_features]) df_valid["Forecast_ARIMAX"] = forecast ##RESULT: PICK THE MODEL WITH THE LOWEST AIC #%% #plot the results df_valid[["VWAP", "Forecast_ARIMAX"]].plot(figsize=(14, 7)) #%% print("RMSE of Auto ARIMAX:", np.sqrt(mean_squared_error(df_valid.VWAP, df_valid.Forecast_ARIMAX))) print("\nMAE of Auto ARIMAX:",
tp, tn, fp, fn = 0, 0, 0, 0 accurracy_matrix_df_for_chunk_iteration = pd.DataFrame( columns=["TP", "FN", "FP", "TN"]) ######################## # ARIMA ######################## current_train_list = dict_of_chunk_series_with_test_and_train_and_forecast[ chunk][chunk_iteration]["TRAIN_LIST_MEDIAN"] current_test_list = dict_of_chunk_series_with_test_and_train_and_forecast[ chunk][chunk_iteration]["TEST_LIST_MEDIAN"] try: arima = pm.auto_arima(current_train_list, seasonal=False, suppress_warnings=True, error_action='ignore') forecast_arima = pd.Series(arima.predict(TEST), index=[*range(TRAIN, TRAIN + TEST, 1)], name="forecast_list_arima") dict_of_chunk_series_with_test_and_train_and_forecast[chunk][ chunk_iteration]["FORECAST_LIST_ARIMA"] = forecast_arima runningtime = round(((time.time() - starttime) / 60), 5) print('Chunk ' + str(j) + ' (ID: ' + str(chunk) + ') iteration ' + str(chunk_iteration) + ': Completed ARIMA. Running time ' + str(runningtime) + ' min.') # extract threshold series threshold_high_for_test_list = dict_of_chunk_series_with_test_and_train_and_forecast[ chunk][chunk_iteration]["THRESHOLD_HIGH_FOR_TEST_LIST"]
#> 70.63707081783771 y_ = my_lm_model.predict(my_df[['x']]) tmp = pd.DataFrame(y_, index=my_df.index) plt.plot(my_train.y, label='train') plt.plot(my_test.y, label='test') plt.plot(tmp, label='model') plt.legend() ### 12.2.3 SARIMAによる時系列予測 #### 12.2.3.1 モデルの構築 import pmdarima as pm my_arima_model = pm.auto_arima(my_train.y, m=12, trace=True) #> (省略) #> Best model: ARIMA(1,1,0)(0,1,0)[12] #> Total fit time: 0.838 seconds #### 12.2.3.2 予測 y_, my_ci = my_arima_model.predict( len(my_test), # 期間はテストデータと同じ. alpha=0.05, # 有意水準(デフォルト) return_conf_int=True) # 信頼区間を求める. tmp = pd.DataFrame({ 'y': y_, 'Lo': my_ci[:, 0], 'Hi': my_ci[:, 1] },
plt.title('S&P/Case-Shiller U.S. National Home Price Index') plt.ylabel('Index Jan 2000=100, Seasonally Adjusted') plt.xlabel('Date') plt.show() # ## Information Criterion Method # In[13]: model = pm.auto_arima(data, d=1, D=1, seasonal=False, start_p=0, start_q=0, max_order=6, test='adf', max_p=6, error_action='ignore', suppress_warnings=True, stepwise=True, trace=True) # In[31]: model_3 = ARIMA(data, order=(2, 1, 1), freq='MS') model_fit3 = model_3.fit() output_3 = model_fit3.forecast(steps=36, freq='MS') forecast_3 = pd.concat([forecast_2, output_3], axis=1) forecast_3.columns = [ forecast_1.columns[0], 'Forecast (2,0,0)', 'Forecast (2,1,0)',
print(__doc__) # Author: Taylor Smith <*****@*****.**> import pmdarima as pm from pmdarima import model_selection import numpy as np from matplotlib import pyplot as plt # ############################################################################# # Load the data and split it into separate pieces data = pm.datasets.load_wineind() train, test = model_selection.train_test_split(data, train_size=150) # Fit a simple auto_arima model arima = pm.auto_arima(train, error_action='ignore', trace=True, suppress_warnings=True, maxiter=10, seasonal=True, m=12) # ############################################################################# # Plot actual test vs. forecasts: x = np.arange(test.shape[0]) plt.scatter(x, test, marker='x') plt.plot(x, arima.predict(n_periods=test.shape[0])) plt.title('Actual test samples vs. forecasts') plt.show()
# 使用ARIMA data = df.sort_index(ascending=True, axis=0) train = data[:987] valid = data[987:] training = train['Close'] validation = valid['Close'] model = auto_arima(training, start_p=1, start_q=1, max_p=3, max_q=3, m=12, start_P=0, seasonal=True, d=1, D=1, trace=True, error_action='ignore', suppress_warnings=True) model.fit(training) forecast = model.predict(n_periods=248) forecast = pandas.DataFrame(forecast, index=valid.index, columns=['Prediction']) rms = numpy.sqrt( numpy.mean(
with similar functionality. auto_arima() uses a stepwise approach to search multiple combinations of p,d,q parameters and chooses the best model that has the least AIC. """ # conda install pyramid-arima import pmdarima as pm model = pm.auto_arima( ss0, start_p=1, start_q=1, max_p=3, max_q=3, m=1, # frequency of series test='adf', # use adf_test to fid optimal `d` d=None, # let model determine `d` seasonal=False, start_P=0, D=0, stepwise=True, trace=True, error_action='ignore', supress_warnings=True) print(model.summary()) #%% 13. How to interpret the residual plots in ARIMA model model.plot_diagnostics() # looks like API of statsmodels.tsa.arima.model is used (new, not .arima_model - old) #%% do some prediction fc, confint = model.predict(n_periods=15, return_conf_int=True)
def autoarima(data, Pre_day): stepwise_fit = pm.auto_arima(data, m=12, seasonal=True, error_action='ignore', suppress_warnings=True, stepwise=True)
def auto_arima_forecast(series, validation_series, horizon, del_outliers=False, normalize=False, plot=False): """ Fits an auto arima model from the series to find the best parameters. Performance of the trained model is assessed on a validation series. :param series: :param validation_series: :param horizon: :param del_outliers: :param normalize: :param plot: :return: SMAPE for the validation series, the forecast validation series, order, seasonal_order """ # whether to remove outliers in the training series if del_outliers: working_series = remove_outliers(series) else: working_series = series # whether to normalize the training series if normalize: scaler, working_series = normalize_series(working_series) else: scaler = None # input sequence is our data train_series = working_series # perform search for best parameters and fit model = auto_arima(train_series, seasonal=True, max_D=2, m=7, trace=True, error_action='ignore', suppress_warnings=True, stepwise=True) order = model.get_params()['order'] seasonal_order = model.get_params()['seasonal_order'] # apparently useless model.fit(train_series) # perform predictions f_autoarima = model.predict(n_periods=horizon) # dataframe which contains the result forecast_dataframe = pd.DataFrame(index=validation_series.index) # if data was normalized, we need to apply the reverse transform if normalize: # first reverse log1p using expm1 validation_forecast = f_autoarima # use scaler to reverse normalizing denormalized_forecast = scaler.inverse_transform( validation_forecast.reshape(-1, 1)) denormalized_forecast = [val[0] for val in denormalized_forecast] # save the forecast in the dataframe forecast_dataframe['forecast'] = denormalized_forecast else: # save the forecast in the dataframe forecast_dataframe['forecast'] = f_autoarima if plot: plt.figure(figsize=(10, 6)) plt.plot(series[-100:], color="blue", linestyle="-") plt.plot(validation_series, color="green", linestyle="-") plt.plot(forecast_dataframe, color="red", linestyle="--") plt.legend(["Train series", "Validation series", "Predicted series"]) plt.title("Validation of auto arima model") plt.show() return smape(validation_series, forecast_dataframe['forecast'] ), forecast_dataframe['forecast'], order, seasonal_order
""" for i in data.columns: print(f"Jarque Bera for {i}") print(stats.jarque_bera(data[i].dropna())) print(f"Augmented Dickey-Fuller for {i}") print(adfuller(data[i].dropna())) """ #DETERMINAR MODELOS AIC modelos = dict() for i in data.columns: print(f"Modelo arima para {i}" f"##############################################" f"##############################################") # Fit auto_arima function to AirPassengers dataset modelos[i] = auto_arima(data[i].dropna()) # set to stepwise # To print the summary print(modelos[i].summary()) """ modelos_AR = dict() for i in data.columns: print(f"Modelo arima para {i}" f"##############################################" f"##############################################") # Fit auto_arima function to AirPassengers dataset modelos_AR[i] = ARIMA(data[i].dropna(), order = (1,0,0)).fit() # set to stepwise # To print the summary print(modelos_AR[i].summary())
plot_pacf(train_data) plt.show() (p, q) = (sm.tsa.arma_order_select_ic(train_data, max_ar=3, max_ma=3, ic='aic')['aic_min_order']) print(p, q) data = np.array(train_dataset.values).T[0] fittedmodel = auto_arima( train_data, start_p=1, start_q=1, max_p=3, max_q=3, max_d=3, max_order=None, seasonal=False, m=1, test='adf', trace=False, error_action='ignore', # don't want to know if an order does not work suppress_warnings=True, # don't want convergence warnings stepwise=True, information_criterion='bic', njob=-1) # set to stepwise print(fittedmodel.summary()) def plot_arima(truth, forecasts, title="ARIMA", xaxis_label='Time',
def test_r_equivalency(dataset, m, kwargs, expected_order, expected_seasonal): fit = pm.auto_arima(dataset, m=m, trace=1, suppress_warnings=True) assert fit.order == expected_order assert fit.seasonal_order[:3] == expected_seasonal
q = list(train['Value']) out_seq = array([q[i] for i in range(len(q))]) q = out_seq.tolist() print(q) q.append(1) #val=list(p[['Software Bug']].values()) #print(val) train, test = q[:48], q[48:49] print(pmdarima.arima.nsdiffs(train, 4, max_D=9)) print(train) forplot = [] # ============================================================================= stepwise_model = auto_arima(train, m=12, p=0, d=2, q=0, start_p=0, start_q=0, max_q=12, max_p=12, error_action='ignore', suppress_warnings=True) model = stepwise_model.fit(train) for i in range(len(test)): q1 = model.predict(n_periods=1) train.append(q1) print(train)
def test_value_errors(endog, kwargs): with pytest.raises(ValueError): pm.auto_arima(endog, **kwargs)
########################################################## # Load the data and split it into separate pieces data = pm.datasets.load_lynx() data data.shape train, test = data[:90], data[90:] train.shape, test.shape # Fit a simple auto_arima model tsmodel = pm.auto_arima(train, start_p=1, start_q=1, start_P=1, start_Q=1, max_p=5, max_q=5, max_P=5, max_Q=5, seasonal=True, stepwise=True, suppress_warnings=True, D=10, max_D=10, error_action='ignore') tsmodel # Create predictions for the future, evaluate on test preds, conf_int = tsmodel.predict(n_periods=test.shape[0], return_conf_int=True) preds conf_int # Print the error: test