def autoArima(data): fit = auto_arima(data, test='adf', trace=True, m=1, error_action='ignore', suppress_warnings=True, stepwise=True, information_criterion='bic') model = ARIMA(data, order=fit.order) return model.fit()
def _find_optimal_model(train, val, test, data_props, examples): results = [] for row in train['y_data']: model = auto_arima(row, trace=True) results.append(model.order) ARIMA_potentials = list(dict.fromkeys(results)) val_results = {} for props in ARIMA_potentials: if props not in val_results: val_results[props] = {'val': {}, 'test': {}} for set, X, y in zip(['val', 'test'], [ train['y_data'], np.concatenate((train['y_data'], val['y_data']), axis=1) ], [val['y_data'], test['y_data']]): for i in range(len(X)): mod = ARIMA(X[i], order=props).fit() y_pred = float(mod.forecast()) y_true = float(y[i]) mae = abs(y_pred - y_true) mda = int( np.sign(y_pred) == np.sign(y_true) or np.sign( np.round(y_pred, 4)) == np.sign(np.round(y_true, 4))) mse = (y_pred - y_true)**2 pos = int(np.sign(np.round(y_true, 6))) for err, vale in zip(['mae', 'mda', 'mse', 'pos'], [mae, mda, mse, pos]): if 'mae' not in val_results[props][set]: val_results[props][set] = { 'mae': [], 'mda': [], 'mse': [], 'pos': [] } val_results[props][set][err].append(vale) final_results = {} for props in ARIMA_potentials: if props not in final_results.items(): final_results[props] = {} for set in ['val', 'test']: for err, vals in val_results[props][set].items(): final_results[props][f'{set}_{err}'] = statistics.mean(vals) if props != (0, 0, 0) and (f'{set}_best_score' not in final_results or final_results[f'{set}_best_score'] > final_results[props][f'{set}_{err}']): final_results[f'{set}_best_score'] = final_results[props][ f'{set}_{err}'] final_results[f'{set}_best_param'] = props return final_results
def beta_pred(self, beta_list_square, pred_date_len=1 + 1): # check_positive = lambda x:0 if x <=0 else np.sqrt(x) stepwise_model = auto_arima(beta_list_square, trace=False, information_criterion='aic', with_intercept=False, error_action='ignore', suppress_warnings=True) stepwise_model.fit(beta_list_square) beta_future_forecast = stepwise_model.predict(n_periods=pred_date_len) return beta_future_forecast
def test_warn_for_stepwise_and_parallel(): with warnings.catch_warnings(record=True) as w: _ = auto_arima( lynx, suppress_warnings=False, d=1, # noqa: F841 error_action='ignore', stepwise=True, n_jobs=2) assert len(w) > 0
def test_seasonal_xreg_differencing(): # Test both a small M and a large M since M is used as the lag parameter # in the xreg array differencing. If M is 1, D is set to 0 for m in (2,): # 12): takes FOREVER _ = auto_arima(wineind, d=1, D=1, # noqa: F841 seasonal=True, exogenous=wineind_xreg, error_action='ignore', suppress_warnings=True, m=m, # Set to super low iter to make test move quickly maxiter=5)
def do_fit(): return auto_arima(wineind, start_p=1, start_q=1, max_p=2, max_q=2, m=2, start_P=0, seasonal=True, n_jobs=2, d=1, D=1, stepwise=False, suppress_warnings=True, error_action='ignore', n_fits=20, random_state=42, # Set to super low iter to make test move quickly maxiter=2)
def fit(self, time_series): return auto_arima(time_series, stationary=True, seasonal=False, start_p=self.start_p, start_q=self.start_q, max_p=self.max_p, max_q=self.max_q, error_action='ignore', stepwise=False, njobs=2)
def test_force_polynomial_error(): x = np.array([1, 2, 3, 4, 5, 6]) d = 2 xreg = None with pytest.raises(ValueError) as ve: auto_arima(x, d=d, D=0, seasonal=False, exogenous=xreg) err_msg = pytest_error_str(ve) assert 'simple polynomial' in err_msg, err_msg # but it should pass when xreg is not none xreg = rs.rand(x.shape[0], 2) _ = auto_arima( x, d=d, D=0, seasonal=False, # noqa: F841 exogenous=xreg, error_action='ignore', suppress_warnings=True)
def test_corner_cases(): assert_raises(ValueError, auto_arima, wineind, error_action='some-bad-string') # things that produce warnings with warnings.catch_warnings(record=False): warnings.simplefilter('ignore') # show a constant result will result in a quick fit auto_arima(np.ones(10), suppress_warnings=True) # show the same thing with return_all results in the ARIMA in a list fits = auto_arima(np.ones(10), suppress_warnings=True, return_valid_fits=True) assert hasattr(fits, '__iter__') # show we fail for n_iter < 0 assert_raises(ValueError, auto_arima, np.ones(10), random=True, n_fits=-1) # show if max* < start* it breaks: assert_raises(ValueError, auto_arima, np.ones(10), start_p=5, max_p=0)
def arima(df, cfg): train, test = train_test_split(df, cfg['test_size']) scaler = MinMaxScaler(feature_range=(10 ** (-10), 1)) train['y'] = scaler.fit_transform(train.values.reshape(-1, 1)) test['y'] = scaler.transform(test.values.reshape(-1, 1)) auto_model = auto_arima(train, start_p=1, start_q=1, max_p=11, max_q=11, max_d=3, max_P=5, max_Q=5, max_D=3, m=12, start_P=1, start_Q=1, seasonal=True, d=None, D=None, suppress_warnings=True, stepwise=True, information_criterion='aicc') print(auto_model.summary()) pred_arima = np.zeros([len(test) - cfg['forecast_horizon'], cfg['forecast_horizon']]) conf_int_arima_80 = np.zeros([len(test) - cfg['forecast_horizon'], cfg['forecast_horizon'], 2]) conf_int_arima_95 = np.zeros([len(test) - cfg['forecast_horizon'], cfg['forecast_horizon'], 2]) for i in range(len(test)-cfg['forecast_horizon']): forecast_arima_95 = auto_model.predict(n_periods=cfg['forecast_horizon'], return_conf_int=True, alpha=1-0.95) forecast_arima_80 = auto_model.predict(n_periods=cfg['forecast_horizon'], return_conf_int=True, alpha=1-0.8) pred_arima[i] = forecast_arima_95[0] conf_int_arima_80[i] = forecast_arima_80[1] conf_int_arima_95[i] = forecast_arima_95[1] auto_model.update(y=[test.values[i]]) # Store results mse_arima, coverage_arima_95, coverage_arima_80, width_arima_95, width_arima_80 = [], [], [], [], [] for i in range(cfg['forecast_horizon']): # ARIMA mean squared error (MSE): mse_arima.append(mean_squared_error(test[i:len(test)-cfg['forecast_horizon']+i], pred_arima[:, i])) # ARIMA 80% PI coverage_arima_80.append(compute_coverage(upper_limits=conf_int_arima_80[:, i, 1], lower_limits=conf_int_arima_80[:, i, 0], actual_values=test.values[i:len(test) - cfg['forecast_horizon'] + i])) width_arima_80.append(np.mean(conf_int_arima_80[:, i, 1] - conf_int_arima_80[:, i, 0], axis=0)) # ARIMA 95% PI coverage_arima_95.append(compute_coverage(upper_limits=conf_int_arima_95[:, i, 1], lower_limits=conf_int_arima_95[:, i, 0], actual_values=test.values[i:len(test)-cfg['forecast_horizon']+i])) width_arima_95.append(np.mean(conf_int_arima_95[:, i, 1] - conf_int_arima_95[:, i, 0], axis=0)) print('================ ARIMA =================') print('Mean MSE', np.mean(mse_arima)) print('MSE sliding window', mse_arima) print('Coverage of 80% PI sliding window', coverage_arima_80) print('Width of 80% PI sliding window', width_arima_80) print('Coverage of 95% PI sliding window', coverage_arima_95) print('Width of 95% PI sliding window', width_arima_95) return mse_arima, coverage_arima_80, coverage_arima_95, width_arima_80, width_arima_95
def f_AutoARIMA(self, O_Train, O_Test): model = auto_arima(O_Train, trace=True, error_action='ignore', suppress_warnings=True) model.fit(O_Train) forecast = model.predict(n_periods=len(O_Test)) forecast = pd.DataFrame(forecast, index=O_Test.index, columns=['Prediction']) return forecast
def base_arima(ts): arima_model = auto_arima(ts, start_p=1, max_p=9, start_q=1, max_q=9, max_d=5, start_P=1, max_P=9, start_Q=1, max_Q=9, max_D=5, m=7, trace=True, seasonal=True, error_action='ignore', suppress_warnings=True, stepwise=True) preds = arima_model.predict(n_periods=15) preds = pd.Series(preds) return preds
def test_with_seasonality3(): # show we can estimate D even when it's not there... auto_arima( wineind, start_p=1, start_q=1, max_p=2, max_q=2, m=wineind_m, start_P=0, seasonal=True, d=1, D=None, error_action='ignore', suppress_warnings=True, trace=True, # get the coverage on trace random_state=42, stepwise=True, # Set to super low iter to make test move quickly maxiter=5)
def __init__(self, signal): self.model = auto_arima(signal, start_p=1, start_q=1, max_p=5, max_q=3, m=1, start_P=0, seasonal=False, d=1, D=None, suppress_warnings=True) # trace=True,
def test_issue_30(): # From the issue: vec = np.array([33., 44., 58., 49., 46., 98., 97.]) auto_arima(vec, out_of_sample_size=1, seasonal=False, suppress_warnings=True) # This is a way to force it: ARIMA(order=(0, 1, 0), out_of_sample_size=1).fit(vec) # Want to make sure it works with exog arrays as well exog = np.random.RandomState(1).rand(vec.shape[0], 2) auto_arima(vec, exogenous=exog, out_of_sample_size=1, seasonal=False, suppress_warnings=True) # This is a way to force it: ARIMA(order=(0, 1, 0), out_of_sample_size=1).fit(vec, exogenous=exog)
def fitARIMA(futures): futures_List = futures for i in futures_List: df = pd.read_csv("tickerData/{}.txt".format(i)) df['DATE'] = pd.to_datetime(df['DATE'], format='%Y%m%d') #filter data to use only training data to fit model df = df.loc[(df['DATE'] > '19900101') & (df['DATE'] <= '20201231')] try: df = df[' CLOSE'] except: df = df['CLOSE'] daily_return = np.log(df) #set period as 5 to represent 5 working days per period #set a range of 0 to 5 for all paramaters model = auto_arima(daily_return, start_p=0, start_q=0, max_p=5, max_q=5, d=1, max_d=5, start_P=0, D=1, start_Q=0, max_P=5, max_Q=5, max_D=5, m=5, seasonal=True, trace=True, suppress_warnings=True, error_action='warn', stepwise=True, n_fits=50) # save model with open('ARIMA/Models/{}.pkl'.format(i), 'wb') as pkl: pickle.dump(model, pkl) #saving in-sample prediction for evaluation of fitted model on training data for i in futures_List: df = pd.read_csv("tickerData/{}.txt".format(i)) df['DATE'] = pd.to_datetime(df['DATE'], format='%Y%m%d') #filter data to use only training data to fit model df = df.loc[(df['DATE'] > '19900101') & (df['DATE'] <= '20201231')] pred = None with open('ARIMA/Models/{}.pkl'.format(i), 'rb') as pkl: model = pickle.load(pkl) pred = model.predict_in_sample(1) date = df["DATE"].tolist() date = [int(x.strftime("%Y%m%d")) for x in date] df_in_sample = pd.DataFrame({"predictions": pred, "date": date}) df_in_sample.to_csv('ARIMA/In_sample_predictions/{}.csv'.format(i), index=False)
def test_with_seasonality4(): # show we can run a random search much faster! and while we're at it, # make the function return all the values. Also, use small M to make our # lives easier. auto_arima(wineind, start_p=1, start_q=1, max_p=2, max_q=2, m=12, start_P=0, seasonal=True, n_jobs=1, d=1, D=None, stepwise=False, error_action='ignore', suppress_warnings=True, random=True, random_state=42, return_valid_fits=True, n_fits=3) # only a few
def test_warn_for_large_differences(): # First: d is too large with pytest.warns(ModelFitWarning): auto_arima(wineind, seasonal=True, m=1, suppress_warnings=False, d=3, error_action='warn', maxiter=5) # Second: D is too large. M needs to be > 1 or D will be set to 0... # unfortunately, this takes a long time. with pytest.warns(ModelFitWarning): auto_arima( wineind, seasonal=True, m=2, # noqa: F841 suppress_warnings=False, D=3, error_action='warn', maxiter=5)
def test_with_seasonality7(): # show we can fit one with OOB as the criterion auto_arima( wineind, start_p=1, start_q=1, max_p=2, max_q=2, m=12, start_P=0, seasonal=True, n_jobs=1, d=1, D=1, out_of_sample_size=10, information_criterion='oob', suppress_warnings=True, error_action='raise', # do raise so it fails fast random=True, random_state=42, n_fits=2, stepwise=False)
def do_fit(simple_differencing=False): return auto_arima(wineind, start_p=1, start_q=1, max_p=2, max_q=2, m=2, start_P=0, seasonal=True, n_jobs=2, d=1, D=1, stepwise=False, suppress_warnings=True, error_action='ignore', n_fits=20, random_state=42, sarimax_kwargs={ 'simple_differencing': simple_differencing}, # Set to super low iter to make test move quickly max_order=None, maxiter=2)
def test_m_too_large(): train = lynx[:90] with pytest.raises(ValueError) as v: auto_arima(train, start_p=1, start_q=1, start_P=1, start_Q=1, max_p=5, max_q=5, max_P=5, max_Q=5, seasonal=True, stepwise=True, suppress_warnings=True, D=10, max_D=10, error_action='ignore', m=20) msg = str(v) assert 'The seasonal differencing order' in msg
def get_best_arima(train_X, train_y): step_wise = auto_arima(train_y, exogenous=train_X, start_p=1, start_q=1, max_p=7, max_q=7, d=1, max_d=7, trace=True, error_action="ignore", suppress_warnings=True, stepwise=True) print(step_wise.summary())
def test_with_seasonality5(): # can we fit the same thing with an exogenous array of predictors? # also make it stationary and make sure that works... # 9/22/18 - make not parallel to reduce mem overhead on pytest all_res = auto_arima(wineind, start_p=1, start_q=1, max_p=2, max_q=2, m=12, start_P=0, seasonal=True, d=1, D=None, error_action='ignore', suppress_warnings=True, stationary=True, random_state=42, return_valid_fits=True, stepwise=True, exogenous=rs.rand(wineind.shape[0], 4)) # only fit 2 # show it is a list assert hasattr(all_res, '__iter__')
def montecarloR(data, model=False, modelR=False, simlen=10, simtimes=5, plot=False): import rpy2.robjects.packages as rpackages rpackages.importr('quantmod') rpackages.importr('forecast') import rpy2.robjects as robjects simulate = robjects.r['simulate'] toString = robjects.r['toString'] asnumeric = robjects.r['as.numeric'] from rpy2.robjects import pandas2ri pandas2ri.activate() """ Start """ #simulate(modelR,nsim = simlen) if modelR == False: modelR = autoarimaR(data) if model == False: model = auto_arima( data, seasonal=False, trace=True, error_action= 'ignore', # don't want to know if an order does not work suppress_warnings=True, # don't want convergence warnings stepwise=True) orgdataandsim = np.append(data.values[-100:], (model.predict(n_periods=simlen))) montecarlo = [ np.array(asnumeric(simulate(modelR, nsim=simlen))) for i in range(simtimes) ] montecarlo = np.transpose(montecarlo) modelparam = list(toString(modelR)) if plot == True: plt.plot(range(len(data.values[-100:]), len(montecarlo) + len(data.values[-100:])), montecarlo, linestyle='--', linewidth=1) plt.plot((orgdataandsim), linewidth=2, label='Org Data') plt.title('Fit model %s, MC simlen: %i, simtim: %i' % (modelparam, simlen, simtimes)) plt.legend() plt.show() return montecarlo, modelparam
def auto_arima_pdq(df,trace_list=False): ''' ==Function== Uses Auto ARIMA to obtain best parameters for data ==Parameters== |trace_list| : bool if True, function will return list of all searched pairs default=False ==Returns== printed pdq variable auto_arima variable to use in other functions ''' arima_pdq = auto_arima(df, trace=trace_list, stepwise=False, max_p=8,max_P = 8, max_order=12).order print('P, D, Q parameters to use in ARIMA model =', arima_pdq) return arima_pdq
def __init__(self, signal): # size = int(len(signal) * 0.66) # train, test = signal[0:size], signal[size:len(signal)] # his = [x for x in signal] self.model = auto_arima(signal, start_p=1, start_q=1, max_p=5, max_q=3, m=12, start_P=0, seasonal=False, d=1, D=1, suppress_warnings=True) # trace=True,
def test_warn_for_large_differences(): # First: d is too large with warnings.catch_warnings(record=True) as w: _ = auto_arima(wineind, seasonal=True, m=1, suppress_warnings=False, d=3, error_action='warn') assert len(w) > 0 # Second: D is too large. M needs to be > 1 or D will be set to 0... # unfortunately, this takes a long time. with warnings.catch_warnings(record=True) as w: _ = auto_arima( wineind, seasonal=True, m=2, # noqa: F841 suppress_warnings=False, D=3, error_action='warn') assert len(w) > 0
def estimate_arma(series): """Estimate ARMA parameters on a series""" series = pd.Series(series) series = series[~pd.isna(series)] arma_model = auto_arima( series, start_p=0, d=0, start_q=0, D=0, stationary=True, suppress_warnings=True, error_action="ignore", ) return arma_model
def test_corner_cases(): with pytest.raises(ValueError): auto_arima(wineind, error_action='some-bad-string') # things that produce warnings with pytest.warns(UserWarning): # show a constant result will result in a quick fit auto_arima(np.ones(10), suppress_warnings=True) # show the same thing with return_all results in the ARIMA in a list fits = auto_arima(np.ones(10), suppress_warnings=True, return_valid_fits=True) assert hasattr(fits, '__iter__') # show we fail for n_iter < 0 with pytest.raises(ValueError): auto_arima(np.ones(10), random=True, n_fits=-1) # show if max* < start* it breaks: with pytest.raises(ValueError): auto_arima(np.ones(10), start_p=5, max_p=0)
def arima_model(vEndog, mExog=None, tPDQ=None): """ Fits an ARIMA model. Order can be specified or determined by auto_arima. Differently from other models, it does not work on patsy/R formula syntax. :param vEndog: DataFrame column/numpy vector containing endogenous data (which will be regressed upon itself) :param mExog: vector/matrix containing exogenous data. Defaults to None :param tPDQ: tuple (p, d, q) containing order of the model; p: number of autorregressions (AR) q: number of differentiations (I) q: number of past prevision errors/moving averages (MA) If None (default), performs an auto_arima() :return mod: fitted model instance """ ## Creating model # If order is specified if tPDQ is not None: # Conditional on whether there are exogenous variables if mExog is None: mod_arima = ARIMA(endog=vEndog, order=tPDQ).fit(cov_type='robust') else: mod_arima = ARIMA(endog=vEndog, exog=mExog, order=tPDQ).fit(cov_type='robust') # If order isn't specified, use auto_arima() else: mod_arima = auto_arima(y=vEndog, X=mExog) mod_arima = mod_arima.fit(y=vEndog, cov_type='robust') ## Printing summary and diagnostics print(mod_arima.summary()) print("For heteroskdasticity, check Prob(H), where H0: homoskedasticity, and the standardized residual graph.") print("If there is hetero., the model error can't be a white noise (which is the desired thing).") print("Estimaed Density and Jarque-Bera have information on normality.") print("In the correlogram, all lollipops must be inside of the shaded area.") # Plots mod_arima.plot_diagnostics(figsize=(10, 10)) plt.show() # Residual means tMean0 = stats.ttest_1samp(mod_arima.resid(), 0, nan_policy='omit') print(f"P-value for the test that residual mean is equal to 0: {np.around(tMean0[1], 5)}.") print("If p < 0.05, H0 is rejected and the residual mean is different from 0 (not ideal).") ## Returning return mod_arima
for district_code in district_code_values: sub_df = flow_df[flow_df['district_code'] == district_code] city_code = sub_df['city_code'].iloc[0] predict_columns = ['dwell', 'flow_in', 'flow_out'] tmp_df = pd.DataFrame(data=date_dt, columns=['date_dt']) tmp_df['city_code'] = city_code tmp_df['district_code'] = district_code for column in predict_columns: ts_log = np.log(1 + sub_df[column]) arima_model = auto_arima(ts_log, start_p=1, max_p=9, start_q=1, max_q=9, max_d=5, start_P=1, max_P=9, start_Q=1, max_Q=9, max_D=5, m=7, random_state=2018, trace=True, seasonal=True, error_action='ignore', suppress_warnings=True, stepwise=True) preds = arima_model.predict(n_periods=15) preds = pd.Series(preds) preds = np.exp(preds) - 1 tmp_df = pd.concat([tmp_df, preds], axis=1) tmp_df.columns = tmp_df_columns preds_df = pd.concat([preds_df, tmp_df], axis=0, ignore_index=True) preds_df = preds_df.sort_values(by=['date_dt']) preds_df.to_csv('prediction1.csv', index=False, header=False)