def test_innovations_mle(): # Test for basic use of Yule-Walker estimation endog = dta['infl'].iloc[:100] # ARMA(1, 1), no trend (since trend would imply GLS estimation) desired_p, _ = innovations_mle(endog, order=(1, 0, 1), demean=False) mod = ARIMA(endog, order=(1, 0, 1), trend='n') res = mod.fit(method='innovations_mle') # Note: atol is required only due to precision issues on Windows assert_allclose(res.params, desired_p.params, atol=1e-5) # SARMA(1, 0)x(1, 0)4, no trend (since trend would imply GLS estimation) desired_p, _ = innovations_mle(endog, order=(1, 0, 0), seasonal_order=(1, 0, 0, 4), demean=False) mod = ARIMA(endog, order=(1, 0, 0), seasonal_order=(1, 0, 0, 4), trend='n') res = mod.fit(method='innovations_mle') # Note: atol is required only due to precision issues on Windows assert_allclose(res.params, desired_p.params, atol=1e-5)
def predict_autoregressive_integrated_moving_verage(data, predict_start=None, intervals=15): model = ARIMA(data, order=(1, 1, 1)) model_fit = model.fit() if predict_start is None: predict_start = len(data) - 2 return model_fit.predict(predict_start, predict_start + intervals, typ='levels')
def arima_model(self, time_series, step=5): for i in range(step): model = ARIMA( time_series.astype(float), order=self.order ) # error occured here pandas data has been cast to numpy dtype obj so chnage it to float as values will be string, so once we convert all values to float then we can append model_fit = model.fit() forecast = model_fit.forecast() input_data = np.asarray(str(forecast)[7:14]) time_series.loc[time_series.last_valid_index() + datetime.timedelta(days=1)] = input_data time_series.sort_index() return time_series
def predict_tomorrow_sarimax(stock, stock_data, db, params): """ Hyper-parameter tuning with back-testing for SARIMAX. """ history_endog = stock_data["GAIN_LOSS"] model = ARIMA(endog=history_endog, order=params["Params"]["order"], seasonal_order=params["Params"]["seasonal_order"]) model_fit = model.fit() prediction = model_fit.forecast(steps=1) prediction = float(prediction) last_date, next_date = get_dates(stock_data) columns = list(stock_data.columns) last_stats = stock_data.iloc[-1] data_last_stats = {} for entry in columns: data_last_stats['LAST_' + entry] = format_floats(last_stats[entry], 4) next_price = (1 + prediction) * float(data_last_stats["LAST_Close"]) pred_gain_loss = format_floats(prediction, 4) trend_gain_loss = 'pos' if float(prediction) > 0 else 'neg' trend_last_gain_loss = 'pos' if float( data_last_stats["LAST_GAIN_LOSS"]) > 0 else 'neg' history = { f"{next_date}_PRED_Price": format_floats(next_price, 2), f"{next_date}_PRED_Price_Diff": pred_gain_loss, f"{next_date}_PRED_Price_Trend": trend_gain_loss, f"{last_date}_REAL_Price": data_last_stats["LAST_Close"], f"{last_date}_REAL_Price_Diff": data_last_stats["LAST_GAIN_LOSS"], f"{last_date}_REAL_Price_Trend": trend_last_gain_loss } # Export to current day folder export_firebase(data=history, stock=stock, db=db, folder='CURRENT_PREDS', delete=True) # Export to history folder export_firebase(data=history, stock=stock, db=db, folder='HISTORY_PREDS', delete=False)
def random_walk(data, test): from statsmodels.tsa.arima.model import ARIMA import pmdarima as pm mod = ARIMA(data, seasonal_order=(0, 1, 0, 12)) res = mod.fit() oos_predictions = res.predict(start=test.index.values[0], end=test.index.values[-1]) return res, oos_predictions, res.bic
def test_low_memory(): # Basic test that the low_memory option is working endog = dta['infl'].iloc[:50] mod = ARIMA(endog, order=(1, 0, 0), concentrate_scale=True) res1 = mod.fit() res2 = mod.fit(low_memory=True) # Check that the models produce the same results assert_allclose(res2.params, res1.params) assert_allclose(res2.llf, res1.llf) # Check that the model's basic memory conservation option wasn't changed assert_equal(mod.ssm.memory_conserve, 0) # Check that low memory was actually used (just check a couple) assert_(res2.llf_obs is None) assert_(res2.forecasts is None) assert_(res2.predicted_state is None) assert_(res2.filtered_state is None) assert_(res2.smoothed_state is None)
def fit_forecast_next(dataset): p_values = [0, 1, 2, 4, 6, 8, 10] d_values = range(0, 3) q_values = range(0, 3) warnings.filterwarnings("ignore") best_cfg, best_score = evaluate_models(dataset, p_values, d_values, q_values) print('Best ARIMA%s RMSE=%.3f' % (best_cfg, best_score)) model = ARIMA(dataset, order=best_cfg) model.k_lags = None model_fit = model.fit() return model_fit, model_fit.forecast()
def evaluate_arima_model(X, arima_order): # prepare training dataset history = [x for x in X] # make predictions model = ARIMA(history, order=arima_order) model_fit = model.fit() start = 0 end = len(X)-1 predictions = model_fit.predict(start, end, typ='levels') # calculate out of sample error error = metrics.median_absolute_error(X, predictions) return error
def get_predictions_rmse_mape_final(self, min_algo): if min_algo == Constants.ARIMA: model = ARIMA(self.total, order=(7, 0, 1)) model_fit = model.fit() elif min_algo == Constants.MOVING_AVERAGE: model = ARMA(self.total, order=[0, 1]) model_fit = model.fit(disp=0) elif min_algo == Constants.AR: model = AR(self.total) model_fit = model.fit(disp=0) elif min_algo == Constants.ARMA: # model = ARMA(self.total, order=[1,0]) model = ARMA(self.total, order=[2, 1]) model_fit = model.fit(disp=0) elif min_algo == Constants.SARIMA: model = SARIMAX(self.total, order=(1, 1, 1), seasonal_order=(1, 1, 1, 12)) model_fit = model.fit(disp=0) elif min_algo == Constants.SES: model = SimpleExpSmoothing(self.total) model_fit = model.fit() start_index = len(self.total) # end_index = start_index + 11 end_index = start_index + Constants.NUMBER_OF_PREDICTIONS - 1 forecast = model_fit.predict(start=start_index, end=end_index) for i in range(0, len(forecast)): forecast[i] = round(forecast[i]) if forecast[i] < 0: forecast[i] = 0 return forecast
def generate_predict_data(): tickerlist = pd.read_csv("tickerlist.csv") order = pd.read_csv("order.csv") order = order.set_index('ticker') dic = {'ticker': [], 'predict': [], 'bef_close': [], 'close': []} ticker = [] predict = [] close = [] bef_close = [] for i in range(len(tickerlist)): try: all_data = pd.read_csv("coin/" + tickerlist['name'].iloc[i]) all_data['time'] = all_data['time'].apply( lambda x: datetime(int(x[:4]), int(x[5:7]), int(x[8:10]))) name = tickerlist['name'].iloc[i] first = int(order['first'].loc[name]) second = int(order['second'].loc[name]) third = int(order['third'].loc[name]) temp_order = (first, second, third) model = ARIMA(all_data['close'], order=temp_order) model_fit = model.fit() forecast_data = model_fit.forecast(steps=1) print(all_data['time'].iloc[-1]) print("name :", tickerlist['name'].iloc[i]) print("close :", all_data['close'].iloc[-1]) print("predict :", round(forecast_data[len(all_data['close'])], 2)) print() ticker.append(tickerlist['name'].iloc[i]) predict.append(round(forecast_data[len(all_data['close'])], 2)) close.append(all_data['close'].iloc[-1]) bef_close.append(all_data['close'].iloc[-2]) except: continue dic['ticker'] = ticker dic['predict'] = predict dic['close'] = close dic['bef_close'] = bef_close df = pd.DataFrame(dic) df.to_csv("dic.csv") return dic
def predict_moving_average(data, predict_start=None, intervals=15): data_norm = np.reshape(scaler.fit_transform(data.reshape(-1, 1)), (-1)) model = ARIMA(data_norm, order=(2, 1, 5)) model_fit = model.fit() if predict_start is None: predict_start = len(data) - 2 prediction_norm = model_fit.predict(predict_start, predict_start + intervals, typ='levels') return np.reshape(scaler.inverse_transform(prediction_norm.reshape(-1, 1)), (-1))
def arma_model(p_range, q_range, data): index = 0 result = np.array([[]]) for p in range(p_range): pi = [] for q in range(q_range): try: model = ARIMA(data, order=(p, 0, q)) model_fit = model.fit() except: model_fit.bic = np.nan dict[index] = (p, q, model_fit.bic) pi.append(model_fit.bic) index = index + 1 result = np.append(result, np.array(pi))
def evaluate_arima_model(X, arima_order): train_size = int(len(X) * 0.67) train, test = X[0:train_size], X[train_size:] history = [x for x in train] predictions = [] for t in range(len(test)): warnings.filterwarnings("ignore") model = ARIMA(history, order=arima_order) model_fit = model.fit() y_hat = model_fit.forecast(steps=1) predictions.append(y_hat) history.append(test[t]) error = mean_squared_error(test, predictions) warnings.filterwarnings("default") return error
def test_nonstationary_gls_error(): # GH-6540 endog = pd.read_csv( io.StringIO( """\ data\n 9.112\n9.102\n9.103\n9.099\n9.094\n9.090\n9.108\n9.088\n9.091\n9.083\n9.095\n 9.090\n9.098\n9.093\n9.087\n9.088\n9.083\n9.095\n9.077\n9.082\n9.082\n9.081\n 9.081\n9.079\n9.088\n9.096\n9.081\n9.098\n9.081\n9.094\n9.091\n9.095\n9.097\n 9.108\n9.104\n9.098\n9.085\n9.093\n9.094\n9.092\n9.093\n9.106\n9.097\n9.108\n 9.100\n9.106\n9.114\n9.111\n9.097\n9.099\n9.108\n9.108\n9.110\n9.101\n9.111\n 9.114\n9.111\n9.126\n9.124\n9.112\n9.120\n9.142\n9.136\n9.131\n9.106\n9.112\n 9.119\n9.125\n9.123\n9.138\n9.133\n9.133\n9.137\n9.133\n9.138\n9.136\n9.128\n 9.127\n9.143\n9.128\n9.135\n9.133\n9.131\n9.136\n9.120\n9.127\n9.130\n9.116\n 9.132\n9.128\n9.119\n9.119\n9.110\n9.132\n9.130\n9.124\n9.130\n9.135\n9.135\n 9.119\n9.119\n9.136\n9.126\n9.122\n9.119\n9.123\n9.121\n9.130\n9.121\n9.119\n 9.106\n9.118\n9.124\n9.121\n9.127\n9.113\n9.118\n9.103\n9.112\n9.110\n9.111\n 9.108\n9.113\n9.117\n9.111\n9.100\n9.106\n9.109\n9.113\n9.110\n9.101\n9.113\n 9.111\n9.101\n9.097\n9.102\n9.100\n9.110\n9.110\n9.096\n9.095\n9.090\n9.104\n 9.097\n9.099\n9.095\n9.096\n9.085\n9.097\n9.098\n9.090\n9.080\n9.093\n9.085\n 9.075\n9.067\n9.072\n9.062\n9.068\n9.053\n9.051\n9.049\n9.052\n9.059\n9.070\n 9.058\n9.074\n9.063\n9.057\n9.062\n9.058\n9.049\n9.047\n9.062\n9.052\n9.052\n 9.044\n9.060\n9.062\n9.055\n9.058\n9.054\n9.044\n9.047\n9.050\n9.048\n9.041\n 9.055\n9.051\n9.028\n9.030\n9.029\n9.027\n9.016\n9.023\n9.031\n9.042\n9.035\n """ ), index_col=None, ) mod = ARIMA( endog, order=(18, 0, 39), enforce_stationarity=False, enforce_invertibility=False, ) with pytest.raises(ValueError, match="Roots of the autoregressive"): mod.fit(method="hannan_rissanen", low_memory=True, cov_type="none")
def arima(df,time_id,lookback, p,d,q): Log(LOG_INFO) << "Computing arima(%d,%d,%d) with lookback: %d " % (p,d,q,lookback) pd=[] for tid in time_id: # pdb.set_trace() series = np.log(df[OPEN_KEY][tid-lookback:tid].values) model = ARIMA(series,order=(p,d,q)) model_fit = model.fit(method_kwargs={"warn_convergence": False}) output = model_fit.forecast() p0 = np.log(df[OPEN_KEY][tid]) err = (output[0]-p0)/p0 pd.append(err) pd = np.array(pd) return pd.reshape(-1,1)
def forecast(self, data: np.ndarray): if data.size > 1: if data.size > self.min_size: try: arima_model = ARIMA(data, order=self.order) res = arima_model.fit() res = res.forecast(steps=1)[0] except Exception as e: res = np.mean(data) else: # Use average until we can use ARIMA model? res = np.mean(data) else: res = self.max_loss return res
def arma_model(): # Autoregressive Moving Average (ARMA) np.random.seed(12345) arparams = np.array([1, -0.75, 0.25]) maparams = np.array([1, 0.65, 0.35]) nobs = 250 y = arma_generate_sample(arparams, maparams, nobs) dates = pd.date_range("1980-1-1", freq="M", periods=nobs) y = pd.Series(y, index=dates) arima = ARIMA(y, order=(2, 0, 2), trend="n") model = arima.fit() inference_dataframe = pd.DataFrame([["1999-06-30", "2001-05-31"]], columns=["start", "end"]) return ModelWithResults(model=model, alg=arima, inference_dataframe=inference_dataframe)
def arima_model(vEndog, mExog=None, tPDQ=None): """ Fits an ARIMA model. Order can be specified or determined by auto_arima. Differently from other models, it does not work on patsy/R formula syntax. :param vEndog: DataFrame column/numpy vector containing endogenous data (which will be regressed upon itself) :param mExog: vector/matrix containing exogenous data. Defaults to None :param tPDQ: tuple (p, d, q) containing order of the model; p: number of autorregressions (AR) q: number of differentiations (I) q: number of past prevision errors/moving averages (MA) If None (default), performs an auto_arima() :return mod: fitted model instance """ ## Creating model # If order is specified if tPDQ is not None: # Conditional on whether there are exogenous variables if mExog is None: mod_arima = ARIMA(endog=vEndog, order=tPDQ).fit(cov_type='robust') else: mod_arima = ARIMA(endog=vEndog, exog=mExog, order=tPDQ).fit(cov_type='robust') # If order isn't specified, use auto_arima() else: mod_arima = auto_arima(y=vEndog, X=mExog) mod_arima = mod_arima.fit(y=vEndog, cov_type='robust') ## Printing summary and diagnostics print(mod_arima.summary()) print("For heteroskdasticity, check Prob(H), where H0: homoskedasticity, and the standardized residual graph.") print("If there is hetero., the model error can't be a white noise (which is the desired thing).") print("Estimaed Density and Jarque-Bera have information on normality.") print("In the correlogram, all lollipops must be inside of the shaded area.") # Plots mod_arima.plot_diagnostics(figsize=(10, 10)) plt.show() # Residual means tMean0 = stats.ttest_1samp(mod_arima.resid(), 0, nan_policy='omit') print(f"P-value for the test that residual mean is equal to 0: {np.around(tMean0[1], 5)}.") print("If p < 0.05, H0 is rejected and the residual mean is different from 0 (not ideal).") ## Returning return mod_arima
def estimate_time(tickets): """ Fa una stima del tempo di chiusura basata su un elenco di ticket ### Parametri - `tickets`: la lista dei ticket passati, ciascuno un dizionario con almeno le chiavi `"inizio"` e `"fine"`, dei timestamp ### Valore ritornato Una predizione del tempo impiegato per chiudere il prossimo ticket """ times = list(map(diff_in_seconds, tickets)) mod = ARIMA(times) fitted = mod.fit() forecast = fitted.forecast(1, alpha=0.05) return timedelta(seconds=forecast[0])
def evaluate_arima_model(X, arima_order): # prepare training dataset train_size = int(len(X) * 0.66) train, test = X[0:train_size], X[train_size:] history = [x for x in train] # make predictions predictions = list() for t in range(len(test)): model = ARIMA(history, order=arima_order) model_fit = model.fit() yhat = model_fit.forecast()[0] predictions.append(yhat) history.append(test[t]) # calculate out of sample error rmse = sqrt(mean_squared_error(test, predictions)) return rmse
def test_from_estimation(d, seasonal): ar = [0.8] if not seasonal else [0.8, 0, 0, 0.2, -0.16] ma = [0.4] if not seasonal else [0.4, 0, 0, 0.2, -0.08] ap = ArmaProcess.from_coeffs(ar, ma, 500) idx = pd.date_range(dt.datetime(1900, 1, 1), periods=500, freq="Q") data = ap.generate_sample(500) if d == 1: data = np.cumsum(data) data = pd.Series(data, index=idx) seasonal_order = (1, 0, 1, 4) if seasonal else None mod = ARIMA(data, order=(1, d, 1), seasonal_order=seasonal_order) res = mod.fit() ap_from = ArmaProcess.from_estimation(res) shape = (5,) if seasonal else (1,) assert ap_from.arcoefs.shape == shape assert ap_from.macoefs.shape == shape
def generate_ARIMA_model(): tickerlist = pd.read_csv("tickerlist.csv") try: if not os.path.exists("predict_data"): os.makedirs("predict_data") except OSError: print("Error: Creating directory") for i in range(len(tickerlist)): all_data = pd.read_csv("./coin/" + tickerlist['name'].iloc[i]) training_data = pd.read_csv("./training_data/" + tickerlist['name'].iloc[i]) test_data = pd.read_csv("./test_data/" + tickerlist['name'].iloc[i]) # 종목별 모델 생성 model = auto_arima(training_data['close'], trace=True, error_action='ignore', start_p=1, start_q=1, max_p=3, max_q=3, suppress_warnings=True, stepwise=False, seasonal=False, with_intercept=False) order = (model.order[0], model.order[1], model.order[2]) dic = {'time': [], 'predict': [],'close': [],'high':[]} time_data = [] predict_data = [] # 위에서 생성한 모델과 test_data를 사용해 predict_data를 생성 for day in range(len(training_data), len(training_data) + len(test_data)): model = ARIMA(all_data['close'].iloc[0:day], order=order) model_fit = model.fit() forecast_data = model_fit.forecast(steps=1) predict_data.append(round(forecast_data[day],2)) time_data.append(all_data['time'].iloc[day]) print(all_data['time'].iloc[day]) print(predict_data) print(len(predict_data)) dic['time'] = time_data dic['predict'] = predict_data dic['close'] = test_data['close'] dic['high'] = test_data['high'] df = pd.DataFrame(dic) df.to_csv("predict_data/" + tickerlist['name'].iloc[i])
def fit(self,model_state: str, endog_data: pd.DataFrame, exp_name: str, exog_data=None): from statsmodels.tsa.arima.model import ARIMA # Hardcoded values to have consistency in models. train_pct = 0.9 steps = 1 all_states = endog_data.columns total_samples = len(endog_data.index) training_sample_size = int(train_pct*total_samples) test_sample_size = total_samples - training_sample_size num_endog_vars = len(endog_data.columns) endog_train_data = endog_data.iloc[:training_sample_size] exog_train_data = None if exog_data is not None: exog_train_data = exog_data.iloc[:training_sample_size] test_data = endog_data.iloc[training_sample_size:] history = endog_train_data.copy() for _col in history.columns: history[_col].values[:training_sample_size]=history[model_state].values[:training_sample_size] history.index = pd.DatetimeIndex(history.index) # Converts datatype str to Datetime history.index = history.index.to_period('D') # Converts Datetime to 0,1,2,...,n predictions = pd.DataFrame(columns=endog_train_data.columns) for t in range(len(test_data)): each_day_predictions = [] obs = [] for _state in all_states: # state_files contains state filenames. print(f'{t} day for {_state}') model = ARIMA(history[_state],order=(7,0,1),exog=exog_train_data) model_fit = model.fit() output = model_fit.forecast(steps=steps) yhat = output.iloc[0] each_day_predictions.append(yhat) history.loc[history.index[-1]+pd.offsets.Day(1)]=test_data.iloc[t] predictions.loc[history.index[-1]+pd.offsets.Day(1)] = each_day_predictions fit_score = {'states':[],'r2':[]} for _state in endog_data.columns: y_obs = test_data[:test_sample_size][_state].to_numpy() y_pred = predictions[:,list(endog_data.columns).index(_state)] fit_score['states'].append(_state) fit_score['r2'].append(r2_score(y_obs,y_pred)) store_exp_results(fit_score, exog_data is not None, exp_name) return predictions,test_data
def sample(self, lagged_values, lagged_times=None, **ignored): """ Find Unique Values to see if outcomes are discrete or continuous """ uniques = np.unique(lagged_values) rev_values = lagged_values[:: -1] #list(reversed(lagged_values)) # our data are in reverse order, the ARIMA needs the opposite if len(uniques) < 0.2 * len( rev_values ): #arbitrary cutoff of 20% to determine whether outcomes are continuous or quantized prev_cases = [ i + 1 for i, x in enumerate(rev_values[:-1]) if x == rev_values[-1] ] # when did this value occur before? List the following values indices if len(prev_cases ) > 8: #arbitrary decision on minimum number of occurrences value_list = [ x for i, x in enumerate(rev_values) if i in prev_cases ] #submit based on what happened before else: value_list = rev_values[:] #not enough data, use the whole set instead v = [ s for s in (np.random.choice(value_list, self.num_predictions)) ] #randomly select from the value list and return as answer else: """ Simple ARIMA """ # evaluate parameters print('ARIMA') p_values = [0, 1, 2, 4, 6, 8, 10, 25] d_values = range(0, 3) q_values = range(0, 3) best_order = self.evaluate_models(rev_values, p_values, d_values, q_values) arma_mod = ARIMA(rev_values, order=best_order, trend='n') model_fit = arma_mod.fit() point_est = model_fit.predict(len(lagged_values), len(lagged_values), dynamic=True) st_dev = np.std(lagged_values) #v = list(np.linspace(start=point_est-2*st_dev,stop=point_est+2*st_dev, num=self.num_predictions)) v = [ s for s in ( np.random.normal(point_est, st_dev, self.num_predictions)) ] #v = [s for s in (np.linspace(start=point_est-2*st_dev,stop=point_est+2*st_dev, num=self.num_predictions))] print(*v, sep=", ") return v
def tsa_factory(y: Y_TYPE, s: dict, k: int, a: A_TYPE = None, t: T_TYPE = None, e: E_TYPE = None, p: int = TSA_P_DEFAULT, d: int = TSA_D_DEFAULT, q: int = TSA_D_DEFAULT) -> ([float], Any, Any): """ Extremely simple univariate, fixed p,d,q ARIMA model that is re-fit each time """ # TODO: FIX THIS TO USE EMPIRICAL STD, OTHERWISE ENSEMBLES ARE DREADFUL y = wrap(y) a = wrap(a) if not s.get('y'): s = {'y': list(), 'a': list(), 'k': k, 'p': {}} else: # Assert immutability of k, dimensions if s['y']: assert len(y) == len(s['y'][0]) assert k == s['k'] if s['a']: assert len(a) == len(s['a'][0]) if y is None: return None, s, None else: s['y'].append(y) if a is not None: s['a'].append(a) if len(s['y']) > max(2 * k + 5, TSA_META['n_warm']): y0s = [y_[0] for y_ in s['y']] model = ARIMA(y0s, order=(p, d, q)) try: x = list(model.fit().forecast(steps=k)) except: x = [wrap(y)[0]] * k else: x = [y[0]] * k y0 = wrap(y)[0] _we_ignore_bias, x_std, s['p'] = parade(p=s['p'], x=x, y=y0) x_std_fallback = nonecast(x_std, fill_value=1.0) return x, x_std_fallback, s
def calculate_arima(df): df_fill = df.asfreq('W', method='ffill') df_returns = df_fill.diff() df_returns.iloc[0] = 0 #Build the model and fit it to the data model = ARIMA(df_returns, order=[1, 0, 1]) fitted_model = model.fit() #debug #print(fitted_model.summary()) #print(acorr_ljungbox(fitted_model.resid,lags=[1,2,3,4,5])[1]) start = df.iloc[[-1]].index end = start + timedelta(weeks=10) start = pd.to_datetime(str(start.values[0])) end = pd.to_datetime(str(end.values[0])) start_date = start.strftime('%Y.%m.%d') end_date = end.strftime('%Y.%m.%d') preds = fitted_model.predict(start=start_date, end=end_date) dfp = pd.DataFrame(preds) dfp = dfp.rename(columns={"predicted_mean": "price"}) last_v = df_fill.price.iloc[-1] for n, x in enumerate(dfp.price.iloc[:]): if n < 1: dfp.price[n] = last_v + x else: dfp.price[n] = dfp.price.iloc[n - 1] + dfp.price.iloc[n] df_fill.price = df_fill.price.fillna(0) dfp.price = dfp.price.fillna(0) for n, x in enumerate(df_fill.price.iloc[:]): if n < 1: if x < 0.0001: df_fill.price[n] = df_fill.price[n + 1] else: if x < 0.0001: df_fill.price[n] = df_fill.price[n - 1] return df_fill, dfp
def get_arima_pred(train, test, p=5, q=1, d=0): print() history = [x for x in train['close']] model_predictions = [] N_test_observations = len(test) for time_point in range(N_test_observations): model = ARIMA(history, order=(p, q, d)) model_fit = model.fit() output = model_fit.forecast() yhat = output[0] model_predictions.append(yhat) true_test_value = test.iloc[time_point]['close'] history.append(true_test_value) test['forecast'] = model_predictions test.index = test['date'] return test
def arima_insample(ts, g, order, name): plt.figure() model = ARIMA(ts, order=order) fit = model.fit() p = pd.Series(dtype=float) print(type(fit)) if g == 1: p = fit.predict() else: i = int(0.1 * len(ts)) while i < ts.index[-1]: p = p.append(fit.predict(start=i, end=i + g - 1, dynamic=True)) i += g plt.plot(ts) plt.plot(p) plt.title(name + " | [p,d,q] : " + str(order) + " | gap=" + str(g)) plt.legend(["actual", "predicted"]) plt.show()
def evaluate_arima_model(self, X, arima_order): # prepare training dataset train_size = int(len(X) * 0.66) train, test = X[0:train_size], X[train_size:] history = [x for x in train] # make predictions predictions = list() for t in range(len(test)): model = ARIMA(history, order=arima_order) model_fit = model.fit( disp=0 ) #If disp = 1 or True, convergence information is printed. yhat = model_fit.forecast()[0] predictions.append(yhat) history.append(test[t]) # calculate out of sample error error = mean_squared_error(test, predictions) return error
def create_rmse_scores_per_term(all_global_params_dict, cc_dict): stem_time_series_wieghts_dict = {} for stem in all_global_params_dict: if stem == 'NumWords': continue print(stem) stem_time_series = np.array(all_global_params_dict[stem].sum(axis=0)) stem_time_series = stem_time_series + cc_dict[stem] print(stem_time_series) # normalize normalize_factor = np.sqrt(np.sum(np.square(stem_time_series))) stem_time_series = stem_time_series / normalize_factor # diff series print(stem_time_series) new_stem_ts = [] for i in range(1, len(stem_time_series)): new_stem_ts.append(stem_time_series[i] - stem_time_series[i - 1]) stem_time_series = np.array(new_stem_ts) print(stem_time_series) for method in ['MA', 'LR', 'ARMA']: curr_score = 0.0 if method == 'MA': for i in range(2, len(stem_time_series)): curr_score += ((0.5 * stem_time_series[i - 2] + 0.5 * stem_time_series[i - 1]) - stem_time_series[i])**2 elif method == 'LR': regr = linear_model.LinearRegression() x_series = stem_time_series[:-1] y_series = stem_time_series[1:] regr.fit(x_series.reshape(-1, 1), y_series.reshape(-1, 1)) y_pred = regr.predict(x_series.reshape(-1, 1)).reshape(1, -1) for i in range(len(y_series)): curr_score += (y_pred[0][i] - y_series[i])**2 elif method == 'ARMA': model = ARIMA(stem_time_series, order=(1, 0, 1)) model_fit = model.fit() curr_score += np.sum(np.square(model_fit.resid[1:])) curr_score = np.sqrt(curr_score / float(len(stem_time_series) - 2)) if stem not in stem_time_series_wieghts_dict: stem_time_series_wieghts_dict[stem] = {} stem_time_series_wieghts_dict[stem][method] = curr_score print(stem_time_series_wieghts_dict) return stem_time_series_wieghts_dict