def auto_arima_pyflux(ts_df): import pyflux as pf min_aic = np.inf min_aic_param = None for p in range(3): for d in range(3): for q in range(3): if (p, d, q) != (0, 0, 0): model = pf.ARIMA(data=ts_df, ar=p, integ=d, ma=q) model_fit = model.fit("MLE") # M-H if model_fit.aic < min_aic: min_aic = model_fit.aic min_aic_param = (p, d, q) if min_aic_param is None: print("Not successful in fitting ARIMA") return -1 else: model = pf.ARIMA(data=ts_df, ar=min_aic_param[0], integ=min_aic_param[1], ma=min_aic_param[2]) model_fit = model.fit("MLE") # M-H return min_aic, min_aic_param, model, model.predict(h=1, intervals=True)
def plot(self, series, latent_variables = None): series = series[180:360] series = pd.DataFrame(series) if latent_variables is None: model = pf.ARIMA(series, self.ar, self.ma, self.integ) else: model = pf.ARIMA(series, self.ar, self.ma, self.integ, latent_variables) model_fit = model.fit() model.plot_predict(30, past_values = 100)
def _latent_variable_distribution(self, set_a, set_b): a = np.asarray(set_a) b = np.asarray(set_b) a_model = pf.ARIMA(a, self.ar, self.ma, self.integ) b_model = pf.ARIMA(b, self.ar, self.ma, self.integ) a_modelfit = a_model.fit() b_modelfit = b_model.fit() a_vars = a_model.latent_variables a_latent_vars = a_vars.dahlia() a_vals = a_latent_vars[0] a_indicators = a_latent_vars[1] a_factors = a_latent_vars[2] a_means = np.empty(len(a_vals)) a_sdevs = np.empty(len(a_vals)) b_vars = b_model.latent_variables b_latent_vars = b_vars.dahlia() b_vals = b_latent_vars[0] b_indicators = b_latent_vars[1] b_factors = b_latent_vars[2] b_means = np.empty(len(b_vals)) b_sdevs = np.empty(len(b_vals)) for i in range(len(a_means)): a_means[i] = a_vals[i].mean() a_sdevs[i] = a_vals[i].std() b_means[i] = b_vals[i].mean() b_sdevs[i] = b_vals[i].std() values = np.empty((len(a_vals), len(a_vals[0]))) for y in range(0, len(a_vals)): """datasets with the 'b' indicator are scaled differently, use z-scores to average equivalent values in a and b distribution""" if (b_indicators == 'b' and a_indicators[y] != 'b') or (a_indicators[y] == b_indicators[y] != 'b'): for z in range(0, len(a_vals[0])): pA = stat.norm(a_means[y],a_sdevs[y]).cdf(a_vals[y][z]) bVal = stat.norm(b_means[y], b_sdevs[y]).ppf(pA) avg = (a_vals[y][z] + bVal) / 2 values[y][z] = avg elif a_indicators[y] != 'b' and b_indicators[y] != 'b': for z in range(0, len(a_vals[0])): pB = stat.norm(b_means[y],b_sdevs[y]).cdf(b_vals[y][z]) aVal = stat.norm(a_means[y], a_sdevs[y]).ppf(pB) avg = (b_vals[y][z] + aVal) / 2 values[y][z] = avg else: for z in range(0, len(a_vals[0])): pA = stat.norm(a_means[y], a_sdevs[y]).cdf(a_vals[y][z]) bVal = stat.norm(b_means[y], b_sdevs[y]).ppf(pA) avg = (a_vals[y][z] + bVal) / 2 values[y][z] = avg return values
def a_test_bbvi_elbo(): """ Tests that the ELBO increases """ model = pf.ARIMA(data=data, ar=1, ma=0, family=pf.Exponential()) x = model.fit('BBVI', iterations=200, record_elbo=True, map_start=False) assert (x.elbo_records[-1] > x.elbo_records[0])
def run_aram(df, maxar, maxma, test_size=14): data = df.dropna() data['log'] = np.log(data[data.columns[0]]) # test_size = int(len(data) * 0.33) train_size = len(data) - int(test_size) train, test = data[:train_size], data[train_size:] if test_stationarity(train[train.columns[1]]) < 0.01: print('平稳,不需要差分') else: diffn = best_diff(train, maxdiff=8) train = produce_diffed_timeseries(train, diffn) print('差分阶数为' + str(diffn) + ',已完成差分') print('开始进行ARMA拟合') order = choose_order(train[train.columns[2]], maxar, maxma) print('模型的阶数为:' + str(order)) _ar = order[0] _ma = order[1] model = pf.ARIMA(data=train, ar=_ar, ma=_ma, target='diff', family=pf.Normal()) model.fit("MLE") test = test['payment_times'] test_predict = model.predict(int(test_size)) test_predict = predict_recover(test_predict, train, diffn) RMSE = np.sqrt( ((np.array(test_predict) - np.array(test))**2).sum() / test.size) print("测试集的RMSE为:" + str(RMSE))
def test_bbvi_mini_batch_elbo(): """ Tests that the ELBO increases """ model = pf.ARIMA(data=data, ar=1, ma=1, family=pf.Cauchy()) x = model.fit('BBVI', iterations=100, mini_batch=32, record_elbo=True) assert (x.elbo_records[-1] > x.elbo_records[0])
def a_test_predict_length(): """ Tests that the prediction dataframe length is equal to the number of steps h """ model = pf.ARIMA(data=data, ar=2, ma=2, family=pf.Exponential()) x = model.fit() assert (model.predict(h=5).shape[0] == 5)
def test_bbvi_elbo(): """ Tests that the ELBO increases """ model = pf.ARIMA(data=data, ar=1, ma=1, family=pf.Skewt()) x = model.fit('BBVI',iterations=400, record_elbo=True) assert(x.elbo_records[-1]>x.elbo_records[0])
def test_predict_is_nans(): """ Tests that the in-sample predictions are not nans """ model = pf.ARIMA(data=data, ar=2, ma=2, family=pf.Skewt()) x = model.fit() assert(len(model.predict_is(h=5).values[np.isnan(model.predict_is(h=5).values)]) == 0)
def univariate_arima(): ''' Reads the data and fits the ARIMA model Prints the Acccuracy Score Inputs: None Outputs: None ''' data = preprocessing.main() n_train_hours = 52 * 3 train = data.iloc[:n_train_hours, :] test = data.iloc[n_train_hours:, :] model = pf.ARIMA(data=train, ar=9, ma=0, integ=1, target='milk') x = model.fit("MLE") x.summary() # model.plot_fit(figsize=(15,5)) model.plot_predict(h=38, past_values=20, figsize=(15, 5)) #import pdb; pdb.set_trace() yhat = model.predict(h=38) pred_chg = yhat > 0 actual_chg = test.iloc[:-1, 0].diff() > 0 print accuracy_score(actual_chg, pred_chg)
def flux_auto(y, s, k, a, t, e, r): """ One way to use flux package - Contemporaneous y[1:] variables are used as exogenous 'X' in pmdarima - This only works for k=1 :returns: x, s', w """ if s is None: s = dict() s = flux_hyperparams(s=s,r=r) s = initialize_buffers(s=s,y=y) if y is not None: # Process observation and return prediction assert isinstance(y, float) or len(y) == s['dim'], ' Cannot change dimension of input in flight ' y0, exog = split_exogenous(y=y, dim=s['dim']) s = update_buffers(s=s, a=a, exog=exog, y0=y0) if True: # Always fit prior to prediction none_, s, _ = flux_auto(y=None, s=s, k=k, a=a, t=t, e=e, r=r) # Fit the model assert none_ is None return flux_or_last_value(s=s,k=k,exog=exog,y0=y0) if y is None: if len(s.get('buffer'))<s['n_burn']: s['model'] = None else: data = pd.DataFrame(columns=['y'], data=s.get('buffer')) s['model'] = pf.ARIMA(data=data, ar=s['ar'], ma=s['ma'], target='y', family=s['family']) _ = s['model'].fit("MLE") return None, s, None # Acknowledge that a fit was requested by returning x=None, w=None
def test_predict_is_length(): """ Tests that the prediction IS dataframe length is equal to the number of steps h """ model = pf.ARIMA(data=data, ar=2, ma=2, family=pf.Cauchy()) x = model.fit() assert (model.predict_is(h=5).shape[0] == 5)
def sliding_prediction_fixed_arma(ts, winsize=28, show_convg_info=False): import pyflux as pf from statsmodels.tsa.arima_model import ARIMA ts = ts.astype(float) # statsmodel # ts = pd.DataFrame(ts) # pyflux start_indx = 0 predictions = [] for start_indx in range(0, ts.size - winsize): # for start_indx in range(0, 1): end_indx = start_indx + winsize ts_sliced = ts[start_indx:end_indx] date = ts.index[end_indx].date() # statsmodel # model = ARIMA(ts_sliced, (7, 1, 0)) # model_fit = model.fit(disp=show_convg_info) # nextday_pred = model_fit.forecast(steps=1) # pyflux ts_sliced = pd.DataFrame(ts_sliced) model = pf.ARIMA(data=ts_sliced, ar=7, integ=0, ma=1) model_fit = model.fit("MLE") # M-H nextday_pred = model.predict(h=1, intervals=True) pred_count = nextday_pred['count'][0] # print(model_fit) # print(nextday_pred['count'][0]) print(nextday_pred['count'], date) predictions.append((date, pred_count)) return pd.DataFrame(predictions)
def a_test_ppc(): """ Tests PPC value """ model = pf.ARIMA(data=data, ar=2, ma=2, family=pf.Exponential()) x = model.fit('BBVI', iterations=100) p_value = model.ppc(nsims=100) assert (0.0 <= p_value <= 1.0)
def test_predict_length(): """ Tests that the prediction dataframe length is equal to the number of steps h """ model = pf.ARIMA(data=data, ar=2, ma=2) x = model.fit() x.summary() assert (model.predict(h=5).shape[0] == 5)
def test_ppc(): """ Tests PPC value """ model = pf.ARIMA(data=data, ar=2, ma=2, family=pf.Cauchy()) x = model.fit('BBVI', iterations=100) p_value = model.ppc() assert (0.0 <= p_value <= 1.0)
def a_test_predict_nans(): """ Tests that the predictions are not nans """ model = pf.ARIMA(data=data, ar=2, ma=2, family=pf.Exponential()) x = model.fit() assert (len( model.predict(h=5).values[np.isnan(model.predict(h=5).values)]) == 0)
def predict_request_body_len(train_set, test_set, method): arima_utils.adfuller_test(train_set['request_body_len'].dropna()) arima_utils.plot_series(train_set['request_body_len'], 'Original Series') ''' train_set['Value First Difference'] = train_set['request_body_len'] - train_set['request_body_len'].shift(1) #dropdna, borra todos los vacios arima_utils.adfuller_test(train_set['Value First Difference'].dropna()) arima_utils.plot_series(train_set['Value First Difference'], 'Value First Difference') ''' arima_utils.plot_pacf(train_set['request_body_len']) arima_utils.plot_acf(train_set['request_body_len']) # use request_body_len, response_body_len # usar p = 12 (intento original) # usar q = 34 (intento original) p = 2 q = 1 start_time = time.time() print("STARTING TIMER REQUEST ", method) model = pf.ARIMA(data=train_set, ar=p, ma=q, integ=0, target='request_body_len') x = model.fit(method=method) end_time = time.time() total_time = end_time - start_time print("TIME: ", total_time) # PRINT DATA #print(x.summary()) print(x.scores) model.plot_fit() plt.show() # model.plot_predict_is(h=30) # firstRegister = conn.head(30) #plt.plot(test_set['ts'], test_set['request_body_len']) #model.plot_predict_is(h=100, past_values=40) #print(model.predict(h=100)) start_time = time.time() print("STARTING TIMER, PREDICT REQUEST ", method) plt.plot(test_set.index, test_set['request_body_len'], label='REAL', color='pink') plt.plot(model.predict(h=100), label='PREDICTION', color='cyan') plt.legend(['REAL', 'PREDICTION']) # model.plot_predict(h=200, past_values=40) # plt.plot(firstRegister['ts'], firstRegister['request_body_len']) end_time = time.time() total_time = end_time - start_time print("TIME: ", total_time) plt.show()
def test_sample_model(): """ Tests sampling function """ model = pf.ARIMA(data=data, ar=2, ma=2, family=pf.Cauchy()) x = model.fit('BBVI', iterations=100) sample = model.sample(nsims=100) assert (sample.shape[0] == 100) assert (sample.shape[1] == len(data) - 2)
def test_predict_is_nonconstant(): """ We should not really have predictions that are constant (should be some difference)... This captures bugs with the predict function not iterating forward """ model = pf.ARIMA(data=data, ar=2, ma=2, family=pf.Cauchy()) x = model.fit() predictions = model.predict_is(h=10, intervals=False) assert (not np.all(predictions.values == predictions.values[0]))
def ARIMAX_model(df, target, ar, integ, ma): pfarima_model = pf.ARIMA(data=df, ar=ar, ma=ma, integ=integ, target=target, family=pf.Normal()) arima_x_mh = pfarima_model.fit("M-H") arima_x_mh.summary()
def test_predict_nans(): """ Tests that the predictions are not nans """ model = pf.ARIMA(data=data, ar=2, ma=2) x = model.fit() x.summary() assert (len( model.predict(h=5).values[np.isnan(model.predict(h=5).values)]) == 0)
def predict_response_body_len(train_set, test_set): arima_utils.adfuller_test(train_set['response_body_len']) arima_utils.plot_series(train_set['response_body_len'], 'Original Series') arima_utils.plot_pacf(train_set['response_body_len']) arima_utils.plot_acf(train_set['response_body_len']) # use request_body_len, response_body_len # usar p = 8 (intento original) # usar q = 7 (intento original) p = 2 q = 8 start_time = time.time() print("STARTING TIMER, RESPONSE") model = pf.ARIMA(data=train_set, ar=p, ma=q, integ=0, target='response_body_len') x = model.fit(method=method) #model.fit(method='BBVI', iterations='10000', optimizer='ADAM') ### #model.fit(method='Laplace') #model.fit(method='M-H') end_time = time.time() total_time = end_time - start_time print("TIME: ", total_time) # PRINT DATA print(x.summary()) print(x.scores) model.plot_fit() plt.show() # model.plot_predict_is(h=30) # firstRegister = conn.head(30) start_time = time.time() print("STARTING TIMER PREDICT RESPONSE") plt.plot(test_set.index, test_set['response_body_len'], label='REAL', color='pink') plt.plot(model.predict(h=100), label='PREDICTION', color='cyan') plt.legend(['REAL', 'PREDICTION']) #end_time = time.time() #total_time = end_time - start_time #print("TIME: " , total_time) end_time = time.time() total_time = end_time - start_time print("TIME: ", total_time) # model.plot_predict(h=200, past_values=40) # plt.plot(firstRegister['ts'], firstRegister['response_body_len']) plt.show()
def doARMA(data, ar, ma): family = pf.Normal() model = pf.ARIMA(data=data, ar=ar, ma=ma, target='sunspot.year', family=family) x = model.fit("MLE") # x.summary() # model.plot_fit(figsize=(15,10)) model.plot_predict_is(h=500, figsize=(15,5)) # model.plot_predict(h=20,past_values=20,figsize=(15,5)) res = model.predict_is(h=500) return res
def predict_ARIMA(trainData, testX, lookAhead, p, q): testX = np.array(testX).reshape(-1) total_train = np.concatenate([trainData, testX], axis=0) model = pf.ARIMA(data=total_train, ar=p, ma=q, family=pf.Normal()) model.fit(method="MLE") pred = model.predict(lookAhead, intervals=False) return pred
def a_test_bbvi(): """ Tests an ARIMA model estimated with BBVI and that the length of the latent variable list is correct, and that the estimated latent variables are not nan """ model = pf.ARIMA(data=data, ar=1, ma=0, family=pf.Exponential()) x = model.fit('BBVI', iterations=200) assert (len(model.latent_variables.z_list) == 3) lvs = np.array([i.value for i in model.latent_variables.z_list]) assert (len(lvs[np.isnan(lvs)]) == 0)
def a_test_laplace(): """ Tests an ARIMA model estimated with Laplace approximation and that the length of the latent variable list is correct, and that the estimated latent variables are not nan """ model = pf.ARIMA(data=data, ar=1, ma=1, family=pf.Exponential()) x = model.fit('Laplace') assert (len(model.latent_variables.z_list) == 3) lvs = np.array([i.value for i in model.latent_variables.z_list]) assert (len(lvs[np.isnan(lvs)]) == 0)
def test_pml(): """ Tests a PML model estimated with Laplace approximation and that the length of the latent variable list is correct, and that the estimated latent variables are not nan """ model = pf.ARIMA(data=data, ar=1, ma=1, family=pf.Cauchy()) x = model.fit('PML') assert (len(model.latent_variables.z_list) == 4) lvs = np.array([i.value for i in model.latent_variables.z_list]) assert (len(lvs[np.isnan(lvs)]) == 0)
def test_mh(): """ Tests an ARIMA model estimated with Metropolis-Hastings and that the length of the latent variable list is correct, and that the estimated latent variables are not nan """ model = pf.ARIMA(data=data, ar=1, ma=1, family=pf.Cauchy()) x = model.fit('M-H', nsims=300) assert (len(model.latent_variables.z_list) == 4) lvs = np.array([i.value for i in model.latent_variables.z_list]) assert (len(lvs[np.isnan(lvs)]) == 0)
def test_bbvi_mini_batch(): """ Tests an ARIMA model estimated with BBVI and that the length of the latent variable list is correct, and that the estimated latent variables are not nan """ model = pf.ARIMA(data=data, ar=1, ma=1, family=pf.Cauchy()) x = model.fit('BBVI', iterations=100, mini_batch=32) assert (len(model.latent_variables.z_list) == 4) lvs = np.array([i.value for i in model.latent_variables.z_list]) assert (len(lvs[np.isnan(lvs)]) == 0)