def ar_coefficient(x, param): """ This feature calculator fits the unconditional maximum likelihood of an autoregressive AR(k) process. The k parameter is the maximum lag of the process .. math:: X_{t}=\\varphi_0 +\\sum _{{i=1}}^{k}\\varphi_{i}X_{{t-i}}+\\varepsilon_{t} For the configurations from param which should contain the maxlag "k" and such an AR process is calculated. Then the coefficients :math:`\\varphi_{i}` whose index :math:`i` contained from "coeff" are returned. :param x: the time series to calculate the feature of :type x: numpy.ndarray :param param: contains dictionaries {"coeff": x, "k": y} with x,y int :type param: list :return x: the different feature values :return type: pandas.Series """ calculated_ar_params = {} x_as_list = list(x) calculated_AR = AR(x_as_list) res = {} k = param["k"] p = param["coeff"] column_name = "k_{}__coeff_{}".format(k, p) if k not in calculated_ar_params: try: calculated_ar_params[k] = calculated_AR.fit(maxlag=k, solver="mle").params except (np.linalg.LinAlgError, ValueError): calculated_ar_params[k] = [np.NaN] * k mod = calculated_ar_params[k] if p <= k: try: res[column_name] = mod[p] except IndexError: res[column_name] = 0 else: res[column_name] = np.NaN return [value for key, value in res.items()][0]
def spectrum0_ar(x): z = np.arange(1, len(x) + 1) z = z[:, np.newaxis]**[0, 1] p, res, rnk, s = lstsq(z, x) residuals = x - np.matmul(z, p) if residuals.std() == 0: spec = order = 0 else: ar_out = AR(x).fit(ic='aic', trend='c') order = ar_out.k_ar spec = np.var(ar_out.resid) / (1 - np.sum(ar_out.params[1:]))**2 return spec, order
def _feature_2_3(ts=_ts): """ The relationship between the order of the AR model and its goodness of fit. It is interpreted in the following way: for models AR(1), AR(2), ... , AR(6) calculates mean residual and calculate linear regression versus [1, 2, ..., 6]. The result is coefficient for the regression. """ ar_results = np.empty(6) orders = np.array(range(1, 7)) for i in range(6): ar_results = AR(ts).fit(maxlag=i).resid.mean() slope, _intercept = stats.linregress(orders, ar_results) return slope
def test_mle(self): # check predict with no constant, #3945 res1 = self.res1 endog = res1.model.endog res0 = AR(endog).fit(maxlag=9, method='mle', trend='nc', disp=0) assert_allclose(res0.fittedvalues[-10:], res0.fittedvalues[-10:], rtol=0.015) res_arma = ARMA(endog, (9, 0)).fit(method='mle', trend='nc', disp=0) assert_allclose(res0.params, res_arma.params, atol=5e-6) assert_allclose(res0.fittedvalues[-10:], res_arma.fittedvalues[-10:], rtol=1e-4)
def predict(self, data, start_idx, end_idx): if len(data.columns) > 1: self.model = VAR(data) result = self.model.fit(self.opt_p) y_pred = self.model.predict(result.params, start=start_idx, end=end_idx, lags=self.opt_p) return pd.DataFrame(data=y_pred, columns=data.columns.values) else: self.model = AR(data) self.model = self.model.fit(self.opt_p) y_pred = self.model.predict(start=start_idx, end=end_idx) return pd.DataFrame(data=y_pred, columns=data.columns.values)
def process_mag_signal(self, data): data = self.split_data_to_windows(data) rez = self.process_1d_signal(data) x_sma = np.sum(data) / len(data) * self._freq np.insert(rez, 5, x_sma) # insert AR 4 coef x_ar = [] for i in range(0, data.shape[0]): ar_mod = AR(data[i]) x_ar.append(ar_mod.fit(3).params) np.append(rez, x_ar) return rez
def predict_finals_week(data): dfs = [] dfs.append( pd.DataFrame({ "date": [ "2017-06-12", "2017-06-13", "2017-06-14", "2017-06-15", "2017-06-16", "2017-06-17", "2017-06-18", "2017-06-19" ] })) for ndx in data.columns: raw_data = {} # split into train and test sets X = data[ndx].values try: # train autoregression model = AR(X) model_fit = model.fit() window = model_fit.k_ar coef = model_fit.params raw_data[ndx] = [] # make predictions history = X[len(X) - window:] history = [history[i] for i in range(len(history))] predictions = list() for t in range(8): length = len(history) lag = [history[i] for i in range(length - window, length)] yhat = coef[0] for d in range(window): yhat += coef[d + 1] * lag[window - d - 1] predictions.append(yhat) history.append(yhat) for prediction in predictions: raw_data[ndx].append(prediction) dfs.append(pd.DataFrame(raw_data)) except: continue return pd.concat(dfs, axis=1)
def choose(drop_type, startdate, enddate, pvalue, dvalue, qvalue, points): # print(drop_type) # print('----------------------------') # print(startdate) # print(type(startdate)) # print('----------------------------') # print(enddate) # print(type(enddate)) slicedf = df.loc[startdate : enddate] # print('----------------------------') # print(len(slicedf.index)) test_slice = df.loc[enddate:] endtime = list(pd.date_range(slicedf.index[-1], periods=points, freq='H'))[-1] x = [0,1,2,3,4] trace1 = None trace2 = None trace3 = None if (drop_type == 'AR'): m = AR(slicedf['TotalVolume']) mnolag = m.fit(method='mle', ic='aic') preds = mnolag.predict(start=slicedf.index[-11],end=endtime, dynamic=False).rename('AR PREDICTIONS') trace1 = makeTrace(preds.index, preds.values, 'Predicitons') trace2 = makeTrace(test_slice.index, test_slice.TotalVolume, 'Testing Data') half = (slicedf.shape[0] // 100) trace3 = makeTrace(slicedf.index[half:], slicedf['TotalVolume'].values[half:], 'Selected Data') elif (drop_type == 'ARIMA'): m = ARIMA(slicedf['TotalVolume'], order=(pvalue, dvalue, qvalue)) mfit = m.fit(method='mle') preds = mfit.predict(start=test_slice.index[0], end=endtime, dynamic=False) trace1 = makeTrace(preds.index, preds.values, 'Predictions') trace2 = makeTrace(test_slice.index, test_slice.TotalVolume, 'Testing Data') half = (slicedf.shape[0] // 400) trace3 = makeTrace(slicedf.index[points // 2:], slicedf['TotalVolume'].values[points // 2:], 'Selected Data') elif (drop_type == 'SARIMAX'): trace1 = makeTrace(x, [5,5,5,5,5], '5', 'Predictions') trace2 = makeTrace(x, [6,6,6,6,6], '6', 'Testing Data') half = (slicedf.shape[0] // 100) trace3 = makeTrace(slicedf.index[half:], slicedf['TotalVolume'].values[half:], 'Selected Data') else: trace1 = makeTrace(x, [7,7,7,7,7], '7', 'Predictions') trace2 = makeTrace(x, [8,8,8,8,8], '8', 'Testing Data') half = (slicedf.shape[0] // 100) trace3 = makeTrace(slicedf.index[half:], slicedf['TotalVolume'].values[half:], 'Selected Data') return { 'data' : [trace1, trace2, trace3], 'type' : 'scatter', 'name' : drop_type, 'layout' : go.Layout(title=drop_type, barmode='stack') }
def do_forecast_ar_model(self, today, train, test): # train autoregression model_fit = AR(train.fillna(0)).fit() logging.info("Fitted AR...") AResults = model_fit.predict(start=len(train), end=len(train) + len(test) - 1) logging.info("Predicted AR") mse = self.utils_cl.compute_mse(test, AResults) mae = self.utils_cl.compute_mae(test, AResults) mase = self.utils_cl.compute_mase(today, test, AResults) logging.info("Exit do_forecast_ar_model") return AResults, mse, mae, mase
def EffectiveSize(df): nn, mm = df.shape df.columns = ["0"] * mm v0 = [] ESS = [] for jj in range(mm): xx = df.iloc[:, jj] xx_mod = AR(xx) xx_res = xx_mod.fit(maxlag=100, ic='aic') v0.append(xx_res.sigma2 / (1.0 - sum(xx_res.params))**2) for jj in range(mm): xx = df.iloc[:, jj] ess = xx.std()**2 / v0[jj] * nn ESS.append(ess) return (ESS)
def calc_prediction(train, test): # train autoregression model = AR(train) model_fit = model.fit() print('Lag: %s' % model_fit.k_ar) print('Coefficients: %s' % model_fit.params) # make predictions predictions = model_fit.predict(start=len(train), end=len(train) + len(test) - 1, dynamic=False) for i in range(len(predictions)): print('predicted=%f, expected=%f' % (predictions[i], test[i])) error = mean_squared_error(test, predictions) print('Test MSE: %.3f' % error) return predictions[0], test[0]
def ar(self, thpt_list): if (len(thpt_list) < 4): return 0. if (len(thpt_list) > 15): return self.find_average_thpt(thpt_list) tmp = [0] + thpt_list[:-1] model = AR(tmp) start_params = [0, 0, 1] model_fit = model.fit(maxlag=1, start_params=start_params, disp=-1) predicted_last = model_fit.predict(len(tmp), len(tmp))[0] last_pt = thpt_list[-1] diff = abs(predicted_last - last_pt) / last_pt if ((last_pt != 0.) and diff < 0.1): return predicted_last return 0.
def generate_AR_para(self, rawwave, filtered=False, wavt=False, AR_order=10): signal = rawwave ''' W = fftfreq(signal.size, d= 1 / 512) psd = rfft(signal) #discrete Fourier transform of a real sequence filtered_psd = psd.copy() filtered_psd[(W<30)] = 0 filtered_signal = irfft(filtered_psd) ''' if filtered == True: if wavt == False: filtered_signal, _, _ = self.selective_freq_range(signal, high_freq=30, low_freq=1.5) ARModel = AR(filtered_signal) else: filtered_signal, _ = self.wavelet_transform(signal) ARModel = AR(filtered_signal) else: ARModel = AR(signal) #ARModel_fit = ARModel.fit() ARModel_fit = ARModel.fit(maxlag=AR_order) return ARModel_fit.params
def AR_Forecast() -> None: df = pd.read_csv("./uspopulation.csv", parse_dates=True, index_col='DATE') df.asfreq('MS') train = df.iloc[:84] test = df.iloc[84:] model = AR(train['PopEst']) #ic is very important parameter for fitting AR model. # One has to choose the value that best fits the model ARFit = model.fit(ic='t-stat') print(ARFit.params) start = len(train) end = len(train) + len(test) - 1 predictions = ARFit.predict(start=start, end=end) test.plot(legend=True, label='Test') predictions.plot(legend=True, label='Predictions', figsize=(12, 8))
def test_mle(self): # check predict with no constant, #3945 res1 = self.res1 endog = res1.model.endog with pytest.warns(FutureWarning): res0 = AR(endog).fit(maxlag=9, method="mle", trend="nc", disp=0) assert_allclose(res0.fittedvalues[-10:], res0.fittedvalues[-10:], rtol=0.015) res_arma = ARIMA(endog, order=(9, 0, 0), trend="n").fit() assert_allclose(res0.params, res_arma.params[:-1], rtol=1e-2) assert_allclose(res0.fittedvalues[-10:], res_arma.fittedvalues[-10:], rtol=1e-4)
def OU_fitting(series): # series: pd.Series, indexed by date # return the fitted OU process model params. ar_model = AR(endog=series).fit(maxlag=1) [b, a] = ar_model.params.tolist() resid_std = np.std(ar_model.resid) lam = -np.log(a) mu = b / (1 - a) sigma = resid_std * np.sqrt(-2 * np.log(a) / (1 - a * a)) res = {'ar_model': ar_model, 'lam': lam, 'mu': mu, 'sigma': sigma} return (res)
def autoRegression(): col_daily = db['daily'] dailyGrossSet = [] for record in col_daily.find({"Date": "Dec. 28"}): year = record['Year'] movieNumber = record['MoviesTracked'] gross = record['Gross($)'].replace(",", "") dailyGrossSet.append(int(gross) / int(movieNumber)) del dailyGrossSet[len(dailyGrossSet) - 1] print(dailyGrossSet) # fit model model = AR(dailyGrossSet) model_fit = model.fit() # make prediction res = model_fit.predict(len(dailyGrossSet), len(dailyGrossSet)) print(res)
def transform(self, X): """ Detect and remove dropped. """ out = [] for x in X: tmp = [] for a in x: ar_mod = AR(a[::self.subsample]) ar_res = ar_mod.fit(self.order) bse = ar_res.bse if len(bse)!=(self.order + 1): bse = np.array([np.nan] * (self.order + 1)) tmp.append(bse) out.append(tmp) return np.array(out)
def fitOU(residual, training_size): dt = 1 ou = np.cumsum(residual) model = AR(ou) fittedmodel = model.fit(maxlag=1, disp=-1) a = fittedmodel.params[0] b = fittedmodel.params[1] var = fittedmodel.sigma2 if b > 0.0 and b < np.exp(-2.0/training_size): kappa = -np.log(b) / dt m = a / (1.0 - np.exp(-kappa * dt)) sigma = np.sqrt(var * 2.0 * kappa / (1.0 - np.exp(-2.0 * kappa * dt))) sigmaeq = np.sqrt(var / (1.0 - np.exp(-2.0 * kappa * dt))); return kappa, m, sigma, sigmaeq else: return -1.0,0,0,0
def ar(data, gap=0, predtill=1): assert predtill - 1 <= gap true = data[:, -predtill:, :] pred = [] for i in range(data.shape[2]): arm = AR(data[0, :-gap - 1, i]) fitted = arm.fit() # print("Lag", fitted.k_ar) # print("Coefficients", fitted.params) pred.append( fitted.predict(start=data.shape[1] - gap - 1, end=data.shape[1] - 1)[gap - predtill:gap]) pred = np.expand_dims(np.array(pred).T, axis=0) mae = np.mean(np.abs(pred - true)) mape = np.mean(np.abs(pred - true) / true) * 100 return mae, mape, pred
def ARmodel(shareFeature_data): dataSize = shareFeature_data.size #splitting of training and testing data trainSize = int(dataSize * 70 / 100 + 1) testSize = int(dataSize * 30 / 100) train = shareFeature_data[0:trainSize] test = shareFeature_data[trainSize + 1:] predictions = [] #the model fitting and forcasting model_ar = AR(shareFeature_data) model_ar_fit = model_ar.fit() predictions = model_ar_fit.predict(start=trainSize, end=dataSize) return predictions
def ar_coefficient(x, c, param): """ This feature calculator fit the unconditional maximum likelihood of an autoregressive AR(k) process. The k parameter is the maximum lag of the process .. math:: X_{t}=\\varphi_0 +\\sum _{{i=1}}^{k}\\varphi_{i}X_{{t-i}}+\\varepsilon_{t} For the configurations from param which should contain the maxlag "k" and such an AR process is calculated. Then the coefficients :math:`\\varphi_{i}` whose index :math:`i` contained from "coeff" are returned. :param x: the time series to calculate the feature of :type x: pandas.Series :param c: the time series name :type c: str :param param: contains dictionaries {"coeff": x, "k": y} with x,y int :type param: list :return x: the different feature values :return type: pandas.Series """ df_cfg = pd.DataFrame(param) df_cfg["k"] = df_cfg["k"].apply(int) res = pd.Series() for k in df_cfg["k"].unique(): coeff = df_cfg[df_cfg["k"] == k]["coeff"] try: mod = AR(list(x)).fit(maxlag=k, solver="mle").params res_tmp = pd.Series(index=["{}__ar_coefficient__k_{}__coeff_{}".format(c, k, p) for p in coeff]) for p in coeff: if p <= k: try: res_tmp["{}__ar_coefficient__k_{}__coeff_{}".format(c, k, p)] = mod[p] except IndexError: res_tmp["{}__ar_coefficient__k_{}__coeff_{}".format(c, k, p)] = 0 else: res_tmp["{}__ar_coefficient__k_{}__coeff_{}".format(c, k, p)] = np.NaN except (LinAlgError, ValueError): res_tmp = pd.Series([np.NaN] * len(coeff), index=["{}__ar_coefficient__k_{}__coeff_{}".format(c, k, p) for p in coeff]) res = res.append(res_tmp) return res
def predict_AR(array, p=1): """第一种方法: 在给定滚动周期下利用AR(P)模型预测 输入: df:DataFrame, 波动率原始数据 window: 整数滚动周期 p: int, lag of AR model 输出: vols_pred: 时间序列, 预测波动率 """ #fit = lambda x: AR(x).fit(maxlag=p, disp=0).predict(start=x.size, end=x.size) #vols_pred = df[VOL_NAME].rolling(window).apply(fit) vols_pred = AR(array).fit(maxlag=p, disp=0).predict(start=array.size, end=array.size, dynamic=True) return vols_pred
def update(self): begin = max(0, self.index - self.window) data = self.arrivals[begin:self.index] # fit model model = AR(data) model_fit = model.fit() self.model = model_fit # make prediction self.prediction = model_fit.predict(len(data), len(data))[0] minVal = min(self.predictedArrivals[-self.windowArrival:]) maxVal = max(self.predictedArrivals[-self.windowArrival:]) if self.prediction < minVal or self.prediction > maxVal: model = SimpleExpSmoothing(data) model_fit = model.fit() self.prediction = model_fit.predict(len(data), len(data))[0]
def predict_AR(df, window=ROLLING_WINDOW, p=1): """第一种方法: 在给定滚动周期下利用AR(P)模型预测 输入: df:DataFrame, 波动率原始数据 window: 整数滚动周期 p: int, lag of AR model 输出: vols_pred: 时间序列, 预测波动率 """ fit = lambda x: AR(x).fit(maxlag=p, disp=0).predict(start=x.size, end=x.size) vols_pred = df[VOL_NAME].rolling(window).apply(fit) vols_pred.name = 'AR' + '_' + repr(window) + '_' + repr(p) print(vols_pred.name + " prediction finished.") return vols_pred
def autocorr(): import pandas.tools.plotting as ptp from statsmodels.graphics.tsaplots import plot_acf from statsmodels.tsa.ar_model import AR qdl = Quandl() start, end = "2017-01-01", "2018-01-01" es = qdl.get_data("ES", start=start, end=end) print(es.head()) xs = es['Settle'] print(type(xs.index)) ptp.lag_plot(xs) #plt.show() ptp.autocorrelation_plot(xs) #plt.show() plot_acf(xs, lags=7) #plt.show() train, test = xs[1:len(xs) - 7], xs[len(xs) - 7:] model = AR(train, dates=xs.index) ar_fit = model.fit() print('Lag: %s' % ar_fit.k_ar) print('Coefficients: %s' % ar_fit.params) #TODO fix error 'unknown string format' ar_predicts = ar_fit.predict(start=train[0], end=train[len(train) - 1], dynamic=False) for x in range(len(ar_predicts)): print('predicted: %f vs. expected: %f' % (ar_predicts[x], test[x])) print(len(test), len(ar_predicts)) error = mean_squared_error(test, ar_predicts) print('Test MSE: %.3f' % error) plt.plot(test) plt.show(ar_predicts, color='red') plt.show()
def ar2(week): col_weekly = db['weekly'] weeklyGrossSet = [] for record in col_weekly.find({"Year": "2018"}): wk = record['Week#'] if int(wk) >= week: break og = record['OverallGross($)'].replace(",", "") tm = record['TotalMovies'] weeklyGrossSet.append(int(og) / int(tm)) print(weeklyGrossSet) # fit model model = AR(weeklyGrossSet) model_fit = model.fit() # make prediction res = model_fit.predict(len(weeklyGrossSet), len(weeklyGrossSet)) print(res)
def AutoRegression(train, test): model = AR(train) model_fit = model.fit() print('Lag:', model_fit.k_ar) print('Coefficients: %s' % model_fit.params) predictions = model_fit.predict(start=len(train), end=len(train) + len(test) - 1, dynamic=False) error = RMSE(test, predictions) plt.plot(test, 'lightblue') plt.ylabel("InBandwidth") plt.plot(predictions, 'r') plt.legend(["Original", "Predicted"]) plt.savefig("AutoRegressionGraph.jpg") plt.show() print("RMSE : ", error)
def ar_model(time_series_raw, time_lag=10, max_lag=1, y_label='', name=None, title="AR Model"): train_length = len(time_series_raw['Value']) - time_lag y_hat = pd.DataFrame([], columns=['Value']) for train_index in range(0, train_length): train, test = time_series_raw['Value'].iloc[ train_index:train_index + time_lag], time_series_raw['Value'].iloc[train_index + time_lag] start_date_train = time_series_raw['Date'].iloc[train_index] end_date_train = time_series_raw['Date'].iloc[train_index + time_lag - 1] predict_test = time_series_raw['Date'].iloc[train_index + time_lag] model = AR(train, dates=pd.date_range(start=start_date_train, end=end_date_train, freq='M')) model_fit = model.fit(maxlag=max_lag) predictions = model_fit.predict(start=predict_test, end=predict_test, dynamic=True) predictions = pd.DataFrame( predictions[0], columns=['Value'], index=pd.DatetimeIndex(data=predictions.index.date)) y_hat = y_hat.append(predictions) # Drop the first time_lag+1 rows time_series_raw = time_series_raw[time_lag:] # MSE diff_score = time_series_raw['Value'].subtract(y_hat['Value'], axis=0) diff_score = diff_score.dropna()**2 mse = diff_score.sum() print("MSE: {}".format(mse)) plt.plot(time_series_raw.index, time_series_raw['Value'], label='Real Values') plt.plot(y_hat.index, y_hat['Value'], label='Predicted Values') plt.legend(loc='upper left') plt.title(title) plt.xlabel("Date") plt.ylabel(y_label) plt.savefig(name) plt.close()
def autoregression_analysis(country, data, output): """ Country based GDP auto-regression analysis Parameters ---------- country: str the name of a country data: str path to the csv file containing the GDP data. output: str The path to the output directory Returns ------- tuple, The path of csv result file, and the path of png plot file. """ # Read csv df = pd.read_csv(data, index_col="year") df = df.dropna() # Train model train = df["gdp"].values model = AR(train) model_fit = model.fit() # Validate model lag = model_fit.k_ar pred = model_fit.predict(start=lag, end=len(train), dynamic=False) # Save result df["pred_gdp"] = [np.nan for _ in range(lag - 1)] + list(pred) result_file = os.path.join(output, "result.csv") df.to_csv(result_file) # Save plot matplotlib.use("Agg") import matplotlib.pyplot as plt df.plot() plt.grid(axis="y", linestyle="--") plt.title(country + "(current $)") plot_file = os.path.join(output, "result.png") plt.savefig(plot_file) return result_file, plot_file