def predict(n_periods): datos = get_data_database('mongodb') df = define_dataframe(datos) model = AutoReg(df.HUM, lags=1) model2 = AutoReg(df.TEMP, lags=1) model_fit = model.fit() model2_fit = model2.fit() fc = model.predict(np.ndarray(shape=(2, 1), dtype=float), start=0, end=n_periods) fc2 = model2.predict(np.ndarray(shape=(2, 1), dtype=float), start=0, end=n_periods) json_ = '{"predicciones": }' print(fc) for x in range(n_periods - 1): json_ = json_ + '{hour: ' + str(datetime.time( x % 24, 0)) + ', temp: ' + str(fc2[x + 1]) + ', hum: ' + str( fc[x + 1]) + '},' return dumps(json_)
def extrapolate_moments(mus, fac): """Extrapolate moments""" L = len(mus) // 2 T = len(mus) L = T P = int(fac * T) # prediction train = mus[0:L].real # train data test = mus[L:T] # test data # model = AR(train).fit(ic="aic") # get the model lags = round(12 * (len(train) / 100.)**(1 / 4.)) model = AutoReg(train, lags=lags, trend="ct").fit(cov_type="HC1") # get the model # model = pm.auto_arima(train, start_p=1, start_q=1, # test='adf', # max_p=3, max_q=3, m=10, # start_P=0, seasonal=True, # d=None, D=1, trace=True, # error_action='ignore', # suppress_warnings=True, # stepwise=True) # pred = model.predict(n_periods=P-L) # prediction pred = model.predict(start=L, end=P - 1) # prediction mus2 = np.zeros(P, dtype=np.complex) mus2[0:L] = mus[0:L] # initial data mus2[L:P] = pred[:] # predicted data return mus2
def model(columna, n_periods): mod = ar_select_order(columna.ravel(), maxlag=15, old_names=True) AutoRegfit = AutoReg(columna, trend='c', lags=mod.ar_lags, old_names=True).fit() prediccion = AutoRegfit.predict(start=len(columna), end=len(columna) + n_periods - 1, dynamic=False) return prediccion
def get_forward(): fstart = len(data) fend = len(data) + max(desired) - max(data.keys())-1 forward_model = AutoReg(list(data.values()),lag) forward_res = forward_model.fit() forward = forward_model.predict(forward_res.params, start=fstart, end=fend) return list(forward)
def get_backward(): bstart = len(data) bend = len(data) + min(data.keys()) - min(desired) -1 print(bstart, bend) backward_model = AutoReg(list(data.values())[::-1],lag) backward_res = backward_model.fit() backward = backward_model.predict(backward_res.params, start=bstart, end=bend) return list(backward)[::-1]
def test_dynamic_against_sarimax(): rs = np.random.RandomState(12345678) e = rs.standard_normal(1001) y = np.empty(1001) y[0] = e[0] * np.sqrt(1.0 / (1 - 0.9**2)) for i in range(1, 1001): y[i] = 0.9 * y[i - 1] + e[i] smod = SARIMAX(y, order=(1, 0, 0), trend='c') sres = smod.fit(disp=False) mod = AutoReg(y, 1, old_names=False) spred = sres.predict(900, 1100) pred = mod.predict(sres.params[:2], 900, 1100) assert_allclose(spred, pred) spred = sres.predict(900, 1100, dynamic=True) pred = mod.predict(sres.params[:2], 900, 1100, dynamic=True) assert_allclose(spred, pred) spred = sres.predict(900, 1100, dynamic=50) pred = mod.predict(sres.params[:2], 900, 1100, dynamic=50) assert_allclose(spred, pred)
def test_autoreg_predict_smoke(ar_data): mod = AutoReg(ar_data.endog, ar_data.lags, trend=ar_data.trend, seasonal=ar_data.seasonal, exog=ar_data.exog, hold_back=ar_data.hold_back, period=ar_data.period, missing=ar_data.missing, old_names=False) res = mod.fit() exog_oos = None if ar_data.exog is not None: exog_oos = np.empty((1, ar_data.exog.shape[1])) mod.predict(res.params, 0, 250, exog_oos=exog_oos) if ar_data.lags == 0 and ar_data.exog is None: mod.predict(res.params, 0, 350, exog_oos=exog_oos) if isinstance(ar_data.endog, pd.Series) and \ (not ar_data.seasonal or ar_data.period is not None): ar_data.endog.index = list(range(ar_data.endog.shape[0])) if ar_data.exog is not None: ar_data.exog.index = list(range(ar_data.endog.shape[0])) mod = AutoReg(ar_data.endog, ar_data.lags, trend=ar_data.trend, seasonal=ar_data.seasonal, exog=ar_data.exog, period=ar_data.period, missing=ar_data.missing, old_names=False) mod.predict(res.params, 0, 250, exog_oos=exog_oos)
def impute(self, value: float, sample: pd.Series, conf: float = 0.95) -> float: """ Imputes outlier values using Auto Regressive method with two lags **Parameters** * **:param value:** (float) * **:param sample:** (pd.Series) * **:param conf:** (float) **returns** * **value:** (float) """ qq = 1 - (1 - conf) / 2 sample = sample.copy() sample.reset_index(drop=True, inplace=True) loc = np.where(np.asanyarray(~np.isnan(sample[sample == value])))[0][0] sample.iloc[loc, :] = np.nan sample.fillna(sample.median(), inplace=True) model = AutoReg(sample.values, lags=2, trend='n', old_names=False).fit() ss = np.std(model.resid) predictions = model.predict(start=0, end=len(sample) + 1) percent = stats.t.ppf(q=qq, df=len(sample) - 1) max_lim = predictions[loc] + percent * ss * np.sqrt(1 + 1 / len(sample)) min_lim = predictions[loc] - percent * ss * np.sqrt(1 + 1 / len(sample)) if Utils.is_between(min_lim, value, max_lim): return np.array([]) elif Utils.is_between(min_lim, predictions[loc], max_lim): return predictions[loc] else: return sample.median()
def extrapolate_moments(mus0,fac,extrapolation_mode="1/n"): """Extrapolate moments""" if np.max(mus0.imag)>1e-4: raise # not implemented mus = mus0.real if extrapolation_mode=="plain": ftrans,ftransinv = no_transform() elif extrapolation_mode=="1/n": ftrans,ftransinv = power_transform(mus) elif extrapolation_mode=="power": ftrans,ftransinv = fit_power_transform(mus) mus = ftrans(mus) # scale the moments L = len(mus)//2 T = len(mus) L = T P = int(fac*T) # prediction train = mus[0:L].real # train data test = mus[L:T] # test data # model = AR(train).fit(ic="aic") # get the model lags = round(12*(len(train)/100.)**(1/4.)) model = AutoReg(train,lags=lags,trend="ct").fit(cov_type="HC1") # get the model # model = pm.auto_arima(train, start_p=1, start_q=1, # test='adf', # max_p=3, max_q=3, m=10, # start_P=0, seasonal=True, # d=None, D=1, trace=True, # error_action='ignore', # suppress_warnings=True, # stepwise=True) # pred = model.predict(n_periods=P-L) # prediction pred = model.predict(start=L,end=P-1) # prediction mus2 = np.zeros(P,dtype=np.complex) mus2[0:L] = mus[0:L] # initial data mus2[L:P] = pred[:] # predicted data mus2 = ftransinv(mus2) # transform back # print(extrapolation_mode,np.max(mus0),np.max(mus2)) return mus2
m_full = LinearRegression() m_full.fit(X_full, y_full) train['full_model'] = m_full.predict(X_full) print(f'Training-Score (Manual AR): {round(m_full.score(X_full, y_full),3)}') '''Cross-Validation''' time_series_split = TimeSeriesSplit(n_splits=5) splits = time_series_split.split(X_full, y_full) cv_manual_ar = cross_val_score(estimator=m_full, X=X_full, y=y_full, cv=splits) print(f'CV-Score (Manual AR): {round(cv_manual_ar.mean(),3)}') '''AutoRegressive Model - Statsmodels (on data taking into account trend and seasonality)''' ar_model = AutoReg(y_season, lags=3, exog=X_season).fit() #print(ar_model.summary()) prediction_ar = ar_model.predict() '''ARIMA Model - Statsmodels (on data taking into account trend and seasonality) - very slow!!''' #arima_model = ARIMA(y_season, order=(1,0,1), exog=X_season).fit() #print(arima_model.summary()) #prediction_arima = arima_model.predict() '''ARIMA Model - only on remainder ''' arima_model = ARIMA(remainder, order=(2, 0, 2), freq='D').fit() prediction_arima = arima_model.predict() prediction_arima.name = 'Arima_lags' # Use prediction of ARIMA Model as feature(includes lags2 , MA 2) for LinearRegression X_arima = X_season.join(prediction_arima) m_arima = LinearRegression() m_arima.fit(X_arima, y_season)
def test_predict_errors(): data = gen_data(250, 2, True) mod = AutoReg(data.endog, 3, old_names=False) res = mod.fit() with pytest.raises(ValueError, match='exog and exog_oos cannot be used'): mod.predict(res.params, exog=data.exog) with pytest.raises(ValueError, match='exog and exog_oos cannot be used'): mod.predict(res.params, exog_oos=data.exog) with pytest.raises(ValueError, match='hold_back must be >= lags'): AutoReg(data.endog, 3, hold_back=1, old_names=False) with pytest.raises(ValueError, match='freq cannot be inferred'): AutoReg(data.endog.values, 3, seasonal=True, old_names=False) mod = AutoReg(data.endog, 3, exog=data.exog, old_names=False) res = mod.fit() with pytest.raises(ValueError, match=r'The shape of exog \(200, 2\)'): mod.predict(res.params, exog=data.exog.iloc[:200]) with pytest.raises(ValueError, match='The number of columns in exog_oos'): mod.predict(res.params, exog_oos=data.exog.iloc[:, :1]) with pytest.raises(ValueError, match='Prediction must have `end` after'): mod.predict(res.params, start=200, end=199) with pytest.raises(ValueError, match='exog_oos must be provided'): mod.predict(res.params, end=250, exog_oos=None) mod = AutoReg(data.endog, 0, exog=data.exog, old_names=False) res = mod.fit() with pytest.raises(ValueError, match='start and end indicate that 10'): mod.predict(res.params, end=259, exog_oos=data.exog.iloc[:5])
# `plot_predict` can be used to produce forecast plots along with # confidence intervals. Here we produce forecasts starting at the last # observation and continuing for 18 months. ind_prod.shape fig = res_glob.plot_predict(start=714, end=732) # The forecasts from the full model and the restricted model are very # similar. I also include an AR(5) which has very different dynamics res_ar5 = AutoReg(ind_prod, 5, old_names=False).fit() predictions = pd.DataFrame({ "AR(5)": res_ar5.predict(start=714, end=726), "AR(13)": res.predict(start=714, end=726), "Restr. AR(13)": res_glob.predict(start=714, end=726), }) _, ax = plt.subplots() ax = predictions.plot(ax=ax) # The diagnostics indicate the model captures most of the the dynamics in # the data. The ACF shows a patters at the seasonal frequency and so a more # complete seasonal model (`SARIMAX`) may be needed. fig = plt.figure(figsize=(16, 9)) fig = res_glob.plot_diagnostics(fig=fig, lags=30)
model_fit = model.fit() # save model to file model_fit.save('petrol_model.pkl') # save the differenced dataset numpy.save('petrol_data.npy', X) # save the last ob numpy.save('petrol_obs.npy', [series.values[-1]]) # load AR model from file and make a one-step prediction # load model model = AutoRegResults.load('petrol_model.pkl') data = numpy.load('petrol_data.npy') last_ob = numpy.load('petrol_obs.npy') # make prediction predictions = model.predict(start=len(data), end=len(data)) # transform prediction yhat = predictions[0] + last_ob[0] print('Prediction for next week: %f' % yhat) # # update the data for the manual model with a new observation once available # import numpy # # get real observation # observation = 48 # # update and save differenced observation # lag = numpy.load('man_data.npy') # last_ob = numpy.load('man_obs.npy') # diffed = observation - last_ob[0] # lag = numpy.append(lag[1:], [diffed], axis=0) # numpy.save('man_data.npy', lag) # # update and save real observation
rates_frame.columns = [ 'time', 'Open', 'High', 'Low', 'Close', 'tick_volume', 'spread', 'real_volume' ] mpf.plot(rates_frame, type='candle') #%% # AR example # from statsmodels.tsa.ar_model import AR #op1 from statsmodels.tsa.ar_model import AutoReg from random import random import matplotlib.pyplot as plt # contrived dataset # fit model # model = AR(rates_frame.Close) #op1 rates_frame.index = pd.date_range(as2[0], periods=len(as2), freq='D') model = AutoReg(rates_frame.Close, lags=400, seasonal=True).fit() # model_fit = model.fit(maxlag=400)#op1 # make prediction # yhat = model.predict('23:55:00','23:59:00') yhat = model.predict(len(rates_frame.Close) - 10, len(rates_frame.Close) + 500) plt.plot(rates_frame.Close) plt.plot(yhat) # plt.show() # %% # %%
y = y.fillna(y.bfill()) # show data y.plot(figsize=(15,6)) plt.show() #### Autoregression (AR) #### """ AR models the next step in the sequence as a linear function of observations at the prior time step AR(1) is a first-order AR model. AR method is best for univariate time series WITHOUT TREND and SEASONAL COMPONENTS """ from statsmodels.tsa.ar_model import AutoReg ar_model = AutoReg(y, lags=1) ar_model = ar_model.fit() ar_yhat = ar_model.predict(len(y), len(y)) print(ar_yhat) #### Moving Average (MA) #### """ MA models next step as a linear function of residual errors from a mean process at prior time steps MA(0) is a zeroth-order MA model MA method is best for univariate time series WITHOUT TREND AND SEASONAL COMPONENTS """ from statsmodels.tsa.arima_model import ARMA ma_model = ARMA(y, order=(0, 1)) ma_model = ma_model.fit(disp=False) ma_yhat = ma_model.predict(len(y), len(y)) print(ma_yhat)
def test_ar_model_predict(ar2): mod = AutoReg(ar2[:10], 2) res = mod.fit() res_pred = res.predict() mod_pred = mod.predict(res.params) assert_allclose(res_pred, mod_pred)
#print fit optimal_type = 'c' optimal_lag = 1 startDate = kalmanData.index[startTrain] + dt.timedelta(days=optimal_lag) endDate = kalmanData.index[-1] pred_dates = pd.date_range(start=startDate, end=endDate) end_train = math.floor(len(remain_data) * 0.2) test = remain_data[-end_train:] train = remain_data[:-end_train] model = AutoReg(train, lags=optimal_lag, trend=optimal_type) model_fit = model.fit() coeff = model_fit.params predictions = np.concatenate( (np.array(model.predict(coeff, optimal_lag, len(train) - 1)), np.array(pred(train, test, optimal_lag, coeff)))) print( "basic rmse", math.sqrt(mean_squared_error(predictions, remain_data[optimal_lag:]))) print("test set rmse", math.sqrt(mean_squared_error(predictions[-23:], remain_data[-23:]))) print( "rmse with persistence model", math.sqrt( mean_squared_error(predictions[1:], remain_data[optimal_lag:-1]))) plt.plot(dates_train[optimal_lag:-1], predictions[1:]) plt.plot(dates_train[optimal_lag:-1], remain_data[optimal_lag:-1]) plt.show()
import matplotlib.pyplot as plt series = 4 ARorder = 2 trainEnd = 70 data = tnsrFile2numpy('data.npz') snames = [ '$\\cos(x)$', '$e^{-ax}$', '$e^{ax}$', '$a_1x^5 + a_2x^4 + a_3x^3 + a_4x^2 + a_5 x $', '$\\frac{ 1 - e^{-(p+q)t} }{ 1 + (p/q)e^{-(p+q)t} }$', '$\\sqrt{x}$', '$ax$', '$0$', '$x^2$' ] datT = data[:, 0:trainEnd] # training data (AR fit) print('=================== AR(', ARorder, ') ===================') mod = AutoReg(datT[series], ARorder, old_names=False) res = mod.fit() print(res.summary()) p = mod.predict(res.params, end=100) plt.title('AR prediction (right of red line=predicted, left=training)') plt.plot(p, label='AR(%d) predicted' % (ARorder)) plt.plot(data[series], label='True (%s)' % (snames[series])) plt.axvline(trainEnd, c='r') plt.legend() plt.savefig('images/AR_%d_%d_%d.png' % (ARorder, series, trainEnd), dpi=200, bbox_inches='tight') plt.show()
def calculate_psd(rr_intervals, method='welch', hr_sampling_frequency=4, power_type='density', max_lag=3): """ Returns the frequency and spectral power from the rr intervals. This method is used to compute HRV frequency domain features Parameters --------- rr_intervals : array-like list of RR interval (in ms) method : str Method used to calculate the psd or powerband or spectrogram. available methods are: 'welch': apply welch method to compute PSD 'lomb': apply lomb method to compute PSD 'ar': method to compute the periodogram - if compute PSD then power_type = 'density' hr_sampling_frequency : int Frequency of the spectrum need to be observed. Common value range from 1 Hz to 10 Hz, by default set to 4 Hz. Detail can be found from the ECG book power_type: str 'density': 'spectrogram': Returns --------- freq : list Frequency of the corresponding psd points. psd : list Power Spectral Density of the signal. """ ts_rr, bpm_list = get_time_and_bpm(rr_intervals) if method == 'welch': nni_interpolation = get_interpolated_nn(ts_rr, bpm_list, hr_sampling_frequency) # ---------- Remove DC Component ---------- # nni_normalized = nni_interpolation - np.mean(nni_interpolation) # --------- Compute Power Spectral Density --------- # freq, psd = signal.welch(x=nni_normalized, fs=hr_sampling_frequency, window='hann', nfft=4096) elif method == 'lomb': freq = np.linspace(0, hr_sampling_frequency, 2**8) a_frequencies = np.asarray(2 * np.pi / freq) psd = signal.lombscargle(ts_rr, rr_intervals, a_frequencies, normalize=True) elif method == 'ar': freq, psd_ = signal.periodogram(rr_intervals, hr_sampling_frequency, window='boxcar', nfft=None, detrend='constant', return_onesided=True, scaling=power_type, axis=-1) model = AutoReg(psd_, max_lag) res = model.fit() psd = model.predict(res.params) else: raise ValueError("Not a valid method. Choose between 'ar', 'lomb' " "and 'welch'") return freq, psd
class AutoRegImplementation(ModelImplementation): def __init__(self, log: Log = None, **params): super().__init__(log) self.params = params self.actual_ts_len = None self.autoreg = None def fit(self, input_data): """ Class fit ar model on data :param input_data: data with features, target and ids to process """ source_ts = np.array(input_data.features) self.actual_ts_len = len(source_ts) lag_1 = int(self.params.get('lag_1')) lag_2 = int(self.params.get('lag_2')) params = {'lags': [lag_1, lag_2]} self.autoreg = AutoReg(source_ts, **params).fit() return self.autoreg def predict(self, input_data, is_fit_pipeline_stage: bool): """ Method for time series prediction on forecast length :param input_data: data with features, target and ids to process :param is_fit_pipeline_stage: is this fit or predict stage for pipeline :return output_data: output data with smoothed time series """ input_data = copy(input_data) parameters = input_data.task.task_params forecast_length = parameters.forecast_length old_idx = input_data.idx target = input_data.target if is_fit_pipeline_stage: fitted = self.autoreg.predict(start=old_idx[0], end=old_idx[-1]) # First n elements in time series are skipped diff = self.actual_ts_len - len(fitted) # Fill nans with first values first_element = fitted[0] first_elements = [first_element] * diff first_elements.extend(list(fitted)) fitted = np.array(first_elements) _, predict = _ts_to_table(idx=old_idx, time_series=fitted, window_size=forecast_length) new_idx, target_columns = _ts_to_table(idx=old_idx, time_series=target, window_size=forecast_length) # Update idx and target input_data.idx = new_idx input_data.target = target_columns # For predict stage we can make prediction else: start_id = old_idx[-1] - forecast_length + 1 end_id = old_idx[-1] predicted = self.autoreg.predict(start=start_id, end=end_id) # Convert one-dim array as column predict = np.array(predicted).reshape(1, -1) new_idx = np.arange(start_id, end_id + 1) # Update idx input_data.idx = new_idx # Update idx and features output_data = self._convert_to_output(input_data, predict=predict, data_type=DataTypesEnum.table) return output_data def get_params(self): return self.params
arima = SARIMAX(adf, order=(2, 1, 0), trend='c').fit() fig = arima.plot_diagnostics(figsize=(10, 6)) plt.tight_layout(pad=2) plt.savefig(os.path.join(imgdir, 'ar.jpg')) plt.show() arima.summary() # Forecasting ## One-step ahead predictions model = AutoReg(df_train, lags=lags, old_names=False).fit() print(model.summary()) # Observations to predict are from the test split from sklearn.metrics import mean_squared_error all_dates = AutoReg(df, lags=lags, old_names=False) df_pred = all_dates.predict(model.params, start=df_train.index[-1]).shift(1).iloc[1:] mse = mean_squared_error(df_test, df_pred) var = np.mean(np.square(df_test - df_train.mean())) print(f"Short-term Forecasts: rmse={np.sqrt(mse):.6f} r2={1-mse/var:.4f}") fig, ax = plt.subplots(clear=True, num=1, figsize=(4, 6)) df_pred.plot(ax=ax, c='C0') df_test.plot(ax=ax, c='C1') ax.legend(['Predicted', 'Actual']) ax.set_title(s + " (one-step forecasts)") plt.tight_layout(pad=2) plt.savefig(os.path.join(imgdir, 'short.jpg')) plt.show() # Multi-step ahead predictions df_pred = all_dates.predict(model.params, start=df_train.index[-1],