예제 #1
0
def predict(n_periods):
    datos = get_data_database('mongodb')
    df = define_dataframe(datos)

    model = AutoReg(df.HUM, lags=1)
    model2 = AutoReg(df.TEMP, lags=1)

    model_fit = model.fit()
    model2_fit = model2.fit()

    fc = model.predict(np.ndarray(shape=(2, 1), dtype=float),
                       start=0,
                       end=n_periods)
    fc2 = model2.predict(np.ndarray(shape=(2, 1), dtype=float),
                         start=0,
                         end=n_periods)

    json_ = '{"predicciones": }'
    print(fc)
    for x in range(n_periods - 1):
        json_ = json_ + '{hour: ' + str(datetime.time(
            x % 24, 0)) + ', temp: ' + str(fc2[x + 1]) + ', hum: ' + str(
                fc[x + 1]) + '},'

    return dumps(json_)
예제 #2
0
def extrapolate_moments(mus, fac):
    """Extrapolate moments"""
    L = len(mus) // 2
    T = len(mus)
    L = T
    P = int(fac * T)  # prediction
    train = mus[0:L].real  # train data
    test = mus[L:T]  # test data
    #    model = AR(train).fit(ic="aic") # get the model
    lags = round(12 * (len(train) / 100.)**(1 / 4.))
    model = AutoReg(train, lags=lags,
                    trend="ct").fit(cov_type="HC1")  # get the model

    #    model = pm.auto_arima(train, start_p=1, start_q=1,
    #                         test='adf',
    #                         max_p=3, max_q=3, m=10,
    #                         start_P=0, seasonal=True,
    #                         d=None, D=1, trace=True,
    #                         error_action='ignore',
    #                         suppress_warnings=True,
    #                         stepwise=True)

    #    pred = model.predict(n_periods=P-L) # prediction
    pred = model.predict(start=L, end=P - 1)  # prediction
    mus2 = np.zeros(P, dtype=np.complex)
    mus2[0:L] = mus[0:L]  # initial data
    mus2[L:P] = pred[:]  # predicted data
    return mus2
예제 #3
0
def model(columna, n_periods):
    mod = ar_select_order(columna.ravel(), maxlag=15, old_names=True)
    AutoRegfit = AutoReg(columna, trend='c', lags=mod.ar_lags,
                         old_names=True).fit()
    prediccion = AutoRegfit.predict(start=len(columna),
                                    end=len(columna) + n_periods - 1,
                                    dynamic=False)
    return prediccion
예제 #4
0
    def get_forward():
        fstart = len(data)
        fend = len(data) + max(desired) - max(data.keys())-1

        forward_model = AutoReg(list(data.values()),lag)
        forward_res = forward_model.fit()
        forward = forward_model.predict(forward_res.params, start=fstart, end=fend)
        
        return list(forward)
예제 #5
0
    def get_backward():
        bstart = len(data)
        bend = len(data) + min(data.keys()) - min(desired) -1
        
        print(bstart, bend)

        backward_model = AutoReg(list(data.values())[::-1],lag)
        backward_res = backward_model.fit()
        backward = backward_model.predict(backward_res.params, start=bstart, end=bend)
        
        return list(backward)[::-1]
예제 #6
0
def test_dynamic_against_sarimax():
    rs = np.random.RandomState(12345678)
    e = rs.standard_normal(1001)
    y = np.empty(1001)
    y[0] = e[0] * np.sqrt(1.0 / (1 - 0.9**2))
    for i in range(1, 1001):
        y[i] = 0.9 * y[i - 1] + e[i]
    smod = SARIMAX(y, order=(1, 0, 0), trend='c')
    sres = smod.fit(disp=False)
    mod = AutoReg(y, 1, old_names=False)
    spred = sres.predict(900, 1100)
    pred = mod.predict(sres.params[:2], 900, 1100)
    assert_allclose(spred, pred)

    spred = sres.predict(900, 1100, dynamic=True)
    pred = mod.predict(sres.params[:2], 900, 1100, dynamic=True)
    assert_allclose(spred, pred)

    spred = sres.predict(900, 1100, dynamic=50)
    pred = mod.predict(sres.params[:2], 900, 1100, dynamic=50)
    assert_allclose(spred, pred)
예제 #7
0
def test_autoreg_predict_smoke(ar_data):
    mod = AutoReg(ar_data.endog,
                  ar_data.lags,
                  trend=ar_data.trend,
                  seasonal=ar_data.seasonal,
                  exog=ar_data.exog,
                  hold_back=ar_data.hold_back,
                  period=ar_data.period,
                  missing=ar_data.missing,
                  old_names=False)
    res = mod.fit()
    exog_oos = None
    if ar_data.exog is not None:
        exog_oos = np.empty((1, ar_data.exog.shape[1]))
    mod.predict(res.params, 0, 250, exog_oos=exog_oos)
    if ar_data.lags == 0 and ar_data.exog is None:
        mod.predict(res.params, 0, 350, exog_oos=exog_oos)
    if isinstance(ar_data.endog, pd.Series) and \
            (not ar_data.seasonal or ar_data.period is not None):
        ar_data.endog.index = list(range(ar_data.endog.shape[0]))
        if ar_data.exog is not None:
            ar_data.exog.index = list(range(ar_data.endog.shape[0]))
        mod = AutoReg(ar_data.endog,
                      ar_data.lags,
                      trend=ar_data.trend,
                      seasonal=ar_data.seasonal,
                      exog=ar_data.exog,
                      period=ar_data.period,
                      missing=ar_data.missing,
                      old_names=False)
        mod.predict(res.params, 0, 250, exog_oos=exog_oos)
예제 #8
0
    def impute(self,
               value: float,
               sample: pd.Series,
               conf: float = 0.95) -> float:
        """
        Imputes outlier values using Auto Regressive method with two lags

        **Parameters**

        * **:param value:** (float)
        * **:param sample:** (pd.Series)
        * **:param conf:** (float)

        **returns**

        * **value:** (float)
        
        """
        qq = 1 - (1 - conf) / 2
        sample = sample.copy()
        sample.reset_index(drop=True, inplace=True)

        loc = np.where(np.asanyarray(~np.isnan(sample[sample == value])))[0][0]
        sample.iloc[loc, :] = np.nan
        sample.fillna(sample.median(), inplace=True)

        model = AutoReg(sample.values, lags=2, trend='n',
                        old_names=False).fit()
        ss = np.std(model.resid)

        predictions = model.predict(start=0, end=len(sample) + 1)

        percent = stats.t.ppf(q=qq, df=len(sample) - 1)
        max_lim = predictions[loc] + percent * ss * np.sqrt(1 +
                                                            1 / len(sample))
        min_lim = predictions[loc] - percent * ss * np.sqrt(1 +
                                                            1 / len(sample))

        if Utils.is_between(min_lim, value, max_lim):

            return np.array([])

        elif Utils.is_between(min_lim, predictions[loc], max_lim):

            return predictions[loc]

        else:

            return sample.median()
예제 #9
0
def extrapolate_moments(mus0,fac,extrapolation_mode="1/n"):
    """Extrapolate moments"""
    if np.max(mus0.imag)>1e-4: raise # not implemented
    mus = mus0.real
    if extrapolation_mode=="plain":
        ftrans,ftransinv = no_transform()
    elif extrapolation_mode=="1/n": 
        ftrans,ftransinv = power_transform(mus)
    elif extrapolation_mode=="power": 
        ftrans,ftransinv = fit_power_transform(mus)
    mus = ftrans(mus) # scale the moments
    L = len(mus)//2
    T = len(mus)
    L = T
    P = int(fac*T) # prediction
    train = mus[0:L].real # train data
    test = mus[L:T] # test data
#    model = AR(train).fit(ic="aic") # get the model
    lags = round(12*(len(train)/100.)**(1/4.))
    model = AutoReg(train,lags=lags,trend="ct").fit(cov_type="HC1") # get the model

#    model = pm.auto_arima(train, start_p=1, start_q=1,
#                         test='adf',
#                         max_p=3, max_q=3, m=10,
#                         start_P=0, seasonal=True,
#                         d=None, D=1, trace=True,
#                         error_action='ignore',  
#                         suppress_warnings=True, 
#                         stepwise=True)


#    pred = model.predict(n_periods=P-L) # prediction
    pred = model.predict(start=L,end=P-1) # prediction
    mus2 = np.zeros(P,dtype=np.complex) 
    mus2[0:L] = mus[0:L] # initial data
    mus2[L:P] = pred[:] # predicted data
    mus2 = ftransinv(mus2) # transform back
#    print(extrapolation_mode,np.max(mus0),np.max(mus2))
    return mus2
예제 #10
0
m_full = LinearRegression()
m_full.fit(X_full, y_full)

train['full_model'] = m_full.predict(X_full)
print(f'Training-Score (Manual AR): {round(m_full.score(X_full, y_full),3)}')
'''Cross-Validation'''

time_series_split = TimeSeriesSplit(n_splits=5)
splits = time_series_split.split(X_full, y_full)
cv_manual_ar = cross_val_score(estimator=m_full, X=X_full, y=y_full, cv=splits)
print(f'CV-Score (Manual AR): {round(cv_manual_ar.mean(),3)}')
'''AutoRegressive Model - Statsmodels (on data taking into account trend and seasonality)'''

ar_model = AutoReg(y_season, lags=3, exog=X_season).fit()
#print(ar_model.summary())
prediction_ar = ar_model.predict()
'''ARIMA Model - Statsmodels (on data taking into account trend and seasonality) - very slow!!'''

#arima_model = ARIMA(y_season, order=(1,0,1), exog=X_season).fit()
#print(arima_model.summary())
#prediction_arima = arima_model.predict()
'''ARIMA Model - only on remainder '''

arima_model = ARIMA(remainder, order=(2, 0, 2), freq='D').fit()
prediction_arima = arima_model.predict()
prediction_arima.name = 'Arima_lags'

# Use prediction of ARIMA Model as feature(includes lags2 , MA 2) for LinearRegression
X_arima = X_season.join(prediction_arima)
m_arima = LinearRegression()
m_arima.fit(X_arima, y_season)
예제 #11
0
def test_predict_errors():
    data = gen_data(250, 2, True)
    mod = AutoReg(data.endog, 3, old_names=False)
    res = mod.fit()
    with pytest.raises(ValueError, match='exog and exog_oos cannot be used'):
        mod.predict(res.params, exog=data.exog)
    with pytest.raises(ValueError, match='exog and exog_oos cannot be used'):
        mod.predict(res.params, exog_oos=data.exog)
    with pytest.raises(ValueError, match='hold_back must be >= lags'):
        AutoReg(data.endog, 3, hold_back=1, old_names=False)
    with pytest.raises(ValueError, match='freq cannot be inferred'):
        AutoReg(data.endog.values, 3, seasonal=True, old_names=False)

    mod = AutoReg(data.endog, 3, exog=data.exog, old_names=False)
    res = mod.fit()
    with pytest.raises(ValueError, match=r'The shape of exog \(200, 2\)'):
        mod.predict(res.params, exog=data.exog.iloc[:200])
    with pytest.raises(ValueError, match='The number of columns in exog_oos'):
        mod.predict(res.params, exog_oos=data.exog.iloc[:, :1])
    with pytest.raises(ValueError, match='Prediction must have `end` after'):
        mod.predict(res.params, start=200, end=199)
    with pytest.raises(ValueError, match='exog_oos must be provided'):
        mod.predict(res.params, end=250, exog_oos=None)

    mod = AutoReg(data.endog, 0, exog=data.exog, old_names=False)
    res = mod.fit()
    with pytest.raises(ValueError, match='start and end indicate that 10'):
        mod.predict(res.params, end=259, exog_oos=data.exog.iloc[:5])
예제 #12
0
# `plot_predict` can be used to produce forecast plots along with
# confidence intervals. Here we produce forecasts starting at the last
# observation and continuing for 18 months.

ind_prod.shape

fig = res_glob.plot_predict(start=714, end=732)

# The forecasts from the full model and the restricted model are very
# similar. I also include an AR(5) which has very different dynamics

res_ar5 = AutoReg(ind_prod, 5, old_names=False).fit()
predictions = pd.DataFrame({
    "AR(5)":
    res_ar5.predict(start=714, end=726),
    "AR(13)":
    res.predict(start=714, end=726),
    "Restr. AR(13)":
    res_glob.predict(start=714, end=726),
})
_, ax = plt.subplots()
ax = predictions.plot(ax=ax)

# The diagnostics indicate the model captures most of the the dynamics in
# the data. The ACF shows a patters at the seasonal frequency and so a more
# complete seasonal model (`SARIMAX`) may be needed.

fig = plt.figure(figsize=(16, 9))
fig = res_glob.plot_diagnostics(fig=fig, lags=30)
예제 #13
0
model_fit = model.fit()
# save model to file
model_fit.save('petrol_model.pkl')
# save the differenced dataset
numpy.save('petrol_data.npy', X)
# save the last ob
numpy.save('petrol_obs.npy', [series.values[-1]])

# load AR model from file and make a one-step prediction

# load model
model = AutoRegResults.load('petrol_model.pkl')
data = numpy.load('petrol_data.npy')
last_ob = numpy.load('petrol_obs.npy')
# make prediction
predictions = model.predict(start=len(data), end=len(data))
# transform prediction
yhat = predictions[0] + last_ob[0]
print('Prediction for next week: %f' % yhat)

# # update the data for the manual model with a new observation once available
# import numpy
# # get real observation
# observation = 48
# # update and save differenced observation
# lag = numpy.load('man_data.npy')
# last_ob = numpy.load('man_obs.npy')
# diffed = observation - last_ob[0]
# lag = numpy.append(lag[1:], [diffed], axis=0)
# numpy.save('man_data.npy', lag)
# # update and save real observation
예제 #14
0
rates_frame.columns = [
    'time', 'Open', 'High', 'Low', 'Close', 'tick_volume', 'spread',
    'real_volume'
]
mpf.plot(rates_frame, type='candle')

#%%

# AR example
# from statsmodels.tsa.ar_model import AR #op1
from statsmodels.tsa.ar_model import AutoReg
from random import random
import matplotlib.pyplot as plt
# contrived dataset
# fit model
# model = AR(rates_frame.Close) #op1
rates_frame.index = pd.date_range(as2[0], periods=len(as2), freq='D')
model = AutoReg(rates_frame.Close, lags=400, seasonal=True).fit()
# model_fit = model.fit(maxlag=400)#op1
# make prediction
# yhat = model.predict('23:55:00','23:59:00')
yhat = model.predict(len(rates_frame.Close) - 10, len(rates_frame.Close) + 500)

plt.plot(rates_frame.Close)
plt.plot(yhat)
# plt.show()

# %%

# %%
예제 #15
0
y = y.fillna(y.bfill())

# show data
y.plot(figsize=(15,6))
plt.show()

#### Autoregression (AR) ####
"""
AR models the next step in the sequence as a linear function of observations at the prior time step
AR(1) is a first-order AR model.
AR method is best for univariate time series WITHOUT TREND and SEASONAL COMPONENTS
"""
from statsmodels.tsa.ar_model import AutoReg
ar_model = AutoReg(y, lags=1)
ar_model = ar_model.fit()
ar_yhat = ar_model.predict(len(y), len(y))
print(ar_yhat)


#### Moving Average (MA) ####
"""
MA models next step as a linear function of residual errors from a mean process at prior time steps
MA(0) is a zeroth-order MA model
MA method is best for univariate time series WITHOUT TREND AND SEASONAL COMPONENTS
"""
from statsmodels.tsa.arima_model import ARMA 
ma_model = ARMA(y, order=(0, 1))
ma_model = ma_model.fit(disp=False)
ma_yhat = ma_model.predict(len(y), len(y))
print(ma_yhat)
예제 #16
0
def test_ar_model_predict(ar2):
    mod = AutoReg(ar2[:10], 2)
    res = mod.fit()
    res_pred = res.predict()
    mod_pred = mod.predict(res.params)
    assert_allclose(res_pred, mod_pred)
예제 #17
0
    #print fit
    optimal_type = 'c'
    optimal_lag = 1

    startDate = kalmanData.index[startTrain] + dt.timedelta(days=optimal_lag)
    endDate = kalmanData.index[-1]
    pred_dates = pd.date_range(start=startDate, end=endDate)
    end_train = math.floor(len(remain_data) * 0.2)

    test = remain_data[-end_train:]
    train = remain_data[:-end_train]
    model = AutoReg(train, lags=optimal_lag, trend=optimal_type)
    model_fit = model.fit()
    coeff = model_fit.params
    predictions = np.concatenate(
        (np.array(model.predict(coeff, optimal_lag,
                                len(train) - 1)),
         np.array(pred(train, test, optimal_lag, coeff))))

    print(
        "basic rmse",
        math.sqrt(mean_squared_error(predictions, remain_data[optimal_lag:])))
    print("test set rmse",
          math.sqrt(mean_squared_error(predictions[-23:], remain_data[-23:])))
    print(
        "rmse with persistence model",
        math.sqrt(
            mean_squared_error(predictions[1:], remain_data[optimal_lag:-1])))

    plt.plot(dates_train[optimal_lag:-1], predictions[1:])
    plt.plot(dates_train[optimal_lag:-1], remain_data[optimal_lag:-1])
    plt.show()
예제 #18
0
파일: AR.py 프로젝트: aruymgaart/AMATH
import matplotlib.pyplot as plt

series = 4
ARorder = 2
trainEnd = 70

data = tnsrFile2numpy('data.npz')
snames = [
    '$\\cos(x)$', '$e^{-ax}$', '$e^{ax}$',
    '$a_1x^5 + a_2x^4 + a_3x^3 + a_4x^2 + a_5 x $',
    '$\\frac{ 1 - e^{-(p+q)t}  }{  1 + (p/q)e^{-(p+q)t}  }$', '$\\sqrt{x}$',
    '$ax$', '$0$', '$x^2$'
]
datT = data[:, 0:trainEnd]  # training data (AR fit)

print('===================  AR(', ARorder, ') ===================')
mod = AutoReg(datT[series], ARorder, old_names=False)
res = mod.fit()
print(res.summary())

p = mod.predict(res.params, end=100)
plt.title('AR prediction (right of red line=predicted, left=training)')
plt.plot(p, label='AR(%d) predicted' % (ARorder))
plt.plot(data[series], label='True (%s)' % (snames[series]))
plt.axvline(trainEnd, c='r')
plt.legend()
plt.savefig('images/AR_%d_%d_%d.png' % (ARorder, series, trainEnd),
            dpi=200,
            bbox_inches='tight')
plt.show()
예제 #19
0
def calculate_psd(rr_intervals,
                  method='welch',
                  hr_sampling_frequency=4,
                  power_type='density',
                  max_lag=3):
    """
    Returns the frequency and spectral power from the rr intervals.
    This method is used to compute HRV frequency domain features

    Parameters
    ---------
    rr_intervals : array-like
        list of RR interval (in ms)
    method : str
        Method used to calculate the psd or powerband or spectrogram.
        available methods are:
        'welch': apply welch method to compute PSD
        'lomb': apply lomb method to compute PSD
        'ar': method to compute the periodogram - if compute PSD then
        power_type = 'density'

    hr_sampling_frequency : int
        Frequency of the spectrum need to be observed. Common value range
        from 1 Hz to 10 Hz,
        by default set to 4 Hz. Detail can be found from the ECG book

    power_type: str
        'density':
        'spectrogram':

    Returns
    ---------
    freq : list
        Frequency of the corresponding psd points.
    psd : list
        Power Spectral Density of the signal.
    """
    ts_rr, bpm_list = get_time_and_bpm(rr_intervals)

    if method == 'welch':
        nni_interpolation = get_interpolated_nn(ts_rr, bpm_list,
                                                hr_sampling_frequency)
        # ---------- Remove DC Component ---------- #
        nni_normalized = nni_interpolation - np.mean(nni_interpolation)

        #  --------- Compute Power Spectral Density  --------- #
        freq, psd = signal.welch(x=nni_normalized,
                                 fs=hr_sampling_frequency,
                                 window='hann',
                                 nfft=4096)

    elif method == 'lomb':
        freq = np.linspace(0, hr_sampling_frequency, 2**8)
        a_frequencies = np.asarray(2 * np.pi / freq)
        psd = signal.lombscargle(ts_rr,
                                 rr_intervals,
                                 a_frequencies,
                                 normalize=True)

    elif method == 'ar':
        freq, psd_ = signal.periodogram(rr_intervals,
                                        hr_sampling_frequency,
                                        window='boxcar',
                                        nfft=None,
                                        detrend='constant',
                                        return_onesided=True,
                                        scaling=power_type,
                                        axis=-1)
        model = AutoReg(psd_, max_lag)
        res = model.fit()
        psd = model.predict(res.params)
    else:
        raise ValueError("Not a valid method. Choose between 'ar', 'lomb' "
                         "and 'welch'")

    return freq, psd
예제 #20
0
class AutoRegImplementation(ModelImplementation):

    def __init__(self, log: Log = None, **params):
        super().__init__(log)
        self.params = params
        self.actual_ts_len = None
        self.autoreg = None

    def fit(self, input_data):
        """ Class fit ar model on data

        :param input_data: data with features, target and ids to process
        """

        source_ts = np.array(input_data.features)
        self.actual_ts_len = len(source_ts)
        lag_1 = int(self.params.get('lag_1'))
        lag_2 = int(self.params.get('lag_2'))
        params = {'lags': [lag_1, lag_2]}
        self.autoreg = AutoReg(source_ts, **params).fit()

        return self.autoreg

    def predict(self, input_data, is_fit_pipeline_stage: bool):
        """ Method for time series prediction on forecast length

        :param input_data: data with features, target and ids to process
        :param is_fit_pipeline_stage: is this fit or predict stage for pipeline
        :return output_data: output data with smoothed time series
        """
        input_data = copy(input_data)
        parameters = input_data.task.task_params
        forecast_length = parameters.forecast_length
        old_idx = input_data.idx
        target = input_data.target

        if is_fit_pipeline_stage:
            fitted = self.autoreg.predict(start=old_idx[0], end=old_idx[-1])
            # First n elements in time series are skipped
            diff = self.actual_ts_len - len(fitted)

            # Fill nans with first values
            first_element = fitted[0]
            first_elements = [first_element] * diff
            first_elements.extend(list(fitted))

            fitted = np.array(first_elements)

            _, predict = _ts_to_table(idx=old_idx,
                                      time_series=fitted,
                                      window_size=forecast_length)

            new_idx, target_columns = _ts_to_table(idx=old_idx,
                                                   time_series=target,
                                                   window_size=forecast_length)

            # Update idx and target
            input_data.idx = new_idx
            input_data.target = target_columns

        # For predict stage we can make prediction
        else:
            start_id = old_idx[-1] - forecast_length + 1
            end_id = old_idx[-1]
            predicted = self.autoreg.predict(start=start_id,
                                             end=end_id)

            # Convert one-dim array as column
            predict = np.array(predicted).reshape(1, -1)
            new_idx = np.arange(start_id, end_id + 1)

            # Update idx
            input_data.idx = new_idx

            # Update idx and features
        output_data = self._convert_to_output(input_data,
                                              predict=predict,
                                              data_type=DataTypesEnum.table)
        return output_data

    def get_params(self):
        return self.params
예제 #21
0
arima = SARIMAX(adf, order=(2, 1, 0), trend='c').fit()
fig = arima.plot_diagnostics(figsize=(10, 6))
plt.tight_layout(pad=2)
plt.savefig(os.path.join(imgdir, 'ar.jpg'))
plt.show()
arima.summary()

# Forecasting
## One-step ahead predictions
model = AutoReg(df_train, lags=lags, old_names=False).fit()
print(model.summary())

# Observations to predict are from the test split
from sklearn.metrics import mean_squared_error
all_dates = AutoReg(df, lags=lags, old_names=False)
df_pred = all_dates.predict(model.params,
                            start=df_train.index[-1]).shift(1).iloc[1:]
mse = mean_squared_error(df_test, df_pred)
var = np.mean(np.square(df_test - df_train.mean()))
print(f"Short-term Forecasts:  rmse={np.sqrt(mse):.6f} r2={1-mse/var:.4f}")
fig, ax = plt.subplots(clear=True, num=1, figsize=(4, 6))
df_pred.plot(ax=ax, c='C0')
df_test.plot(ax=ax, c='C1')
ax.legend(['Predicted', 'Actual'])
ax.set_title(s + " (one-step forecasts)")
plt.tight_layout(pad=2)
plt.savefig(os.path.join(imgdir, 'short.jpg'))
plt.show()

# Multi-step ahead predictions
df_pred = all_dates.predict(model.params,
                            start=df_train.index[-1],