def grid_search_best_model_timeseries_arma(df, grid, cv):
    keys, values = zip(*grid.items())
    params = []
    for v in product(*values):
        params.append(tuple(v))

    print(params)
    best_param = None
    best_score = np.infty
    tsp = TimeSeriesSplit(n_splits=cv)

    for param in params:
        scores = []
        for train_ind, test_ind in tsp.split(df):
            train_data = df.iloc[train_ind]
            test_data = df.iloc[test_ind]
            try:
                #print(train_data, test_data)
                estimator = arima_model.ARMA(train_data, order=param)
                res = estimator.fit()
                #print(res.params)
                #get out of sample predictions with test data start and end
                y_pred = estimator.predict(res.params, test_data.index[0],
                                           test_data.index[-1])
                #print(y_pred)
                y_test = test_data.values.reshape(-1)
                score = math.sqrt(metrics.mean_squared_error(y_test, y_pred))
                scores.append(score)
            except:
                pass
        #print(scores)
        if len(scores) > 0 and np.mean(scores) < best_score:
            best_score = np.mean(scores)
            best_param = param

    if best_param is not None:
        estimator = arima_model.ARMA(df, order=best_param)
        res = estimator.fit()
        print("best parameters:" + str(best_param))
        print("validation rmse:" + str(best_score))
        #get insample predictions with start and end indices
        y_pred = estimator.predict(res.params, start=0, end=df.shape[0] - 1)
        y_train = df.values.reshape(-1)
        train_rmse = math.sqrt(metrics.mean_squared_error(y_train, y_pred))
        print("train rmse:" + str(train_rmse))
        return estimator, res
    else:
        return None, None
Example #2
0
 def _fit(self, ord, ix, if_plot=False):
     assert len(
         self.y[ix]) > 2 * (ord[0] + ord[1]), 'not enough data to fit'
     from statsmodels.tsa import arima_model
     ar = arima_model.ARMA(self.y[ix], ord)
     param = ar.fit(trend='nc')
     if if_plot:
         param.plot_predict()
     return param.params
Example #3
0
    def fit_ARIMA(self, col, order=(1, 1)):
        try:

            model = ARIMA.ARMA(self.ss[col])

            result = model.fit(order=order)
            self.ss['%s_ARIMA_fitted' % col] = result.fittedvalues
            self.ss['%s_ARIMA_resid' % col] = result.resid
        except KeyError:
            print "Warning: %s is a bad key, ignoring" % col
def garch_group(Y, q0=1, p=1, q=1, do_plots=False):
    residuals = np.zeros(Y.shape[0] - q0)
    for y in np.transpose(Y):
        model = ar_model.AR(y)
        results = model.fit(q0)
        et = results.resid**2
        residuals += (et - sum(et) / len(et)) / np.std(et)
        residuals /= Y.shape[1]
    model = arima_model.ARMA(residuals, (p, q))
    r2 = model.fit()
    if do_plots:
        print r2.pvalues
        pylab.plot(r2.fittedvalues)
        pylab.show()
    else:
        return residuals, r2
Example #5
0
def arma_construct(p, q, x_train, y_train, x_valid, y_valid, num_data):
    # This is a generic ARMA model constructor, which is used to estimate certain ARMA models on a set of data.
    # Inputs: p - The order of the AR model.
    #         q - The order of the MA model.
    #         x_train - Partition of noise set for training.
    #         y_train - Training data evaluated using original model.
    #         x_valid - Partition of noise set for validation.
    #         y_valid - Validation data evaluated using original model.
    #         num_data - Amount of data being generated.
    # Outputs: No outputs, put does do a lot of print statements.

    # First, we create the model.
    arma = arima_model.ARMA(y_train, order=(p, q))
    arma_fit = arma.fit(disp=0)

    # Then, we convert the parameters to lists, so they match the format of the original construction.
    ma_co = arma_fit.maparams.tolist()
    ar_co = list(-arma_fit.arparams)
    # We also need to add in a 1 at the start, since those are neglected when presenting the results from
    # the ARMA function we use.
    ma_co.insert(0, 1)
    ar_co.insert(0, 1)

    # Getting the coefficients of the AR and MA models, separately.
    print('**** ARMA({0},{1}) ****'.format(p, q))
    print('AR Coefficients: ', ["%.3f" % item for item in ar_co])
    print('MA Coefficients: ', ["%.3f" % item for item in ma_co])

    # Next, we calculate the error for the model.
    # Starting with the training error.
    arma_train = signal.lfilter(ma_co, ar_co, x_train)
    ave_t_error = (1 / (num_data / 2)) * np.sum(np.abs((y_train - arma_train) / arma_train))
    print('Training Error: %.3f' % ave_t_error)

    # Then we calculate the validation error.
    arma_valid = signal.lfilter(ma_co, ar_co, x_valid)
    ave_v_error = (1 / (num_data / 2)) * np.sum(np.abs((y_valid - arma_valid) / arma_valid))
    print('Validation Error: %.3f' % ave_v_error)
Example #6
0
 def fit_row(self, row):
     model = arima_model.ARMA(row, (self.p, self.q)).fit()
     pred = model.predict(start=0, end=len(row) - 1)
     params = model.params
     return (pred, params)
def garch(y, q0=1, p=1, q=1):
    model = ar_model.AR(y)
    results = model.fit(q0)
    et = results.resid**2
    model = arima_model.ARMA((et - sum(et) / len(et)) / np.std(et), (p, q))
    return model.fit()
Example #8
0
    print('    Coefficents of MA: {0}\n'.format(formatIter(paraMA)))
    print('    Sigma^2: %.4f\n' % paraSigma2)
    print('    Method:Moment Estimation / Inverse Correlation Function Method')

    '''
    ## (2)
    use arima_model.ARMA.fit() to compute MLE
    we may encounter warnings, i.e. non-positive definite Hessian
    ,but ignore it
    we select start_param of optimization algorithm by arima_model.ARMA.fit()
    instead of results of (1)
    '''
    filterwarnings('ignore')   # ignore warnings
    while True:
        series = arima_process.arma_generate_sample(ar, ma, 100, sigma=2)
        armaModel = arima_model.ARMA(series, (4, 2))
        try:
            armaResult = armaModel.fit(method='mle', trend='nc',
                                       disp=0, maxiter=10000)
        except ValueError as e:
            continue
        else:
            paraAR_mle, paraMA_mle = armaResult.arparams, armaResult.maparams
            paraSigma2_mle = armaResult.sigma2
            break

    print('\n----3.1 (2)----\n')
    print('    Coefficents of AR: {0}\n'.format(formatIter(paraAR_mle)))
    print('    Coefficents of MA: {0}\n'.format(formatIter(paraMA_mle)))
    print('    Sigma^2: %.4f\n' % paraSigma2_mle)
    print('    Method:Maximum Likelihood Estimation')
Example #9
0
            self.reactor_size = (24 * 1.0, 'in^3')
        else:
            self.reactor_size = (None, None)

    def _load_timeseries_data(self):

        self.gts = df.Dataframe()
        self.gts.SQL_load_data(
            self.interface_proc,
            'gas_proc_data_tbl',
            conditions=[
                "timestamp >= '%s'" % self.run_info.info['ss_start'],
                "timestamp < '%s'" % self.run_info.info['ss_stop']
            ])  #This line needs to automatically load the units


if __name__ == "__main__":

    user = raw_input('User: '******'timestamp']
    model = ARIMA.ARMA(test.gts['mass_flow_brush_feeder'], order=(1, 1))
    result = model.fit()
    print result.summary()
    print test.gts['mass_flow_brush_feeder']
    print result.fittedvalues
    print result.resid
Example #10
0
data = p.read_csv(src, sep="|")

#print(data) # MUST GIVE COLUMN HEADERS in CSV FILE BEFORE WE DO THIS
five_year_data = data[["5Y"]]
print(five_year_data)  # how do we get this to work as a 1D array?
#five_year_data_1d = p.Series.ravel(five_year_data)
#print(five_year_data_1d)
acf = calc_acf(five_year_data)
print(acf[0])
print("ACF is", acf)
# plt.xlim(1, 10)
acf_plt = s.plot_acf(acf)
pacf = calc_pacf(five_year_data, 10)
print(pacf)
pacf_plt = s.plot_pacf(pacf)
plt.show()

# Param estimation
#model = a(five_year_data, order=(10,1,0))
# model_fit = model.fit(disp=0)
# print(model_fit.summary())

ar_model = ar.AR(five_year_data)
ar_model_fit = ar_model.fit(10)
print("Params")
print(ar_model_fit.params)

ma_model = ma.ARMA(five_year_data.values, (0, 10))
ma_model_fit = ma_model.fit()
print("MA Params")
print(ma_model_fit.params)
Example #11
0
 def fit_row(self, row: np.ndarray) -> base.ApproxAndParams:
     model = arima_model.ARMA(row, (self._p, self._q)).fit()
     predicted = model.predict(start=0, end=len(row) - 1)
     params = model.params
     return base.ApproxAndParams(predicted, params)
                        method='SLSQP',
                        bounds=bnds,
                        options={'maxiter': MAX_MLE_ITER})

time_mymle_end = time.time()
time_mymle = time_mymle_end - time_mymle_start

assert res.success

mu_hat = res.x[0]
phi_hat = res.x[1]
theta_hat = res.x[2]
sigma2_hat = res.x[3]

### Now, for comparison purposes, estimate using the StatsModels package ###
sm = arima_model.ARMA(y_vec, order=(1, 1))

time_sm_start = time.time()

fittedsm = sm.fit(disp=0)

time_sm_end = time.time()
time_sm = time_sm_end - time_sm_start

### Display results ###
print('(mu, phi, theta, sigma2) = (%f, %f, %f, %f)' % (mu, phi, theta, sigma2))
print('(mu, phi, theta, sigma2)_0 = (%f, %f, %f, %f)' %
      (mu_guess, phi_guess, theta_guess, sigma2_guess))
print('(mu, phi, theta, sigma2)^hat = (%f, %f, %f, %f)' %
      (mu_hat, phi_hat, theta_hat, sigma2_hat))
print('(mu, phi, theta, sigma2)^hatSM = (%f, %f, %f, %f)' %
Example #13
0
plt.show()
autocorrelation_plot(dfLeadsTrain['Count_Tickets'])
plt.show()

# In[10]:

print dfLeadsTrain.columns
print dfLeadsTest.tail()

# In[11]:

exog_cols = [
    u'Day_of_Week_1', u'Day_of_Week_2', u'Day_of_Week_3', u'Day_of_Week_4',
    u'Day_of_Week_5', u'Day_of_Week_6', u'MONTH_END', u'HOLIDAY'
]
modelARMA = am.ARMA(dfLeadsTrain.Count_Tickets, (2, 1),
                    exog=dfLeadsTrain[exog_cols])
resultsARMA = modelARMA.fit()
print(resultsARMA.summary())

# Durbin-Watson on results shows no/low positive autocorrelation

# In[12]:

print resultsARMA.aic, resultsARMA.bic, resultsARMA.hqic
print sm.stats.durbin_watson(resultsARMA.resid.values)

# In[13]:

fig = plt.figure(figsize=(12, 8))
ax = fig.add_subplot(111)
ax = resultsARMA.resid.plot(ax=ax)
Example #14
0
y = util.simulate(poly=100, sinusoids=(10, 100, -20)).values

hr = np.arange(365 * 96) * .25
t = hr * 3600
sinusoids = [
    np.random.normal(0.0, 0.1, 365 * 96) + 10 +
    3 * np.sin(hr * 2 * np.pi / 96 / .25),
    np.random.normal(0.0, 0.1, 365 * 96) + 15 +
    3 * np.sin(hr * 2 * np.pi / 96 / .25) +
    3 * np.cos(t * 2 * np.pi / 96. / .25 / 365.),
    np.random.normal(0.0, 1.0, 365 * 96) + 15 +
    3 * np.sin(hr * 2 * np.pi / 96 / .25) +
    3 * np.cos(t * 2 * np.pi / 96. / .25 / 365.) +
    np.random.normal(0.0, 1e-5, 365 * 96).cumsum()
]
arma20 = arima_model.ARMA(y, (2, 0)).fit()
y2 = arma.predict(start=10 * 96, end=12 * 96)
y1 = y[10 * 96 - 1:12 * 96]
plt.plot(t[10 * 96 - 1:12 * 96], zip(*[y1, y2]))
plt.show()
y2 = arma30.predict(start=10 * 96, end=12 * 96)
plt.plot(t[10 * 96 - 1:12 * 96], zip(*[y1, y2]))
plt.show()
arma30.resid.plot()
plt.plot(arma30.resid)
plt.show()
plt.plot(arma30.resid / y2)
plt.plot(arma30.resid / y)
plt.show()
plt.plot(arma30.resid / y)
plt.show()
Example #15
0
# stats: -0.582869706432, pvalue: 0.920441499758
# h0: rho == 1 is rejected. There is no unit root.

# for AAPL
# MA(15) fits model the best.
# stats: 3.04749309416, pvalue: 1.0
# stats: 5.14080783224, pvalue: 0.999999999468
# h0: rho == 1 is not rejected. There is unit root.

test_series = goog_df
from arch.unitroot import PhillipsPerron

# fit MA on resid.
import statsmodels.tsa.arima_model as arma
for ma_lag in [7, 8, 9, 10, 11, 12, 13, 15, 20]:
    model = arma.ARMA(test_series, (0, ma_lag)).fit()
    print 'lag: {}, aic: {}'.format(ma_lag, model.aic)
pp = PhillipsPerron(test_series, trend='c', lags=10, test_type='tau')
print 'stats: {}, pvalue: {}'.format(pp.stat, pp.pvalue)
pp = PhillipsPerron(test_series, trend='c', lags=10, test_type='rho')
print 'stats: {}, pvalue: {}'.format(pp.stat, pp.pvalue)

### using adf test
### goog and aapl t-value
# 0.876758903592
# 0.999070159449

test_series = goog_df
from arch.unitroot import ADF
adf = ADF(goog_df, lags=24)
print adf.pvalue
def get_arma_coefficients(series, order=(2, 3)):
    """
    Returns the ARMA model coefficients for the given model
    """
    model = arima_model.ARMA(series, order).fit(disp=False)
    return model.params