Exemple #1
0
    def forecast_out_model(data, order=(3, 0)):
        """Forecast parameters for one model.

        Parameters
        ----------
        data : DataFrame
            Parameters for one model only

        Returns
        -------
        data : DataFrame
            Predicted parameters. The same structure as input.

        """
        window = data.shape[0] // 2
        maxlags = order[0]

        out = [data[:window]]
        nobs = data.shape[0]
        for first in range(nobs - window):
            last = window + first
            if data.shape[1] == 1:
                model = ARMA(data[first:last], order=order)
                res = model.fit(method='css', disp=False)
                forecast = res.forecast(1)[0]
            else:
                model = VAR(data[first:last])
                res = model.fit(maxlags=maxlags)
                forecast = res.forecast(np.atleast_2d(data[first:last]), 1)
            out.append(forecast)

        return np.vstack(out)
Exemple #2
0
def test_compare_arma():
    #this is a preliminary test to compare arma_kf, arma_cond_ls and arma_cond_mle
    #the results returned by the fit methods are incomplete
    #for now without random.seed

    #np.random.seed(9876565)
    x = fa.ArmaFft([1, -0.5], [1., 0.4], 40).generate_sample(size=200,
            burnin=1000)

# this used kalman filter through descriptive
#    d = ARMA(x)
#    d.fit((1,1), trend='nc')
#    dres = d.res

    modkf = ARMA(x)
    ##rkf = mkf.fit((1,1))
    ##rkf.params
    reskf = modkf.fit((1,1), trend='nc', disp=-1)
    dres = reskf

    modc = Arma(x)
    resls = modc.fit(order=(1,1))
    rescm = modc.fit_mle(order=(1,1), start_params=[0.4,0.4, 1.], disp=0)

    #decimal 1 corresponds to threshold of 5% difference
    #still different sign  corrcted
    #assert_almost_equal(np.abs(resls[0] / d.params), np.ones(d.params.shape), decimal=1)
    assert_almost_equal(resls[0] / dres.params, np.ones(dres.params.shape),
        decimal=1)
    #rescm also contains variance estimate as last element of params

    #assert_almost_equal(np.abs(rescm.params[:-1] / d.params), np.ones(d.params.shape), decimal=1)
    assert_almost_equal(rescm.params[:-1] / dres.params, np.ones(dres.params.shape), decimal=1)
Exemple #3
0
 def certain_model(self, p, q):
         model = ARMA(self.data_ts, order=(p, q))
         try:
             self.properModel = model.fit( disp=-1, method='css')
             self.p = p
             self.q = q
             self.bic = self.properModel.bic
             self.predict_ts = self.properModel.predict()
             self.resid_ts = deepcopy(self.properModel.resid)
         except:
             print 'You can not fit the model with this parameter p,q, ' \
                   'please use the get_proper_model method to get the best model'
Exemple #4
0
def proper_model(timeseries, maxLag):
    init_bic = 1000000000
    for p in np.arange(maxLag):
        for q in np.arange(maxLag):
            model = ARMA(timeseries, order=(p, q))
            try:
                results_ARMA = model.fit(disp = 0, method='css')
            except:
                continue
            bic = results_ARMA.bic
            if bic < init_bic:
                model_return = results_ARMA
                init_bic = bic
    return model_return
Exemple #5
0
def get_arma_forecast(ts, forecast_start, forecast_periods, pq_order, pickle_path=None):
    
    dates = ts.index
    start = dates.get_loc(pandas.datetools.parse(forecast_start))
    end = start + forecast_periods
    if pickle is None:
        arma = ARMA(ts.values, order = pq_order)
        arma_fitted = arma.fit()
    else:#pickle = path_to_pickle_file
        arma_fitted = pickle.load(open(pickle_path, "rb"))
    forecast_values = arma_fitted.predict(start, end)
    
    forecast_index = date_range(forecast_start, periods=19)
    
    return Series(forecast_values[1:], index=forecast_index[1:])
Exemple #6
0
 def _proper_model(self):
     for p in np.arange(self.maxLag):
         for q in np.arange(self.maxLag):
             # print p,q,self.bic
             model = ARMA(self.data_ts, order=(p, q))
             try:
                 results_ARMA = model.fit(disp=-1, method='css')
             except:
                 continue
             bic = results_ARMA.bic
             # print 'bic:',bic,'self.bic:',self.bic
             if bic < self.bic:
                 self.p = p
                 self.q = q
                 self.properModel = results_ARMA
                 self.bic = bic
                 self.resid_ts = deepcopy(self.properModel.resid)
                 self.predict_ts = self.properModel.predict()
Exemple #7
0
def main():

    df = pd.read_csv(FILE_NAME, sep=',', skipinitialspace=True, encoding='utf-8')
    df = df.drop('AverageTemperatureUncertainty', axis=1)
    df = df[df.Country == 'Canada']
    df = df.drop('Country', axis=1)
    df.index = pd.to_datetime(df.dt)
    df = df.drop('dt', axis=1)
    df = df.ix['1900-01-01':]
    df = df.sort_index()

    # Display AT
    df.AverageTemperature.fillna(method='pad', inplace=True)
    mp.plot(df.AverageTemperature)
    mp.show()

    # Rolling Mean
    df.AverageTemperature.plot.line(style='b', legend=True, grid=True, label='Avg. Temperature (AT)')
    ax = df.AverageTemperature.rolling(window=12).mean().plot.line(style='r', legend=True, label='Mean AT')
    ax.set_xlabel('Date')
    mp.legend(loc='best')
    mp.title('Weather timeseries visualization')
    mp.show()

    test_stationarity(df.AverageTemperature)

    res = arma_order_select_ic(df.AverageTemperature, ic=['aic', 'bic'], trend='nc',
              max_ar=4, max_ma=4, fit_kw={'method': 'css-mle'})
    print res

    # Fit the model
    ts = pd.Series(df.AverageTemperature, index=df.index)
    model = ARMA(ts, order=(3, 3))
    results = model.fit(trend='nc', method='css-mle')
    print(results.summary2())

    # Plot the model
    fig, ax = mp.subplots(figsize=(10, 8))
    fig = results.plot_predict('01/01/2003', '12/01/2023', ax=ax)
    ax.legend(loc='lower left')
    mp.title('Weather Time Series prediction')
    mp.show()

    predictions = results.predict('01/01/2003', '12/01/2023')
    def test_glsar_arima(self):
        from statsmodels.tsa.arima_model import ARMA

        endog = self.res.model.endog
        exog = self.res.model.exog
        mod1 = GLSAR(endog, exog, 3)
        res = mod1.iterative_fit(10)
        mod_arma = ARMA(endog, order=(3,0), exog=exog[:, :-1])
        res_arma = mod_arma.fit(method='css', iprint=0, disp=0)
        assert_allclose(res.params, res_arma.params[[1,2,0]], atol=0.01, rtol=1e-3)
        assert_allclose(res.model.rho, res_arma.params[3:], atol=0.05, rtol=1e-3)
        assert_allclose(res.bse, res_arma.bse[[1,2,0]], atol=0.015, rtol=1e-3)

        assert_equal(len(res.history['params']), 5)
        # this should be identical, history has last fit
        assert_equal(res.history['params'][-1], res.params)

        res2 = mod1.iterative_fit(4, rtol=0)
        assert_equal(len(res2.history['params']), 4)
        assert_equal(len(res2.history['rho']), 4)
    def predict_arma_next_days(self, item):
        ts = df_train[item]
        ts = ts.sort_index() # sorting index Date
        ts_last_day = ts[self.fc] # real last data
        ts = ts[0:self.fc] # index 0 until last data - 1

        model = ARMA(ts, order=(self.p, self.q), freq='D') # build a model
        fitting = model.fit(disp=False)
        params = fitting.params
        residuals = fitting.resid
        p = fitting.k_ar
        q = fitting.k_ma
        k_exog = fitting.k_exog
        k_trend = fitting.k_trend

        # n_days forecasting
        forecast = _arma_predict_out_of_sample(params, self.n_days, residuals, p, q, k_trend, k_exog, endog=ts, exog=None, start=len(ts))
        # ts:          history until 1 day before self.fc
        # ts[self.fc]: last day
        # forecast:    1 day forecast (time equalto ts[self.fc])
        return ts, ts_last_day, forecast
Exemple #10
0
with open('{}-results.txt'.format(segment_id), 'a') as f:
    for p in range(1, 5, 1):
        f.write(str(fit_arma(train_df['Speed'], p, 0)))
        f.write('\n')
    for q in range(1, 4, 1):
        f.write(str(fit_arma(train_df['Speed'], 0, q)))
        f.write('\n')
    for p in range(1, 5, 1):
        for q in range(1, 4, 1):
            f.write(str(fit_arma(train_df['Speed'], p, q)))
            f.write('\n')

from matplotlib import pyplot
# select model to be ARMA(1, 6)
model = ARMA(train_df['Speed'], order=(4, 3))
model_fit = model.fit(disp=0)
print(model_fit.summary())

residuals = pd.DataFrame(model_fit.resid)
residuals.plot()
pyplot.show()
residuals.plot(kind='kde')
pyplot.show()
print(residuals.describe())

# out-of-sample prediction
start_index = train_df.index[-1]
end_index = test_df.index[-1]
forecast = model_fit.predict(start=start_index, end=end_index)
forecast = forecast[test_df.index]  # some days are missing.
pred_error = test_df['Speed'] - forecast
Exemple #11
0
c = 0
e = z
N = len(z)
a = np.random.normal(0, 0.1, size=p)
x = np.zeros(N)
a = np.r_[1, a][::-1]
for i in range(p, N):
    for j in range(p):
        x[i] = a[j] * x[i - j]
    x[i] = x[i] + c + e[i]

plt.plot(x[p:])
plt.show()

model = ARMA(close_data, order=(2, 3))
model_fit = model.fit(disp=False)
# make prediction
yhat = model_fit.predict(0, len(close_data))
plt.plot(yhat[3:], 'b')
plt.show()
""""
ar plot
e = mv_out
N=len(mv_out)
a = [0.2, 0.2]
# AR model
#a = [0.5,0.5]
p = len(a)
c=-100
x = np.zeros(N)
for i in range(p, N):
Exemple #12
0
## ACF and PACF for Returns

sgt.plot_acf(df.returns, lags=40, zero = False)
plt.title("ACF FTSE Returns", size=24)
plt.show()

sgt.plot_pacf(df.returns, lags = 40, zero = False, method = ('ols'))
plt.title("PACF FTSE Returns", size=24)
plt.show()

## AR(1) for Returns
## AR(1) for Returns

model_ret_ar_1 = ARMA(df.returns, order = (1,0))

results_ret_ar_1 = model_ret_ar_1.fit()

results_ret_ar_1.summary()

## Higher-Lag AR Models for Returns
## Higher-Lag AR Models for Returns

model_ret_ar_2 = ARMA(df.returns, order = (2,0))
results_ret_ar_2 = model_ret_ar_2.fit()
results_ret_ar_2.summary()

LLR_test(model_ret_ar_1, model_ret_ar_2)

model_ret_ar_3 = ARMA(df.returns, order = (3,0))
results_ret_ar_3 = model_ret_ar_3.fit()
results_ret_ar_3.summary()
Exemple #13
0
plot_acf(dataset.windSpeed, lags=50)
plot_pacf(dataset.windSpeed, lags=50)
plt.xlabel('lags')
plt.show()

#%%

from statsmodels.tsa.arima_model import ARMA

import itertools
p = q = range(0, 4)
pq = itertools.product(p, q)
for param in pq:
    try:
        mod = ARMA(dataset.windSpeed, order=param)
        results = mod.fit()
        print('ARMA{} - AIC:{}'.format(param, results.aic))
    except:
        continue

model = ARMA(dataset.windSpeed, order=(3, 3))
results_MA = model.fit(method="css-mle")

#%%

plt.plot(dataset.windSpeed)
plt.plot(results_MA.fittedvalues, color='red')
plt.title('Fitting data _ MSE: %.2f' %
          (((results_MA.fittedvalues - dataset.windSpeed)**2).mean()))
plt.show()
Exemple #14
0
ax.plot(X.ravel(), 'k')
plt.savefig('figure5A.png')
plt.savefig('figure5A.pdf')

fig, ax = myfigure(fig_scale=1.8)
ax.plot(np.diff(X.ravel()), 'k')
plt.savefig('figure5B.png')
plt.savefig('figure5B.pdf')

diff_train = np.diff(train)
fig, ax = myfigure(fig_scale=1.8)
ax.plot(np.diff(X.ravel()), 'k', alpha=0.4, label='truth')
ax.plot(diff_train, 'k')

model1 = ARMA(diff_train, order=(1, 0))
fitted_model1 = model1.fit(disp=0)
prediction1 = fitted_model1.forecast(len(X) - size)
ax.plot(np.arange(size - 2, size + len(test) - 1),
        [diff_train[-1]] + prediction1[0].tolist(),
        color=colors[0],
        linestyle='-',
        label='ARMA(1,0)')

model2 = ARMA(diff_train, order=(1, 1))
fitted_model2 = model2.fit(disp=0)
prediction2 = fitted_model2.forecast(len(X) - size)
ax.plot(np.arange(size - 2, size + len(test) - 1),
        [diff_train[-1]] + prediction2[0].tolist(),
        color=colors[1],
        linestyle='--',
        label='ARMA(1,1)')
Exemple #15
0
              zero=False,
              method="ols")
plt.title("PACF for Prices", size=20)
plt.show()

# SUMMARY - PARTE INFERIOR
# coef - Valor de los coeficientes
# std err - Errores standar
# z - valor del estadistico de prueba
# P>|z| - P valor, Si es pequeño, entonces el coeficiente es significativo. valor comun 0.05
# [0.025 - 0.975] - Intervalo de confianza, si contiene el cero no es significativo

# Modelo AR de orden 1 y sin valores residuales
# la constante y el coeficiente de retraso son significativos
modelAR = ARMA(dfTrain.marketValue, order=(1, 0))
resultsAR = modelAR.fit()
resultsAR.summary()

# Modelo AR de orden 2 y sin valores residuales
# solo la constante y el coeficiente del primer retraso son significativos
# entonces el precio de hace 2 dias no afectan en gran medida el precio de hoy
modelAR2 = ARMA(dfTrain.marketValue, order=(2, 0))
resultsAR2 = modelAR2.fit()
resultsAR2.summary()

# Modelo AR de orden 3 y sin valores residuales
# solo el coeficiente del segundo retraso no es significativo
modelAR3 = ARMA(dfTrain.marketValue, order=(3, 0))
resultsAR3 = modelAR3.fit()
resultsAR3.summary()
Exemple #16
0
# y seria un modelo de menor orden


# Degree of Freedom DF = parametrosmodel1 - parametrosmodel2
def LLR_test(model1, model2, DF=1):
    L1 = model1.llf
    L2 = model2.llf
    LR = (2 * (L2 - L1))
    p = chi2.sf(LR, DF).round(3)
    return p


# Modelo AR de orden 1 y sin valores residuales
# la constante y el coeficiente de retraso son significativos
modelAR1 = ARMA(dfTrain.returns, order=(1, 0))
resultsAR1 = modelAR1.fit()
print(resultsAR1.summary())

# Modelo AR de orden 2 y sin valores residuales
# los coeficientes de retraso son significativos, pero la constante no
# el p valor de LLR es significativo, entonces se selecciona el modelo de orden 2
modelAR2 = ARMA(dfTrain.returns, order=(2, 0))
resultsAR2 = modelAR2.fit()
print(resultsAR2.summary())
print("LLR test: " + str(LLR_test(resultsAR1, resultsAR2)))

# Modelo AR de orden 3 y sin valores residuales
# los coeficientes de retraso son significativos, pero la constante no
# el p valor de LLR es significativo, entonces se selecciona el modelo de orden 3
modelAR3 = ARMA(dfTrain.returns, order=(3, 0))
resultsAR3 = modelAR3.fit()
Exemple #17
0
def queryandinsert():
    """ This is the main function which will be call by main... it integrate several other functions.
    Please do not call this function in other pack, otherwise it will cause unexpected result!!!!"""
    global gtbuDict             # gtbuDict, being used to store query data from gtbu database.....
    global omsDict              # being used to store query data from OMS database.....
    global presisDict
    global counter
    global testingDict

    starttime = datetime.datetime.now()

    print len(presisDict)
    print "connect to databae!"

    # connect to the database use my own toolkits
    querydbinfoOMS = getdbinfo('OMS')
    querydbnameOMS = "wifi_data"

    querydbinfoGTBU = getdbinfo("GTBU")
    querydbnameGTBU = "ucloudplatform"

    insertdbinfo = getdbinfo('REMOTE')
    insertdbname = 'login_history'

    # print the database information for verification
    for key, value in querydbinfoOMS.iteritems():
        print key + " : " + str(value)

    queryStatementRemote = """
    SELECT epochTime,visitcountry,onlinenum
    FROM t_fordemo
    WHERE butype =2 AND visitcountry IN ('JP','DE','TR') AND epochTime BETWEEN DATE_SUB(NOW(),INTERVAL 2 DAY) AND NOW()
    ORDER BY epochTime ASC
    """
    # get the online data which will be used to calculate the daily uer number ( Daily user number is bigger than the max number...
    # and the max number is actually what being used in this scenario
    queryStatementTraining = """
    SELECT t1,t2,DATEDIFF(t2,t1) AS dif,imei,visitcountry FROM
    (
    SELECT DATE(logindatetime) AS t1,DATE(logoutdatetime) AS t2, imei,visitcountry
    FROM t_usmguserloginlog
    WHERE visitcountry IN ('JP','DE','TR')
    ) AS z
    GROUP BY t1,t2,imei
    """

    # (output data) get the max online number for each of these countries every day ( this record is incomplete due to the constant network partition
    # therefore a lot of corresponding operation is necessary for aligning the input and output date by day!...
    queryStatementOnline ="""
    SELECT epochTime,visitcountry,MAX(onlinenum)
    FROM
    (
    SELECT DATE(epochTime) AS epochTime,visitcountry,onlinenum
    FROM t_fordemo
    WHERE butype =2 and visitcountry IN ('JP','DE','TR')
    ) AS z
    GROUP BY epochTime,visitcountry
    """

    # (input data) get the order number information which will be used to calculate the daily maximum number for each country...
    # this number could be ridiculously large with respect to the real number for some specific countries.
    querystatementOMS = """
    SELECT DATE(date_goabroad),DATE(date_repatriate),DATEDIFF(date_repatriate,date_goabroad),imei,package_id FROM tbl_order_basic
    WHERE imei IS NOT NULL AND (DATE(date_repatriate)) > '2016-01-01' AND DATE(date_goabroad) < DATE(NOW())
    ORDER BY date_repatriate ASC
    """

    querystatementOMSCount = """
    SELECT  date_goabroad,date_repatriate,DATEDIFF(date_repatriate,date_goabroad),t1.package_id,t3.iso2 FROM tbl_order_basic AS t1
    LEFT JOIN tbl_package_countries AS t2
    ON t1.package_id = t2.package_id
    LEFT JOIN tbl_country AS t3
    ON t2.country_id = t3.pk_global_id
    WHERE t1.data_status = 0 AND DATE(date_goabroad) BETWEEN DATE(NOW()) AND DATE_ADD(NOW(),INTERVAL 3 MONTH) OR
    (
    DATE(date_repatriate) >= DATE(NOW())
    )
    """

    # establish connection to the mysql databases................
    querydbGTBU = MySQLdb.connect(user = querydbinfoGTBU['usr'],
                                  passwd = querydbinfoGTBU['pwd'],
                                  host = querydbinfoGTBU['host'],
                                  port = querydbinfoGTBU['port'],
                                  db = querydbnameGTBU)
    querydbOMS = MySQLdb.connect(user = querydbinfoOMS['usr'],
                                 passwd = querydbinfoOMS['pwd'],
                                 host = querydbinfoOMS['host'],
                                 port = querydbinfoOMS['port'],
                                 db = querydbnameOMS)
    insertdb = MySQLdb.connect(user = insertdbinfo['usr'],
                               passwd = insertdbinfo['pwd'],
                               host = insertdbinfo['host'],
                               port = insertdbinfo['port'],
                               db = insertdbname)

    queryCurGTBU = querydbGTBU.cursor()
    queryCurOMS = querydbOMS.cursor()
    insertCur = insertdb.cursor()


    print "executing query!!! By using generator!!!"
    insertCur.execute(queryStatementRemote)
    remoteGenerator = fetchsome(insertCur,100) #fetchsome is a generator which will fetch a certain number of query each time.

    for row in remoteGenerator:
        accumulatOnlineNumber(row,testingDict)

    onlineList = getTestingList(testingDict)

    countryList = onlineList[1]
    jpIndex = countryList.index('JP')
    datalist = onlineList[2][jpIndex]
    timelist = onlineList[0]

    tsJP = Series(datalist,index = timelist)
    df = DataFrame()
    df['JP'] = tsJP

    print df.index
    print df.columns

    print df

    tsJP_log = np.log(tsJP)
    lag_acf = acf(tsJP_log,nlags=200)
    lag_pacf = pacf(tsJP_log,nlags=200,method='ols')

    # model = ARIMA(tsJP_log,order=(2,1,2))
    model = ARMA(tsJP_log,(5,2))
    res = model.fit(disp=-1)


    print "Here is the fit result"
    print res

    params = res.params
    residuals = res.resid
    p = res.k_ar
    q = res.k_ma
    k_exog = res.k_exog
    k_trend = res.k_trend
    steps = 300

    newP = _arma_predict_out_of_sample(params, steps, residuals, p, q, k_trend, k_exog, endog=tsJP_log, exog=None, start=len(tsJP_log))
    newF,stdF,confiF = res.forecast(steps)

    print newP
    newP = np.exp(newP)
    print newP

    print " Forecast below!!"
    print newF
    newF = np.exp(newF)
    print newF
    print stdF
    stdF = np.exp(stdF)
    print stdF

    x_axis = range(len(lag_acf))
    y_axis = lag_acf

    onlineEWMA=go.Scatter(
        x = x_axis,
        y = y_axis,
        mode = 'lines+markers',
        name = "lag_acf"
    )

    onlinePre=go.Scatter(
        x = x_axis,
        y = newP,
        mode = 'lines+markers',
        name = "predictJP"
    )

    layout = dict(title = 'predicewma',
              xaxis = dict(title = 'Date'),
              yaxis = dict(title = 'online Number'),
              )

    data = [onlineEWMA,onlinePre]
    fig = dict(data=data, layout=layout)

    plot(fig,filename ="/ukl/apache-tomcat-7.0.67/webapps/demoplotly/EWMAprediction.html",auto_open=False)
# ACF plot
plot_acf(diff_data, lags = 20)
#pyplot.show()
#plt.savefig(os.path.join('plots', f'{ticker}_ACF.png'), dpi = 400)  
#plt.close()

# PACF plot
plot_pacf(diff_data, lags = 20)
#pyplot.show()
#plt.savefig(os.path.join('plots', f'{ticker}_PACF.png'), dpi = 400)  
#plt.close()


# ARMA
model = ARMA(diff_data, order = (2,2))
model_fit = model.fit()


#summary of the model
print(model_fit.summary())

print(len(diff_data))

predictions = model_fit.forecast(40)
print(predictions[0])


#print(df['adjclose'].iloc[-1])


x, x_diff = df['adjclose'].iloc[-1], predictions[0]
Exemple #19
0
# diff(T)函数,T为周期,也就是差分步数,而不是阶数,diff()函数本身就是做一阶差分的。
period = origin.diff(24)
# period.plot()


# 去除周期性之后再做一阶差分,得到平稳序列
diff1 = period.diff(1)
# 差分之后去除之前的空值
inputDiff = diff1.dropna()
inputDiff.plot()
plt.show()

model = ARMA(count, order=(3, 2), dates='20160401000000', freq='B')

resultARMA = model.fit(disp=1, method='css', maxiter=60, trend='nc')    # 设置迭代次数为60,可以避免极大似然函数不收敛的问题

# print(resultARMA.)
# 模型预测
nextValue = resultARMA.forecast()[0]
print(nextValue)

# predictARMA = resultARMA.predict(start='20160406', end='20160430')
# print(predictARMA)

# 一阶差分还原
diffShift = periodTest.shift(1)


print(periodTest.tail(1))
# print(diff1)
Exemple #20
0
def AR(a, p):
    mod = ARMA(a, order=(p, 0))
    result = mod.fit()
    parameter = result.params
    return parameter
Exemple #21
0
def fit_AR(x, p):
    mod = ARMA(x, order=(p, 0))
    res = mod.fit(trend='nc')
    rho = res.params[0]
    sigma = np.std(res.resid)
    return rho, sigma
Exemple #22
0
plt.plot(modc.error_estimate)
#plt.show()


modct = TArma(x)
reslst = modc.fit(order=(1,1))
print(reslst[0])
rescmt = modct.fit_mle(order=(1,1), start_params=[-0.4,0.4, 10, 1.],maxiter=500,
                       maxfun=500)
print(rescmt.params)


mkf = ARMA(x)
##rkf = mkf.fit((1,1))
##rkf.params
rkf = mkf.fit((1,1), trend='nc')
print(rkf.params)

np.random.seed(12345)
y_arma22 = arma_generate_sample([1.,-.85,.35, -0.1],[1,.25,-.7], nsample=1000)
##arma22 = ARMA(y_arma22)
##res22 = arma22.fit(trend = 'n', order=(2,2))
##print 'kf ',res22.params
##res22css = arma22.fit(method='css',trend = 'n', order=(2,2))
##print 'css', res22css.params
mod22 = Arma(y_arma22)
resls22 = mod22.fit(order=(2,2))
print('ls ', resls22[0])
resmle22 = mod22.fit_mle(order=(2,2), maxfun=2000)
print('mle', resmle22.params)
Exemple #23
0
proc = ArmaProcess.from_coeffs(res[0][: order[0]], res[0][: order[1]])

print ar, ma
proc.nobs = nobs
# TODO: bug nobs is None, not needed ?, used in ArmaProcess.__repr__
print proc.ar, proc.ma

print proc.ar_roots(), proc.ma_roots()

from statsmodels.tsa.arma_mle import Arma

modn = Arma(x)
resn = modn.fit_mle(order=order)

moda = ARMA(x, order=order)
resa = moda.fit(trend="nc")

print "\nparameter estimates"
print "ls  ", res[0]
print "norm", resn.params
print "t   ", res2.params
print "A   ", resa.params

print "\nstandard deviation of parameter estimates"
# print 'ls  ', res[0]  #TODO: not available yet
print "norm", resn.bse
print "t   ", res2.bse
print "A   ", resa.bse
print "A/t-1", resa.bse / res2.bse[:3] - 1

print "other bse"
Exemple #24
0
    AR3 = ArmaProcess(ar3, ma3)
    sim3 = AR3.generate_sample(nsample=1000)
    plt.title("AR(2) model : AR parameter = +0.9")
    plt.plot(sim3)
    # AR(2) MA(1)模型: AR参数 = -0.9
    plt.subplot(4, 1, 4)
    ar4 = np.array([2, 0.9])
    ma4 = np.array([1])
    AR4 = ArmaProcess(ar4, ma4)
    sim4 = AR4.generate_sample(nsample=1000)
    plt.title("AR(2) model : AR parameter = -0.9")
    plt.plot(sim4)
    fig.savefig("AR.png")
    # 预测模型
    model = ARMA(sim1, order=(1, 0))
    result = model.fit()
    print(result.summary())
    print("μ = {}, φ = {}".format(result.params[0], result.params[1]))
    # 用模型预测
    fig = plt.figure()
    fig = result.plot_predict(start=900, end=1010)
    fig.savefig("AR_predict.png")

    rmse = math.sqrt(
        mean_squared_error(sim1[900:1011], result.predict(start=900, end=999)))
    print("The root mean squared error is {}.".format(rmse))

    # 预测蒙特利尔的湿度
    humid = ARMA(humidity["Montreal"].diff().iloc[1:].values, order=(1, 0))
    res = humid.fit()
    fig = plt.figure()
Exemple #25
0
# 自相关和偏相关图,默认阶数为12阶
def draw_acf_pacf(ts, lags=1):
    f = plt.figure(facecolor='white')
    ax1 = f.add_subplot(211)
    plot_acf(ts, lags=31, ax=ax1)
    ax2 = f.add_subplot(212)
    plot_pacf(ts, lags=31, ax=ax2)
    plt.show()

ts_log = np.log(ts)

diff_12 = ts_log.diff(1)
diff_12.dropna(inplace=True)

model = ARMA(diff_12, order=(1, 1))
result_arma = model.fit( disp=-1, method='css')

predict_ts = result_arma.predict()

# 一阶差分还原
diff_shift_ts = diff_12.shift(1)
diff_recover_1 = predict_ts.add(diff_shift_ts)
rol_sum = ts_log.rolling(window=11).sum()
rol_recover = diff_recover_1*12 - rol_sum.shift(1)


# 对数还原
log_recover = np.exp(rol_recover)
log_recover.dropna(inplace=True)

ts = ts[log_recover.index]  # 过滤没有预测的记录
Exemple #26
0
    decomposed_300.trend.plot()
    plt.subplot(212)
    decomposed_300.trend.diff().plot()
    fig.savefig("stand300.png")
    fig = plt.figure()
    plt.subplot(211)
    decomposed_nas.trend.plot()
    plt.subplot(212)
    decomposed_nas.trend.diff().plot()
    fig.savefig("standnas.png")

    # 建立模型预测
    # AR模型
    from statsmodels.tsa.arima_model import ARMA
    df300_model = ARMA(df_300["close"].diff().iloc[1:].values, order=(1, 0))
    df300_res = df300_model.fit()
    fig = plt.figure()
    fig = df300_res.plot_predict(start=1000, end=1100)
    fig.savefig("ar_300.png")
    print(df300_res.summary())
    # print("模型误差:%f" % pre_error(df_300["close"].diff().iloc[1:].values[1000:1100], df300_res.predict(start = 1000, end = 1100)))
    dfnas_model = ARMA(df_nas["close"].diff().iloc[1:].values, order=(1, 0))
    dfnas_res = dfnas_model.fit()
    fig = plt.figure()
    fig = dfnas_res.plot_predict(start=1000, end=1100)
    fig.savefig("ar_nas.png")
    print(dfnas_res.summary())

    # MA模型
    df300_ma = ARMA(df_300["close"].diff().iloc[1:].values, order=(0, 1))
    df300_res = df300_ma.fit()
Exemple #27
0
from statsmodels.miscmodels.tmodel import TArma

modct = TArma(x)
reslst = modc.fit(order=(1,1))
print(reslst[0])
rescmt = modct.fit_mle(order=(1,1), start_params=[-0.4,0.4, 10, 1.],maxiter=500,
                       maxfun=500)
print(rescmt.params)


from statsmodels.tsa.arima_model import ARMA
mkf = ARMA(x)
##rkf = mkf.fit((1,1))
##rkf.params
rkf = mkf.fit((1,1), trend='nc')
print(rkf.params)

from statsmodels.tsa.arima_process import arma_generate_sample
np.random.seed(12345)
y_arma22 = arma_generate_sample([1.,-.85,.35, -0.1],[1,.25,-.7], nsample=1000)
##arma22 = ARMA(y_arma22)
##res22 = arma22.fit(trend = 'nc', order=(2,2))
##print 'kf ',res22.params
##res22css = arma22.fit(method='css',trend = 'nc', order=(2,2))
##print 'css', res22css.params
mod22 = Arma(y_arma22)
resls22 = mod22.fit(order=(2,2))
print('ls ', resls22[0])
resmle22 = mod22.fit_mle(order=(2,2), maxfun=2000)
print('mle', resmle22.params)
Exemple #28
0
def fit_arma(ts, p, q):
    model = ARMA(ts, order=(p, q))
    model_fit = model.fit(disp=0)
    return model_fit.summary()
Exemple #29
0
from statsmodels.tsa.statespace.mlemodel import MLEModel
from statsmodels.tsa.arima_process import ArmaProcess

from statsmodels.tsa.arima_model import ARMA

import numpy as np

np.random.seed(20190624)
ma_invertible = ArmaProcess(ar=np.array([1]), ma=np.array([1, .5]))
ma_noninvertible = ArmaProcess(ar=np.array([1]), ma=np.array([1, 2.5]))

y_invertible = ma_invertible.generate_sample(nsample=1000, scale=1.2)
y_noninvertible = ma_noninvertible.generate_sample(nsample=1000, scale=1.2)

model = ARMA(y_invertible, (0, 1))
model_fit = model.fit(trend='nc')
model_fit.summary()
model_fit.predict()[0:10]
model.predict(params=np.array([.5322, 1.2]))[0:10]

y_with_outlier = y_noninvertible.copy()
y_with_outlier[995] = 1200.0

model2 = ARMA(y_with_outlier, (0, 1))
model2_fit = model2.fit(trend='nc')
model2_fit.summary()
model2.predict(params=np.array([.5]))[990:]

y_invertible[0:10]

Exemple #30
0
print('This is my ARMA simulation:', Simulated_data)

for p in range(1, 10):
    for phi in np.arange(0.1, 0.99, 0.1):
        ar = np.array([p, phi])
        ma = np.array([p, phi - 0.1])
        AR_object = ArmaProcess(ar, ma)
        simulated_data = AR_object.generate_sample(nsample=5000)
        print(simulated_data)

# Step 2 (Training):

from statsmodels.tsa.arima_model import ARMA

arma_model = ARMA(Blockchain_df['Bitcoins_in_circulation'], order=(1, 0))
arma_results = arma_model.fit()
print('This is my trained ARMA model:', arma_results.summary())

# Step 3 (Predicting):

predict_data = arma_results.predict(start=100, end=1900)
print('This is my predicted ARMA:', predict_data)
arma_results.plot_predict(start=100, end=1900)
#plt.show()
'''COINTEGRATION'''

from statsmodels.tsa.stattools import coint

Cointegration_serie = coint(Blockchain_df['USD/EUR'], Blockchain_df['USD/CHF'])
print(Cointegration_serie[1])
Exemple #31
0
def test_reset_trend():
    endog = y_arma[:,0]
    mod = ARMA(endog)
    res1 = mod.fit(order=(1,1), trend="c", disp=-1)
    res2 = mod.fit(order=(1,1), trend="nc", disp=-1)
    assert_equal(len(res1.params), len(res2.params)+1)
Exemple #32
0
import numpy as np
from statsmodels.tsa.arima_process import ArmaProcess

np.random.seed(20190529)
ar1ma2 = ArmaProcess(ar=np.array([1, -.9]), ma=np.array([1, .2, -.1]))
y = ar1ma2.generate_sample(nsample=1000, scale=1.2)
print(y[0:5])

#---
from statsmodels.tsa.arima_model import ARMA

model = ARMA(y, (1, 2))
model_fit = model.fit(trend='nc')
model_fit.summary()
model_fit.llf



#import statsmodels as sm
from statsmodels.tsa.statespace.mlemodel import MLEModel
from statsmodels.tsa.statespace.tools import (constrain_stationary_univariate,
                                              unconstrain_stationary_univariate)

class AR1MA2_verbose(MLEModel):
    start_params = [.8, 0.24, -.11, 1.3]
    param_names = ['ar1', 'ma1', 'ma2', 'sigma2']

    def __init__(self, endog):
        super().__init__(endog, k_states = 3)
print(res[0])
proc = ArmaProcess.from_coeffs(res[0][:order[0]], res[0][:order[1]])

print(ar, ma)
proc.nobs = nobs
# TODO: bug nobs is None, not needed ?, used in ArmaProcess.__repr__
print(proc.ar, proc.ma)

print(proc.ar_roots(), proc.ma_roots())

from statsmodels.tsa.arma_mle import Arma
modn = Arma(x)
resn = modn.fit_mle(order=order)

moda = ARMA(x, order=order)
resa = moda.fit( trend='nc')

print('\nparameter estimates')
print('ls  ', res[0])
print('norm', resn.params)
print('t   ', res2.params)
print('A   ', resa.params)

print('\nstandard deviation of parameter estimates')
#print 'ls  ', res[0]  #TODO: not available yet
print('norm', resn.bse)
print('t   ', res2.bse)
print('A   ', resa.bse)
print('A/t-1', resa.bse / res2.bse[:3] - 1)

print('other bse')
traffic = normalization(traffic)
traffic_train = traffic[:1000]
traffic_test = traffic[
    1000:]  #type(traffic) = <class 'pandas.core.series.Series'>
traffic_train = pd.Series(traffic_train, index=rng[:1000])
traffic_test = pd.Series(traffic_test, index=rng[1000:])
traffic = pd.Series(traffic)
traffic_diff1 = traffic_train.diff(1)

#print proper_model(traffic_train,13)

#print testStationarity(traffic)
#draw_acf_pacf(traffic)

model = ARMA(traffic_train, order=(10, 3))
result_arma = model.fit(disp=-1, method='css')
predict_ts_train = result_arma.predict()
predict_ts_test = result_arma.predict('6/21/2014 20:00:00',
                                      '6/30/2014 17:00:00',
                                      dynamic=True)

fig1 = plt.figure(1)
plt.plot(traffic_train, 'r')
plt.plot(predict_ts_train, 'b--')
fig2 = plt.figure(2)
plt.plot(traffic_test, 'r')
plt.plot(predict_ts_test, 'b--')
plt.show()

traffic_test.to_csv(
    '/home/johnson/tensorflow/pic/30min/result_30min/ARIMA_test_result')
Exemple #35
0
# Import the ARMA module from statsmodels
from statsmodels.tsa.arima_model import ARMA

# Forecast the first AR(1) model
mod = ARMA(simulated_data_1, order=(1, 0))
res = mod.fit()
res.plot_predict(start=990, end=1010)
plt.show()
Exemple #36
0
def run_arma(series, p, q):
    mod_arma = ARMA(series, order=(p, q))
    res_arma = mod_arma.fit()
    return res_arma.summary()
, of one of the simulated series that you generated in the earlier exercise. Since the parameters are known for a simulated series, it is a good way to understand the estimation routines before applying it to real data.

For simulated_data_1 with a true ϕ
ϕ
 of 0.9, you will print out the estimate of ϕ
ϕ
. In addition, you will also print out the entire output that is produced when you fit a time series, so you can get an idea of what other tests and summary statistics are available in statsmodels.

INSTRUCTIONS
100XP
Import the class ARMA in the module statsmodels.tsa.arima_model.
Create an instance of the ARMA class called mod using the simulated data simulated_data_1 and the order (p,q) of the model (in this case, for an AR(1)), is order=(1,0).
Fit the model mod using the method .fit() and save it in a results object called res.
Print out the entire summmary of results using the .summary() method.
Just print out an estimate of the constant and ϕ
ϕ
 using the .params attribute (no parentheses).
 '''
 # Import the ARMA module from statsmodels
from statsmodels.tsa.arima_model import ARMA

# Fit an AR(1) model to the first simulated data
mod = ARMA(simulated_data_1, order=(1,0))
res = mod.fit()

# Print out summary information on the fit
print(res.summary())

# Print out the estimate for the constant and for phi
print("When the true phi=0.9, the estimate of phi (and the constant) are:")
print(res.params)
all_set = set(range(391))
existed_set = set(df.index)
print("缺失行:",all_set-existed_set)
    
df = df.reindex(range(391),method='ffill')      #补齐行.
df.index = pd.date_range(start='2017-09-01 9:30', end='2017-09-01 16:00',freq='1min')
df.plot(grid=True)

pct = df.pct_change()       #变化率.
pct = pct.dropna()

plot_acf(pct,lags=30)       #自相关.
plt.show()

ma1 = ARMA(pct,order=(0,1))
res = ma1.fit()
print(res.params)

print(('\n'*80))
#########################################################################
#2.temperature.
df = pd.ExcelFile('./data/temperature_ARMA_simple_demo.xlsx').parse('Sheet1',header=None)
df.columns = ['years','tavg']
df = df.set_index('years')

#自相关,和偏相关图不是截尾,也不是拖尾.p > 0.05.
df.index = pd.to_datetime(df.index,format='%Y')     #to_datetime
df.plot()
plt.show()

#0 hypo: random walk with drift.
    print'Critical values: ', d_order0[4]

    if d_order0[0] > d_order0[4]['5%']:
        print 'Time Series is  nonstationary'
    else:
        print 'Time Series is stationary'

    # # selecting parameter
    order = sm.tsa.arma_order_select_ic(ts_diff_1, max_ar=6, max_ma=3, ic=['aic'])
    # print order

    try:

        # ARMA model
        model = ARMA(ts_diff_1,(order['aic_min_order'][0],order['aic_min_order'][1]))
        predict_diff_1 = model.fit(disp=False).forecast(14)[0]

        # restore
        predict = np.cumsum(predict_diff_1)
        predict = predict + np.mean(ts[-7:])

        # use continuity is better
        stander = sklearn.preprocessing.StandardScaler()
        predict = stander.fit_transform(predict)
        predict = stander.fit(ts[-7:]).inverse_transform(predict)
        predict = np.round(predict)

        print predict
        predict_result = np.vstack((predict_result,predict))

        # visualizing
Exemple #40
0
pvalue2 = acorr_val(ts_date)  #原始序列的白噪声检测
print(pvalue2)
rule1 = (adf < critical_values['1%'] and adf < critical_values['5%']
         and adf < critical_values['10%'] and pvalue1 < 0.01)
rule2 = (pvalue2[0, ] < 0.05)

#对时间序列稳定性处理
log_n, ts_date = get_best_log(ts_date, max_log=5, rule1=rule1, rule2=rule2)

adf, pvalue1, critical_values = adf_val(ts_date, 'final time series',
                                        'final acf', 'final pacf')
pvalue2 = acorr_val(ts_date)

#训练模型
#model_arma=arma_fit(ts_date)
model_arma1 = ARMA(ts_date, order=(2, 4))
model_arma = model_arma1.fit(disp=-1, method='css')

#模型训练和效果评估
ts_date = train_test(model_arma,
                     ts=ts_date,
                     lon_n=log_n,
                     rule1=rule1,
                     rule2=rule2)  #还原后的时间序列,规则原始序列的结果,一般原始的序列,都不满足平稳性

#模型应用
start = '1991-07-28'
end = '1991-08-02'

predict_data(model_arma, ts_date, log_n, start, end, rule1=rule1, rule2=rule2)