コード例 #1
0
def div_predict(sp500_div):
    pmax = int(len(sp500_div) / 100)
    qmax = int(len(sp500_div) / 100)
    from sklearn.preprocessing import StandardScaler, scale

    #
    ssssss = StandardScaler()

    bix = []
    for p in range(pmax + 1):
        tm = []
        for q in range(qmax + 1):
            try:
                tm.append(ARIMA(sp500_div, order=(p, 1, q)).fit().bic)
            except:
                tm.append(None)
        bix.append(tm)

    import matplotlib.pyplot as plt
    find = pd.DataFrame(bix)
    find.columns.name = 'p'
    find.index.name = 'q'
    q = find.unstack().astype('float32')
    print('q,p: ', q.idxmin())
    clf = ARIMA(sp500_div, order=(3, 1, 2)).fit()  #(3,2)
    print(clf.summary())
    plt.figure()
    clf.plot_predict()
    plt.show()
    return clf.forecast(1)[0]
コード例 #2
0
def predict_stock(stock_close_rtn, n_steps=5, plot=False):
    """Given the close returns of a stock (as a dataframe), predict the next n_step values"""
    diff_series = (stock_close_rtn - stock_close_rtn.shift()).dropna()
    p, q = find_params_arima(diff_series)

    model = ARIMA(stock_close_rtn.values, (p, q, 0)).fit(disp=plot)
    predicted = model.predict(end=n_steps)

    if plot:
        model.plot_predict(
            len(stock_close_rtn) - 10,
            len(stock_close_rtn) + n_steps)
    plt.axhline(y=0, linestyle='--', color='gray')

    return predicted
コード例 #3
0
print(model.resid.plot())
print(plot_acf(model.resid, lags = 50))
#%% ARIMA
arima_model = ARIMA(dftaxi_day, (28,1, 1)).fit()
arima_model.summary()
print(arima_model.resid.plot())
print(plot_acf(arima_model.resid, lags = 50))
#%% Predict
#arima_model.predict(1,100).plot()

import matplotlib.pyplot as plt

fig, ax = plt.subplots()
ax = train.plot(ax=ax)

fig = arima_model.plot_predict('2016-10-01',
    '2016-12-31', ax=ax, plot_insample=False)
###############################################################################################################
###############################################################################################################
#%%
print (train)
#train.plot(y='response_variable',kind='line')
#%% Train/Test Split
n = len(dftaxi_day.response_variable)

train = dftaxi_day.response_variable[:int(.75*n)]
test = dftaxi_day.response_variable[int(.75*n):]
#%%
print(test)
#%% Train autocorrelation
print (train.autocorr(lag=1)) # 0.61
print (train.autocorr(lag=7)) # 0.78
コード例 #4
0
ファイル: plots.py プロジェクト: kusmier/NASA
# fc_series = pd.Series(fc, index=lst_day_validation.index)
# lower_series = pd.Series(conf[:, 0], index=lst_day_validation.index)
# upper_series = pd.Series(conf[:, 1], index=lst_day_validation.index)
#
# plt.figure(figsize=(12, 5), dpi=300)
# plt.plot(lst_day_train, label='training')
# plt.plot(lst_day_validation, label='validation')
# plt.plot(fc_series, label='forecast')
# plt.title('Forecast vs Actuals')
# plt.legend(loc='upper left', fontsize=12)
# plt.show()

# Build Model
print(lst_day_train)
model = ARIMA(base_temperatures['lst_day'], order=(1, 1, 0)).fit()
model.plot_predict(dynamic=False)
plt.figure(figsize=(12, 5), dpi=300)
plt.plot(base_temperatures['lst_day'].diff())
plt.plot(model.fittedvalues, color='red')
plt.show()

predictions_ARIMA_diff = pd.Series(model.fittedvalues, copy=True)
x, x_diff = base_temperatures['lst_day'].iloc[0], predictions_ARIMA_diff.iloc[
    1:]
predictions_ARIMA = np.r_[x, x_diff].cumsum().astype(float)
print(len(predictions_ARIMA))
print(len(base_temperatures))
predictions = pd.Series(predictions_ARIMA,
                        index=base_temperatures['lst_day'][1:].index)
print(model.fittedvalues.tail())
print(predictions.tail())
コード例 #5
0
    plt.show()

    # Train the ARIMA model
    y_train = y[0:90]
    y_test = y[90:]

    arima = ARIMA(y_train, order=(6, 1, 2), missing="drop").\
        fit(transparams=True, maxiter=500, trend="c")

    y_pred_arima = arima.predict(start=90, end=99)

    # Show the results
    sns.set()

    fig, ax = plt.subplots(figsize=(18, 8))

    ax.plot(y_test, linewidth=1.0, color="r", label="Data")
    arima.plot_predict(start=90,
                       end=99,
                       plot_insample=False,
                       dynamic=True,
                       ax=ax)

    ax.set_xlabel("Time", fontsize=16)
    ax.set_ylabel("Measure", fontsize=16)
    ax.set_title("ARMA(6, 1, 2) prediction", fontsize=16)
    ax.legend(fontsize=16)

    plt.show()
コード例 #6
0
# predict_dta = arma.predict(start='2016-10-14-00', end='2016-10-14-23', dynamic=True,)
# print(predict_dta)
#
# # arma.plot_predict(start='2016-10-14-00', end='2016-10-14-23', dynamic=True,)
# # plt.show()
#
# pred = pd.Series(np.array(list(predict_dta), dtype=float), index=pd.date_range(start='2016-10-14', periods=24, freq='H'))
#
# time.plot()
# pred.plot()
# plt.show()

model = ARIMA(time, order=(5, 1, 1), freq='H').fit()

predict_dta = model.predict(
    start='2016-10-14-00',
    end='2016-10-14-23',
    dynamic=True,
)
print(predict_dta)

model.plot_predict(
    start='2016-10-14-00',
    end='2016-10-14-23',
    dynamic=True,
)
plt.show()

# pred = model.forecast(10)
# print(pred)
コード例 #7
0
order_trend = arma_order_select_ic(data['ts1'].dropna())
print(order_trend['bic_min_order'])  #这里的选择和书中的一样

#4.拟合
result_trend = ARIMA(data['index'], (0, 1, 1)).fit()

print(result_trend.params)

#后边的步骤其实和ARMA一样了
#5.模型拟合度检验
#(1)残差的白噪声检验
output3 = acorr_ljungbox(result_trend.resid,
                         boxpierce=True,
                         lags=[6, 12],
                         return_df=True)
print(output3)
#拟合后的白噪声检测效果很好,充分的大于了0.05

#(2)模型参数的显著性检验
print(result_trend.pvalues)  #这个结果貌似与R的不太一致

fig, ax = plt.subplots()
ax = data['index'].loc['1952':].plot(ax=ax)
result_trend.plot_predict('1989',
                          '1992',
                          dynamic=True,
                          ax=ax,
                          plot_insample=False)

plt.show()
コード例 #8
0
import matplotlib.pyplot as plt
from sklearn import metrics as me

data1 = pd.read_csv('e:/work/milkproduction.csv', header=0, index_col=0)
data1 = pd.Series(data1['production'])

data1_diff = data1.diff(1).dropna()
x = pd.concat([data1_diff, data1], axis=1)
print(x)

print(adfuller(data1_diff, autolag='AIC'))

model = ARIMA(data1_diff, order=(1, 0, 2)).fit(disp=-1)
print(sum((data1_diff - model.fittedvalues)**2))
exit()
'''
model.plot_predict()
model.forecast()
plot_acf(data1)
plot_pacf(data1)
sm.qqplot(model.resid,line='s')
plt.show()
'''

model_prediction_diff=pd.Series([data1[0],data1[1]-data1[0]],index=[data1.index[0],data1.index[1]])\
    .append(model.fittedvalues)
model_prediction = pd.Series.cumsum(model_prediction_diff)
model_prediction.plot()
data1.plot()
plt.show()
df.dropna(inplace=True)
##df['Date'] = pd.to_datetime(df['Date'])
LocalTransmission = df['LocalTransmission'].astype('int32')
#print (df.head())
print(df.index)

result = ARIMA(df, order=(1, 1, 1)).fit(disp=False)
print(result.summary())
#print(result.params)
predictions = result.predict(start="2020-03-01", end="2020-05-01")
#accuracy = result.score()
print(predictions)
##accuracy = result.score()
#print (accuracy)

result.plot_predict(start="2020-03-01", end="2020-05-01")
plt.suptitle('Prediction for postive cases in Egypt \n Algorithm used: ARIMA',
             fontsize=12)
plt.show()

##def mean_forecast_error(y, yhat):
##    return y.sub(yhat).mean()


def mean_forecast_error(LocalTransmission, predictions):
    return mean(sum(LocalTransmission, predictions))


mean_forecast_error(LocalTransmission, predictions)

print(mean_forecast_error)
コード例 #10
0
ファイル: 金融中的统计.py プロジェクト: jqyangDaz/Daz
plt.show()#定阶
p=d=q=range(0,4)
pdq=list(itertools.product(p,d,q))
for param in pdq:
    try:
        model=ARIMA(data1_diff,order=param).fit(disp=-1)
        print('ARIMA{} AIC:{} BIC:{}'.format(param,model.aic,model.bic))
    except:
        continue



'''
####模型拟合####
model = ARIMA(data1_diff, order=(3, 0, 3)).fit(disp=-1)
print(model.summary())

model.plot_predict()
model.forecast()
sm.qqplot(model.resid, line='s')
plt.show()

####差分还原画图####
model_prediction_diff=pd.Series([data1[0],data1[1]-data1[0]],index=[data1.index[0],data1.index[1]])\
    .append(model.fittedvalues)
model_prediction = pd.Series.cumsum(model_prediction_diff)
model_prediction.plot(label='forcest')
data1.plot()
plt.legend(loc='upper right')
plt.show()
コード例 #11
0
fig = plt.figure(figsize=(10, 5))
ax = fig.add_subplot(111)
ax = arima_mod100.resid.plot(ax=ax)
ax.set_title("Residual series")
plt.show()

resid = arima_mod100.resid

print "============== Residuals normality test ================"
print st.normaltest(resid)
print "========================================================"

fig = plt.figure(figsize=(10, 5))
ax = fig.add_subplot(111)
ax.set_title("Residuals test for normality")
fig = qqplot(resid, line='q', ax=ax, fit=True)
plt.show()

fig = plt.figure(figsize=(10, 5))
ax = fig.add_subplot(111)
ax = trainWTI.ix['2012':].plot(ax=ax)
fig = arima_mod100.plot_predict('2014m1',
                                '2015m12',
                                dynamic=True,
                                ax=ax,
                                plot_insample=False)
ax.set_title("Prediction of spot prices")
ax.set_xlabel("Dates")
ax.set_ylabel("Price [USD]")
plt.show()
コード例 #12
0

for model in ["additive", "multiplicative"]:
    ts_decompose(y, model, True)

##################################################
# MODEL
##################################################
arima_model = ARIMA(train, order=(1, 1, 1)).fit(disp=0)  # order(p, d, q)
arima_model.summary()

y_pred = arima_model.forecast(48)[0]
mean_absolute_error(test, y_pred)
# 2.7193

arima_model.plot_predict(dynamic=False)
plt.show()

train["1985":].plot(legend=True, label="TRAIN")
test.plot(legend=True, label="TEST", figsize=(6, 4))
pd.Series(y_pred, index=test.index).plot(legend=True, label="PREDICTION")
plt.title("Train, Test and Predicted Test")
plt.show()

##################################################
# MODEL TUNING
##################################################

##################################################
# Statistical Consideration of Model Degree Selection
##################################################
コード例 #13
0
ファイル: arima.py プロジェクト: nzahasan/tank-model
class autoARIMA(object):
    '''
    A wrapper of statsmodels, ARIMA for easier model fitting and generating forecast
    this fits ARIMA model using brute force with lowest BIC(Bayesian Information Criteria) value.
      - Not the best way but its the easiest
    
    Possible alternative to look at - pyramid-arima
    '''
    def __init__(self, endog, max_p=5, max_d=5, max_q=5, helpText=True):

        self.endog = endog
        self.max_p = max_p
        self.max_d = max_d
        self.max_q = max_q
        self.helpText = helpText
        self.fitted_model = None

    def getOrder(self):

        fittedOrder = {'order': [], 'bic': []}

        # iterate through (p,d,q) values
        for p in range(self.max_p):
            for d in range(self.max_d):
                for q in range(self.max_q):
                    try:
                        model = ARIMA(self.endog, order=(p, d, q)).fit(disp=0)
                        fittedOrder['bic'].append(model.bic)
                        fittedOrder['order'].append((p, d, q))
                    except:
                        continue

        # find order with lowest bic value
        bestOrder = fittedOrder['order'][fittedOrder['bic'].index(
            min(fittedOrder['bic']))]

        if self.helpText == True:
            print('Lowest BIC value with order ', bestOrder)

        return bestOrder

    def fit(self):

        # return a fitted ARIMA model with lowest bic value

        self.fitted_model = ARIMA(self.endog,
                                  order=self.getOrder()).fit(disp=0)

        return self

    def forecast(self, num_step):

        # returns forecasted values and confidence limit of the forecast

        if self.fitted_model == None:
            print('ERROR: Fit the model first')
            return None

        forecast, _, confLimit = self.fitted_model.forecast(steps=num_step)
        return (forecast, confLimit)

    def inSamplePlot(self):

        self.fitted_model.plot_predict()
        return self

    def saveModel(self, fileName):
        with open(fileName, 'wb') as outModelFile:
            pickle.dump(self, outModelFile, pickle.HIGHEST_PROTOCOL)

    def loadModel(self, fileName):
        with open(fileName, 'rb') as inModelFile:
            self = pickle.load(inModelFile)
コード例 #14
0
# A value close to 0 indicates strong positive correlation, while a value of 4 indicates strong negative correlation.
print "==================== Durbin-Watson ====================="
print sm.stats.durbin_watson(arima_mod100.resid.values)
print "========================================================"

fig = plt.figure(figsize=(10,5))
ax = fig.add_subplot(111)
ax = arima_mod100.resid.plot(ax=ax)
ax.set_title("Residual series")
plt.show()

resid = arima_mod100.resid

print "============== Residuals normality test ================"
print st.normaltest(resid)
print "========================================================"

fig = plt.figure(figsize=(10,5))
ax = fig.add_subplot(111)
ax.set_title("Residuals test for normality")
fig = qqplot(resid, line='q', ax=ax, fit=True)
plt.show()

fig = plt.figure(figsize=(10,5))
ax = fig.add_subplot(111)
ax = trainWTI.ix['2012':].plot(ax=ax)
fig = arima_mod100.plot_predict('2014m1', '2015m12', dynamic=True, ax=ax, plot_insample=False)
ax.set_title("Prediction of spot prices")
ax.set_xlabel("Dates")
ax.set_ylabel("Price [USD]")
plt.show()
コード例 #15
0
model = pf.ARIMA(data=dts, ar=p, integ=d, ma=q)
x = model.fit()
model.plot_fit(figsize=(15, 4))
'''
mu, Y = model._model(model.latent_variables.get_z_values())
fitted_values = pd.Series(model.link(mu),index=dts.ix[-len(mu):].index)
dts.subtract(fitted_values).plot()
'''

#MLE: Maximum Likelihood Estimation
model = pf.ARIMA(data=dts, ar=4, ma=4, integ=0,
                 target='Current_value')  #family is pf.Normal()[By default]
x = model.fit("MLE")
x.summary()
model.plot_z(figsize=(15, 7))  # Latency Variable plot
model.plot_z(indices=range(1, 9))  # Latency Variable plot
model.plot_fit(figsize=(15, 5))  #ARIMA model fit

model.plot_predict(h=30, figsize=(
    15, 5))  # plots predictions for next 5 time steps, 95% confidence interval
model.plot_predict_is(
    h=30
)  # plots rolling in-sample prediction for past 5 time steps :Idea of performance #model.plot_predict(h=20,past_values=20,figsize=(15,5))
#predictions = model.predict(h=5, intervals=True) # outputs dataframe of predictions
'''
samples = model.sample(nsims=10) # returns 10 samples from the data
ppc_pvalue = model.ppc(T=np.mean) # p-value for mean posterior predictive test
model.plot_sample(nsims=10) # draws samples from the model
model.plot_ppc(T=np.mean) # plots histogram of posterior predictive check for mean
'''
コード例 #16
0
print(predict_dta)
print("##########使用forecast预测数据###################") 
###################################使用forecast预测数据#########################################################################

###################################使用plot_predict预测数据#####################################################################
print("##########使用plot_predict预测数据###################") 

if d == 0:
    predict_dta2 = arima.predict(start = forecast_start_date, end = forecast_end_date,dynamic = False)
else:
    predict_dta2 = arima.predict(start = forecast_start_date, end = forecast_end_date,dynamic = False,typ = forecast_typ)
print(predict_dta2)

xdata_pred2,ax = plt.subplots(figsize = fig_size )
ax = data_analysis.ix[1:].plot(ax=ax)
xdata_pred2 = arima.plot_predict(start = forecast_start_date,end = forecast_end_date,dynamic = False, ax = ax, plot_insample = False)
plt.show()
#print(xdata_pred2)
print("##########使用plot_predict预测数据###################") 
###################################使用plot_predict预测数据######################################################################


###################################使用predict预测数据######################################################################
#dynamic=False参数确保我们产生一步前进的预测,这意味着每个点的预测都将使用到此为止的完整历史生成
#语法参考:http://www.statsmodels.org/stable/generated/statsmodels.tsa.arima_model.ARIMAResults.predict.html#statsmodels.tsa.arima_model.ARIMAResults.predict
print("##########使用predict预测数据###################")
if d == 0:
    xdata_pred = arima.predict(end = forecast_end_date, dynamic = False) #预测
else:
    xdata_pred = arima.predict(end = forecast_end_date, dynamic = False,typ = forecast_typ) #预测
print(xdata_pred)
コード例 #17
0
#len(xTest)
xTest

arima = ARIMA(xTrain, order=(10, 2, 1))
arima = arima.fit()
arima.summary()

pred = arima.forecast(steps=len(xTest))

print(mean_squared_error(xTest, pred[0]))
print(np.sqrt(mean_squared_error(xTest, pred[0])))

#pred

ax = arima.plot_predict(start='2019-05-12', end='2019-06-10')
ax.set_figheight(9)
ax.set_figwidth(19)

import itertools
"""Auto Arima"""

auto = auto_arima(xTrain,
                  start_p=0,
                  start_q=0,
                  d=0,
                  max_d=9,
                  max_p=30,
                  end_q=30,
                  start_P=0,
                  start_Q=0,
コード例 #18
0
diff = data['AnnualMeansBA'].diff(1)
diff = diff.dropna()
diff.plot()
plt.show()

import statsmodels
from statsmodels.tsa.arima_model import ARIMA

Diff = diff.to_frame().dropna().as_matrix()

numpy.size(Diff)

r = ARIMA(Diff, order=(2, 1, 1))
r = r.fit(disp=-1)
r.plot_predict(1, 45)
pred = r.predict(1, 45)


#dates = pd.date_range('1961-01','1970-01',freq='M')

predictions_ARIMA_diff = pandas.Series(pred, copy=True)
predictions_ARIMA_diff_cumsum = predictions_ARIMA_diff.cumsum()

predictions_ARIMA_log = pandas.Series(Diff.ix[0])

predictions_ARIMA_log = predictions_ARIMA_log.add(predictions_ARIMA_diff_cumsum,fill_value=0)
predictions_ARIMA = numpy.exp(predictions_ARIMA_log)
plt.plot(res)
plt.plot(predictions_ARIMA)
コード例 #19
0
plt.rcParams["figure.figsize"] = [15, 10]
plt.rcParams["font.size"] = 14
data.plot(y='Monthly Mean Total Sunspot Number')
plt.show()

values = data.values
rec_num = len(values)

result = seasonal_decompose(data.interpolate(), model='additive')
result.plot()
plt.show()

# autocorrelation
autocorrelation_plot(data.values)
plt.show()

# split data
split = int(rec_num * 0.7)
train_vals = values[:split]
test_vals = values[split:len(values)]

# ARIMA
data = sma.datasets.sunspots.load_pandas().data
data.index = pd.Index(sma.tsa.datetools.dates_from_range('1700', '2008'))
del data["YEAR"]
model = ARIMA(data, order=(5, 1, 2)).fit(disp=False)
model.summary()
fig, ax = plt.subplots(figsize=(15, 10))
ax = data.loc['1945':].plot(ax=ax)
model.plot_predict('2000', '2020', dynamic=True, ax=ax, plot_insample=False)
plt.show()
コード例 #20
0
    price_24 = df.set_index(
        pd.date_range(start='12-31-2011', end='12-26-2020', freq='D'))
    return price_24


price_24 = get_price_24(price)

price_model = ARIMA(price_24, order=(1, 1, 0)).fit()

date_in = st.text_input(label='Input Date to Predict to(format YYYY-MM-DD')
date_in = str(date_in)

if st.button('Arima Prediction'):
    fig, ax = plt.subplots(1, figsize=(14, 4))
    ax.plot(price_24['2017':].index, price_24['2017':])
    fig = price_model.plot_predict('2020',
                                   f'{date_in}',
                                   dynamic=True,
                                   ax=ax,
                                   plot_insample=False)

    ax.legend().get_texts()[1].set_text("95% Prediction Interval")
    ax.legend(loc="lower left")

    ax.set_title("Price Forecasts from ARIMA Model")
    st.pyplot(fig)

date = df.index
fig3 = px.line(df.bpi.values, x=date, y=df.bpi)
fig3.show()
st.plotly_chart(fig3)
コード例 #21
0
    simulated_data = auto_regressive_process(
        len(metal_diff), np.array(list(metal_model.params)[1:]))
    simulated_data.index = metal_diff.index
    ax[i].plot(simulated_data.index, simulated_data, marker='.')
    ax[i].set_title("Simulated Data from " + name + " Model Fit")

plt.tight_layout()
plt.show()

#make projection and compare to real data
#not working year 0
next_day = pd.to_datetime(end_date) + td(days=1)
next_year = pd.to_datetime(end_date, ) + td(days=365)
next_day = next_day.date()

metal.reindex(pd.DatetimeIndex(start=start_date, end=next_year.year, freq='D'))

fig, ax = plt.subplots(1, figsize=(14, 4))
ax.plot(metal.index, metal, marker='.')
fig = metal_model.plot_predict(end_date,
                               next_year.year,
                               dynamic=True,
                               ax=ax,
                               plot_insample=False)

_ = ax.legend().get_texts()[1].set_text("95% Prediction Interval")
_ = ax.legend(loc="lower left")

_ = ax.set_title(name + " Series Forcasts from ARIMA Model")

plt.show()
コード例 #22
0
plt.plot(indexedDataset,color='blue')
plt.plot(ARIMA.fittedvalues,color='black')
plt.title('Rss:%4f'% sum((ARIMA.fittedvalues-indexedDataset['case'])**2))
#convert fitted values into series 
predict_ARIMA_diff=pd.Series(ARIMA.fittedvalues,copy=True)
print(predict_ARIMA_diff.head())
cumsum_predictions=predict_ARIMA_diff.cumsum()
print(cumsum_predictions.head())
predictions_ARIMA=pd.Series(indexedDataset['case'].iloc[0],index=indexedDataset.index)   
predictions_ARIMA=pd.Series(predictions_ARIMA.add(cumsum_predictions,fill_value=0))   
predictions_ARIMA.head()
#predictions_ARIMA=np.exp(predictions_ARIMA_log)
plt.plot(indexedDataset,color='blue')
plt.plot(predictions_ARIMA,color='red')
indexedDataset
ARIMA.plot_predict(1,312)
x=ARIMA.forecast(steps=120)
## change in xlabel