Example #1
2
def draw_acf_pacf(ts, lags=31):
    # type: (object, object) -> object
    f = plt.figure(facecolor='white')
    ax1 = f.add_subplot(211)
    plot_acf(ts, lags=31, ax=ax1)
    ax2 = f.add_subplot(212)
    plot_pacf(ts, lags=31, ax=ax2)
    plt.show()
Example #2
0
def draw_acf_pacf(ts, lags=1):
    f = plt.figure(facecolor='white')
    ax1 = f.add_subplot(211)
    plot_acf(ts, lags=31, ax=ax1)
    ax2 = f.add_subplot(212)
    plot_pacf(ts, lags=31, ax=ax2)
    plt.show()
Example #3
0
def test_plot_acf_kwargs():
    # Just test that it runs.
    fig = plt.figure()
    ax = fig.add_subplot(111)

    ar = np.r_[1., -0.9]
    ma = np.r_[1., 0.9]
    armaprocess = tsp.ArmaProcess(ar, ma)
    rs = np.random.RandomState(1234)
    acf = armaprocess.generate_sample(100, distrvs=rs.standard_normal)

    buff = BytesIO()
    plot_acf(acf, ax=ax)
    fig.savefig(buff, format='rgba')
    plt.close(fig)

    buff_with_vlines = BytesIO()
    fig_with_vlines = plt.figure()
    ax = fig_with_vlines.add_subplot(111)
    vlines_kwargs = {'linestyles': 'dashdot'}
    plot_acf(acf, ax=ax, vlines_kwargs=vlines_kwargs)
    fig_with_vlines.savefig(buff_with_vlines, format='rgba')
    plt.close(fig_with_vlines)

    buff.seek(0)
    buff_with_vlines.seek(0)
    plain = buff.read()
    with_vlines = buff_with_vlines.read()

    assert_(with_vlines != plain)
Example #4
0
def faz_plot(valores):
    fig, axes = plt.subplots(nrows=3,figsize=(8,12))
    axes[0].hist(valores)
    axes[1].plot(valores)
    tsaplots.plot_acf(valores, axes[2])
    for ax in axes.flat:
        ax.set(title='', xlabel='')
    plt.show()
    return None
Example #5
0
    def d_param(self, diff):
        '''function takes different values for difference step, and returns true or false flag if acf and pacf values
        lie into the threshold area'''
        THRESHOLD = 0.08
        if diff == 0:
            acf = tss.acf(self.val)
            pacf = tss.pacf(self.val)
            # acf and pacf plots
            fig = plt.figure(figsize = (12,8))
            ax1 = fig.add_subplot(121)
            fig = plot_acf(self.val,lags =40 ,ax=ax1)
            ax2 = fig.add_subplot(122, sharey=ax1)
            fig= plot_pacf(self.val, lags = 40, ax =ax2)
            plt.savefig('ACF_vs_PACF.jpg')
            plt.close()
            # check if most acf and pacf are lie in the accepted region for diff0
            acf_percent = len(acf[np.abs(acf) <= THRESHOLD])/float(len(acf))
            pacf_percent = len(pacf[np.abs(pacf) <= THRESHOLD])/float(len(pacf))
            return (acf_percent >= .65) and (pacf_percent >= 0.65)

        elif diff == 1:
            diff1_acf = tss.acf(self.diff1_val.dropna())
            diff1_pacf = tss.pacf(self.diff1_val.dropna())
            # for acf and pacf plots
            fig = plt.figure(figsize = (12,8))
            ax1 = fig.add_subplot(121)
            fig = plot_acf(self.diff1_val.dropna(),lags =40 ,ax=ax1)
            ax2 = fig.add_subplot(122, sharey=ax1)
            fig= plot_pacf(self.diff1_val.dropna(), lags = 40, ax =ax2)
            plt.savefig('ACF_vs_PACF_diff1.jpg')
            plt.close()
            # check if most acf and pacf are lie in the accepted region for diff1
            acf_percent = len(diff1_acf[np.abs(diff1_acf) <= THRESHOLD])/float(len(diff1_acf))
            pacf_percent = len(diff1_pacf[np.abs(diff1_pacf) <= THRESHOLD])/float(len(diff1_pacf))
            return (acf_percent >= .65) and (pacf_percent >= 0.65)

        elif diff == 2:
            diff2_acf = tss.acf(self.diff2_val.dropna())
            diff2_pacf = tss.pacf(self.diff2_val.dropna())
            # check save fig for acf and pacf plots
            fig = plt.figure(figsize = (12,8))
            ax1 = fig.add_subplot(121)
            fig = plot_acf(self.diff2_val.dropna(),lags =40 ,ax=ax1)
            ax2 = fig.add_subplot(122, sharey=ax1)
            fig = plot_pacf(self.diff2_val.dropna(), lags = 40, ax =ax2)
            plt.savefig('ACF_vs_PACF_diff2.jpg')
            plt.close()
            # check if most acf and pacf are lie in the accepted region for diff2
            acf_percent = len(diff2_acf[np.abs(diff2_acf) <= THRESHOLD])/float(len(diff2_acf))
            pacf_percent = len(diff2_pacf[np.abs(diff2_pacf) <= THRESHOLD])/float(len(diff2_pacf))
            return (acf_percent >= .65) and (pacf_percent >= 0.65)

        else:
            raise InvalidParamError
Example #6
0
def test_plot_acf():
    # Just test that it runs.
    fig = plt.figure()
    ax = fig.add_subplot(111)

    ar = np.r_[1., -0.9]
    ma = np.r_[1.,  0.9]
    armaprocess = tsp.ArmaProcess(ar, ma)
    acf = armaprocess.acf(20)[:20]
    plot_acf(acf, ax=ax)

    plt.close(fig)
Example #7
0
def plotds(xt, nlag=30, fig_size=(12, 10)):
    if not isinstance(xt, pd.Series):
         xt = pd.Series(xt)
    plt.figure(figsize=fig_size)
    layout = (2, 2)
    
    # Assign axes
    ax_xt = plt.subplot2grid(layout, (0, 0), colspan=2)
    ax_acf= plt.subplot2grid(layout, (1, 0))
    ax_pacf = plt.subplot2grid(layout, (1, 1))
    
    # Plot graphs
    xt.plot(ax=ax_xt)
    ax_xt.set_title('Time Series')
    plot_acf(xt, lags=50, ax=ax_acf)
    plot_pacf(xt, lags=50, ax=ax_pacf)
    plt.tight_layout()
    return None
Example #8
0
File: tsa.py Project: zhilim/TSA
def preliminaries(data):
	plt.plot(data)
	title = "Daily Time Series for S&P 500"
	plt.title(title)
	plt.ylabel("Price")
	plt.xlabel("Day")
	fn = "results/snp.png"
	plt.savefig(fn)

	acf = tp.plot_acf(data)
	acf.savefig("results/snpacf.png")
Example #9
0
    def draw_ACFs(df):
        """
        :param df: pandas.DataFrame
        """
        def label(ax, string):
            ax.annotate(string, (1, 1), xytext=(-8, -8), ha='right', va='top',
                        size=14, xycoords='axes fraction', textcoords='offset points')

        fig, axes = plt.subplots(nrows=5, figsize=(8, 12))
        fig.tight_layout()

        axes[0].plot(df[TimeSeriesDataFrameMap.Square_residuals])
        label(axes[0], 'Returns')

        plot_acf(df[TimeSeriesDataFrameMap.Residuals], axes[1], lags=10)
        label(axes[1], 'Residuals autocorrelation')

        plot_acf(df[TimeSeriesDataFrameMap.Abs_residuals], axes[2], lags=10)
        label(axes[2], 'Absolute residuals autocorrelation')

        plot_acf(df[TimeSeriesDataFrameMap.Square_residuals], axes[3], lags=10)
        label(axes[3], 'Square residuals autocorrelation')

        plot_pacf(df[TimeSeriesDataFrameMap.Square_residuals], axes[4], lags=10)
        label(axes[4], 'Square residuals partial autocorrelation')
        plt.show()
Example #10
0
    def autocorrelation(self):
        """Plot autocorrelation of data.

        Source -
            https://machinelearningmastery.com/feature-selection-time-series-forecasting-python/

        Args:
            None

        Returns:
            None: Series for learning

        """
        # Get training vectors and classes
        (_, classes) = self._training_vectors_classes()

        # Convert the zeroth column of classes to a 1d np.array
        classes_1d = classes.values[:, 0]

        # Do the plotting
        plot_acf(classes_1d)
        plt.show()
Example #11
0
 def plot_acf_pacf(self, channel, lags=20):
     '''
     Input: channel and #lags to include
     Output: Plots with autocorrelation function and partial autocorrelation function.
     '''
     #set indexto date in input
     ts = chan_filter(self.df, channel)
     ts.sort_index(inplace=True)
     data = ts["AVG CCV's"]
     fig = plt.figure(figsize=(12,8))
     ax1 = fig.add_subplot(211)
     fig = plot_acf(data, lags=lags, ax=ax1)
     ax2 = fig.add_subplot(212)
     fig = plot_pacf(data, lags=lags, ax=ax2)
     plt.show()
Example #12
0
def test_plot_acf_irregular(close_figures):
    # Just test that it runs.
    fig = plt.figure()
    ax = fig.add_subplot(111)

    ar = np.r_[1., -0.9]
    ma = np.r_[1., 0.9]
    armaprocess = tsp.ArmaProcess(ar, ma)
    rs = np.random.RandomState(1234)
    acf = armaprocess.generate_sample(100, distrvs=rs.standard_normal)
    plot_acf(acf, ax=ax, lags=np.arange(1, 11))
    plot_acf(acf, ax=ax, lags=10, zero=False)
    plot_acf(acf, ax=ax, alpha=None, zero=False)
Example #13
0
def test_plot_acf():
    # Just test that it runs.
    fig = plt.figure()
    ax = fig.add_subplot(111)

    ar = np.r_[1., -0.9]
    ma = np.r_[1., 0.9]
    armaprocess = tsp.ArmaProcess(ar, ma)
    rs = np.random.RandomState(1234)
    acf = armaprocess.generate_sample(100, distrvs=rs.standard_normal)
    plot_acf(acf, ax=ax, lags=10)
    plot_acf(acf, ax=ax)
    plot_acf(acf, ax=ax, alpha=None)

    plt.close(fig)
def process_lock(args):
    
    data = df().from_csv("pickle/" + args.replace("/","_") +  "/" + "data.csv")
    data = data.resample('B').dropna()
    #data = df({"STT":[1,2,3,4,5,6,7,5,3,2,2,2],"S":[1,2,3,4,5,6,7,5,3,2,2,2]})
    #plt.plot(data["STT"].values)
    d = df({"STT":data["STT"].values,
            "STT1":data.shift(1)["STT"],
            "STT2":data.shift(2)["STT"],
            "STT3":data.shift(3)["STT"],
            "STT4":data.shift(4)["STT"],
            "STT5":data.shift(5)["STT"],
            "STT6":data.shift(6)["STT"]})
    d = d.dropna().astype(int)
    x = d.copy()
    x.__delitem__("STT")
    #print(x)
    #d.plot(figsize=(12,8));
    #plt.plot(RangeData.range(d))  
    fig = plt.figure(figsize=(12,8))
    ax1 = fig.add_subplot(211)
    fig = plot_acf(d["STT"].values.squeeze(), lags=40, ax=ax1)
    #fig = plot_pacf(d, lags=40, ax=ax1)
    #d.plot()
    plt.show()
    #ax2 = fig.add_subplot(212)
    #fig = plot_pacf(d.index.values, lags=40, ax=ax2)
    #print("Get training data")
    #X_train, X_test, y_train, y_test = cross_validation.train_test_split(
    #            x, d["STT"].values, test_size=0.4, random_state=0)
    
    #print(X_train.shape, y_train.shape)
    #print(X_test.shape, y_test.shape)

    print("-----------SCORE-----------------------------")
    print(args,">>>")
    result = sm.ols(formula="STT ~ STT1 + STT2 + STT3 + STT4 + STT5 + STT6", data=d).fit()
    print(result.summary())
# COMMAND ----------

ts_log_diff = ts_log - ts_log.shift()
ts_log_diff.drop(ts_log_diff.index[0], inplace=True)

fig_log=ts_log_diff.plot(figsize=(18, 6))
display(fig_log.figure)

# COMMAND ----------

test_stationarity(ts_log_diff)

# COMMAND ----------

lag_acf = plot_acf(ts_log_diff, lags=10)
display(lag_acf)

# COMMAND ----------

lag_pacf = plot_pacf(ts_log_diff, lags=10)
display(lag_pacf)

# COMMAND ----------

import math

model = ARIMA(ts_log_diff.astype(float), order=(7, 1, 2))   # uber_trained
results_ARIMA = model.fit(maxiter=500)  
print(results_ARIMA.summary())
Example #16
0
plt.figure()
month_figure_1 = sns.lineplot(x= 'month', y= 'salescount', data = group_monthwise_sum, sort= True)
plt.xticks(rotation=45)
#month_figure_1.set_xticklabels(month_figure_1.get_xticklabels(), rotation=65, horizontalalignment='right')


plt.figure()
month_figure_2= sns.lineplot(x= 'month', y= 'salescount', data = group_monthwise_mean, sort= True)
plt.xticks(rotation=45)
#month_figure_2.set_xticklabels(month_figure_2.get_xticklabels(), rotation=65, horizontalalignment='right')

"""
Model Building
"""

plot_acf(salad_sales.salescount)

#plt.acorr('salescount',data= salad_sales)
#autocorrelation_plot(salad_sales)


x = salad_sales.date
x.to_frame()
y = salad_sales.salescount
y.to_frame()

# =============================================================================
# model = ARIMA(y, order=(7,0,3))
# model_fit = model.fit(disp=0)
# print(model_fit.summary())
# 
Example #17
0
# In[53]:


#exog = merged_df_varmax_train[['humidity', 'temperatureLow', 'day_0', 'day_1', 'day_2', 'day_3', 'day_4', 'day_5', 'day_6', 'holiday_0.0', 'holiday_1.0']]
exog = merged_df_varmax_train[['humidity', 'temperatureLow', 'month_1', 'month_2', 'month_3', 'month_4', 'month_7']]
# exog = merged_df_varmax_train[['humidity', 'temperatureLow', 'weekend_True', 'holiday_True', 'month_1', 'month_2', 'month_3', 'month_4','month_5',
#                                 'month_6', 'month_7', 'month_8', 'month_9', 'month_10','month_11', 'month_12']]
#exog = merged_df_varmax_train[['humidity', 'temperatureLow', 'weekend_False', 'weekend_True']]
exog = exog[1:]
exog.head()


# In[50]:


plot_acf(endog_diff['energy_sum'], lags=20)


# In[51]:


plot_pacf(endog_diff['energy_sum'], lags=20)


# In[54]:


from statsmodels.tsa.statespace.varmax import VARMAX
model_varmax = VARMAX(endog=endog_diff, exog=exog, order=(15, 0))
results_varmax = model_varmax.fit(maxiter=5000, disp=False)
results_varmax.summary()
Example #18
0
                    [0., 0.],
                    [0., 0.]
                 ])

qMatrix = [q1]

y0 = np.asmatrix([0., 0]).T

dcoeff = [0,2]

t = 500

X = sim.varimapdqGaussian(t = t, 
                            pMatrix = pMatrix, 
                            qMatrix = qMatrix, 
                            dcoeff = dcoeff, 
                            y0 = y0)


x1 = np.asarray(X[0,:]).reshape(t)
x2 = np.asarray(X[1,:]).reshape(t)

plot_acf(x1, lags = 10)
plot_pacf(x1, lags = 10)
plot_acf(x2, lags = 10)
plot_acf(np.diff(x2, 2), lags = 10)
plot_pacf(np.diff(x2,2), lags = 10)

plt.plot(x1)
plt.plot(x2)
plt.plot(np.diff(x2, 2))
Example #19
0
# converts string to datetime object in pandas:
df['issue_d_format'] = pd.to_datetime(df['issue_d']) 
dfts = df.set_index('issue_d_format') 
year_month_summary = dfts.groupby(lambda x : x.year * 100 + x.month).count()
loan_count_summary = year_month_summary['issue_d']

print(loan_count_summary)
#we're left with a data table of year+month x # of loans issued
plt.xlabel('2015 Issue Date (Month)')
plt.ylabel('Loans Issued')
loan_count_summary.plot()
plt.show()

#ACF
sag.plot_acf(loan_count_summary)
plt.show()

#PACF
sag.plot_pacf(loan_count_summary)
plt.show()

print ("There are autocorrelated structures in the data, specfically there seems to be Seasonality and a need to add an Auto Regressive term.")

#output
#201501    2616
#201502    2588
#201503    3002
#201504    3067
#201505    3167
#201506    3494
plt.figure(1)
plt.plot(yahoo_close)

plt.figure(2)
plt.plot(returns)


# In[245]:

yahoo_close


# In[246]:

import statsmodels.graphics.tsaplots as tsplots
tsplots.plot_acf(returns, lags= 20)
plt.show()


# In[247]:

returns[1:10]


# In[248]:

tsplots.plot_pacf(returns, lags= 20)
plt.show()


# In[249]:
def acf_pacf(series):
    p1 = plot_acf(series)
    p2 = plot_pacf(series)
    return p1, p2
Example #22
0
    connectivity=['dPLI']
    #connectivity=['wPLI','dPLI']

    for c in connectivity:
        data = pd.read_pickle('../data/WholeBrain_{}_10_1_alpha.pickle'.format(c))
        areas=data.columns[4:]

        data_p=data.query("ID == '{}'".format(p))
        data_p_Base=data_p.query("Phase == 'Base'")
        data_p_Anes=data_p.query("Phase == 'Anes'")
        #data_p_Reco=data_p.query("Phase == 'Reco'")

        for i in range(len(areas)):

            fig = plt.figure(figsize=(17, 8))
            fig.suptitle('Part: {} Left autocorrelation'.format(p), size=16)
            plt.subplot(121)
            ax1=plt.subplot(1,2,1)
            tsaplots.plot_acf(data_p_Base[areas[i]], lags=100,ax=ax1)
            ax1.set_title('Baseline  ' + areas[i])
            ax2=plt.subplot(1,2,2)
            tsaplots.plot_acf(data_p_Anes[areas[i]], lags=100,ax=ax2)
            ax2.set_title('Anesthesia' + areas[i])

            pdf.savefig(fig)
            plt.close()

    pdf.close()


Example #23
0
        all_data = pdr.get_data_yahoo(self.tk, start=self.start, end=self.end)
        self.stock_data = pd.DataFrame(all_data['Adj Close'],
                                       columns=["Adj Close"])
        self.stock_data["log"] = np.log(self.stock_data) - np.log(
            self.stock_data.shift(1))

    def mean_sigma(self):
        st = self.stock_data["log"].dropna().ewm(span=252).std()
        sigma = st.iloc[-1]
        return sigma

    def garch_sigma(self):
        model = arch.arch_model(self.stock_data["log"].dropna(),
                                mean='Zero',
                                vol='GARCH',
                                p=1,
                                q=1)
        model_fit = model.fit()
        forecast = model_fit.forecast(horizon=1)
        var = forecast.variance.iloc[-1]
        sigma = float(np.sqrt(var))
        return sigma


if __name__ == "__main__":
    vol = stock_vol("AAPL", start="2016-01-01", end="2016-03-01")
    test = vol.stock_data["log"].dropna()
    print(test)
    fig = plot_acf(test)
    plt.show()
Example #24
0
# Differencing the log values
log_diff = bc_log.diff().dropna()
# Plotting the daily log difference
plt.figure(figsize=(16, 8))
plt.plot(log_diff)
plt.title('Differencing Log')
plt.savefig('logdiff.png')
plt.show()
#Testing for Stationarity
results = adfuller(log_diff.Close)
print(f"P-value: {results[1]}")

#PACF AND ACF
#ACF and PACF for the Differencing
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(16, 8))
plot_acf(bc_diff, ax=ax1, lags=40)
plot_pacf(bc_diff, ax=ax2, lags=40)
plt.show()

#Appears to be some correlation at day 5 and 10 mostly. #ACF and PACF for the Log Difference
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(16, 8))
plot_acf(log_diff, ax=ax1, lags=40)
plot_pacf(log_diff, ax=ax2, lags=40)
plt.savefig('acfpacf.png')
plt.show()


def best_param(model, data, pdq, pdqs):
    ans = []
    for comb in tqdm(pdq):
        for combs in tqdm(pdqs):
Example #25
0
def run_main():
    """
        主函数
    """
    # 1. 准备数据
    # 指定股票分析开始日期
    start_date = datetime.datetime(2007, 1, 1)
    # 指定股票分析截止日期
    end_date = datetime.datetime(2017, 3, 1)
    # 股票代码
    stock_code = '600519.SS'  # 沪市贵州茅台

    #通过雅虎财经拿到数据
    stock_df = pandas_datareader.data.DataReader(stock_code, 'yahoo',
                                                 start_date, end_date)
    # 预览数据
    print(stock_df.head())

    # 2. 可视化数据
    plt.plot(stock_df['Close'])
    plt.title('股票每日收盘价')
    plt.show()

    # 按周重采样
    stock_s = stock_df['Close'].resample('W-MON').mean()
    stock_train = stock_s['2014':'2016']
    plt.plot(stock_train)
    plt.title('股票周收盘价均值')
    plt.show()

    # 分析 ACF
    acf = plot_acf(stock_train, lags=20)
    plt.title("股票指数的 ACF")
    acf.show()

    # 分析 PACF
    pacf = plot_pacf(stock_train, lags=20)
    plt.title("股票指数的 PACF")
    pacf.show()

    # 3. 处理数据,平稳化数据
    # 这里只是简单第做了一节差分,还有其他平稳化时间序列的方法
    # 可以查询资料后改进这里的平稳化效果
    stock_diff = stock_train.diff()
    diff = stock_diff.dropna()
    print(diff.head())
    print(diff.dtypes)

    plt.figure()
    plt.plot(diff)
    plt.title('一阶差分')
    plt.show()

    acf_diff = plot_acf(diff, lags=20)
    plt.title("一阶差分的 ACF")
    acf_diff.show()

    pacf_diff = plot_pacf(diff, lags=20)
    plt.title("一阶差分的 PACF")
    pacf_diff.show()

    # 4. 根据ACF和PACF定阶并建立模型
    model = ARIMA(stock_train, order=(1, 1, 1), freq='W-MON')
    # 拟合模型
    arima_result = model.fit()
    print(arima_result.summary())

    # 5. 预测
    pred_vals = arima_result.predict('20170102',
                                     '20170301',
                                     dynamic=True,
                                     typ='levels')
    print(pred_vals)

    # 6. 可视化预测结果
    stock_forcast = pd.concat([stock_s, pred_vals],
                              axis=1,
                              keys=['original', 'predicted'])

    plt.figure()
    plt.plot(stock_forcast)
    plt.title('真实值vs预测值')
    plt.savefig('./stock_pred.png', format='png')
    plt.show()
Example #26
0
plt.rcParams['axes.unicode_minus'] = False  #用来正常显示表示负号
import warnings

warnings.filterwarnings("ignore")

data = pd.read_excel('brand_dazong.xlsx', index_col=u'日期', header=0)
print(data.head())

#画出时序图
data.plot()
plt.show()

#画出自相关性图
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

plot_acf(data)
plt.show()

#平稳性检测
from statsmodels.tsa.stattools import adfuller
#返回值依次为:adf, pvalue p值, usedlag, nobs, critical values临界值 ,
# icbest, regresults, resstore
#adf 分别大于3中不同检验水平的3个临界值,单位检测统计量对应的p 值显著大于 0.05 ,
#说明序列可以判定为 非平稳序列
print('原始序列的检验结果为:', adfuller(data[u'销量']))

#对数据进行差分后得到 自相关图和 偏相关图
D_data = data.diff().dropna()
D_data.columns = [u'销量差分']

D_data.plot()  #画出差分后的时序图
Example #27
0
# Plot the time series data
series.plot(figsize=(15, 8),
            marker='x',
            title='Indonesia Resident Patient Weekly Revenue Number')
plt.xlabel('DATE')
plt.ylabel('REVENUE')
plt.show()

# Autocorrelation
#pd.plotting.autocorrelation_plot(series)
#plt.figure(figsize=(15, 8))
#plt.show()
from statsmodels.graphics.tsaplots import plot_acf
fig, ax = plt.subplots(figsize=(15, 8))
plot_acf(series['REVENUE'], ax=ax)
ax.set_ylabel('Autocorrelation')
ax.set_xlabel('Lag (weeks)')
plt.show()
# from the picture below we choose the first 6 lags

# ## Start of ARIMA model

# +
# ARIMA Model
from statsmodels.tsa.arima_model import ARIMA

model = ARIMA(series, order=(6, 0, 0))
model_fit = model.fit(disp=0)
print(model_fit.summary())
# -
                               end='2017-08-28 16:00',
                               freq='1min')
'''
INSTRUCTIONS

*   Import plot_acf and ARMA modules from statsmodels
*   Compute minute-to-minute returns from prices:
    *   Compute returns with the .pct_change() method
    *   Use the pandas method .dropna() to drop the first row of returns, which is NaN
*   Plot the ACF function with lags up to 60 minutes
*   Fit the returns data to an MA(1) model and print out the MA(1) parameter
'''

# Import plot_acf and ARMA modules from statsmodels
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.tsa.arima_model import ARMA

# Compute returns from prices and drop the NaN
returns = intraday.pct_change()
returns = returns.dropna()

# Plot ACF of returns with lags up to 60 minutes
plot_acf(returns, lags=60)
plt.show()

# Fit the data to an MA(1) model
mod = ARMA(returns, order=(0, 1))
res = mod.fit()

print(res.params)
from pandas import read_csv
from statsmodels.graphics.tsaplots import plot_acf

from matplotlib import pyplot

series = read_csv('../../static/data_set.csv',
                  nrows=2000,
                  header=0,
                  parse_dates=[0],
                  index_col=0,
                  squeeze=True)
plot_acf(series)

pyplot.show()
Example #30
0
result = adfuller(df['Shift_12'].dropna(), autolag='AIC')
print(result[1])
#result2=kpss(df['Shift_12'].dropna())
#print(result2[1])

fig = px.line(y=df['Shift_12'],
              x=df.index,
              template='plotly_dark',
              title='Sales over months',
              labels=dict(x="Date", y="Sale"))
fig.show()

# p,d,q (pacf,diff,acf)

# q
plot_acf(df['Shift_12'].iloc[13:], ax=plt.gca(), lags=30)
plt.show()

# p
plot_pacf(df['Shift_12'].iloc[13:], ax=plt.gca(), lags=30)
plt.show()

order = (1, 1, 1)

model = ARIMA(df['Sale'], order=order)
model = model.fit()
print(model.summary())

df['forecast'] = model.predict(start=90, end=104, dynamic=True)

df[['Sale', 'forecast']].plot()
,
0.8
,
0.8
2
,
0.8
3
,
…
Simulate 5000 observations of the MA(30) model
Plot the ACF of the simulated series
'''



# import the modules for simulating data and plotting the ACF
from statsmodels.tsa.arima_process import ArmaProcess
from statsmodels.graphics.tsaplots import plot_acf

# Build a list MA parameters
ma = [0.8**i for i in range(30)]

# Simulate the MA(30) model
ar = np.array([1])
AR_object = ArmaProcess(ar, ma)
simulated_data = AR_object.generate_sample(nsample=5000)

# Plot the ACF
plot_acf(simulated_data, lags=30)
plt.show()
Example #32
0
def plot_acf_pacf(ts):
    plot_acf(ts)
    plt.figure(0)
    plot_pacf(ts)
    plt.show()
Example #33
0
#subtract regression line from loan count data to account for non-stationary data
new_loan= []
item_count = 0
for item in loan_count_summary:
	 new_loan.append(item-est.params[0]-est.params[1]*item_count)
	 item_count +=1
loan_count_rev = pd.Series(new_loan, index = loan_count_summary.index)

#create variables to plot regression line 
x = loan_count_summary.index
y = est.params[0]+ est.params[1]*x


#plot regression line with loan data and detrended data
plt.plot(x, y, 'r-')
loan_count_rev.plot()
lcr = loan_count_summary.plot()
lcr.set_xlabel('Time (1 unit is a Month)')
lcr.set_ylabel('Loan Count')
lcr.legend(['OLS Regression Line', 'Loan Count Detrended (Y-Regression Line)', 
	'Loan Count Raw Data'])
plt.show()

#plot auto-correlation and partial auto-correlation
plot_acf(loan_count_rev)
plt.show()
plot_pacf(loan_count_rev)
plt.show()

Example #34
0
forecastnum = 5

#读取数据,指定日期列为指标,Pandas自动将“日期”列识别为Datetime格式
data = pd.read_csv(discfile, index_col = 0)
data = pd.DataFrame(data,dtype=np.float64)
data

#时序图
plt.rcParams['font.sans-serif'] = ['SimHei'] #用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False #用来正常显示负号
data.plot()
plt.show()

#自相关图
from statsmodels.graphics.tsaplots import plot_acf
plot_acf(data).show()

#平稳性检测
from statsmodels.tsa.stattools import adfuller as ADF
print( ADF(data[u'销量']))
#返回值依次为adf、pvalue、usedlag、nobs、critical values、icbest、regresults、resstore

#差分后的结果
D_data = data.diff().dropna()
D_data.columns = [u'销量差分']
D_data.plot() #时序图
plt.show()
plot_acf(D_data).show() #自相关图
from statsmodels.graphics.tsaplots import plot_pacf
plot_pacf(D_data).show() #偏自相关图
ADF(D_data[u'销量差分'])#平稳性检测
You will generate a white noise series and plot the autocorrelation function to show that it is zero for all lags. You can use np.random.normal() to generate random returns. For a Gaussian white noise process, the mean and standard deviation describe the entire process.

Plot this white noise series to see what it looks like, and then plot the autocorrelation function.

INSTRUCTIONS
100XP
Generate 1000 random normal returns using np.random.normal() with mean 2% (0.02) and standard deviation 5% (0.05), where the argument for the mean is loc and the argument for the standard deviation is scale.
Plot the time series.
Verify the mean and standard deviation of returns using np.mean() and np.std().
Plot the autocorrelation function using plot_acf with lags=20.
'''
# Import the plot_acf module from statsmodels
from statsmodels.graphics.tsaplots import plot_acf

# Simulate wite noise returns
returns = np.random.normal(loc=0.02, scale=0.05, size=1000)

# Print out the mean and standard deviation of returns
mean = np.mean(returns)
std = np.std(returns)
print("The mean is %5.3f and the standard deviation is %5.3f" %(mean,std))

# Plot returns series
plt.plot(returns)
plt.show()

# Plot autocorrelation function of white noise returns
plot_acf(returns, lags=20)
plt.show()
Example #36
0
df = pd.read_csv('named.csv', names=['value'],
                 header=0)  #Named deaths per episode

import statistics
print('Mean number of deaths per episode')
print(statistics.mean(ov.value))

# In[8]:

############################################# Named Deaths #######################################################

# Original Series
fig, axes = plt.subplots(3, 2, sharex=True)
axes[0, 0].plot(df.value, linewidth=2.0)
axes[0, 0].set_title('Original Series')
plot_acf(df.value, ax=axes[0, 1], linewidth=3.0)

# 1st Differencing
axes[1, 0].plot(df.value.diff(), linewidth=2.0)
axes[1, 0].set_title('1st Order Differencing')
plot_acf(df.value.diff().dropna(), ax=axes[1, 1], linewidth=3.0)

# 2nd Differencing
axes[2, 0].plot(df.value.diff().diff(), linewidth=2.0)
axes[2, 0].set_title('2nd Order Differencing')
plot_acf(df.value.diff().diff().dropna(), ax=axes[2, 1], linewidth=3.0)

plt.show()

# In[9]:
Example #37
0
# -*- coding: utf-8 -*-
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
import seaborn as sns
import quandl

x = y = np.arange(100)
plt.plot(x, y)
plt.show()

from statsmodels.graphics.tsaplots import plot_acf
plot_acf(x)
plt.show()

x = quandl.get("WIKI/AMZN", start_date="2017-1-5")
print(x)

for i in range(3):
    print(i)
Example #38
0
def run_main():
    #k = ts.get_hist_data('002867', start='2017-04-10', end='2018-06-20')
    # 002867茅台股票  这里可以设置获取的时间段

    #lit = ['open', 'high', 'close', 'low']  # 这里我们只获取其中四列
    #data = df1[lit]
    data=df1
    d_one = data.index  # 以下9行将object的index转换为datetime类型
    d_two = []
    d_three = []
    date2 = []
    for i in d_one:
        d_two.append(i)
    for i in range(len(d_two)):
        d_three.append(parse(d_two[i]))
    data2 = pd.DataFrame(data, index=d_three,
                         dtype=np.float64)  # 构建新的DataFrame赋予index为转换的d_three。当然你也可以使用date_range()来生成时间index

    data2 = data2.drop_duplicates(keep='first')
    data2 = data2.sort_index(axis=0)
    plt.plot(data2['close'])
    # 显然数据非平稳,所以我们需要做差分
    plt.title('股市每日收盘价')
    #plt.show()

    data2_w = data2['close'].resample(FREQ).mean()
    # 由于原始数据太多,按照每一周来采样,更好预测,并取每一周的均值
    data2_train = data2_w['2008-01':'2009-11']  # 我们只取2017到2018的数据来训练
    plt.plot(data2_train)
    plt.title('周重采样数据')
    #plt.show()
    data2_train = data2_train.dropna(axis=0, how='any')
    new_index = pd.date_range('20180101', periods=len(data2_train),freq = FREQ)
    data2_train = pd.DataFrame(data2_train)
    data_train = copy.copy(data2_train)
    data2_train.set_index(new_index, inplace=True)
    # 一阶差分,分析ACF
    acf = plot_acf(data2_train, lags=20)  # 通过plot_acf来查看训练数据,以便我们判断q的取值
    plt.title("股票指数的 ACF")
    # acf.show()

    # 一阶差分,分析PACF
    pacf = plot_pacf(data2_train, lags=20)  # 通过plot_pacf来查看训练数据,以便我们判断p的取值
    plt.title("股票指数的 PACF")
    # pacf.show()
    data2_diff = data2_train.diff(1)  # 差分很简单使用pandas的diff()函数可以进行一阶差分
    diff = data2_diff.dropna()
    for i in range(1):  # 五阶差分,一般一到二阶就行了,我有点过分
        diff = diff.diff(1)
        diff = diff.dropna()
    plt.figure()
    plt.plot(diff)
    plt.title('2阶差分')
    #plt.show()

    # 五阶差分的ACF
    acf_diff = plot_acf(diff, lags=40)
    plt.title("2阶差分的ACF")  # 根据ACF图,观察来判断q
    # acf_diff.show()

    # 五阶差分的PACF
    pacf_diff = plot_pacf(diff, lags=40)  # 根据PACF图,观察来判断p
    plt.title("2阶差分的PACF")
    pacf_diff.show()

    # # 根据ACF和PACF以及差分 定阶并建模
    # model = ARIMA(data2_train, order=(3, 2, 4), freq='1D')  # pdq    频率按周
    #
    # # 拟合模型
    # arima_result = model.fit()
    #
    # # 预测
    # pred_vals = arima_result.predict('2018-08-20','2019-01-30', dynamic=True, typ = "levels")  # 输入预测参数,这里我们预测2017-01-02以后的数据
    # forcast_vals = arima_result.forecast(30)[0]
    # fore_new_index = pd.date_range('20190101', periods=len(forcast_vals))
    # forcast_vals = pd.DataFrame(forcast_vals)
    # forcast_vals.set_index(fore_new_index, inplace=True)
    # # 可视化预测
    # stock_forcast = pd.concat([data2_train, pred_vals], axis=1, keys=['original', 'predicted'])  # 将原始数据和预测数据相结合,使用keys来分层
    # fore_stock_forcast = pd.concat([data2_train, forcast_vals], axis=1, keys=['original', 'predicted'])  # 将原始数据和预测数据相结合,使用keys来分层
    data2_train_fit = data2_train[0:(len(data2_train) - 30)]
    # 根据ACF和PACF以及差分 定阶并建模
    model = ARIMA(data2_train_fit, order=(5, 2, 2), freq=FREQ)

    pred_begin = pd.date_range('20180101', periods=len(data2_train) - 30, freq = FREQ)[-1]    # 拟合模型
    arima_result = model.fit()

    # 预测
    # pred_vals = arima_result.predict('2018-08-20', '2019-01-30', dynamic=True,
    #                                  typ="levels")  # 输入预测参数,这里我们预测2017-01-02以后的数据
    forcast_vals = arima_result.forecast(30)[0]
    fore_new_index = pd.date_range(pred_begin, periods=len(forcast_vals))
    forcast_vals = pd.DataFrame(forcast_vals)
    forcast_vals.set_index(fore_new_index, inplace=True)
    # 可视化预测
    # stock_forcast = pd.concat([data2_train, pred_vals], axis=1,
    #                           keys=['original', 'predicted'])  # 将原始数据和预测数据相结合,使用keys来分层
    fore_stock_forcast = pd.concat([data2_train, forcast_vals], axis=1,
                                   keys=['original', 'predicted'])  # 将原始数据和预测数据相结合,使用keys来分层
    # 构图
    # plt.figure()
    # plt.plot(stock_forcast)
    # plt.title('真实值vs预测值')
    # plt.show()
    plt.figure()
    plt.plot(fore_stock_forcast)
    plt.title("Forcast Results")
    plt.show()
    a  = 1
from statsmodels.tsa.seasonal import seasonal_decompose

# In[113]:

result = seasonal_decompose(USAtemp1.AverageTemperature,
                            model='additive',
                            freq=12)
result.plot()

# In[116]:

from statsmodels.graphics.tsaplots import plot_acf

# In[117]:

plot_acf(USAtemp1.AverageTemperature, lags=40)

# In[118]:

from statsmodels.graphics.tsaplots import plot_pacf

# In[119]:

plot_pacf(USAtemp1.AverageTemperature, lags=40)

# In[120]:

import itertools
from statsmodels.tsa.arima_model import ARIMA

# In[124]:
Example #40
0
data = data[8:]
data = pd.Series(data)
data.index = pd.Index(
    pd.date_range(start='2001-1-1', end='2019-12-1', freq='QS-JAN'))

data_log = np.log(data)
data_log_diff1 = data_log.diff(1)
data_log_diff1_new = data_log_diff1.dropna(inplace=True)
data_log_diff1_4 = data_log_diff1 - data_log_diff1.shift(4)
data_log_diff1_4_new = data_log_diff1_4.dropna()

fig = plt.figure(figsize=(12, 8))
ax = fig.add_subplot(111)
data_log_diff1_4_new.plot(ax=ax)

plot_acf(data_log_diff1_4_new, lags=30)
plot_pacf(data_log_diff1_4_new, lags=30)
plt.show()

p = range(2, 7)
q = range(0, 7)
parameters = product(p, q)
parameters_list = list(parameters)


def optimizeARMA(parameters_list):
    results = []
    best_aic = float("inf")
    for param in parameters_list:
        try:
            model = ARMA(data_log_diff1_4_new, (param[0], param[1])).fit()
# -*- coding: utf-8 -*-
"""
Created on Tue Apr 14 18:52:52 2020

@author: NK
"""

import pandas as pd

vimana = pd.read_csv('vimana.csv')
vimana.head()
vimana.info()

from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

#Show autocorrelations upto lag 20
acf_plot = plot_acf(vimana['demand'], lags=20)

#Show partial autocorrelations upto lag 20
pacf_plot = plot_pacf(vimana['demand'], lags=20)

Example #42
0
df['Milk First Difference'].plot()

#Second Difference 
df['Milk Second Difference'] = df['Milk First Difference'] - df['Milk First Difference'].shift(1)
adf_check(df['Milk Second Difference'].dropna())
df['Milk Second Difference'].plot()

#Seasonal First Difference 
# You can also do seasonal first difference
df['Seasonal First Difference'] = df['Milk First Difference'] - df['Milk First Difference'].shift(12)
df['Seasonal First Difference'].plot()
adf_check(df['Seasonal First Difference'].dropna())

#Autocorrelation
from statsmodels.graphics.tsaplots import plot_acf,plot_pacf
fig_first = plot_acf(df["Milk First Difference"].dropna())

fig_seasonal_first = plot_acf(df["Seasonal First Difference"].dropna())

from pandas.plotting import autocorrelation_plot
autocorrelation_plot(df['Seasonal First Difference'].dropna())

#Using the Seasonal ARIMA model
For non-seasonal data
from statsmodels.tsa.arima_model import ARIMA

# We have seasonal data!
model = sm.tsa.statespace.SARIMAX(df['Milk in pounds per cow'],order=(0,1,0), seasonal_order=(1,1,1,12))
results = model.fit()
print(results.summary())
      
Example #43
0
series = read_csv('dataset.csv',
                  header=None,
                  index_col=0,
                  parse_dates=True,
                  squeeze=True)
# prepare data
X = series.values
X = X.astype('float32')
train_size = int(len(X) * 0.50)
train, test = X[0:train_size], X[train_size:]
# walk-forward validation
history = [x for x in train]
predictions = list()
for i in range(len(test)):
    # predict
    model = ARIMA(history, order=(0, 1, 2))
    model_fit = model.fit()
    yhat = model_fit.forecast()[0]
    predictions.append(yhat)
    # observation
    obs = test[i]
    history.append(obs)
# errors
residuals = [test[i] - predictions[i] for i in range(len(test))]
residuals = DataFrame(residuals)
pyplot.figure()
pyplot.subplot(211)
plot_acf(residuals, lags=25, ax=pyplot.gca())
pyplot.subplot(212)
plot_pacf(residuals, lags=25, ax=pyplot.gca())
pyplot.show()
Example #44
0
	train, test = data[1:-328], data[-328:-6]
	# restructure into windows of weekly data
	train = array(split(train, len(train)/7))
	test = array(split(test, len(test)/7))
	return train, test

# convert windows of weekly multivariate data into a series of total power
def to_series(data):
	# extract just the total power from each week
	series = [week[:, 0] for week in data]
	# flatten into a single series
	series = array(series).flatten()
	return series

# load the new file
dataset = read_csv('household_power_consumption_days.csv', header=0, infer_datetime_format=True, parse_dates=['datetime'], index_col=['datetime'])
# split into train and test
train, test = split_dataset(dataset.values)
# convert training data into a series
series = to_series(train)
# plots
pyplot.figure()
lags = 365
# acf
axis = pyplot.subplot(2, 1, 1)
plot_acf(series, ax=axis, lags=lags)
# pacf
axis = pyplot.subplot(2, 1, 2)
plot_pacf(series, ax=axis, lags=lags)
# show plot
pyplot.show()
# artifacts in the plot.  this works better
x = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17]
# make the plot
plt.plot(x,np.asarray(lcs),linewidth=1.0)
plt.show()
plt.clf()
# there's a definite linear, increasing trend... 
# let's first try diffs
lcs_d1 = lcs.diff()
plt.plot(x,np.asarray(lcs_d1),linewidth=1.0)
plt.show()
# trend is gone
# we could have used the signal package like this (with same result):
# import scipy.signal as sig
# lcs_dt = sig.detrend(lcs)
# plt.plot(x,lcs_dt,linewidth=2.0)
# plt.show()

# now plot the ACF of the transformed series
plt.figure()
st.plot_acf(lcs_d1)
plt.show()
plt.clf()

# and PACF
plt.figure()
st.plot_pacf(lcs_d1)
plt.show()
plt.clf()

print "no autocorrelated structures"
'''
Getting "Warmed" Up: Look at Autocorrelations

Since the temperature series, temp_NY, is a random walk with drift, take first differences to make it stationary. Then compute the sample ACF and PACF. This will provide some guidance on the order of the model.

INSTRUCTIONS
100XP
Import the modules for plotting the sample ACF and PACF
Take first differences of the DataFrame temp_NY using the pandas method .diff()
Create two subplots for plotting the ACF and PACF
Plot the sample ACF of the differenced series
Plot the sample PACF of the differenced series
'''
# Import the modules for plotting the sample ACF and PACF
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

# Take first difference of the temperature Series
chg_temp = temp_NY.diff()
chg_temp = chg_temp.dropna()

# Plot the ACF and PACF on the same page
fig, axes = plt.subplots(2,1)

# Plot the ACF
plot_acf(chg_temp, lags=20, ax=axes[0])
'''
Compare AR Model with Random Walk

Sometimes it is difficult to distinguish between a time series that is slightly mean reverting and a time series that does not mean revert at all, like a random walk. You will compare the ACF for the slightly mean-reverting interest rate series of the last exercise with a simulated random walk with the same number of observations.

You should notice when plotting the autocorrelation of these two series side-by-side that they look very similar.

INSTRUCTIONS
100XP
Import plot_acf function from the statsmodels module
Create two axes for the two subplots
Plot the autocorrelation function for 12 lags of the interest rate series interest_rate_data in the top plot
Plot the autocorrelation function for 12 lags of the interest rate series simulated_data in the bottom plot
'''
# Import the plot_acf module from statsmodels
from statsmodels.graphics.tsaplots import plot_acf

# Plot the interest rate series and the simulated random walk series side-by-side
fig, axes = plt.subplots(2,1)

# Plot the autocorrelation of the interest rate series in the top plot
fig = plot_acf(interest_rate_data, alpha=1, lags=12, ax=axes[0])

# Plot the autocorrelation of the simulated random walk series in the bottom plot
fig = plot_acf(simulated_data, alpha=1, lags=12, ax=axes[1])

# Label axes
axes[0].set_title("Interest Rate Data")
axes[1].set_title("Simulated Random Walk Data")
plt.show()
Example #48
0
discfile = 'F:\\spyder\\datamining\\chapter5\\demo\\data\\arima_data.xls'
forecastnum = 5

#读取数据,指定日期为指标,pandas自动将日期列识别为Datetime格式
data = pd.read_excel(discfile, index_col=u'日期')

#时序图
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei']  #用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False  #用来正常显示正负号
data.plot()
plt.show()

#自相关图
from statsmodels.graphics.tsaplots import plot_acf
plot_acf(data).show()

#平稳性检测
from statsmodels.tsa.stattools import adfuller as ADF
print(u'原始序列的ADF检验结果为:', ADF(data[u'销量']))
#返回值依次为adf, pvalue, usedlag, nobs, critical values, icbest, regresults, resstore

#差分后的结果
D_data = data.diff().dropna()
D_data.columns = [u'销量差分']
D_data.plot()  #时序图
plt.show()
plot_acf(D_data).show()  #自相关图

from statsmodels.graphics.tsaplots import plot_pacf
plot_pacf(D_data).show()  #偏自相关图
Example #49
0
    plt.show()
    
    #Perform Dickey-Fuller test:
    print 'Results of Dickey-Fuller Test:'
    dftest = adfuller(timeseries, autolag='AIC')
    dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
    for key,value in dftest[4].items():
        dfoutput['Critical Value (%s)'%key] = value
    print dfoutput 

from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

# regular diff
diff0 = df.stack().diff(periods=4)[4:]
diff0.plot(title='European Retail Trade Differenced')
plot_acf(diff0, lags=30)
plot_pacf(diff0, lags=30)

test_stationarity(diff0)

# additional diff
diff1 = diff0.diff()[1:]
diff1.plot(title='European Retail Trade Differenced Twice')
plot_acf(diff1, lags=30)
plot_pacf(diff1, lags=30)

test_stationarity(diff1)

import statsmodels.api as sm
data = df.stack().values
model = sm.tsa.statespace.SARIMAX(data, order=(0,1,1), seasonal_order=(0,1,1,4))
Example #50
0
                    \rho_2 & \rho_1 & 1 & \cdots & \rho_{n-3} \\
                    \vdots   & \vdots   &  \vdots  & \ddots &  \vdots \\
                    \rho_{n-1} & \rho_{n-2} & \cdots & \rho_{n-3} & 1
                \end{pmatrix}
$

The _autocovariance function_, $\Gamma(\tau)$, can simply be read off from the diagonals of the autocovariance matrix; likewise with the _autocorrelation function_.

__Q.__ What is the autocorrelation function for the Gaussian white noise process with zero mean and unit variance?

__A.__ Since the values are iid, the values are uncorrelated, thus: 

$P(\tau) = \delta_{\tau,0}$

#Plotting:
tg.plot_acf(wn_series)

#Plotting:
tg.plot_acf(wn)

Autocorrelations for other time series above:

tg.plot_acf(bm)

tg.plot_acf(ar2_series)

What is the autocorrelation of the $AR(2)$ process above?

### Moving Average Process

From above, ${X_t}$ is a _moving average_ process of order $q$ if $X_t$ follows: 
import matplotlib.pyplot             as plt
import numpy                         as np
import pandas                        as pd
import statsmodels.graphics.tsaplots as tsaplots

df = pd.read_csv('LoanStats3b.csv', header=1, low_memory=False)

# converts string to datetime object in pandas:
df['issue_d_format'] = pd.to_datetime(df['issue_d'])
dfts = df.set_index('issue_d_format')
year_month_summary = dfts.groupby(lambda x : x.year * 100 + x.month).count()
loan_count_summary = year_month_summary['issue_d']

tsaplots.plot_acf(loan_count_summary)
tsaplots.plot_pacf(loan_count_summary)
plt.show()
    (0, 0), (20, -30),
    fontsize=10,
    xycoords='axes fraction',
    textcoords='offset points',
    va='top')
plt.savefig('08 Seasonal First Difference Sales.png')
'''
    ACF
'''

# Autocorrelation of first difference
max_lags = dataset['First Difference'].count(
) - dataset['First Difference'].isnull().sum()  # Calculating lags

# Visualising
fig_first = plot_acf(dataset['First Difference'].dropna(), lags=160)
plt.title('First Difference ACF')
plt.savefig('09 First Difference ACF.png')

# Alternatively
autocorrelation_plot(dataset['First Difference'].dropna())
plt.title('First Difference ACF (pandas plot)')
plt.savefig('09.1 First Difference ACF.png')

# Autocorrelation of Seasonal first difference
max_lags = dataset['Seasonal First Difference'].count(
) - dataset['Seasonal First Difference'].isnull().sum()  # Calculating lags

# Visualising
fig_seasonal_first = plot_acf(dataset["Seasonal First Difference"].dropna(),
                              lags=136)
def programmer_6():
    """
    警告解释:
    # UserWarning: matplotlib is currently using a non-GUI backend, so cannot show the figure
  "matplotlib is currently using a non-GUI backend, "
    调用了多次plt.show()
    解决方案,使用plt.subplot()

    # RuntimeWarning: overflow encountered in exp
    运算精度不够

    forecastnum-->预测天数
    plot_acf().show()-->自相关图
    plot_pacf().show()-->偏自相关图
    """
    discfile = 'data/arima_data.xls'
    forecastnum = 5
    data = pd.read_excel(discfile, index_col=u'日期')

    fig = plt.figure(figsize=(8, 6))
    # 第一幅自相关图
    ax1 = plt.subplot(411)
    fig = plot_acf(data, ax=ax1)

    # 平稳性检测
    print(u'原始序列的ADF检验结果为:', ADF(data[u'销量']))
    # 返回值依次为adf、pvalue、usedlag、nobs、critical values、icbest、regresults、resstore

    # 差分后的结果
    D_data = data.diff().dropna()
    D_data.columns = [u'销量差分']
    # 时序图
    D_data.plot()
    plt.show()
    # 第二幅自相关图
    fig = plt.figure(figsize=(8, 6))
    ax2 = plt.subplot(412)
    fig = plot_acf(D_data, ax=ax2)
    # 偏自相关图
    ax3 = plt.subplot(414)
    fig = plot_pacf(D_data, ax=ax3)
    plt.show()
    fig.clf()

    print(u'差分序列的ADF检验结果为:', ADF(D_data[u'销量差分']))  # 平稳性检测

    # 白噪声检验
    print(u'差分序列的白噪声检验结果为:', acorr_ljungbox(D_data, lags=1))  # 返回统计量和p值
    data[u'销量'] = data[u'销量'].astype(float)
    # 定阶
    pmax = int(len(D_data) / 10)  # 一般阶数不超过length/10
    qmax = int(len(D_data) / 10)  # 一般阶数不超过length/10
    bic_matrix = []  # bic矩阵
    data.dropna(inplace=True)

    # 存在部分报错,所以用try来跳过报错;存在warning,暂未解决使用warnings跳过
    import warnings
    warnings.filterwarnings('error')
    for p in range(pmax + 1):
        tmp = []
        for q in range(qmax + 1):
            try:
                tmp.append(ARIMA(data, (p, 1, q)).fit().bic)
            except:
                tmp.append(None)
        bic_matrix.append(tmp)
    # 从中可以找出最小值
    bic_matrix = pd.DataFrame(bic_matrix)
    # 用stack展平,然后用idxmin找出最小值位置。
    p, q = bic_matrix.stack().idxmin()
    print(u'BIC最小的p值和q值为:%s、%s' % (p, q))
    model = ARIMA(data, (p, 1, q)).fit()  # 建立ARIMA(0, 1, 1)模型
    model.summary2()  # 给出一份模型报告
    model.forecast(forecastnum)  # 作为期5天的预测,返回预测结果、标准误差、置信区间。
Example #54
0
# Import the plot_acf module from statsmodels
from statsmodels.graphics.tsaplots import plot_acf

# Simulate white noise returns
# loc = mean, scale = std deviation
returns = np.random.normal(loc=0.02, scale=0.05, size=1000)

# Print out the mean and standard deviation of returns
mean = np.mean(returns)
std = np.std(returns)
print("The mean is %5.3f and the standard deviation is %5.3f" % (mean, std))

# Plot returns series
plt.plot(returns)
plt.show()

# Plot autocorrelation function of white noise returns
plot_acf(returns, lags=20)
plt.show()
Example #55
0
fig, axs = plt.subplots(ncols=2, nrows=4, figsize=(15, 15))
Ns = []
for idx, i in enumerate(range(3,7)):
#for i in range(5,23):
    np.random.seed(rseed)
    N = np.power(10,i)
    #N = np.power(2, i)
    
    u = np.random.uniform(size=N)
    
    u2 = np.power(u, 2)
    Ns.append(N)
    acfs, confint, qstat, pvalues = acf(u, nlags=20, qstat=True, alpha=0.05)
    
    tsaplots.plot_acf(u, axs[idx][0], lags=20)
    tsaplots.plot_acf(u2, axs[idx][1], lags=20)
    axs[idx][0].set_title('Autocorrelation Series. Length 10e{0}'.format(i))
    axs[idx][1].set_title('Autocorrelation Square Series. Length 10e{0}'.format(i))


Ns = []
fig, axs = plt.subplots(ncols=3, nrows=4, figsize=(15, 15))

## part e
for idx, i in enumerate(range(3,7)):
#for i in range(5,23):
    np.random.seed(rseed)
    N = np.power(10,i)
    #N = np.power(2, i)
    u = np.random.uniform(size=N)
Example #56
0
def visualizeACFPlot(autoRegressiveIntgratedMovingAverageForecastingDataset):
    
    title = 'Autocorrelation: Real Manufacturing and Trade Inventories'
    lags = 40
    plot_acf(autoRegressiveIntgratedMovingAverageForecastingDataset['Inventories'],title=title,lags=lags)
    pylab.savefig('acf_plot.png')
'''
axing Exercise: Compute the ACF

In the last chapter, you computed autocorrelations with one lag. Often we are interested in seeing the autocorrelation over many lags. The quarterly earnings for H&R Block (ticker symbol HRB) is plotted on the right, and you can see the extreme cyclicality of its earnings. A vast majority of its earnings occurs in the quarter that taxes are due.

You will compute the array of autocorrelations for the H&R Block quarterly earnings that is pre-loaded in the DataFrame HRB. Then, plot the autocorrelation function using the plot_acf module. This plot shows what the autocorrelation function looks like for cyclical earnings data. The ACF at lag=0 is always one, of course. In the next exercise, you will learn about the confidence interval for the ACF, but for now, supress the confidence interval by setting alpha=1.

INSTRUCTIONS
100XP
INSTRUCTIONS
100XP
Import the acf module and plot_acf module from statsmodels.
Compute the array of autocorrelations of the quarterly earnings data in DataFrame HRB.
Plot the autocorrelation function of the quarterly earnings data in HRB, and pass the argument alpha=1 to supress the confidence interval.
'''
# Import the acf module and the plot_acf module from statsmodels
from statsmodels.tsa.stattools import acf
from statsmodels.graphics.tsaplots import plot_acf

# Compute the acf array of HRB
acf_array = acf(HRB)
print(acf_array)

# Plot the acf function
plot_acf(HRB, alpha=1)
plt.show()
cx_inv = pd.DataFrame(list(cx_inv), columns=['date', 'invest'])
cx_wd = pd.DataFrame(list(cx_wd), columns=['date', 'withdraw'])

cx = to_timeseries('2017-12-01', '2018-03-22', cx_inv, cx_wd)
cx = cx.assign(vol=lambda x: x.invest - x.withdraw)

cx_inv.invest = cx_inv.invest.astype('float')
cx_wd.withdraw = cx_wd.withdraw.astype('float')
cx_inv.plot()
cx_wd.plot()

#####################################
#introducing ARIMA model:
cx.set_index('date', inplace=True, drop=True)
cx = cx.loc[:, 'vol']
plot_acf(cx).show()
#返回值依次为adf、pvalue、usedlag、nobs、critical values、icbest、regresults、resstore

acorr_ljungbox(cx, lags=3)

ADF(cx)
pmax = int(len(cx) / 10)
qmax = int(len(cx) / 10)

#adjust parameters:
bic_matrix = []
for p in range(pmax + 1):
    tmp = []
    for q in range(qmax + 1):
        try:
            tmp.append(ARIMA(cx, (p, 0, q)).fit().bic)
Example #59
0
# IBM EXAMPLE for ARMA modelling
#############

# Load Dataset
ibm_df = pd.read_csv('datasets/ibm-common-stock-closing-prices.csv')
ibm_df.head()

#Rename the second column
ibm_df.rename(columns={'IBM common stock closing prices': 'Close_Price'},
              inplace=True)
ibm_df.head()
ibm_df.Close_Price.plot()

# Plot ACF and PACF
ibm_df = ibm_df.dropna()
plot_acf(ibm_df.Close_Price, lags=50)
plot_pacf(ibm_df.Close_Price, lags=50)

# QQ plot and probability plot
sm.qqplot(ibm_df['Close_Price'], line='s')

# Optimize ARMA parameters
aicVal = []
for ari in range(1, 3):
    for maj in range(0, 3):
        arma_obj = smtsa.ARMA(ibm_df.Close_Price.tolist(),
                              order=(ari, maj)).fit(maxlag=30,
                                                    method='mle',
                                                    trend='nc')
        aicVal.append([ari, maj, arma_obj.aic])
'''
Seasonal Adjustment During Tax Season

Many time series exhibit strong seasonal behavior. The procedure for removing the seasonal component of a time series is called seasonal adjustment. For example, most economic data published by the government is seasonally adjusted.

You saw earlier that by taking first differences of a random walk, you get a stationary white noise process. For seasonal adjustments, instead of taking first differences, you will take differences with a lag corresponding to the periodicity.

Look again at the ACF of H&R Block's quarterly earnings, pre-loaded in the DataFrame HRB, and there is a clear seasonal component. The autocorrelation is high for lags 4,8,12,16,..., because of the spike in earnings every four quarters during tax season. Apply a seasonal adjustment by taking the fourth difference (four represents the periodicity of the series). Then compute the autocorrelation of the transformed series.

INSTRUCTIONS
100XP
Create a new DataFrame of seasonally adjusted earnings by taking the lag-4 difference of quarterly earnings using the .diff() method.
Examine the first 10 rows of the seasonally adjusted DataFrame and notice that the first four rows are NaN.
Drop the NaN rows using the .dropna() method.
Plot the autocorrelation function of the seasonally adjusted DataFrame.
'''
# Import the plot_acf module from statsmodels
from statsmodels.graphics.tsaplots import plot_acf

# Seasonally adjust quarterly earnings
HRBsa = HRB.diff(4)

# Print the first 10 rows of the seasonally adjusted series
print(HRBsa.head(10))

# Drop the NaN data in the first three three rows
HRBsa = HRBsa.dropna()

# Plot the autocorrelation function of the seasonally adjusted series
plot_acf(HRBsa)
plt.show()