Ejemplo n.º 1
0
    def d_param(self, diff):
        '''function takes different values for difference step, and returns true or false flag if acf and pacf values
        lie into the threshold area'''
        THRESHOLD = 0.08
        if diff == 0:
            acf = tss.acf(self.val)
            pacf = tss.pacf(self.val)
            # acf and pacf plots
            fig = plt.figure(figsize = (12,8))
            ax1 = fig.add_subplot(121)
            fig = plot_acf(self.val,lags =40 ,ax=ax1)
            ax2 = fig.add_subplot(122, sharey=ax1)
            fig= plot_pacf(self.val, lags = 40, ax =ax2)
            plt.savefig('ACF_vs_PACF.jpg')
            plt.close()
            # check if most acf and pacf are lie in the accepted region for diff0
            acf_percent = len(acf[np.abs(acf) <= THRESHOLD])/float(len(acf))
            pacf_percent = len(pacf[np.abs(pacf) <= THRESHOLD])/float(len(pacf))
            return (acf_percent >= .65) and (pacf_percent >= 0.65)

        elif diff == 1:
            diff1_acf = tss.acf(self.diff1_val.dropna())
            diff1_pacf = tss.pacf(self.diff1_val.dropna())
            # for acf and pacf plots
            fig = plt.figure(figsize = (12,8))
            ax1 = fig.add_subplot(121)
            fig = plot_acf(self.diff1_val.dropna(),lags =40 ,ax=ax1)
            ax2 = fig.add_subplot(122, sharey=ax1)
            fig= plot_pacf(self.diff1_val.dropna(), lags = 40, ax =ax2)
            plt.savefig('ACF_vs_PACF_diff1.jpg')
            plt.close()
            # check if most acf and pacf are lie in the accepted region for diff1
            acf_percent = len(diff1_acf[np.abs(diff1_acf) <= THRESHOLD])/float(len(diff1_acf))
            pacf_percent = len(diff1_pacf[np.abs(diff1_pacf) <= THRESHOLD])/float(len(diff1_pacf))
            return (acf_percent >= .65) and (pacf_percent >= 0.65)

        elif diff == 2:
            diff2_acf = tss.acf(self.diff2_val.dropna())
            diff2_pacf = tss.pacf(self.diff2_val.dropna())
            # check save fig for acf and pacf plots
            fig = plt.figure(figsize = (12,8))
            ax1 = fig.add_subplot(121)
            fig = plot_acf(self.diff2_val.dropna(),lags =40 ,ax=ax1)
            ax2 = fig.add_subplot(122, sharey=ax1)
            fig = plot_pacf(self.diff2_val.dropna(), lags = 40, ax =ax2)
            plt.savefig('ACF_vs_PACF_diff2.jpg')
            plt.close()
            # check if most acf and pacf are lie in the accepted region for diff2
            acf_percent = len(diff2_acf[np.abs(diff2_acf) <= THRESHOLD])/float(len(diff2_acf))
            pacf_percent = len(diff2_pacf[np.abs(diff2_pacf) <= THRESHOLD])/float(len(diff2_pacf))
            return (acf_percent >= .65) and (pacf_percent >= 0.65)

        else:
            raise InvalidParamError
Ejemplo n.º 2
0
def plot_acf_multiple(ys, lags=20):
    """

    """
    from scikits.statsmodels.tsa.stattools import acf
    # hack
    old_size = mpl.rcParams['font.size']
    mpl.rcParams['font.size'] = 8

    plt.figure(figsize=(10, 10))
    xs = np.arange(lags + 1)

    acorr = np.apply_along_axis(lambda x: acf(x, nlags=lags), 0, ys)

    k = acorr.shape[1]
    for i in range(k):
        ax = plt.subplot(k, 1, i + 1)
        ax.vlines(xs, [0], acorr[:, i])

        ax.axhline(0, color='k')
        ax.set_ylim([-1, 1])

        # hack?
        ax.set_xlim([-1, xs[-1] + 1])

    mpl.rcParams['font.size'] = old_size
Ejemplo n.º 3
0
def plot_acf_multiple(ys, lags=20):
    """

    """
    from scikits.statsmodels.tsa.stattools import acf
    # hack
    old_size = mpl.rcParams['font.size']
    mpl.rcParams['font.size'] = 8

    plt.figure(figsize=(10, 10))
    xs = np.arange(lags + 1)

    acorr = np.apply_along_axis(lambda x: acf(x, nlags=lags), 0, ys)

    k = acorr.shape[1]
    for i in range(k):
        ax = plt.subplot(k, 1, i + 1)
        ax.vlines(xs, [0], acorr[:, i])

        ax.axhline(0, color='k')
        ax.set_ylim([-1, 1])

        # hack?
        ax.set_xlim([-1, xs[-1] + 1])

    mpl.rcParams['font.size'] = old_size
Ejemplo n.º 4
0
def plot_acf(y, lags=100, partial=False, ax=None):
    from scikits.statsmodels.tsa.stattools import acf, pacf

    if partial:
        the_acf = pacf(y, nlags=lags)
    else:
        the_acf = acf(y, nlags=lags)

    if ax is None:
        fig = plt.figure(figsize=(10, 5))
        ax = fig.add_subplot(111)

    ax.vlines(np.arange(lags + 1), [0], the_acf)
    ax.axhline(0, color='k')
Ejemplo n.º 5
0
def plot_acf(y, lags=100, partial=False, ax=None):
    from scikits.statsmodels.tsa.stattools import acf, pacf

    if partial:
        the_acf = pacf(y, nlags=lags)
    else:
        the_acf = acf(y, nlags=lags)

    if ax is None:
        fig = plt.figure(figsize=(10, 5))
        ax = fig.add_subplot(111)

    ax.vlines(np.arange(lags+1), [0], the_acf)
    ax.axhline(0, color='k')
Ejemplo n.º 6
0
 def __init__(self):
     self.acf = self.results['acvarfft']
     self.qstat = self.results['Q1']
     self.res1 = acf(self.x, nlags=40, qstat=True, fft=True)
Ejemplo n.º 7
0
 def __init__(self):
     self.acf = self.results['acvar']
     #self.acf = np.concatenate(([1.], self.acf))
     self.qstat = self.results['Q1']
     self.res1 = acf(self.x, nlags=40, qstat=True)
 def __init__(self):
     self.acf = self.results['acvarfft']
     self.qstat = self.results['Q1']
     self.res1 = acf(self.x, nlags=40, qstat=True, fft=True)
 def __init__(self):
     self.acf = self.results['acvar']
     #self.acf = np.concatenate(([1.], self.acf))
     self.qstat = self.results['Q1']
     self.res1 = acf(self.x, nlags=40, qstat=True)
Ejemplo n.º 10
0
def acorr_ljungbox(x, lags=None, boxpierce=False):
    '''Ljung-Box test for no autocorrelation


    Parameters
    ----------
    x : array_like, 1d
        data series, regression residuals when used as diagnostic test
    lags : None, int or array_like
        If lags is an integer then this is taken to be the largest lag
        that is included, the test result is reported for all smaller lag length.
        If lags is a list or array, then all lags are included up to the largest
        lag in the list, however only the tests for the lags in the list are
        reported.
        If lags is None, then the default maxlag is 12*(nobs/100)^{1/4}
    boxpierce : {False, True}
        If true, then additional to the results of the Ljung-Box test also the
        Box-Pierce test results are returned

    Returns
    -------
    lbvalue : float or array
        test statistic
    pvalue : float or array
        p-value based on chi-square distribution
    bpvalue : (optionsal), float or array
        test statistic for Box-Pierce test
    bppvalue : (optional), float or array
        p-value based for Box-Pierce test on chi-square distribution

    Notes
    -----
    Ljung-Box and Box-Pierce statistic differ in their scaling of the
    autocorrelation function. Ljung-Box test is reported to have better
    small sample properties.

    could be extended to work with more than one series
    1d or nd ? axis ? ravel ?
    needs more testing

    ''Verification''

    Looks correctly sized in Monte Carlo studies.
    not yet compared to verified values

    Examples
    --------
    see example script

    References
    ----------
    Greene
    Wikipedia

    '''
    x = np.asarray(x)
    nobs = x.shape[0]
    if lags is None:
        lags = range(1, 41)  #TODO: check default; SS: changed to 40
    elif isinstance(lags, int):
        lags = range(1, lags + 1)
    maxlag = max(lags)
    lags = np.asarray(lags)

    acfx = acf(x, nlags=maxlag)  # normalize by nobs not (nobs-nlags)
    # SS: unbiased=False is default now
    #    acf2norm = acfx[1:maxlag+1]**2 / (nobs - np.arange(1,maxlag+1))
    acf2norm = acfx[1:maxlag + 1]**2 / (nobs - np.arange(1, maxlag + 1))

    qljungbox = nobs * (nobs + 2) * np.cumsum(acf2norm)[lags - 1]
    pval = stats.chi2.sf(qljungbox, lags)
    if not boxpierce:
        return qljungbox, pval
    else:
        qboxpierce = nobs * np.cumsum(acfx[1:maxlag + 1]**2)[lags]
        pvalbp = stats.chi2.sf(qboxpierce, lags)
        return qljungbox, pval, qboxpierce, pvalbp
Ejemplo n.º 11
0
    # plt.show()
    # print adfuller(x_diff_1, regression="c")   #The Augmented Dickey-Fuller test

    x_fit, res, lag = create_ar_modex(x)

    fig = plt.figure()
    ax = fig.add_subplot(1,1,1)
    ax.plot(x[lag:])
    ax.plot(x_fit[lag:])
    plt.legend(labels=['X', 'AR model'])
    plt.show()
    plt.bar(range(len(res)), res)
    plt.show()

    # check auto correlation
    acf = stattools.acf(res, nlags=100)
    plt.bar(range(len(acf)), acf)
    plt.show()

    # lbs = stattools.q_stat(acf, len(res[lag:]))
    # plt.bar(range(len(lbs[1])), lbs[1])
    # plt.show()
    # print adfuller(res, regression="c")   # The Augmented Dickey-Fuller test


    # y_fit, res = create_ar_modex(y)
    # fig = plt.figure()
    # ax = fig.add_subplot(1,1,1)
    # ax.plot(y)
    # ax.plot(y_fit)
    # plt.legend(labels=['Y', 'AR model'])
Ejemplo n.º 12
0
from scikits.statsmodels.tsa.arima_process import arma_generate_sample, arma_impulse_response
from scikits.statsmodels.tsa.arima_process import arma_acovf, arma_acf, ARIMA

# from movstat import acf, acovf
# from scikits.statsmodels.sandbox.tsa import acf, acovf, pacf
from scikits.statsmodels.tsa.stattools import acf, acovf, pacf

ar = [1.0, -0.6]
# ar = [1., 0.]
ma = [1.0, 0.4]
# ma = [1., 0.4, 0.6]
# ma = [1., 0.]
mod = ""  #'ma2'
x = arma_generate_sample(ar, ma, 5000)
x_acf = acf(x)[:10]
x_ir = arma_impulse_response(ar, ma)

# print x_acf[:10]
# print x_ir[:10]
# irc2 = np.correlate(x_ir,x_ir,'full')[len(x_ir)-1:]
# print irc2[:10]
# print irc2[:10]/irc2[0]
# print irc2[:10-1] / irc2[1:10]
# print x_acf[:10-1] / x_acf[1:10]

# detrend helper from matplotlib.mlab
def detrend(x, key=None):
    if key is None or key == "constant":
        return detrend_mean(x)
    elif key == "linear":
Ejemplo n.º 13
0
def acorr_ljungbox(x, lags=None, boxpierce=False):
    '''Ljung-Box test for no autocorrelation


    Parameters
    ----------
    x : array_like, 1d
        data series, regression residuals when used as diagnostic test
    lags : None, int or array_like
        If lags is an integer then this is taken to be the largest lag
        that is included, the test result is reported for all smaller lag length.
        If lags is a list or array, then all lags are included up to the largest
        lag in the list, however only the tests for the lags in the list are
        reported.
        If lags is None, then the default maxlag is 12*(nobs/100)^{1/4}
    boxpierce : {False, True}
        If true, then additional to the results of the Ljung-Box test also the
        Box-Pierce test results are returned

    Returns
    -------
    lbvalue : float or array
        test statistic
    pvalue : float or array
        p-value based on chi-square distribution
    bpvalue : (optionsal), float or array
        test statistic for Box-Pierce test
    bppvalue : (optional), float or array
        p-value based for Box-Pierce test on chi-square distribution

    Notes
    -----
    Ljung-Box and Box-Pierce statistic differ in their scaling of the
    autocorrelation function. Ljung-Box test is reported to have better
    small sample properties.

    could be extended to work with more than one series
    1d or nd ? axis ? ravel ?
    needs more testing

    ''Verification''

    Looks correctly sized in Monte Carlo studies.
    not yet compared to verified values

    Examples
    --------
    see example script

    References
    ----------
    Greene
    Wikipedia

    '''
    x = np.asarray(x)
    nobs = x.shape[0]
    if lags is None:
        lags = range(1,41)  #TODO: check default; SS: changed to 40
    elif isinstance(lags, int):
        lags = range(1,lags+1)
    maxlag = max(lags)
    lags = np.asarray(lags)

    acfx = acf(x, nlags=maxlag) # normalize by nobs not (nobs-nlags)
                             # SS: unbiased=False is default now
#    acf2norm = acfx[1:maxlag+1]**2 / (nobs - np.arange(1,maxlag+1))
    acf2norm = acfx[1:maxlag+1]**2 / (nobs - np.arange(1,maxlag+1))

    qljungbox = nobs * (nobs+2) * np.cumsum(acf2norm)[lags-1]
    pval = stats.chi2.sf(qljungbox, lags)
    if not boxpierce:
        return qljungbox, pval
    else:
        qboxpierce = nobs * np.cumsum(acfx[1:maxlag+1]**2)[lags]
        pvalbp = stats.chi2.sf(qboxpierce, lags)
        return qljungbox, pval, qboxpierce, pvalbp
Ejemplo n.º 14
0
import matplotlib.mlab as mlab

from scikits.statsmodels.tsa.arima_process import arma_generate_sample, arma_impulse_response
from scikits.statsmodels.tsa.arima_process import arma_acovf, arma_acf, ARIMA
#from movstat import acf, acovf
#from scikits.statsmodels.sandbox.tsa import acf, acovf, pacf
from scikits.statsmodels.tsa.stattools import acf, acovf, pacf

ar = [1., -0.6]
#ar = [1., 0.]
ma = [1., 0.4]
#ma = [1., 0.4, 0.6]
#ma = [1., 0.]
mod = ''  #'ma2'
x = arma_generate_sample(ar, ma, 5000)
x_acf = acf(x)[:10]
x_ir = arma_impulse_response(ar, ma)

#print x_acf[:10]
#print x_ir[:10]
#irc2 = np.correlate(x_ir,x_ir,'full')[len(x_ir)-1:]
#print irc2[:10]
#print irc2[:10]/irc2[0]
#print irc2[:10-1] / irc2[1:10]
#print x_acf[:10-1] / x_acf[1:10]


# detrend helper from matplotlib.mlab
def detrend(x, key=None):
    if key is None or key == 'constant':
        return detrend_mean(x)
Ejemplo n.º 15
0
    xhat5, err5 = VARMA(x,B,C)
    #print err5

    #in differences
    #VARMA(np.diff(x,axis=0),B,C)


    #Note:
    # * signal correlate applies same filter to all columns if kernel.shape[1]<K
    #   e.g. signal.correlate(x0,np.ones((3,1)),'valid')
    # * if kernel.shape[1]==K, then `valid` produces a single column
    #   -> possible to run signal.correlate K times with different filters,
    #      see the following example, which replicates VAR filter
    x0 = np.column_stack([np.arange(T), 2*np.arange(T)])
    B[:,:,0] = np.ones((P,K))
    B[:,:,1] = np.ones((P,K))
    B[1,1,1] = 0
    xhat0 = VAR(x0,B)
    xcorr00 = signal.correlate(x0,B[:,:,0])#[:,0]
    xcorr01 = signal.correlate(x0,B[:,:,1])
    print np.all(signal.correlate(x0,B[:,:,0],'valid')[:-1,0]==xhat0[P:,0])
    print np.all(signal.correlate(x0,B[:,:,1],'valid')[:-1,0]==xhat0[P:,1])

    #import error
    #from movstat import acovf, acf
    from scikits.statsmodels.tsa.stattools import acovf, acf
    aav = acovf(x[:,0])
    print aav[0] == np.var(x[:,0])
    aac = acf(x[:,0])

Ejemplo n.º 16
0
def test_acf():
    acf_x = tsa.acf(x100, unbiased=False)[:21]
    assert_array_almost_equal(mlacf.acf100.ravel(), acf_x, 8) #why only dec=8
    acf_x = tsa.acf(x1000, unbiased=False)[:21]
    assert_array_almost_equal(mlacf.acf1000.ravel(), acf_x, 8) #why only dec=9