def __init__(self):
     self.x = np.concatenate((np.array([np.nan]), self.x))
     self.acf = self.results["acvar"]  # drop and conservative
     self.qstat = self.results["Q1"]
     self.res_drop = acf(self.x, nlags=40, qstat=True, alpha=0.05, missing="drop")
     self.res_conservative = acf(self.x, nlags=40, qstat=True, alpha=0.05, missing="conservative")
     self.acf_none = np.empty(40) * np.nan  # lags 1 to 40 inclusive
     self.qstat_none = np.empty(40) * np.nan
     self.res_none = acf(self.x, nlags=40, qstat=True, alpha=0.05, missing="none")
Example #2
0
 def __init__(self):
     self.x = np.concatenate((np.array([np.nan]),self.x))
     self.acf = self.results['acvar'] # drop and conservative
     self.qstat = self.results['Q1']
     self.res_drop = acf(self.x, nlags=40, qstat=True, alpha=.05, 
                         missing='drop')
     self.res_conservative = acf(self.x, nlags=40, qstat=True, alpha=.05, 
                                 missing='conservative')       
     self.acf_none = np.empty(40) * np.nan # lags 1 to 40 inclusive
     self.qstat_none = np.empty(40) * np.nan
     self.res_none = acf(self.x, nlags=40, qstat=True, alpha=.05,
                     missing='none')
Example #3
0
 def setup_class(cls):
     cls.x = np.concatenate((np.array([np.nan]),cls.x))
     cls.acf = cls.results['acvar'] # drop and conservative
     cls.qstat = cls.results['Q1']
     cls.res_drop = acf(cls.x, nlags=40, qstat=True, alpha=.05,
                         missing='drop')
     cls.res_conservative = acf(cls.x, nlags=40, qstat=True, alpha=.05,
                                 missing='conservative')
     cls.acf_none = np.empty(40) * np.nan # lags 1 to 40 inclusive
     cls.qstat_none = np.empty(40) * np.nan
     cls.res_none = acf(cls.x, nlags=40, qstat=True, alpha=.05,
                     missing='none')
 def SlowDecay():
     r = abs(np.sum(df.replace(np.inf, np.nan).replace(-np.inf, np.nan).fillna(0), axis=1) / len(df.columns))
     r = abs(df['AAPL'].replace(np.inf, np.nan).replace(-np.inf, np.nan).dropna(0))[:1000000]
     r1 = (np.sum(df.replace(np.inf, np.nan).replace(-np.inf, np.nan).fillna(0), axis=1) / len(df.columns))
     r1 = (df['AAPL'].replace(np.inf, np.nan).replace(-np.inf, np.nan).dropna(0))
     acf1, conf = acf(pd.DataFrame(r), alpha=0.05, nlags=20)
     acf2, conf2 = acf(pd.DataFrame(r1), alpha=0.05, nlags=20)
     plt.plot(acf1, label='ACF Absolute Returns')
     plt.plot(acf2, label='ACF Returns')
     plt.fill_between(range(len(acf1)), [i[0] for i in conf], [i[1] for i in conf], alpha=0.3)
     plt.fill_between(range(len(acf1)), [i[0] for i in conf2], [i[1] for i in conf2], alpha=0.3)
     plt.legend(loc='best')
     plt.savefig('Graphs/PACFAbsReturns.pdf', bbox_inches='tight')
Example #5
0
    def fit(self, data):

        magnitude = data[0]
        AC = stattools.acf(magnitude, nlags=self.nlags)
        k = next((index for index, value in
                 enumerate(AC) if value < np.exp(-1)), None)

        while k is None:
            self.nlags = self.nlags + 100
            AC = stattools.acf(magnitude, nlags=self.nlags)
            k = next((index for index, value in
                      enumerate(AC) if value < np.exp(-1)), None)

        return k
Example #6
0
def plot_acf(data):
    nlags = 90
    lw = 2
    x = range(nlags+1)

    plt.figure(figsize=(6, 4))
    plt.plot(x, acf(data['VIX']**2, nlags=nlags), lw=lw, label='VIX')
    plt.plot(x, acf(data['RV']**2, nlags=nlags), lw=lw, label='RV')
    plt.plot(x, acf(data['logR'], nlags=nlags), lw=lw, label='logR')
    plt.legend()
    plt.xlabel('Lags, days')
    plt.grid()
    plt.savefig('../plots/autocorr_logr_vix_rv.eps',
                bbox_inches='tight', pad_inches=.05)
    plt.show()
Example #7
0
def PrintSerialCorrelations(dailies):
    """Prints a table of correlations with different lags.

    dailies: map from category name to DataFrame of daily prices
    """
    filled_dailies = {}
    for name, daily in dailies.items():
        filled_dailies[name] = FillMissing(daily, span=30)

    # print serial correlations for raw price data
    for name, filled in filled_dailies.items():            
        corr = thinkstats2.SerialCorr(filled.ppg, lag=1)
        print(name, corr)

    rows = []
    for lag in [1, 7, 30, 365]:
        row = [str(lag)]
        for name, filled in filled_dailies.items():            
            corr = thinkstats2.SerialCorr(filled.resid, lag)
            row.append('%.2g' % corr)
        rows.append(row)

    print(r'\begin{tabular}{|c|c|c|c|}')
    print(r'\hline')
    print(r'lag & high & medium & low \\ \hline')
    for row in rows:
        print(' & '.join(row) + r' \\')
    print(r'\hline')
    print(r'\end{tabular}')

    filled = filled_dailies['high']
    acf = smtsa.acf(filled.resid, nlags=365, unbiased=True)
    print('%0.3f, %0.3f, %0.3f, %0.3f, %0.3f' % 
          (acf[0], acf[1], acf[7], acf[30], acf[365]))
Example #8
0
 def __init__(self):
     self.acf = self.results['acvar']
     #self.acf = np.concatenate(([1.], self.acf))
     self.qstat = self.results['Q1']
     self.res1 = acf(self.x, nlags=40, qstat=True, alpha=.05)
     self.confint_res = self.results[['acvar_lb','acvar_ub']].view((float,
                                                                         2))
Example #9
0
 def setup_class(cls):
     cls.acf = cls.results['acvar']
     #cls.acf = np.concatenate(([1.], cls.acf))
     cls.qstat = cls.results['Q1']
     cls.res1 = acf(cls.x, nlags=40, qstat=True, alpha=.05)
     cls.confint_res = cls.results[['acvar_lb','acvar_ub']].view((float,
                                                                         2))
Example #10
0
def plot_acf_multiple(ys, lags=20):
    """

    """
    from statsmodels.tsa.stattools import acf
    # hack
    old_size = mpl.rcParams['font.size']
    mpl.rcParams['font.size'] = 8

    plt.figure(figsize=(10, 10))
    xs = np.arange(lags + 1)

    acorr = np.apply_along_axis(lambda x: acf(x, nlags=lags), 0, ys)

    k = acorr.shape[1]
    for i in range(k):
        ax = plt.subplot(k, 1, i + 1)
        ax.vlines(xs, [0], acorr[:, i])

        ax.axhline(0, color='k')
        ax.set_ylim([-1, 1])

        # hack?
        ax.set_xlim([-1, xs[-1] + 1])

    mpl.rcParams['font.size'] = old_size
Example #11
0
 def ACF_PACF_plot(self):
     #plot ACF and PACF to find the number of terms needed for the AR and MA in ARIMA
     # ACF finds MA(q): cut off after x lags 
     # and PACF finds AR (p): cut off after y lags 
     # in ARIMA(p,d,q) 
     lag_acf = acf(self.ts_log_diff, nlags=20)
     lag_pacf = pacf(self.ts_log_diff, nlags=20, method='ols')
     
     #Plot ACF:
     ax=plt.subplot(121)
     plt.plot(lag_acf)
     ax.set_xlim([0,5])
     plt.axhline(y=0,linestyle='--',color='gray')
     plt.axhline(y= -1.96/np.sqrt(len(ts_log_diff)),linestyle='--',color='gray')
     plt.axhline(y= 1.96/np.sqrt(len(ts_log_diff)),linestyle='--',color='gray')
     plt.title('Autocorrelation Function')
     
     #Plot PACF:
     plt.subplot(122)
     plt.plot(lag_pacf)
     plt.axhline(y=0,linestyle='--',color='gray')
     plt.axhline(y= -1.96/np.sqrt(len(ts_log_diff)),linestyle='--',color='gray')
     plt.axhline(y=1.96/np.sqrt(len(ts_log_diff)),linestyle='--',color='gray')
     plt.title('Partial Autocorrelation Function')
     plt.tight_layout()
def acf_fcn(data,lags=2,alpha=.05):
    #@FORMAT: data = np(values)
    try:
        acfvalues, confint,qstat,pvalues = acf(data,nlags=lags,qstat=True,alpha=alpha)
        return [acfvalues,pvalues]
    except:
        return [np.nan]
Example #13
0
def autocorrelation(x, *args, unbiased=True, nlags=None, fft=True, **kwargs):
    """
    Return autocorrelation function of signal `x`.

    Parameters
    ----------
    x: array_like
        A 1D signal.
    nlags: int
        The number of lags to calculate the correlation for (default .9*len(x))
    fft:  bool
        Compute the ACF via FFT.
    args, kwargs
        As accepted by `statsmodels.tsa.stattools.acf`.

    Returns
    -------
    acf: array
        Autocorrelation function.
    confint: array, optional
        Confidence intervals if alpha kwarg provided.
    """
    from statsmodels.tsa.stattools import acf
    if nlags is None:
        nlags = int(.9 * len(x))
    corr = acf(x, *args, unbiased=unbiased, nlags=nlags, fft=fft, **kwargs)
    return _significant_acf(corr, kwargs.get('alpha'))
Example #14
0
def SimulateAutocorrelation(daily, iters=1001, nlags=40):
    """Resample residuals, compute autocorrelation, and plot percentiles.

    daily:
    iters:
    nlags:
    """
    # run simulations
    t = []
    for i in range(iters):
        filled = FillMissing(daily, span=30)
        resid = thinkstats2.Resample(filled.resid)
        acf = smtsa.acf(resid, nlags=nlags, unbiased=True)[1:]
        t.append(np.abs(acf))

    # put the results in an array and sort the columns
    size = iters, len(acf)
    array = np.zeros(size)
    for i, acf in enumerate(t):
        array[i,] = acf
    array = np.sort(array, axis=0)

    # find the bounds that cover 95% of the distribution
    high = PercentileRow(array, 97.5)
    low = -high
    lags = range(1, nlags+1)
    thinkplot.FillBetween(lags, low, high, alpha=0.2, color='gray')
Example #15
0
	def calc_autocorr(self):
		'''
		Calculate the autocorrelation of an array.
		'''
		nlags = int(self.fs / self._minfreq)
		self.acorr = acf(self._windowed(self.s2), nlags=nlags)
		self.acorr_freq = self.fs / np.arange(self.acorr.size)
Example #16
0
 def __init__(self):
     self.acf = self.results['acvar']
     #self.acf = np.concatenate(([1.], self.acf))
     self.qstat = self.results['Q1']
     self.res1 = acf(self.x, nlags=40, qstat=True, alpha=.05)
     res = DataFrame.from_records(self.results)
     self.confint_res = recarray_select(self.results, ['acvar_lb','acvar_ub'])
     self.confint_res = self.confint_res.view((float, 2))
Example #17
0
def plotACF(timeSeries):
    lag_acf = acf(timeSeries, nlags=40)
    plt.subplot(121) 
    plt.plot(lag_acf)
    plt.axhline(y=0,linestyle='--',color='gray')
    plt.axhline(y=-1.96/np.sqrt(len(timeSeries)),linestyle='--',color='gray')
    plt.axhline(y=1.96/np.sqrt(len(timeSeries)),linestyle='--',color='gray')
    plt.title('Autocorrelation Function')
Example #18
0
def lpc(frame, order):
    """
    frame: windowed signal
    order: lpc order
    return from 0th to `order`th linear predictive coefficients
    """
    r = acf(frame, unbiased=False, nlags=order)
    return levinson_durbin(r, order)[0]
Example #19
0
def correlation_plot(d, dt=6e-3, **kwargs):
    corr, conf = acf(d, nlags=len(d)-1, alpha=0.05)
    taus = dt*np.arange(0, len(d))
    ax = pl.gca()
    ax.plot(taus, corr, **kwargs)
    ax.fill_between(taus, y1=conf[:,0], y2=conf[:,1], color='k', alpha=0.2, lw=0)
    ax.set_xscale('log')
    ax.set_xlabel(r'$\tau$ (seconds)')
    ax.set_ylabel(r'$G(\tau)$')
    ax.grid()
Example #20
0
def ljungBox2(x, maxlag):
	lags = np.asarray(range(1, maxlag+1))
	x = x.tolist()
	n = len(x)
	acfx = acf(x, nlags=maxlag) # normalize by nobs not (nobs-nlags)
	acf2norm = acfx[1:maxlag+1]**2 / (n - np.arange(1,maxlag+1))

	qljungbox = n * (n+2) * np.cumsum(acf2norm)[lags-1]
	pval = scipy.stats.chi2.sf(qljungbox, lags)
	return qljungbox, pval
 def get_acf_pacf(self, inputDataSeries, lag = 15):
     # Copy the data in input data
     outputData = pandas.DataFrame(inputDataSeries)
     
     if min(inputDataSeries.index) == inputDataSeries.index[0]:
         # Ascending
         multiplier = 1
         lag = multiplier*lag
     elif max(inputDataSeries.index) == inputDataSeries.index[0]:
         # Descending
         multiplier = -1
         lag = multiplier*lag
     else:
         print('Cannot determine the order put the lag value manually')
         print('Syntax: calc_returns(inputData, columnName, lag = lag_value)')
     
     n_iter = lag
     columnName = outputData.columns[0]
     i = 1
     
     
     # Calculate ACF
     acf_values = []
     acf_values.append(outputData[columnName].corr(outputData[columnName]))
     
     while i <= abs(n_iter):
         col_name = 'lag_' + str(i)
         outputData[col_name] = ''
         outputData[col_name] = outputData[columnName].shift(multiplier*i)
         
         i += 1
         
         acf_values.append(outputData[columnName].corr(outputData[col_name]))
     
     # Define an emplty figure
     fig = plt.figure()
     
     # Define 2 subplots
     ax1 = fig.add_subplot(211) # 2 by 1 by 1 - 1st plot in 2 plots
     ax2 = fig.add_subplot(212) # 2 by 1 by 2 - 2nd plot in 2 plots
     
     ax1.plot(range(len(acf_values)), acf(inputDataSeries, nlags = n_iter), \
              range(len(acf_values)), acf_values, 'ro')
     ax2.plot(range(len(acf_values)), pacf(inputDataSeries, nlags = n_iter), 'g*-')
     
     # Plot horizontal lines    
     ax1.axhline(y = 0.0, color = 'black')
     ax2.axhline(y = 0.0, color = 'black')
         
     # Axis labels    
     plt.xlabel = 'Lags'
     plt.ylabel = 'Correlation Coefficient'
     return {'acf' : list(acf_values), \
             'pacf': pacf(inputDataSeries, nlags = n_iter)} 
Example #22
0
def ARIMA_fun( data ):
    lag_pacf = pacf( data, nlags=20, method='ols' )
    lag_acf, ci2, Q  = acf( data, nlags=20 , qstat=True, unbiased=True)

    model = ARIMA(orig_data, order=(1, 1, int(ci2[0]) ) )  
    results_ARIMA = model.fit(disp=-1)
    plt.subplot(121)
    plt.plot( data )
    plt.plot(results_ARIMA.fittedvalues)
    #plt.show()
    return results_ARIMA.fittedvalues
Example #23
0
def plotACF(lcTime,lcInt,**kwargs):
    '''
    calculate correlation curve of lc
    return correlation, ljunbBox statistics, and pvalue
    '''
    #calculate auto-corr function: Pearson correlation of lc w/itself shifted by various lags (tau)
    corr,ljb,pvalue = acf(lcInt,unbiased=False,qstat=True,nlags=len(lcTime))
    #plot correlation as function of lag time
    plt.plot(lcTime,corr,**kwargs)
    plt.xlabel(r"$\tau(s)$",fontsize=14)
    plt.ylabel(r"$R(\tau)$",fontsize=14)
    plt.title(r"Autocorrelation $R(\tau)$",fontsize=14)
    plt.show()

    return corr, ljb, pvalue
Example #24
0
    def FE(self, serie_atual):
        '''
        Método para fazer a diferenciacao de uma serie_atual
        :param serie_atual: serie_atual real
        '''  
        
        #serie_df = pd.DataFrame(serie_atual)
        serie_diff = pd.Series(serie_atual)
        serie_diff = serie_diff - serie_diff.shift()
        serie_diff = serie_diff[1:]
        
        
        features = []
        
        #feature 1:
        auto_correlacao = acf(serie_diff, nlags=5)
        for i in auto_correlacao:
            features.append(i)
        
        #feature 2:
        parcial_atcorr = pacf(serie_diff, nlags=5)
        for i in parcial_atcorr:
            features.append(i)
        
        #feature 3:
        variancia = serie_diff.std()
        features.append(variancia)
        
        #feature 4:
        serie_skew = serie_diff.skew()
        features.append(serie_skew)

        #feature 5:
        serie_kurtosis = serie_diff.kurtosis()
        features.append(serie_kurtosis)
        
        #feature 6:
        turning_p = self.turningpoints(serie_diff)
        features.append(turning_p)
        
        #feature 7:
        
        #feature 8:
        
        
        return features
Example #25
0
def integrated_autocorr1(x, acf_cutoff=0.0):
    r"""Estimate the integrated autocorrelation time, :math:`\tau_{int}` of a
    time series.

    This method performancs a summation of empirical autocorrelation function,
    using a window length, ``M``, to the smallest value such that
    ``ACF(m) <= acf_cutoff``. This procedure is used in (Chodera 2007) with
    ``acf_cutoff = 0``. In (Hoffman 2011, Hub 2010), this estimator is used
    with ``acf_cutoff = 0.05``.

    Parameters
    ----------
    x : ndarray, shape=(n_samples, n_dims)
        The time series, with time along axis 0.

    References
    ----------
    .. [1] J. D. Chodera, W. C. Swope, J. W. Pitera, C. Seok, and K. A. Dill.
       JCTC 3(1):26-41, 2007.
    .. [2] Hoffman, M. D., and A. Gelman. "The No-U-Turn sampler: Adaptively
       setting path lengths in Hamiltonian Monte Carlo." arXiv preprint
       arXiv:1111.4246 (2011).
    .. [3] Hub, J. S., B. L. De Groot, and D. V. Der Spoel. "g_wham: A Fre
       Weighted Histogram Analysis Implementation Including Robust Error and
       Autocorrelation Estimates." J. Chem. Theory Comput. 6.12 (2010):
       3713-3720.

    Returns
    -------
    tau_int : ndarray, shape=(n_dims,)
        The estimated integrated autocorrelation time of each dimension in
        ``x``, considered independently.
    """
    # Compute the autocorrelation function.
    if x.ndim == 1:
        x = x.reshape(-1, 1)
    n = len(x)

    tau = np.zeros(x.shape[1])
    for j in range(x.shape[1]):
        f = acf(x[:,j], nlags=n, unbiased=False, fft=True)
        window = find_first((f <= acf_cutoff).astype(np.uint8))
        tau[j] = 1 + 2*f[1:window].sum()

    return tau
def global_analysis(csv_fname, trajectory_df):
    # catch small trajectory_dfs
    if len(trajectory_df.index) < MIN_TRAJECTORY_LEN:
        return None
    else:
        
        # for each trajectory, loop through segments
        acf_data = np.zeros((len(INTERESTED_VALS), 1, LAGS+1))
        pacf_data = np.zeros((len(INTERESTED_VALS), 1, LAGS+1))
        
            
        # do analysis variable by variable
        count = -1
        for var_name, var_values in trajectory_df.iteritems():
            count += 1
            # make matrices
            
            
            
            # make dictionary for column indices
            var_index = trajectory_df.columns.get_loc(var_name)
#                {'velo_x':0, 'velo_y':1, 'velo_z':2, 'curve':3, 'log_curve':4}[var_name]
            
#            # run ACF and PACF for the column
            col_acf, acf_confint = acf(var_values, nlags=LAGS, alpha=.05)#,  qstat= True)
#            
#            # store data
            acf_data[var_index, 0, :] = col_acf
##            super_data_confint_lower[var_index, segment_i, :] = acf_confint[:,0]
##            super_data_confint_upper[var_index, segment_i, :] = acf_confint[:,1]
            
            
#            ## , acf_confint, acf_qstats, acf_pvals
            col_pacf, pacf_confint = pacf(var_values, nlags=LAGS, method='ywmle', alpha=.05)
            pacf_data[var_index, 0, :] = col_pacf
#            # TODO: check for PACF values above or below +-1
#            super_data[var_index+len(INTERESTED_VALS), segment_i, :] = col_pacf
#            super_data_confint_lower[var_index+len(INTERESTED_VALS), segment_i, :] = pacf_confint[:,0]
#            super_data_confint_upper[var_index+len(INTERESTED_VALS), segment_i, :] = pacf_confint[:,1]

                
                
            
        
        return acf_data, pacf_data
Example #27
0
def find_grid(hspace_angle, max_separation):
    """Returns the separation between graduations of the ruler.

    Args:
        hspace_angle: Bins outputted from :py:meth:`hough_transform`, but for only a single angle.
        max_separation: Maximum size of the *largest* graduation.

    Returns:
        int: Separation between graduations in pixels

    """

    autocorrelation = acf(hspace_angle, nlags=max_separation, unbiased=False)

    smooth = gaussian_filter1d(autocorrelation, 1)
    peaks = peakutils.indexes(smooth, thres=0.25)

    return np.mean(np.diff(np.insert(peaks[:4], 0, 0)))
Example #28
0
def integrated_autocorr6(x, c=6):
    r"""Estimate the integrated autocorrelation time, :math:`\tau_{int}` of a
    time series.

    This method performancs a summation of empirical autocorrelation function,
    using Sokal's "automatic windowing" procedure. The window length, ``M`` is
    chosen self-consistently to be the smallest value such that ``M`` is at
    least ``c`` times the estimated autocorrelation time, where ``c`` should
    be a constant in the range of 4, 6, or 10. See Appendix C of Sokal 1988.

    Parameters
    ----------
    x : ndarray, shape=(n_samples, n_dims)
        The time series, with time along axis 0.
    max_length : int
        The data ``x`` is aggregated if necessary by taking batch means so that
        the length of the series is less than ``max.length``.

    References
    ----------
    .. [1] Madras, Neal, and Alan D. Sokal. "The pivot algorithm: a highly
       efficient Monte Carlo method for the self-avoiding walk." J.
       Stat. Phys. 50.1-2 (1988): 109-186.

    Returns
    -------
    tau_int : ndarray, shape=(n_dims,)
        The estimated integrated autocorrelation time of each dimension in
        ``x``, considered independently.
    """
    if x.ndim == 1:
        x = x.reshape(-1, 1)

    tau = np.zeros(x.shape[1])
    for j in range(x.shape[1]):
        f = acf(x[:,j], nlags=len(x), unbiased=False, fft=True)
        # vector of the taus, all with different choices of the window
        # length
        taus = 1 + 2*np.cumsum(f)[1:]
        ms = np.arange(len(f)-1)
        ind = find_first((ms > c*taus).astype(np.uint8))
        tau[j] = taus[ind]
    return tau
    def plot_chain_acf(self, paramName, numBurnIn=0, dims=None, nlags=30,
                                                               derived=False):
        """
        Make Markov chain autocorrelation function plots for a chosen
        parameter

        dims is a list or tuple of two lists which specificy which rows and
        columns should be plotted. If empty then all are plotted.
        """

        nlags = int(np.minimum( nlags,
                                   np.sqrt(len(self.chain_model)-numBurnIn) ))

        # Get a list of parameters
        if not derived:
            paramList = [md[paramName] for md in self.chain_model]
        else:
            raise NotImplementedError("Doesn't work because of the change"
                                      "in the way the chain is stored.")
#            #TODO Fix this
#            paramList = eval("[md.{}() for md in self.chain_model]"\
#                                                           .format(paramName))
        # Get the parameter shape
        paramShape = paramList[0].shape

        if len(paramShape) == 1:
            fig, axs, coords = self._create_1d_plot_axes(paramList[0], dims)
        elif len(paramShape) == 2:
            if dims is None:
                dims = (None,None)
            fig, axs, coords = self._create_2d_plot_axes(paramList[0],
                                                             dims[0], dims[1])
        else:
            raise ValueError("Cannot draw plots for this parameter")

        for idx in np.ndindex(coords.shape):
            samples = [pp[coords[idx]] for pp in paramList[numBurnIn:]]
            acf = stattools.acf(samples,unbiased=False,nlags=nlags)
            axs[idx].plot(acf, 'k')
            axs[idx].plot([0,nlags], [0,0], 'k:')
            axs[idx].set_xlim([0,nlags])

        return fig, axs
Example #30
0
def SimulateAutocorrelation(daily, iters=1001, nlags=40):
    """Resample residuals, compute autocorrelation, and plot percentiles.

    daily: DataFrame
    iters: number of simulations to run
    nlags: maximum lags to compute autocorrelation
    """
    # run simulations
    t = []
    for _ in range(iters):
        filled = FillMissing(daily, span=30)
        resid = thinkstats2.Resample(filled.resid)
        acf = smtsa.acf(resid, nlags=nlags, unbiased=True)[1:]
        t.append(np.abs(acf))

    high = thinkstats2.PercentileRows(t, [97.5])[0]
    low = -high
    lags = range(1, nlags+1)
    thinkplot.FillBetween(lags, low, high, alpha=0.2, color='gray')
	# Delete unnecessary variables to free up memory
	del pW_0, pW_1, pW_2, pb_0, pb_1, pb_2

	# Trace plot
	#
	n_lags_used = n_samp - nburn
	acf_vals = np.zeros([27, n_lags_used])
	rnd0 = random.sample(range(n_hidden), 9)
	rnd1 = random.sample(range(n_hidden), 9)
	rnd2 = random.sample(range(n_hidden), 9)
	trace_plotw = np.zeros([9, n_samp])

	for i in range(9):
		w_samp = qW_0.params.eval()[:, 0, rnd0[i]]
		acf_vals[i,:] = acf(w_samp[nburn:], nlags=n_lags_used)
		trace_plotw[i, :] = w_samp
	np.save(path + '/traceplot_w0.npy', np.reshape(trace_plotw, [-1, 9, n_samp]))

	for i in range(9):
		w_samp = qW_1.params.eval()[:, rnd1[i], rnd0[i]]
		acf_vals[9+i, :] = acf(w_samp[nburn:], nlags=n_lags_used)
		trace_plotw[i, :] = w_samp
	np.save(path + '/traceplot_w1.npy', np.reshape(trace_plotw, [-1, 9, n_samp]))

	for i in range(9):
		w_samp = qW_2.params.eval()[:, rnd1[i], 0]
		acf_vals[18+i, :] = acf(w_samp[nburn:], nlags=n_lags_used)
		trace_plotw[i, :] = w_samp
	np.save(path + '/traceplot_w2.npy', np.reshape(trace_plotw, [-1, 9, n_samp]))
Example #32
0
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from statsmodels.tsa.stattools import acf, pacf

import plotter

print("here ")
print(np.where(np.isnan(plotter.ts_log_diff)))
pd.DataFrame(plotter.ts_log_diff).nan

lag_acf = acf(plotter.ts_log_diff, nlags=20)
lag_pacf = pacf(plotter.ts_log_diff, nlags=20, method='ols')

# Plot ACF:
plt.subplot(121)
plt.plot(lag_acf)
plt.axhline(y=0, linestyle='--', color='gray')
plt.axhline(y=-1.96 / np.sqrt(len(plotter.ts_log_diff)),
            linestyle='--',
            color='gray')
plt.axhline(y=1.96 / np.sqrt(len(plotter.ts_log_diff)),
            linestyle='--',
            color='gray')
plt.title('Autocorrelation Function')
plt.show()

# Plot PACF:
plt.subplot(122)
plt.plot(lag_pacf)
plt.axhline(y=0, linestyle='--', color='gray')
Example #33
0
data_moving_avg_diff_train.dropna(inplace=True)
adfuller(data_moving_avg_diff_train)

data_moving_avg_diff_test = data_moving_avg_diff[584:]

plt.plot(data_moving_avg_diff_train)
adfuller(data_moving_avg_diff_train)

plt.plot(data_moving_avg_diff_test)
adfuller(data_moving_avg_diff_test)

#Since the p value is now smaller than 0.05 the data is stationary

#ACF and PACF plots:
from statsmodels.tsa.stattools import acf, pacf
lag_acf3 = acf(data_moving_avg_diff_train, nlags=20)
lag_pacf3 = pacf(data_moving_avg_diff_train, nlags=20, method='ols')
#Plot ACF:

plt.subplot(121)
plt.plot(lag_acf3, '^k:')
plt.axhline(y=0, linestyle='--', color='gray')
plt.axhline(y=-1.96 / np.sqrt(len(data_moving_avg_diff_train)),
            linestyle='--',
            color='gray')
plt.axhline(y=1.96 / np.sqrt(len(data_moving_avg_diff_train)),
            linestyle='--',
            color='gray')
plt.grid()
plt.title('Autocorrelation Function')
#Choose p=2
Example #34
0
Values of p and q come through ACF and PACF plots. So let us understand both ACF and PACF!
"""
"""
#Below code plots, both ACF and PACF plots for us
from pandas.plotting import autocorrelation_plot
from statsmodels.graphics.tsaplots import plot_pacf, plot_acf
autocorrelation_plot(df_log)
plot_pacf(df_log, lags=10)
plt.show()
"""

# plot acf  and pacf graphs ( auto corellation function and partially auto corellation function )
# to find 'p' from p,d,q we need to use, PACF graphs and for 'q' use ACF graph
from statsmodels.tsa.stattools import acf, pacf
# we use d value here(data_log_shift)
acf = acf(df_log_diff, nlags=15)
pacf = pacf(df_log_diff, nlags=15, method='ols')

# ols stands for ordinary least squares used to minimise the errors

# 121 and 122 makes the data to look side by size

#plot PACF
plt.subplot(121)
plt.plot(acf)
plt.axhline(y=0, linestyle='-', color='blue')
plt.axhline(y=-1.96 / np.sqrt(len(df_log_diff)), linestyle='--', color='black')
plt.axhline(y=1.96 / np.sqrt(len(df_log_diff)), linestyle='--', color='black')
plt.title('Auto corellation function')
plt.tight_layout()
Example #35
0
def extract_features():
    dados = []
    for x in range(1,23):
            fich = np.loadtxt("%d.csv" %x,delimiter=",");
            aux = []
            auxAcX = []
            auxAcY = []
            auxAcZ = []
            valor = 2;
            for y in range(0,len(fich)):
                if fich[y][0] <= valor and y != len(fich)-1:
                    auxAcX.append(fich[y,1])
                    auxAcY.append(fich[y,2])
                    auxAcZ.append(fich[y,3])
                elif fich[y,0] > valor or y == len(fich)-1:
                    try:
                         aux = [[x-1,np.mean(auxAcX),np.std(auxAcX),np.var(auxAcX),median(auxAcX),np.percentile(auxAcX,25),np.percentile(auxAcX,75),mode(auxAcX),np.min(auxAcX),np.argmin(auxAcX),np.max(auxAcX),np.argmax(auxAcX),robust.mad(auxAcX),stattools.acf(auxAcX).mean(),stattools.acf(auxAcX).std(),stattools.acovf(auxAcX).mean(),stattools.acovf(auxAcX).std(),skew(auxAcX),kurtosis(auxAcX),iqr(auxAcX),
                                     np.mean(auxAcY),np.std(auxAcY),np.var(auxAcY),median(auxAcY),np.percentile(auxAcY,25),np.percentile(auxAcY,75),mode(auxAcY),np.min(auxAcY),np.argmin(auxAcY),np.max(auxAcY),np.argmax(auxAcY),robust.mad(auxAcY),stattools.acf(auxAcY).mean(),stattools.acf(auxAcX).std(),stattools.acovf(auxAcY).mean(),stattools.acovf(auxAcY).std(),skew(auxAcY),kurtosis(auxAcY),iqr(auxAcY),
                                     np.mean(auxAcZ),np.std(auxAcZ),np.var(auxAcZ),median(auxAcZ),np.percentile(auxAcZ,25),np.percentile(auxAcZ,75),mode(auxAcZ),np.min(auxAcZ),np.argmin(auxAcZ),np.max(auxAcZ),np.argmax(auxAcZ),robust.mad(auxAcZ),stattools.acf(auxAcZ).mean(),stattools.acf(auxAcX).std(),stattools.acovf(auxAcZ).mean(),stattools.acovf(auxAcZ).std(),skew(auxAcZ),kurtosis(auxAcZ),iqr(auxAcZ),
                                     mt.sqrt(np.mean(auxAcX)**2+np.mean(auxAcY)**2+np.mean(auxAcZ)**2),np.correlate(auxAcX,auxAcY),np.correlate(auxAcX,auxAcZ),np.correlate(auxAcZ,auxAcY),np.resize(np.fft.fftfreq(len(np.fft.fft(fich[:,1:]))),(100,))]];
                         aux = list(deepflatten(aux))
                         dados.append(aux)
                    except ValueError:  #raised if `y` is empty.
                        pass
                    y = y-1;
                    auxAcX = []
                    auxAcY = []
                    auxAcZ = []
                    valor = valor + 2;
                    
    with open("features.csv","w+") as my_csv:
        csvWriter = csv.writer(my_csv,delimiter=',')
        csvWriter.writerows(dados)
    return np.array(dados)
def autocorrelation_all(series):
    """
    Returns auto-correlation for each possible lag
    """
    return stattools.acf(series, nlags=len(series))
Example #37
0
    def arima_model(self):
        # To Identify No.of Rows In a DataSet
        no_rows = len(self.df)

        #To Assign DataSet Column Names to Variables
        Total_cloumns = self.df.columns
        inp_column = Total_cloumns[0]
        out_column = Total_cloumns[1]

        #To Index The Date Column
        inp_col = pd.to_datetime(self.df[inp_column])
        Dataset = self.df.set_index([inp_column])

        #To Split The Dataset Into Train & Test
        x = self.df[inp_column]
        y = self.df[out_column]
        tscv = TimeSeriesSplit()
        TimeSeriesSplit(max_train_size=None, n_splits=5)
        for train_index, test_index in tscv.split(Dataset):
            x_train, x_test = x[train_index], x[test_index]
            y_train, y_test = y[train_index], y[test_index]

        #To Make Train DataSet
        train_timestamp = pd.DataFrame(x_train)
        train_timestamp_1 = train_timestamp.rename(columns={0: inp_column},
                                                   inplace=True)
        train_usedspace = pd.DataFrame(y_train)
        train_usedspace_1 = train_timestamp.rename(columns={0: out_column},
                                                   inplace=True)
        trdata = pd.concat([train_timestamp, train_usedspace], axis=1)
        trdata = trdata.set_index([inp_column])

        #To Identify No.of Rows For Y-Train
        rows_ytrain = len(y_train)

        #To Identify No.of Rows For Y-Test
        rows_ytest = len(y_test)

        #To Describe The Entire DataSet
        dataset_des = Dataset.describe()
        #print(dataset_des)

        #To Describe The Training DataSet
        trdata_des = trdata.describe()

        #Apply Log Transform For Training DataSet & Remove NaN Values
        Dataset_logScale = np.log(trdata)

        datasetLogDiffshifting = Dataset_logScale - Dataset_logScale.shift()
        datasetLogDiffshifting.dropna(inplace=True)
        #Dataset_logScale=trdata

        #components of time-series(Plot Trend, Seasonal And Residuals)
        decomposition = seasonal_decompose(Dataset_logScale, period=3)
        trend = decomposition.trend
        seasonal = decomposition.seasonal
        residual = decomposition.resid
        plt.show()
        plt.plot(Dataset_logScale, label='Original')
        plt.legend(loc='best')
        plt.subplot(412)
        plt.plot(trend, label='Trend')
        plt.legend(loc='best')
        plt.subplot(413)
        plt.plot(seasonal, label='Seasonality')
        plt.legend(loc='best')
        plt.subplot(414)
        plt.plot(residual, label='Residuals')
        plt.legend(loc='best')
        plt.tight_layout()
        decomposedLogData = residual
        decomposedLogData.dropna(inplace=True)

        #To Identify Variance
        seasonal_max = seasonal.max()
        seasonal_min = seasonal.min()
        trend_max = trend.max()
        trend_min = trend.min()
        variance = (seasonal_max - seasonal_min) / (trend_max -
                                                    trend_min) * 100

        #To Find Variance Of Seasonal
        variance_seasonal = np.var(seasonal)

        #To Find Variance Of Trend
        variance_trend = np.var(trend)

        #To Find Variance Of Residuals
        variance_residual = np.var(residual)

        #Auto co-relation and Partial Auto Co-relation Functions
        lag_acf = acf(datasetLogDiffshifting, nlags=20)
        lag_pacf = pacf(datasetLogDiffshifting, nlags=20,
                        method='ols')  #ols=ordinary least square method

        #plot ACF:
        plt.subplot(121)
        plt.plot(lag_acf)
        plt.axhline(y=0, linestyle='--', color='gray')
        plt.axhline(y=1.96 / np.sqrt(len(datasetLogDiffshifting)),
                    linestyle='--',
                    color='gray')
        plt.axhline(y=1.96 / np.sqrt(len(datasetLogDiffshifting)),
                    linestyle='--',
                    color='gray')
        plt.title('AutoCorrelation Function')
        st.pyplot()
        #plot PACF:
        plt.subplot(122)
        plt.plot(lag_pacf)
        plt.axhline(y=0, linestyle='--', color='gray')
        plt.axhline(y=1.96 / np.sqrt(len(datasetLogDiffshifting)),
                    linestyle='--',
                    color='gray')
        plt.axhline(y=1.96 / np.sqrt(len(datasetLogDiffshifting)),
                    linestyle='--',
                    color='gray')
        plt.title('Partial AutoCorrelation Function')
        plt.tight_layout()
        st.pyplot()

        #Apply AR Model:
        print(Dataset_logScale)
        model = ARIMA(Dataset_logScale, order=(3, 1, 3))
        results_AR = model.fit(disp=-1)
        plt.plot(datasetLogDiffshifting)
        plt.plot(results_AR.fittedvalues, color='red')  #residual sum of square
        plt.title('RSS: %.4f' % sum(
            (results_AR.fittedvalues - datasetLogDiffshifting[out_column])**2))

        #Apply MA Model:
        model = ARIMA(Dataset_logScale, order=(3, 1, 3))  #moving Average Model
        results_MA = model.fit(disp=-1)
        plt.plot(datasetLogDiffshifting)
        plt.plot(results_MA.fittedvalues, color='red')
        plt.title('RSS: %.4f' % sum(
            (results_MA.fittedvalues - datasetLogDiffshifting[out_column])**2))
        st.pyplot()
        #Integrate Both As ARIMA Model:
        model = ARIMA(Dataset_logScale, order=(3, 1, 3))  #plotting for ARIMA
        results_ARIMA = model.fit(disp=-1)
        plt.plot(datasetLogDiffshifting)
        plt.plot(results_ARIMA.fittedvalues, color='red')
        plt.title('RSS: %.4f' % sum(
            (results_ARIMA.fittedvalues - datasetLogDiffshifting[out_column])**
            2))
        st.pyplot()
        #Fitting ARIMA Model And Converting Cumulative Sum
        predictions_ARIMA_diff = pd.Series(results_ARIMA.fittedvalues,
                                           copy=True)  #fitting ARIMA model
        predictions_ARIMA_diff_cumsum = predictions_ARIMA_diff.cumsum(
        )  #convereted to cumulative sum
        predictions_ARIMA_log = pd.Series(Dataset_logScale[out_column].iloc[0],
                                          index=Dataset_logScale.index)
        predictions_ARIMA_log = predictions_ARIMA_log.add(
            predictions_ARIMA_diff_cumsum, fill_value=0)

        #Predictions Of ARIMA Model
        pred = results_ARIMA.predict(start=1, end=rows_ytrain)
        predictions_ARIMA_diff = pd.Series(pred, copy=True)
        predictions_ARIMA_diff_cumsum = predictions_ARIMA_diff.cumsum()
        predictions_ARIMA_log = pd.Series(Dataset_logScale.iloc[0])
        predictions_ARIMA_log = predictions_ARIMA_log.add(
            predictions_ARIMA_diff_cumsum, fill_value=0)
        predictions_ARIMA = np.exp(predictions_ARIMA_log)
        s = pd.DataFrame(predictions_ARIMA)
        s = s.reset_index()

        #Forecasted Plot For ARIMA Model
        st.write("Forecasted Plot For ARIMA Model")
        p = results_ARIMA.plot_predict(1, 550)
        plt.xlabel('Timestamp', fontsize=14, color='b')
        plt.ylabel('Sales', fontsize=14, color='b')
        plt.title('Forecast ', fontsize=20, color='black')
        plt.legend(['Forecasted Data', 'Input Data'], loc='upper left')
        plt.axhline(y=1000, color='r', linestyle='-')
        plt.ylim(0, 12)
        plt.show()
        st.pyplot()

        #Forecasted Results Of Y-test For ARIMA Model
        forecast = results_ARIMA.forecast(steps=rows_ytest)[0]
        y_pred = (forecast * 100) / 2

        from sklearn import metrics
        MAE = (metrics.mean_absolute_error(y_test, y_pred))  #To Find MAE Value

        MSE = (metrics.mean_squared_error(y_test, y_pred))  #To Find MSE Value

        RMSE = (np.sqrt(metrics.mean_squared_error(y_test, y_pred))
                )  #To Find RMSE Value

        def mean_absolute_percentage_error(y_true,
                                           y_pred):  #To Find MAPE Value
            y_true, y_pred = np.array(y_true), np.array(y_pred)
            return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

        mape = mean_absolute_percentage_error(y_test, y_pred)

        from sklearn.metrics import r2_score  #To Find R2 Value
        r2 = r2_score(y_test, y_pred)

        #To Make DataFrame and Rename For Forecasted log Results Of Y-Test
        s = s.drop(columns=["index"])
        s.rename(columns={0: out_column}, inplace=True)

        #To Find Fitted Values
        train_y = y_train.to_frame()
        train_y = np.log(train_y)
        fitted_values = ((train_y - s) / train_y) * 100
        fitted_values_1 = fitted_values.rename(
            columns={out_column: "Fitted_values"}, inplace=True)

        #To Find Predicted Values
        predicted_values = pd.DataFrame(y_pred)
        predicted_values1 = predicted_values.rename(
            columns={0: "Predicted_values"}, inplace=True)

        #To Find Forecasted Values For A Quarter-Period
        quarter_period = 90
        forecasted_days = rows_ytest + quarter_period
        forecast = results_ARIMA.forecast(steps=forecasted_days)[0]
        for_val = (forecast * 100) / 2

        #Assign Name To Forecasetd Values and Make into DataFrame
        forecast = pd.DataFrame(for_val)
        forecast.rename(columns={0: "Forecasted_values"}, inplace=True)
        forecast = forecast.iloc[50:]

        #Convert all DataFrames into Numpy.array
        fitted_values = fitted_values.Fitted_values.to_numpy()
        predicted_values = predicted_values.Predicted_values.to_numpy()
        forecasted_values = forecast.Forecasted_values.to_numpy()
        timestamp = self.df.Date.to_numpy()
        actual_values = data.Monthly_sales_total.to_numpy()

        #To Make Alignment For Final Report
        length = len(fitted_values)
        an_array = np.empty(length)
        an_array[:] = 0
        final_predicted = np.concatenate((an_array, predicted_values))
        length1 = len(actual_values)
        an_array1 = np.empty(length1)
        an_array1[:] = 0
        final_forecasted = np.concatenate((an_array1, forecasted_values))

        #To Make All Results Into The DataFrame
        dict = {
            'Date': timestamp,
            'Actual_values': actual_values,
            'Fitted_values': fitted_values,
            'Predicted_values': final_predicted,
            'Forecasted_values': final_forecasted
        }
        df = pd.DataFrame.from_dict(dict, orient='index')
        df.transpose()

        #To Generate CSV Report File
        df.to_csv(
            r'C:\Users\nkatakamsetty\Desktop\Demand_Forecast\final_report.csv',
            index=True)
        return [
            trdata, dataset_des, trdata_des, variance, variance_seasonal,
            variance_trend, variance_residual, s, MAE, MSE, RMSE, mape, r2, df
        ]
Example #38
0
plt.show()

plt.figure(6)
plt.plot(differencing)
plotstats(differencing)
plt.show()

print('ACF and PACF with series stationarized')

pyplot.figure()
plot_acf(differencing, ax=pyplot.gca(), lags=20)
pyplot.figure()
plot_pacf(differencing, ax=pyplot.gca(), lags=20)
pyplot.show()

lag_acf = acf(differencing, nlags=20)
lag_pacf = pacf(differencing, nlags=20, method='ols')

#Temporary test ACF and PACF

plt.figure(13)
plt.plot(lag_acf)
plt.axhline(y=0, linestyle='--', color='gray')
plt.axhline(y=-1.96 / np.sqrt(len(series)), linestyle='--', color='gray')
plt.axhline(y=1.96 / np.sqrt(len(series)), linestyle='--', color='gray')
plt.title('Autocorrelation function for PETR4 - ARIMA (0,1,1)')
plt.show()

#Plot PACF:

plt.figure(14)
Example #39
0
def plot_acf(x,
             ax=None,
             lags=None,
             alpha=.05,
             use_vlines=True,
             unbiased=False,
             fft=False,
             **kwargs):
    """Plot the autocorrelation function

    Plots lags on the horizontal and the correlations on vertical axis.

    Parameters
    ----------
    x : array_like
        Array of time-series values
    ax : Matplotlib AxesSubplot instance, optional
        If given, this subplot is used to plot in instead of a new figure being
        created.
    lags : array_like, optional
        Array of lag values, used on horizontal axis.
        If not given, ``lags=np.arange(len(corr))`` is used.
    alpha : scalar, optional
        If a number is given, the confidence intervals for the given level are
        returned. For instance if alpha=.05, 95 % confidence intervals are
        returned where the standard deviation is computed according to
        Bartlett's formula. If None, no confidence intervals are plotted.
    use_vlines : bool, optional
        If True, vertical lines and markers are plotted.
        If False, only markers are plotted.  The default marker is 'o'; it can
        be overridden with a ``marker`` kwarg.
    unbiased : bool
       If True, then denominators for autocovariance are n-k, otherwise n
    fft : bool, optional
        If True, computes the ACF via FFT.
    **kwargs : kwargs, optional
        Optional keyword arguments that are directly passed on to the
        Matplotlib ``plot`` and ``axhline`` functions.

    Returns
    -------
    fig : Matplotlib figure instance
        If `ax` is None, the created figure.  Otherwise the figure to which
        `ax` is connected.

    See Also
    --------
    matplotlib.pyplot.xcorr
    matplotlib.pyplot.acorr
    mpl_examples/pylab_examples/xcorr_demo.py

    Notes
    -----
    Adapted from matplotlib's `xcorr`.

    Data are plotted as ``plot(lags, corr, **kwargs)``

    """
    fig, ax = utils.create_mpl_ax(ax)

    if lags is None:
        lags = np.arange(len(x))
        nlags = len(lags) - 1
    else:
        nlags = lags
        lags = np.arange(lags + 1)  # +1 for zero lag

    confint = None
    # acf has different return type based on alpha
    if alpha is None:
        acf_x = acf(x, nlags=nlags, alpha=alpha, fft=fft, unbiased=unbiased)
    else:
        acf_x, confint = acf(x,
                             nlags=nlags,
                             alpha=alpha,
                             fft=fft,
                             unbiased=unbiased)

    if use_vlines:
        ax.vlines(lags, [0], acf_x, **kwargs)
        ax.axhline(**kwargs)

    kwargs.setdefault('marker', 'o')
    kwargs.setdefault('markersize', 5)
    kwargs.setdefault('linestyle', 'None')
    ax.margins(.05)
    ax.plot(lags, acf_x, **kwargs)
    ax.set_title("Autocorrelation")

    if confint is not None:
        # center the confidence interval TODO: do in acf?
        ax.fill_between(lags,
                        confint[:, 0] - acf_x,
                        confint[:, 1] - acf_x,
                        alpha=.25)

    return fig
Example #40
0
def test_acf_fft_dataframe():
    # regression test #322

    result = acf(sunspots.load_pandas().data[['SUNACTIVITY']], fft=True)
    assert_equal(result.ndim, 1)
Example #41
0
 def setup_class(cls):
     cls.acf = cls.results['acvarfft']
     cls.qstat = cls.results['Q1']
     cls.res1 = acf(cls.x, nlags=40, qstat=True, fft=True)
Example #42
0
 def setup_class(cls):
     cls.acf = cls.results['acvar']
     # cls.acf = np.concatenate(([1.], cls.acf))
     cls.qstat = cls.results['Q1']
     cls.res1 = acf(cls.x, nlags=40, qstat=True, alpha=.05, fft=False)
     cls.confint_res = cls.results[['acvar_lb', 'acvar_ub']].values
Example #43
0
    tau = np.zeros((nbeta, 3))

    for b in range(nbeta):
        beta[b] = beta_low + b * (beta_high - beta_low) / (nbeta - 1)

        for name in range(len(Observables)):
            Obs_mean = np.zeros((nbeta))
            Obs_var = np.zeros((nbeta))

            #            fileO=("%s/beta_%d/%s.npy" %(BASEDIR, b, Observables[name]))
            #            Obs=np.load(fileO)
            file = h5py.File('%s/beta_%d/Output.h5' % (BASEDIR, b), 'r')
            Obs = np.asarray(file['Measurements']['%s' % (Observables[name])])

            A_Obs = acf(Obs, nlags=int(len(Obs) / 10), fft=True)
            #            A_Obs=acf(Obs, fft=True)
            #            fig, ax1 = plt.subplots(1, 1)
            #            ax1.set_title(r"$L=%s; beta=%s$" %(L[l], beta[b]) )
            #            ax1.set_xlabel(r"$t$")
            #            ax1.set_ylabel(r"$Autocorr$")
            #            ax1.plot(A_Obs[:], "-")
            #            ax1.grid()
            #            plt.show()

            temp = np.where(A_Obs[:] < 0.1)
            print(Observables[name], b, temp)
            tmax_int = 10 * temp[0][0]
            temp_tau = []
            time_int = 1000
            tmax_int = max(time_int, tmax_int)
ts_data_decompose = residual
ts_data_decompose.dropna(inplace=True)


# In[50]:


#  Forecasting a Time Series

# ACF and PACF
#ACF and PACF plots:


from statsmodels.tsa.stattools import acf, pacf
lag_acf = acf(ts_data, nlags=20)
lag_pacf = pacf(ts_data, nlags=20, method='ols')

#Plot ACF: 
plt.subplot(121) 
plt.plot(lag_acf)
plt.axhline(y=0,linestyle='--',color='gray')
plt.axhline(y=-1.96/np.sqrt(len(ts_data)),linestyle='--',color='gray')
plt.axhline(y=1.96/np.sqrt(len(ts_data)),linestyle='--',color='gray')
plt.title('Autocorrelation Function')

#Plot PACF:
plt.subplot(122)
plt.plot(lag_pacf)
plt.axhline(y=0,linestyle='--',color='gray')
plt.axhline(y=-1.96/np.sqrt(len(ts_data)),linestyle='--',color='gray')
Example #45
0
#plt.tight_layout()
#
##there can be cases where an observation simply consisted of trend & seasonality. In that case, there won't be
##any residual component & that would be a null or NaN. Hence, we also remove such cases.
#decomposedLogData = residual
#decomposedLogData.dropna(inplace=True)
#test_stationarity(decomposedLogData)
#
#
#decomposedLogData = residual
#decomposedLogData.dropna(inplace=True)
#test_stationarity(decomposedLogData)

#-------------for main_meter only------------------

lag_acf = acf(datasetLogDiffShifting['main_meter'], nlags=20)
lag_pacf = pacf(datasetLogDiffShifting['main_meter'], nlags=20, method='ols')

#Plot ACF:
plt.subplot(121)
plt.plot(lag_acf)
plt.axhline(y=0, linestyle='--', color='gray')
plt.axhline(y=-1.96 / np.sqrt(len(datasetLogDiffShifting)),
            linestyle='--',
            color='gray')
plt.axhline(y=1.96 / np.sqrt(len(datasetLogDiffShifting)),
            linestyle='--',
            color='red')
plt.title('Autocorrelation Function')

#Plot PACF
def acf_coefs(x, maxlag=100):
    x = np.asarray(x).ravel()
    nlags = np.minimum(len(x) - 1, maxlag)
    return acf(x, nlags=nlags).ravel()
Example #47
0
            tmp = dill.load(f)
            tmp['x'] = tmp['x'][:TMAX]
            tmp['y'] = tmp['y'][:TMAX]
            x.append(tmp['x'])
            y.append(tmp['y'])
            covs, means = evaluation.mean_hit_rate(tmp['x'],
                                                   tmp['y'],
                                                   n_covs=501)
            xm.append(covs)
            ym.append(means)
    return x, y, xm, ym


if __name__ == '__main__':
    x, y, xm, ym = load_compare_data()
    m0 = evaluation.interpolated_hit_rate(x[0], y[0], 0.05)
    m1 = evaluation.interpolated_hit_rate(x[2], y[2], 0.05)
    d = m0 - m1

    acorr = acf(d, nlags=21)
    # Confidence interval: z value / sqrt(sample size)
    ci = np.sqrt(2) * erfinv(0.95) / np.sqrt(d.size)

    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.plot(acorr, 'k-')
    ax.plot([0, len(acorr)], [ci, ci], 'k--')
    ax.plot([0, len(acorr)], [-ci, -ci], 'k--')
    plt.xlabel('Lag (days)', size=FONTSIZE)
    plt.ylabel('Autocorrelation', size=FONTSIZE)
    plt.tight_layout()
Example #48
0
                                 end)

returns = RJ['close_price'].pct_change().dropna()
returns.name = 'return'
# returns.plot()
# returns.column = ['return']
# print(returns)

# 计算自相关系数
# acfs = stattools.acf(returns)
# # print(acfs)
# # 偏自相关系数
# pacfs = stattools.pacf(returns)
# # print(pacfs)
# # 自相关性图
# plot_acf(returns,use_vlines=True,lags=30)
# # 偏自相关性图
# plot_pacf(returns,use_vlines=True,lags=30)
# plot_acf(RJ['close_price'],use_vlines=True,lags=30)
#
# plt.show()

# 单位根检验
# adfRJ= ADF(returns)
# print(adfRJ.summary().as_text())
# adfClose = ADF(RJ['close_price'])
# print(adfClose.summary().as_text())

# 白噪声检验
LB = stattools.q_stat(stattools.acf(returns), len(returns))
print(LB)
Example #49
0
def modelverification(ts,forecast1,actual1):
    
    forecast1=pd.DataFrame(forecast1)
    actual1=actual1.reset_index()
    forecast1.reset_index(inplace=True)
   
    forecast=forecast1[forecast1.columns[1]]
    actual=actual1[actual1.columns[1]]

    mape = np.mean(np.abs(forecast - actual)/np.abs(actual))  # MAPE
    me = np.mean(forecast - actual)             # ME
    mae = np.mean(np.abs(forecast - actual))    # MAE
    mpe = np.mean((forecast - actual)/actual)   # MPE
    rmse = np.mean((forecast - actual)**2)**.5  # RMSE
    corr = np.corrcoef(forecast, actual)[0,1]   # corr
   
    print("mape:",mape)
    print("me:",me)
    print("mae:",mae)
    print("mpe:",mpe)
    print("RMSE:",rmse)
    print("Correlation:",corr)
    
    #Validation through residuals
    resd_ar=pd.DataFrame(ts.resid)
    plt.figure(figsize=(10,5))
    plt.plot(resd_ar,color='red')
    plt.title("Residual plot of Model")
    plt.show()
    print("Mean of Residual is:\n")
    print(resd_ar.mean())
    
    plt.figure(figsize=(10,5))
    resd_ar.hist()
    plt.title("Histogram of Residual plot of Model")
    plt.show()
    
    #ACF Graph
    lag_acf=acf(ts.resid,nlags=20)
    lag_pacf=pacf(ts.resid,nlags=20,method='ols')

    #plot ACF
    plt.figure(figsize=(15,5))
    plt.subplot(121)
    plt.plot(lag_acf)
    plt.axhline(y=0,linestyle='--',color='grey')
    plt.axhline(y=-1.96/np.sqrt(len(ts.resid)),linestyle='--',color='red')
    plt.axhline(y=1.96/np.sqrt(len(ts.resid)),linestyle='--',color='red')
    plt.title('Auto Correlation Function')
    
    # plot PACF
    plt.subplot(122)
    plt.plot(lag_pacf)
    plt.axhline(y=0,linestyle='--',color='grey')
    plt.axhline(y=-1.96/np.sqrt(len(ts.resid)),linestyle='--',color='red')
    plt.axhline(y=1.96/np.sqrt(len(ts.resid)),linestyle='--',color='red')
    plt.title('Partial Auto Correlation Function')
    plt.tight_layout()
    plt.show()
    '''-------------Lung-box test-----------'''
    ltest=sm.stats.acorr_ljungbox(ts.resid, lags=[10])
    print(ltest)
Example #50
0
    plt.plot(Blockchain_df[['Close']].pct_change())
    plt.show()
''''CORRELATION AND AUTOCORRELATION (ACF & PLOTTING)'''

Correlation = Blockchain_df['USD/EUR'].corr(Blockchain_df['USD/CHF'])
print('\n This is the correlation between both:', Correlation)

# Imported libraries for Autocorrelation:

from statsmodels.tsa.stattools import acf
from statsmodels.graphics.tsaplots import plot_acf

Autocorrelation = Blockchain_df['Breakeven Inflation Rate'].autocorr()
print('\n This is the Autocorrelation:', Autocorrelation)

Acf = acf(Blockchain_df['Breakeven Inflation Rate'])
print('\n This is the ACF: ', Acf)
print('\n This is the lenght:', len(Acf))

Blockchain_df_acf_plot = 0
if Blockchain_df_acf_plot == 1:
    plot_acf(Blockchain_df['Breakeven Inflation Rate'], lags=20, alpha=0.5)
    plt.show()
'''WHITE NOISE & GAUSSIAN WHITE NOISE'''

Blockchain_df_WN = 0
if Blockchain_df_WN == 1:
    fig, axs = plt.subplots(nrows=3, ncols=1)
    Blockchain_df[['Breakeven Inflation Rate']].plot(ax=axs[0])
    Blockchain_df[['Breakeven Inflation Rate']].plot(kind='hist',
                                                     alpha=0.8,
Example #51
0
 def __init__(self):
     self.acf = self.results['acvarfft']
     self.qstat = self.results['Q1']
     self.res1 = acf(self.x, nlags=40, qstat=True, fft=True)
Example #52
0
from statsmodels import regression
from statsmodels.tsa.arima_process import arma_generate_sample, arma_impulse_response
from statsmodels.tsa.arima_process import arma_acovf, arma_acf
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.stattools import acf, acovf
from statsmodels.graphics.tsaplots import plot_acf

ar = [1., -0.6]
#ar = [1., 0.]
ma = [1., 0.4]
#ma = [1., 0.4, 0.6]
#ma = [1., 0.]
mod = ''  #'ma2'
x = arma_generate_sample(ar, ma, 5000)
x_acf = acf(x)[:10]
x_ir = arma_impulse_response(ar, ma)

#print x_acf[:10]
#print x_ir[:10]
#irc2 = np.correlate(x_ir,x_ir,'full')[len(x_ir)-1:]
#print irc2[:10]
#print irc2[:10]/irc2[0]
#print irc2[:10-1] / irc2[1:10]
#print x_acf[:10-1] / x_acf[1:10]


# detrend helper from matplotlib.mlab
def detrend(x, key=None):
    if key is None or key == 'constant':
        return detrend_mean(x)
Example #53
0
from statsmodels.graphics.tsaplots import *

import matplotlib.pyplot as plt
from arch.unitroot import ADF

from statsmodels.tsa import arima_model

HS300_data=pd.read_csv("./data/HS300.csv")
HS300_data.index=pd.to_datetime(HS300_data['date'])
SH_ret=HS300_data['ret_cur']
SH_close=HS300_data['close']
type(SH_ret)
SH_ret.head()

##自相关系数
acfs=stattools.acf(SH_ret)

##偏自相关系数
pacfs=stattools.pacf(SH_ret)

plot_acf(SH_ret,use_vlines=True,lags=30)

SH_ret.plot()
plt.title('return')

SH_close.plot()
plt.title('close price')


adfSH_ret=ADF(SH_ret)
print(adfSH_ret)
Example #54
0
npa = df.to_numpy()
logdata = np.log(npa)
plt.plot(npa, color = 'blue', marker = "o")
plt.plot(logdata, color = 'red', marker = "o")
plt.title("numpy.log()")
plt.xlabel("x");plt.ylabel("logdata")
#plt.show() 


#Autocorrelazione

from statsmodels.tsa.stattools import acf

diffdata = df.value.diff()
diffdata[0] = df.value[0] # reset 1st elem
acfdata = acf(diffdata,unbiased=True,nlags=50)
plt.bar(np.arange(len(acfdata)),acfdata)
plt.show


# oppure
import statsmodels.api as sm
diffdata = df.value.diff()
diffdata[0] = df.value[0] # reset 1st elem
sm.graphics.tsa.plot_acf(diffdata, lags=100)
plt.title("aoooooto")
plt.show


#division , train and test set
#cutpoint = int(0.7*len(diffdata))
output['value']['滞后数'] = t[2]
output['value']['Number of Observations Used'] = t[3]
output['value']['Critical Value(1%)'] = t[4]['1%']
output['value']['Critical Value(5%)'] = t[4]['5%']
output['value']['Critical Value(10%)'] = t[4]['10%']
print(output)  #p值很小可以看做平稳序列了
#进行白噪声检验
output2 = acorr_ljungbox(data['ts1'].dropna(),
                         boxpierce=True,
                         lags=[6, 12],
                         return_df=True)
print(output2)
#白噪声显示差分后的序列存在一些相关性可用arma模型

#3.求自相关系数和偏自相关系数
lag_acf = acf(data['ts1'].dropna(), nlags=10, fft=False)
lag_pacf = pacf(data['ts1'].dropna(), nlags=10, method='ols')
# fig, axes = plt.subplots(1,2, figsize=(20,5))
# plot_acf(data['ts1'].dropna(), lags=10, ax=axes[0])
# plot_pacf(data['ts1'].dropna(), lags=10, ax=axes[1], method='ols')
# plt.show(block=True)

order_trend = arma_order_select_ic(data['ts1'].dropna())
print(order_trend['bic_min_order'])  #这里的选择和书中的一样

#4.拟合
result_trend = ARIMA(data['index'], (0, 1, 1)).fit()

print(result_trend.params)

#后边的步骤其实和ARMA一样了
Example #56
0
def test_acf():
    acf_x = tsa.acf(x100, unbiased=False)[:21]
    assert_array_almost_equal(mlacf.acf100.ravel(), acf_x, 8)  # why only dec=8
    acf_x = tsa.acf(x1000, unbiased=False)[:21]
    assert_array_almost_equal(mlacf.acf1000.ravel(), acf_x,
                              8)  # why only dec=9
Example #57
0
def plot_acf(x,
             ax=None,
             lags=None,
             alpha=.05,
             use_vlines=True,
             unbiased=False,
             fft=False,
             title='Autocorrelation',
             zero=True,
             vlines_kwargs=None,
             **kwargs):
    """Plot the autocorrelation function

    Plots lags on the horizontal and the correlations on vertical axis.

    Parameters
    ----------
    x : array_like
        Array of time-series values
    ax : Matplotlib AxesSubplot instance, optional
        If given, this subplot is used to plot in instead of a new figure being
        created.
    lags : int or array_like, optional
        int or Array of lag values, used on horizontal axis. Uses
        np.arange(lags) when lags is an int.  If not provided,
        ``lags=np.arange(len(corr))`` is used.
    alpha : scalar, optional
        If a number is given, the confidence intervals for the given level are
        returned. For instance if alpha=.05, 95 % confidence intervals are
        returned where the standard deviation is computed according to
        Bartlett's formula. If None, no confidence intervals are plotted.
    use_vlines : bool, optional
        If True, vertical lines and markers are plotted.
        If False, only markers are plotted.  The default marker is 'o'; it can
        be overridden with a ``marker`` kwarg.
    unbiased : bool
        If True, then denominators for autocovariance are n-k, otherwise n
    fft : bool, optional
        If True, computes the ACF via FFT.
    title : str, optional
        Title to place on plot.  Default is 'Autocorrelation'
    zero : bool, optional
        Flag indicating whether to include the 0-lag autocorrelation.
        Default is True.
    vlines_kwargs : dict, optional
        Optional dictionary of keyword arguments that are passed to vlines.
    **kwargs : kwargs, optional
        Optional keyword arguments that are directly passed on to the
        Matplotlib ``plot`` and ``axhline`` functions.

    Returns
    -------
    fig : Matplotlib figure instance
        If `ax` is None, the created figure.  Otherwise the figure to which
        `ax` is connected.

    See Also
    --------
    matplotlib.pyplot.xcorr
    matplotlib.pyplot.acorr
    mpl_examples/pylab_examples/xcorr_demo.py

    Notes
    -----
    Adapted from matplotlib's `xcorr`.

    Data are plotted as ``plot(lags, corr, **kwargs)``

    kwargs is used to pass matplotlib optional arguments to both the line
    tracing the autocorrelations and for the horizontal line at 0. These
    options must be valid for a Line2D object.

    vlines_kwargs is used to pass additional optional arguments to the
    vertical lines connecting each autocorrelation to the axis.  These options
    must be valid for a LineCollection object.
    """
    fig, ax = utils.create_mpl_ax(ax)

    lags, nlags, irregular = _prepare_data_corr_plot(x, lags, zero)
    vlines_kwargs = {} if vlines_kwargs is None else vlines_kwargs

    confint = None
    # acf has different return type based on alpha
    if alpha is None:
        acf_x = acf(x, nlags=nlags, alpha=alpha, fft=fft, unbiased=unbiased)
    else:
        acf_x, confint = acf(x,
                             nlags=nlags,
                             alpha=alpha,
                             fft=fft,
                             unbiased=unbiased)

    _plot_corr(ax, title, acf_x, confint, lags, irregular, use_vlines,
               vlines_kwargs, **kwargs)

    return fig
Example #58
0
def naive_plus(series, horizon):
    rho = acf(series, nlags=horizon + 1)
    return np.array([
        rho[h] * series[-1] + (1 - rho[h]) * np.mean(series)
        for h in range(horizon)
    ])
# lets create a MA series having mean 2 and of order 2
y5 = 2 + xma + 0.8 * np.roll(xma, -1) + 0.6 * np.roll(
    xma, -2)  # + 0.6 *np.roll(xma,-3)

plt.figure(figsize=(16, 7))

# Plot ACF:
plt.subplot(121)
plt.plot(xma)
plt.subplot(122)
plt.plot(y5)
plt.show()

# calling acf function from stattools
lag_acf = acf(y5, nlags=50)

# Plot ACF:
plt.figure(figsize=(16, 7))
plt.plot(lag_acf, marker="o")
plt.axhline(y=0, linestyle='--', color='gray')
plt.axhline(y=-1.96 / np.sqrt(len(y5)), linestyle='--', color='gray')
plt.axhline(y=1.96 / np.sqrt(len(y5)), linestyle='--', color='gray')
plt.title('Autocorrelation Function')
plt.xlabel('number of lags')
plt.ylabel('correlation')
plt.tight_layout()
plt.show()

# calling pacf function from stattools
lag_pacf = pacf(y5, nlags=50, method='ols')
plt.plot(ts_log)
plt.show()
moving_avg = ts_log.rolling(12).mean()
plt.plot(ts_log)
plt.plot(moving_avg, color='red')
plt.show()
ts_log_moving_avg_diff = ts_log - moving_avg
print(ts_log_moving_avg_diff.head(12))
ts_log_moving_avg_diff.dropna(inplace=True)
test_stationarity(ts_log_moving_avg_diff)
ts_log_diff = ts_log - ts_log.shift()
plt.plot(ts_log_diff)
plt.show()
ts_log_diff.dropna(inplace=True)
test_stationarity(ts_log_diff)
lag_acf = acf(ts_log_diff, nlags=20)
lag_pacf = pacf(ts_log_diff, nlags=20, method='ols')
plt.subplot(121)
plt.plot(lag_acf)
plt.axhline(y=0, linestyle='--', color='gray')
plt.axhline(y=-1.96 / np.sqrt(len(ts_log_diff)), linestyle='--', color='gray')
plt.axhline(y=1.96 / np.sqrt(len(ts_log_diff)), linestyle='--', color='gray')
plt.title('Autocorrelation Function')
plt.show()
plt.subplot(122)
plt.plot(lag_pacf)
plt.axhline(y=0, linestyle='--', color='gray')
plt.axhline(y=-1.96 / np.sqrt(len(ts_log_diff)), linestyle='--', color='gray')
plt.axhline(y=1.96 / np.sqrt(len(ts_log_diff)), linestyle='--', color='gray')
plt.title('Partial Autocorrelation Function')
plt.show()