def __init__(self): self.x = np.concatenate((np.array([np.nan]), self.x)) self.acf = self.results["acvar"] # drop and conservative self.qstat = self.results["Q1"] self.res_drop = acf(self.x, nlags=40, qstat=True, alpha=0.05, missing="drop") self.res_conservative = acf(self.x, nlags=40, qstat=True, alpha=0.05, missing="conservative") self.acf_none = np.empty(40) * np.nan # lags 1 to 40 inclusive self.qstat_none = np.empty(40) * np.nan self.res_none = acf(self.x, nlags=40, qstat=True, alpha=0.05, missing="none")
def __init__(self): self.x = np.concatenate((np.array([np.nan]),self.x)) self.acf = self.results['acvar'] # drop and conservative self.qstat = self.results['Q1'] self.res_drop = acf(self.x, nlags=40, qstat=True, alpha=.05, missing='drop') self.res_conservative = acf(self.x, nlags=40, qstat=True, alpha=.05, missing='conservative') self.acf_none = np.empty(40) * np.nan # lags 1 to 40 inclusive self.qstat_none = np.empty(40) * np.nan self.res_none = acf(self.x, nlags=40, qstat=True, alpha=.05, missing='none')
def setup_class(cls): cls.x = np.concatenate((np.array([np.nan]),cls.x)) cls.acf = cls.results['acvar'] # drop and conservative cls.qstat = cls.results['Q1'] cls.res_drop = acf(cls.x, nlags=40, qstat=True, alpha=.05, missing='drop') cls.res_conservative = acf(cls.x, nlags=40, qstat=True, alpha=.05, missing='conservative') cls.acf_none = np.empty(40) * np.nan # lags 1 to 40 inclusive cls.qstat_none = np.empty(40) * np.nan cls.res_none = acf(cls.x, nlags=40, qstat=True, alpha=.05, missing='none')
def SlowDecay(): r = abs(np.sum(df.replace(np.inf, np.nan).replace(-np.inf, np.nan).fillna(0), axis=1) / len(df.columns)) r = abs(df['AAPL'].replace(np.inf, np.nan).replace(-np.inf, np.nan).dropna(0))[:1000000] r1 = (np.sum(df.replace(np.inf, np.nan).replace(-np.inf, np.nan).fillna(0), axis=1) / len(df.columns)) r1 = (df['AAPL'].replace(np.inf, np.nan).replace(-np.inf, np.nan).dropna(0)) acf1, conf = acf(pd.DataFrame(r), alpha=0.05, nlags=20) acf2, conf2 = acf(pd.DataFrame(r1), alpha=0.05, nlags=20) plt.plot(acf1, label='ACF Absolute Returns') plt.plot(acf2, label='ACF Returns') plt.fill_between(range(len(acf1)), [i[0] for i in conf], [i[1] for i in conf], alpha=0.3) plt.fill_between(range(len(acf1)), [i[0] for i in conf2], [i[1] for i in conf2], alpha=0.3) plt.legend(loc='best') plt.savefig('Graphs/PACFAbsReturns.pdf', bbox_inches='tight')
def fit(self, data): magnitude = data[0] AC = stattools.acf(magnitude, nlags=self.nlags) k = next((index for index, value in enumerate(AC) if value < np.exp(-1)), None) while k is None: self.nlags = self.nlags + 100 AC = stattools.acf(magnitude, nlags=self.nlags) k = next((index for index, value in enumerate(AC) if value < np.exp(-1)), None) return k
def plot_acf(data): nlags = 90 lw = 2 x = range(nlags+1) plt.figure(figsize=(6, 4)) plt.plot(x, acf(data['VIX']**2, nlags=nlags), lw=lw, label='VIX') plt.plot(x, acf(data['RV']**2, nlags=nlags), lw=lw, label='RV') plt.plot(x, acf(data['logR'], nlags=nlags), lw=lw, label='logR') plt.legend() plt.xlabel('Lags, days') plt.grid() plt.savefig('../plots/autocorr_logr_vix_rv.eps', bbox_inches='tight', pad_inches=.05) plt.show()
def PrintSerialCorrelations(dailies): """Prints a table of correlations with different lags. dailies: map from category name to DataFrame of daily prices """ filled_dailies = {} for name, daily in dailies.items(): filled_dailies[name] = FillMissing(daily, span=30) # print serial correlations for raw price data for name, filled in filled_dailies.items(): corr = thinkstats2.SerialCorr(filled.ppg, lag=1) print(name, corr) rows = [] for lag in [1, 7, 30, 365]: row = [str(lag)] for name, filled in filled_dailies.items(): corr = thinkstats2.SerialCorr(filled.resid, lag) row.append('%.2g' % corr) rows.append(row) print(r'\begin{tabular}{|c|c|c|c|}') print(r'\hline') print(r'lag & high & medium & low \\ \hline') for row in rows: print(' & '.join(row) + r' \\') print(r'\hline') print(r'\end{tabular}') filled = filled_dailies['high'] acf = smtsa.acf(filled.resid, nlags=365, unbiased=True) print('%0.3f, %0.3f, %0.3f, %0.3f, %0.3f' % (acf[0], acf[1], acf[7], acf[30], acf[365]))
def __init__(self): self.acf = self.results['acvar'] #self.acf = np.concatenate(([1.], self.acf)) self.qstat = self.results['Q1'] self.res1 = acf(self.x, nlags=40, qstat=True, alpha=.05) self.confint_res = self.results[['acvar_lb','acvar_ub']].view((float, 2))
def setup_class(cls): cls.acf = cls.results['acvar'] #cls.acf = np.concatenate(([1.], cls.acf)) cls.qstat = cls.results['Q1'] cls.res1 = acf(cls.x, nlags=40, qstat=True, alpha=.05) cls.confint_res = cls.results[['acvar_lb','acvar_ub']].view((float, 2))
def plot_acf_multiple(ys, lags=20): """ """ from statsmodels.tsa.stattools import acf # hack old_size = mpl.rcParams['font.size'] mpl.rcParams['font.size'] = 8 plt.figure(figsize=(10, 10)) xs = np.arange(lags + 1) acorr = np.apply_along_axis(lambda x: acf(x, nlags=lags), 0, ys) k = acorr.shape[1] for i in range(k): ax = plt.subplot(k, 1, i + 1) ax.vlines(xs, [0], acorr[:, i]) ax.axhline(0, color='k') ax.set_ylim([-1, 1]) # hack? ax.set_xlim([-1, xs[-1] + 1]) mpl.rcParams['font.size'] = old_size
def ACF_PACF_plot(self): #plot ACF and PACF to find the number of terms needed for the AR and MA in ARIMA # ACF finds MA(q): cut off after x lags # and PACF finds AR (p): cut off after y lags # in ARIMA(p,d,q) lag_acf = acf(self.ts_log_diff, nlags=20) lag_pacf = pacf(self.ts_log_diff, nlags=20, method='ols') #Plot ACF: ax=plt.subplot(121) plt.plot(lag_acf) ax.set_xlim([0,5]) plt.axhline(y=0,linestyle='--',color='gray') plt.axhline(y= -1.96/np.sqrt(len(ts_log_diff)),linestyle='--',color='gray') plt.axhline(y= 1.96/np.sqrt(len(ts_log_diff)),linestyle='--',color='gray') plt.title('Autocorrelation Function') #Plot PACF: plt.subplot(122) plt.plot(lag_pacf) plt.axhline(y=0,linestyle='--',color='gray') plt.axhline(y= -1.96/np.sqrt(len(ts_log_diff)),linestyle='--',color='gray') plt.axhline(y=1.96/np.sqrt(len(ts_log_diff)),linestyle='--',color='gray') plt.title('Partial Autocorrelation Function') plt.tight_layout()
def acf_fcn(data,lags=2,alpha=.05): #@FORMAT: data = np(values) try: acfvalues, confint,qstat,pvalues = acf(data,nlags=lags,qstat=True,alpha=alpha) return [acfvalues,pvalues] except: return [np.nan]
def autocorrelation(x, *args, unbiased=True, nlags=None, fft=True, **kwargs): """ Return autocorrelation function of signal `x`. Parameters ---------- x: array_like A 1D signal. nlags: int The number of lags to calculate the correlation for (default .9*len(x)) fft: bool Compute the ACF via FFT. args, kwargs As accepted by `statsmodels.tsa.stattools.acf`. Returns ------- acf: array Autocorrelation function. confint: array, optional Confidence intervals if alpha kwarg provided. """ from statsmodels.tsa.stattools import acf if nlags is None: nlags = int(.9 * len(x)) corr = acf(x, *args, unbiased=unbiased, nlags=nlags, fft=fft, **kwargs) return _significant_acf(corr, kwargs.get('alpha'))
def SimulateAutocorrelation(daily, iters=1001, nlags=40): """Resample residuals, compute autocorrelation, and plot percentiles. daily: iters: nlags: """ # run simulations t = [] for i in range(iters): filled = FillMissing(daily, span=30) resid = thinkstats2.Resample(filled.resid) acf = smtsa.acf(resid, nlags=nlags, unbiased=True)[1:] t.append(np.abs(acf)) # put the results in an array and sort the columns size = iters, len(acf) array = np.zeros(size) for i, acf in enumerate(t): array[i,] = acf array = np.sort(array, axis=0) # find the bounds that cover 95% of the distribution high = PercentileRow(array, 97.5) low = -high lags = range(1, nlags+1) thinkplot.FillBetween(lags, low, high, alpha=0.2, color='gray')
def calc_autocorr(self): ''' Calculate the autocorrelation of an array. ''' nlags = int(self.fs / self._minfreq) self.acorr = acf(self._windowed(self.s2), nlags=nlags) self.acorr_freq = self.fs / np.arange(self.acorr.size)
def __init__(self): self.acf = self.results['acvar'] #self.acf = np.concatenate(([1.], self.acf)) self.qstat = self.results['Q1'] self.res1 = acf(self.x, nlags=40, qstat=True, alpha=.05) res = DataFrame.from_records(self.results) self.confint_res = recarray_select(self.results, ['acvar_lb','acvar_ub']) self.confint_res = self.confint_res.view((float, 2))
def plotACF(timeSeries): lag_acf = acf(timeSeries, nlags=40) plt.subplot(121) plt.plot(lag_acf) plt.axhline(y=0,linestyle='--',color='gray') plt.axhline(y=-1.96/np.sqrt(len(timeSeries)),linestyle='--',color='gray') plt.axhline(y=1.96/np.sqrt(len(timeSeries)),linestyle='--',color='gray') plt.title('Autocorrelation Function')
def lpc(frame, order): """ frame: windowed signal order: lpc order return from 0th to `order`th linear predictive coefficients """ r = acf(frame, unbiased=False, nlags=order) return levinson_durbin(r, order)[0]
def correlation_plot(d, dt=6e-3, **kwargs): corr, conf = acf(d, nlags=len(d)-1, alpha=0.05) taus = dt*np.arange(0, len(d)) ax = pl.gca() ax.plot(taus, corr, **kwargs) ax.fill_between(taus, y1=conf[:,0], y2=conf[:,1], color='k', alpha=0.2, lw=0) ax.set_xscale('log') ax.set_xlabel(r'$\tau$ (seconds)') ax.set_ylabel(r'$G(\tau)$') ax.grid()
def ljungBox2(x, maxlag): lags = np.asarray(range(1, maxlag+1)) x = x.tolist() n = len(x) acfx = acf(x, nlags=maxlag) # normalize by nobs not (nobs-nlags) acf2norm = acfx[1:maxlag+1]**2 / (n - np.arange(1,maxlag+1)) qljungbox = n * (n+2) * np.cumsum(acf2norm)[lags-1] pval = scipy.stats.chi2.sf(qljungbox, lags) return qljungbox, pval
def get_acf_pacf(self, inputDataSeries, lag = 15): # Copy the data in input data outputData = pandas.DataFrame(inputDataSeries) if min(inputDataSeries.index) == inputDataSeries.index[0]: # Ascending multiplier = 1 lag = multiplier*lag elif max(inputDataSeries.index) == inputDataSeries.index[0]: # Descending multiplier = -1 lag = multiplier*lag else: print('Cannot determine the order put the lag value manually') print('Syntax: calc_returns(inputData, columnName, lag = lag_value)') n_iter = lag columnName = outputData.columns[0] i = 1 # Calculate ACF acf_values = [] acf_values.append(outputData[columnName].corr(outputData[columnName])) while i <= abs(n_iter): col_name = 'lag_' + str(i) outputData[col_name] = '' outputData[col_name] = outputData[columnName].shift(multiplier*i) i += 1 acf_values.append(outputData[columnName].corr(outputData[col_name])) # Define an emplty figure fig = plt.figure() # Define 2 subplots ax1 = fig.add_subplot(211) # 2 by 1 by 1 - 1st plot in 2 plots ax2 = fig.add_subplot(212) # 2 by 1 by 2 - 2nd plot in 2 plots ax1.plot(range(len(acf_values)), acf(inputDataSeries, nlags = n_iter), \ range(len(acf_values)), acf_values, 'ro') ax2.plot(range(len(acf_values)), pacf(inputDataSeries, nlags = n_iter), 'g*-') # Plot horizontal lines ax1.axhline(y = 0.0, color = 'black') ax2.axhline(y = 0.0, color = 'black') # Axis labels plt.xlabel = 'Lags' plt.ylabel = 'Correlation Coefficient' return {'acf' : list(acf_values), \ 'pacf': pacf(inputDataSeries, nlags = n_iter)}
def ARIMA_fun( data ): lag_pacf = pacf( data, nlags=20, method='ols' ) lag_acf, ci2, Q = acf( data, nlags=20 , qstat=True, unbiased=True) model = ARIMA(orig_data, order=(1, 1, int(ci2[0]) ) ) results_ARIMA = model.fit(disp=-1) plt.subplot(121) plt.plot( data ) plt.plot(results_ARIMA.fittedvalues) #plt.show() return results_ARIMA.fittedvalues
def plotACF(lcTime,lcInt,**kwargs): ''' calculate correlation curve of lc return correlation, ljunbBox statistics, and pvalue ''' #calculate auto-corr function: Pearson correlation of lc w/itself shifted by various lags (tau) corr,ljb,pvalue = acf(lcInt,unbiased=False,qstat=True,nlags=len(lcTime)) #plot correlation as function of lag time plt.plot(lcTime,corr,**kwargs) plt.xlabel(r"$\tau(s)$",fontsize=14) plt.ylabel(r"$R(\tau)$",fontsize=14) plt.title(r"Autocorrelation $R(\tau)$",fontsize=14) plt.show() return corr, ljb, pvalue
def FE(self, serie_atual): ''' Método para fazer a diferenciacao de uma serie_atual :param serie_atual: serie_atual real ''' #serie_df = pd.DataFrame(serie_atual) serie_diff = pd.Series(serie_atual) serie_diff = serie_diff - serie_diff.shift() serie_diff = serie_diff[1:] features = [] #feature 1: auto_correlacao = acf(serie_diff, nlags=5) for i in auto_correlacao: features.append(i) #feature 2: parcial_atcorr = pacf(serie_diff, nlags=5) for i in parcial_atcorr: features.append(i) #feature 3: variancia = serie_diff.std() features.append(variancia) #feature 4: serie_skew = serie_diff.skew() features.append(serie_skew) #feature 5: serie_kurtosis = serie_diff.kurtosis() features.append(serie_kurtosis) #feature 6: turning_p = self.turningpoints(serie_diff) features.append(turning_p) #feature 7: #feature 8: return features
def integrated_autocorr1(x, acf_cutoff=0.0): r"""Estimate the integrated autocorrelation time, :math:`\tau_{int}` of a time series. This method performancs a summation of empirical autocorrelation function, using a window length, ``M``, to the smallest value such that ``ACF(m) <= acf_cutoff``. This procedure is used in (Chodera 2007) with ``acf_cutoff = 0``. In (Hoffman 2011, Hub 2010), this estimator is used with ``acf_cutoff = 0.05``. Parameters ---------- x : ndarray, shape=(n_samples, n_dims) The time series, with time along axis 0. References ---------- .. [1] J. D. Chodera, W. C. Swope, J. W. Pitera, C. Seok, and K. A. Dill. JCTC 3(1):26-41, 2007. .. [2] Hoffman, M. D., and A. Gelman. "The No-U-Turn sampler: Adaptively setting path lengths in Hamiltonian Monte Carlo." arXiv preprint arXiv:1111.4246 (2011). .. [3] Hub, J. S., B. L. De Groot, and D. V. Der Spoel. "g_wham: A Fre Weighted Histogram Analysis Implementation Including Robust Error and Autocorrelation Estimates." J. Chem. Theory Comput. 6.12 (2010): 3713-3720. Returns ------- tau_int : ndarray, shape=(n_dims,) The estimated integrated autocorrelation time of each dimension in ``x``, considered independently. """ # Compute the autocorrelation function. if x.ndim == 1: x = x.reshape(-1, 1) n = len(x) tau = np.zeros(x.shape[1]) for j in range(x.shape[1]): f = acf(x[:,j], nlags=n, unbiased=False, fft=True) window = find_first((f <= acf_cutoff).astype(np.uint8)) tau[j] = 1 + 2*f[1:window].sum() return tau
def global_analysis(csv_fname, trajectory_df): # catch small trajectory_dfs if len(trajectory_df.index) < MIN_TRAJECTORY_LEN: return None else: # for each trajectory, loop through segments acf_data = np.zeros((len(INTERESTED_VALS), 1, LAGS+1)) pacf_data = np.zeros((len(INTERESTED_VALS), 1, LAGS+1)) # do analysis variable by variable count = -1 for var_name, var_values in trajectory_df.iteritems(): count += 1 # make matrices # make dictionary for column indices var_index = trajectory_df.columns.get_loc(var_name) # {'velo_x':0, 'velo_y':1, 'velo_z':2, 'curve':3, 'log_curve':4}[var_name] # # run ACF and PACF for the column col_acf, acf_confint = acf(var_values, nlags=LAGS, alpha=.05)#, qstat= True) # # # store data acf_data[var_index, 0, :] = col_acf ## super_data_confint_lower[var_index, segment_i, :] = acf_confint[:,0] ## super_data_confint_upper[var_index, segment_i, :] = acf_confint[:,1] # ## , acf_confint, acf_qstats, acf_pvals col_pacf, pacf_confint = pacf(var_values, nlags=LAGS, method='ywmle', alpha=.05) pacf_data[var_index, 0, :] = col_pacf # # TODO: check for PACF values above or below +-1 # super_data[var_index+len(INTERESTED_VALS), segment_i, :] = col_pacf # super_data_confint_lower[var_index+len(INTERESTED_VALS), segment_i, :] = pacf_confint[:,0] # super_data_confint_upper[var_index+len(INTERESTED_VALS), segment_i, :] = pacf_confint[:,1] return acf_data, pacf_data
def find_grid(hspace_angle, max_separation): """Returns the separation between graduations of the ruler. Args: hspace_angle: Bins outputted from :py:meth:`hough_transform`, but for only a single angle. max_separation: Maximum size of the *largest* graduation. Returns: int: Separation between graduations in pixels """ autocorrelation = acf(hspace_angle, nlags=max_separation, unbiased=False) smooth = gaussian_filter1d(autocorrelation, 1) peaks = peakutils.indexes(smooth, thres=0.25) return np.mean(np.diff(np.insert(peaks[:4], 0, 0)))
def integrated_autocorr6(x, c=6): r"""Estimate the integrated autocorrelation time, :math:`\tau_{int}` of a time series. This method performancs a summation of empirical autocorrelation function, using Sokal's "automatic windowing" procedure. The window length, ``M`` is chosen self-consistently to be the smallest value such that ``M`` is at least ``c`` times the estimated autocorrelation time, where ``c`` should be a constant in the range of 4, 6, or 10. See Appendix C of Sokal 1988. Parameters ---------- x : ndarray, shape=(n_samples, n_dims) The time series, with time along axis 0. max_length : int The data ``x`` is aggregated if necessary by taking batch means so that the length of the series is less than ``max.length``. References ---------- .. [1] Madras, Neal, and Alan D. Sokal. "The pivot algorithm: a highly efficient Monte Carlo method for the self-avoiding walk." J. Stat. Phys. 50.1-2 (1988): 109-186. Returns ------- tau_int : ndarray, shape=(n_dims,) The estimated integrated autocorrelation time of each dimension in ``x``, considered independently. """ if x.ndim == 1: x = x.reshape(-1, 1) tau = np.zeros(x.shape[1]) for j in range(x.shape[1]): f = acf(x[:,j], nlags=len(x), unbiased=False, fft=True) # vector of the taus, all with different choices of the window # length taus = 1 + 2*np.cumsum(f)[1:] ms = np.arange(len(f)-1) ind = find_first((ms > c*taus).astype(np.uint8)) tau[j] = taus[ind] return tau
def plot_chain_acf(self, paramName, numBurnIn=0, dims=None, nlags=30, derived=False): """ Make Markov chain autocorrelation function plots for a chosen parameter dims is a list or tuple of two lists which specificy which rows and columns should be plotted. If empty then all are plotted. """ nlags = int(np.minimum( nlags, np.sqrt(len(self.chain_model)-numBurnIn) )) # Get a list of parameters if not derived: paramList = [md[paramName] for md in self.chain_model] else: raise NotImplementedError("Doesn't work because of the change" "in the way the chain is stored.") # #TODO Fix this # paramList = eval("[md.{}() for md in self.chain_model]"\ # .format(paramName)) # Get the parameter shape paramShape = paramList[0].shape if len(paramShape) == 1: fig, axs, coords = self._create_1d_plot_axes(paramList[0], dims) elif len(paramShape) == 2: if dims is None: dims = (None,None) fig, axs, coords = self._create_2d_plot_axes(paramList[0], dims[0], dims[1]) else: raise ValueError("Cannot draw plots for this parameter") for idx in np.ndindex(coords.shape): samples = [pp[coords[idx]] for pp in paramList[numBurnIn:]] acf = stattools.acf(samples,unbiased=False,nlags=nlags) axs[idx].plot(acf, 'k') axs[idx].plot([0,nlags], [0,0], 'k:') axs[idx].set_xlim([0,nlags]) return fig, axs
def SimulateAutocorrelation(daily, iters=1001, nlags=40): """Resample residuals, compute autocorrelation, and plot percentiles. daily: DataFrame iters: number of simulations to run nlags: maximum lags to compute autocorrelation """ # run simulations t = [] for _ in range(iters): filled = FillMissing(daily, span=30) resid = thinkstats2.Resample(filled.resid) acf = smtsa.acf(resid, nlags=nlags, unbiased=True)[1:] t.append(np.abs(acf)) high = thinkstats2.PercentileRows(t, [97.5])[0] low = -high lags = range(1, nlags+1) thinkplot.FillBetween(lags, low, high, alpha=0.2, color='gray')
# Delete unnecessary variables to free up memory del pW_0, pW_1, pW_2, pb_0, pb_1, pb_2 # Trace plot # n_lags_used = n_samp - nburn acf_vals = np.zeros([27, n_lags_used]) rnd0 = random.sample(range(n_hidden), 9) rnd1 = random.sample(range(n_hidden), 9) rnd2 = random.sample(range(n_hidden), 9) trace_plotw = np.zeros([9, n_samp]) for i in range(9): w_samp = qW_0.params.eval()[:, 0, rnd0[i]] acf_vals[i,:] = acf(w_samp[nburn:], nlags=n_lags_used) trace_plotw[i, :] = w_samp np.save(path + '/traceplot_w0.npy', np.reshape(trace_plotw, [-1, 9, n_samp])) for i in range(9): w_samp = qW_1.params.eval()[:, rnd1[i], rnd0[i]] acf_vals[9+i, :] = acf(w_samp[nburn:], nlags=n_lags_used) trace_plotw[i, :] = w_samp np.save(path + '/traceplot_w1.npy', np.reshape(trace_plotw, [-1, 9, n_samp])) for i in range(9): w_samp = qW_2.params.eval()[:, rnd1[i], 0] acf_vals[18+i, :] = acf(w_samp[nburn:], nlags=n_lags_used) trace_plotw[i, :] = w_samp np.save(path + '/traceplot_w2.npy', np.reshape(trace_plotw, [-1, 9, n_samp]))
import matplotlib.pyplot as plt import numpy as np import pandas as pd from statsmodels.tsa.stattools import acf, pacf import plotter print("here ") print(np.where(np.isnan(plotter.ts_log_diff))) pd.DataFrame(plotter.ts_log_diff).nan lag_acf = acf(plotter.ts_log_diff, nlags=20) lag_pacf = pacf(plotter.ts_log_diff, nlags=20, method='ols') # Plot ACF: plt.subplot(121) plt.plot(lag_acf) plt.axhline(y=0, linestyle='--', color='gray') plt.axhline(y=-1.96 / np.sqrt(len(plotter.ts_log_diff)), linestyle='--', color='gray') plt.axhline(y=1.96 / np.sqrt(len(plotter.ts_log_diff)), linestyle='--', color='gray') plt.title('Autocorrelation Function') plt.show() # Plot PACF: plt.subplot(122) plt.plot(lag_pacf) plt.axhline(y=0, linestyle='--', color='gray')
data_moving_avg_diff_train.dropna(inplace=True) adfuller(data_moving_avg_diff_train) data_moving_avg_diff_test = data_moving_avg_diff[584:] plt.plot(data_moving_avg_diff_train) adfuller(data_moving_avg_diff_train) plt.plot(data_moving_avg_diff_test) adfuller(data_moving_avg_diff_test) #Since the p value is now smaller than 0.05 the data is stationary #ACF and PACF plots: from statsmodels.tsa.stattools import acf, pacf lag_acf3 = acf(data_moving_avg_diff_train, nlags=20) lag_pacf3 = pacf(data_moving_avg_diff_train, nlags=20, method='ols') #Plot ACF: plt.subplot(121) plt.plot(lag_acf3, '^k:') plt.axhline(y=0, linestyle='--', color='gray') plt.axhline(y=-1.96 / np.sqrt(len(data_moving_avg_diff_train)), linestyle='--', color='gray') plt.axhline(y=1.96 / np.sqrt(len(data_moving_avg_diff_train)), linestyle='--', color='gray') plt.grid() plt.title('Autocorrelation Function') #Choose p=2
Values of p and q come through ACF and PACF plots. So let us understand both ACF and PACF! """ """ #Below code plots, both ACF and PACF plots for us from pandas.plotting import autocorrelation_plot from statsmodels.graphics.tsaplots import plot_pacf, plot_acf autocorrelation_plot(df_log) plot_pacf(df_log, lags=10) plt.show() """ # plot acf and pacf graphs ( auto corellation function and partially auto corellation function ) # to find 'p' from p,d,q we need to use, PACF graphs and for 'q' use ACF graph from statsmodels.tsa.stattools import acf, pacf # we use d value here(data_log_shift) acf = acf(df_log_diff, nlags=15) pacf = pacf(df_log_diff, nlags=15, method='ols') # ols stands for ordinary least squares used to minimise the errors # 121 and 122 makes the data to look side by size #plot PACF plt.subplot(121) plt.plot(acf) plt.axhline(y=0, linestyle='-', color='blue') plt.axhline(y=-1.96 / np.sqrt(len(df_log_diff)), linestyle='--', color='black') plt.axhline(y=1.96 / np.sqrt(len(df_log_diff)), linestyle='--', color='black') plt.title('Auto corellation function') plt.tight_layout()
def extract_features(): dados = [] for x in range(1,23): fich = np.loadtxt("%d.csv" %x,delimiter=","); aux = [] auxAcX = [] auxAcY = [] auxAcZ = [] valor = 2; for y in range(0,len(fich)): if fich[y][0] <= valor and y != len(fich)-1: auxAcX.append(fich[y,1]) auxAcY.append(fich[y,2]) auxAcZ.append(fich[y,3]) elif fich[y,0] > valor or y == len(fich)-1: try: aux = [[x-1,np.mean(auxAcX),np.std(auxAcX),np.var(auxAcX),median(auxAcX),np.percentile(auxAcX,25),np.percentile(auxAcX,75),mode(auxAcX),np.min(auxAcX),np.argmin(auxAcX),np.max(auxAcX),np.argmax(auxAcX),robust.mad(auxAcX),stattools.acf(auxAcX).mean(),stattools.acf(auxAcX).std(),stattools.acovf(auxAcX).mean(),stattools.acovf(auxAcX).std(),skew(auxAcX),kurtosis(auxAcX),iqr(auxAcX), np.mean(auxAcY),np.std(auxAcY),np.var(auxAcY),median(auxAcY),np.percentile(auxAcY,25),np.percentile(auxAcY,75),mode(auxAcY),np.min(auxAcY),np.argmin(auxAcY),np.max(auxAcY),np.argmax(auxAcY),robust.mad(auxAcY),stattools.acf(auxAcY).mean(),stattools.acf(auxAcX).std(),stattools.acovf(auxAcY).mean(),stattools.acovf(auxAcY).std(),skew(auxAcY),kurtosis(auxAcY),iqr(auxAcY), np.mean(auxAcZ),np.std(auxAcZ),np.var(auxAcZ),median(auxAcZ),np.percentile(auxAcZ,25),np.percentile(auxAcZ,75),mode(auxAcZ),np.min(auxAcZ),np.argmin(auxAcZ),np.max(auxAcZ),np.argmax(auxAcZ),robust.mad(auxAcZ),stattools.acf(auxAcZ).mean(),stattools.acf(auxAcX).std(),stattools.acovf(auxAcZ).mean(),stattools.acovf(auxAcZ).std(),skew(auxAcZ),kurtosis(auxAcZ),iqr(auxAcZ), mt.sqrt(np.mean(auxAcX)**2+np.mean(auxAcY)**2+np.mean(auxAcZ)**2),np.correlate(auxAcX,auxAcY),np.correlate(auxAcX,auxAcZ),np.correlate(auxAcZ,auxAcY),np.resize(np.fft.fftfreq(len(np.fft.fft(fich[:,1:]))),(100,))]]; aux = list(deepflatten(aux)) dados.append(aux) except ValueError: #raised if `y` is empty. pass y = y-1; auxAcX = [] auxAcY = [] auxAcZ = [] valor = valor + 2; with open("features.csv","w+") as my_csv: csvWriter = csv.writer(my_csv,delimiter=',') csvWriter.writerows(dados) return np.array(dados)
def autocorrelation_all(series): """ Returns auto-correlation for each possible lag """ return stattools.acf(series, nlags=len(series))
def arima_model(self): # To Identify No.of Rows In a DataSet no_rows = len(self.df) #To Assign DataSet Column Names to Variables Total_cloumns = self.df.columns inp_column = Total_cloumns[0] out_column = Total_cloumns[1] #To Index The Date Column inp_col = pd.to_datetime(self.df[inp_column]) Dataset = self.df.set_index([inp_column]) #To Split The Dataset Into Train & Test x = self.df[inp_column] y = self.df[out_column] tscv = TimeSeriesSplit() TimeSeriesSplit(max_train_size=None, n_splits=5) for train_index, test_index in tscv.split(Dataset): x_train, x_test = x[train_index], x[test_index] y_train, y_test = y[train_index], y[test_index] #To Make Train DataSet train_timestamp = pd.DataFrame(x_train) train_timestamp_1 = train_timestamp.rename(columns={0: inp_column}, inplace=True) train_usedspace = pd.DataFrame(y_train) train_usedspace_1 = train_timestamp.rename(columns={0: out_column}, inplace=True) trdata = pd.concat([train_timestamp, train_usedspace], axis=1) trdata = trdata.set_index([inp_column]) #To Identify No.of Rows For Y-Train rows_ytrain = len(y_train) #To Identify No.of Rows For Y-Test rows_ytest = len(y_test) #To Describe The Entire DataSet dataset_des = Dataset.describe() #print(dataset_des) #To Describe The Training DataSet trdata_des = trdata.describe() #Apply Log Transform For Training DataSet & Remove NaN Values Dataset_logScale = np.log(trdata) datasetLogDiffshifting = Dataset_logScale - Dataset_logScale.shift() datasetLogDiffshifting.dropna(inplace=True) #Dataset_logScale=trdata #components of time-series(Plot Trend, Seasonal And Residuals) decomposition = seasonal_decompose(Dataset_logScale, period=3) trend = decomposition.trend seasonal = decomposition.seasonal residual = decomposition.resid plt.show() plt.plot(Dataset_logScale, label='Original') plt.legend(loc='best') plt.subplot(412) plt.plot(trend, label='Trend') plt.legend(loc='best') plt.subplot(413) plt.plot(seasonal, label='Seasonality') plt.legend(loc='best') plt.subplot(414) plt.plot(residual, label='Residuals') plt.legend(loc='best') plt.tight_layout() decomposedLogData = residual decomposedLogData.dropna(inplace=True) #To Identify Variance seasonal_max = seasonal.max() seasonal_min = seasonal.min() trend_max = trend.max() trend_min = trend.min() variance = (seasonal_max - seasonal_min) / (trend_max - trend_min) * 100 #To Find Variance Of Seasonal variance_seasonal = np.var(seasonal) #To Find Variance Of Trend variance_trend = np.var(trend) #To Find Variance Of Residuals variance_residual = np.var(residual) #Auto co-relation and Partial Auto Co-relation Functions lag_acf = acf(datasetLogDiffshifting, nlags=20) lag_pacf = pacf(datasetLogDiffshifting, nlags=20, method='ols') #ols=ordinary least square method #plot ACF: plt.subplot(121) plt.plot(lag_acf) plt.axhline(y=0, linestyle='--', color='gray') plt.axhline(y=1.96 / np.sqrt(len(datasetLogDiffshifting)), linestyle='--', color='gray') plt.axhline(y=1.96 / np.sqrt(len(datasetLogDiffshifting)), linestyle='--', color='gray') plt.title('AutoCorrelation Function') st.pyplot() #plot PACF: plt.subplot(122) plt.plot(lag_pacf) plt.axhline(y=0, linestyle='--', color='gray') plt.axhline(y=1.96 / np.sqrt(len(datasetLogDiffshifting)), linestyle='--', color='gray') plt.axhline(y=1.96 / np.sqrt(len(datasetLogDiffshifting)), linestyle='--', color='gray') plt.title('Partial AutoCorrelation Function') plt.tight_layout() st.pyplot() #Apply AR Model: print(Dataset_logScale) model = ARIMA(Dataset_logScale, order=(3, 1, 3)) results_AR = model.fit(disp=-1) plt.plot(datasetLogDiffshifting) plt.plot(results_AR.fittedvalues, color='red') #residual sum of square plt.title('RSS: %.4f' % sum( (results_AR.fittedvalues - datasetLogDiffshifting[out_column])**2)) #Apply MA Model: model = ARIMA(Dataset_logScale, order=(3, 1, 3)) #moving Average Model results_MA = model.fit(disp=-1) plt.plot(datasetLogDiffshifting) plt.plot(results_MA.fittedvalues, color='red') plt.title('RSS: %.4f' % sum( (results_MA.fittedvalues - datasetLogDiffshifting[out_column])**2)) st.pyplot() #Integrate Both As ARIMA Model: model = ARIMA(Dataset_logScale, order=(3, 1, 3)) #plotting for ARIMA results_ARIMA = model.fit(disp=-1) plt.plot(datasetLogDiffshifting) plt.plot(results_ARIMA.fittedvalues, color='red') plt.title('RSS: %.4f' % sum( (results_ARIMA.fittedvalues - datasetLogDiffshifting[out_column])** 2)) st.pyplot() #Fitting ARIMA Model And Converting Cumulative Sum predictions_ARIMA_diff = pd.Series(results_ARIMA.fittedvalues, copy=True) #fitting ARIMA model predictions_ARIMA_diff_cumsum = predictions_ARIMA_diff.cumsum( ) #convereted to cumulative sum predictions_ARIMA_log = pd.Series(Dataset_logScale[out_column].iloc[0], index=Dataset_logScale.index) predictions_ARIMA_log = predictions_ARIMA_log.add( predictions_ARIMA_diff_cumsum, fill_value=0) #Predictions Of ARIMA Model pred = results_ARIMA.predict(start=1, end=rows_ytrain) predictions_ARIMA_diff = pd.Series(pred, copy=True) predictions_ARIMA_diff_cumsum = predictions_ARIMA_diff.cumsum() predictions_ARIMA_log = pd.Series(Dataset_logScale.iloc[0]) predictions_ARIMA_log = predictions_ARIMA_log.add( predictions_ARIMA_diff_cumsum, fill_value=0) predictions_ARIMA = np.exp(predictions_ARIMA_log) s = pd.DataFrame(predictions_ARIMA) s = s.reset_index() #Forecasted Plot For ARIMA Model st.write("Forecasted Plot For ARIMA Model") p = results_ARIMA.plot_predict(1, 550) plt.xlabel('Timestamp', fontsize=14, color='b') plt.ylabel('Sales', fontsize=14, color='b') plt.title('Forecast ', fontsize=20, color='black') plt.legend(['Forecasted Data', 'Input Data'], loc='upper left') plt.axhline(y=1000, color='r', linestyle='-') plt.ylim(0, 12) plt.show() st.pyplot() #Forecasted Results Of Y-test For ARIMA Model forecast = results_ARIMA.forecast(steps=rows_ytest)[0] y_pred = (forecast * 100) / 2 from sklearn import metrics MAE = (metrics.mean_absolute_error(y_test, y_pred)) #To Find MAE Value MSE = (metrics.mean_squared_error(y_test, y_pred)) #To Find MSE Value RMSE = (np.sqrt(metrics.mean_squared_error(y_test, y_pred)) ) #To Find RMSE Value def mean_absolute_percentage_error(y_true, y_pred): #To Find MAPE Value y_true, y_pred = np.array(y_true), np.array(y_pred) return np.mean(np.abs((y_true - y_pred) / y_true)) * 100 mape = mean_absolute_percentage_error(y_test, y_pred) from sklearn.metrics import r2_score #To Find R2 Value r2 = r2_score(y_test, y_pred) #To Make DataFrame and Rename For Forecasted log Results Of Y-Test s = s.drop(columns=["index"]) s.rename(columns={0: out_column}, inplace=True) #To Find Fitted Values train_y = y_train.to_frame() train_y = np.log(train_y) fitted_values = ((train_y - s) / train_y) * 100 fitted_values_1 = fitted_values.rename( columns={out_column: "Fitted_values"}, inplace=True) #To Find Predicted Values predicted_values = pd.DataFrame(y_pred) predicted_values1 = predicted_values.rename( columns={0: "Predicted_values"}, inplace=True) #To Find Forecasted Values For A Quarter-Period quarter_period = 90 forecasted_days = rows_ytest + quarter_period forecast = results_ARIMA.forecast(steps=forecasted_days)[0] for_val = (forecast * 100) / 2 #Assign Name To Forecasetd Values and Make into DataFrame forecast = pd.DataFrame(for_val) forecast.rename(columns={0: "Forecasted_values"}, inplace=True) forecast = forecast.iloc[50:] #Convert all DataFrames into Numpy.array fitted_values = fitted_values.Fitted_values.to_numpy() predicted_values = predicted_values.Predicted_values.to_numpy() forecasted_values = forecast.Forecasted_values.to_numpy() timestamp = self.df.Date.to_numpy() actual_values = data.Monthly_sales_total.to_numpy() #To Make Alignment For Final Report length = len(fitted_values) an_array = np.empty(length) an_array[:] = 0 final_predicted = np.concatenate((an_array, predicted_values)) length1 = len(actual_values) an_array1 = np.empty(length1) an_array1[:] = 0 final_forecasted = np.concatenate((an_array1, forecasted_values)) #To Make All Results Into The DataFrame dict = { 'Date': timestamp, 'Actual_values': actual_values, 'Fitted_values': fitted_values, 'Predicted_values': final_predicted, 'Forecasted_values': final_forecasted } df = pd.DataFrame.from_dict(dict, orient='index') df.transpose() #To Generate CSV Report File df.to_csv( r'C:\Users\nkatakamsetty\Desktop\Demand_Forecast\final_report.csv', index=True) return [ trdata, dataset_des, trdata_des, variance, variance_seasonal, variance_trend, variance_residual, s, MAE, MSE, RMSE, mape, r2, df ]
plt.show() plt.figure(6) plt.plot(differencing) plotstats(differencing) plt.show() print('ACF and PACF with series stationarized') pyplot.figure() plot_acf(differencing, ax=pyplot.gca(), lags=20) pyplot.figure() plot_pacf(differencing, ax=pyplot.gca(), lags=20) pyplot.show() lag_acf = acf(differencing, nlags=20) lag_pacf = pacf(differencing, nlags=20, method='ols') #Temporary test ACF and PACF plt.figure(13) plt.plot(lag_acf) plt.axhline(y=0, linestyle='--', color='gray') plt.axhline(y=-1.96 / np.sqrt(len(series)), linestyle='--', color='gray') plt.axhline(y=1.96 / np.sqrt(len(series)), linestyle='--', color='gray') plt.title('Autocorrelation function for PETR4 - ARIMA (0,1,1)') plt.show() #Plot PACF: plt.figure(14)
def plot_acf(x, ax=None, lags=None, alpha=.05, use_vlines=True, unbiased=False, fft=False, **kwargs): """Plot the autocorrelation function Plots lags on the horizontal and the correlations on vertical axis. Parameters ---------- x : array_like Array of time-series values ax : Matplotlib AxesSubplot instance, optional If given, this subplot is used to plot in instead of a new figure being created. lags : array_like, optional Array of lag values, used on horizontal axis. If not given, ``lags=np.arange(len(corr))`` is used. alpha : scalar, optional If a number is given, the confidence intervals for the given level are returned. For instance if alpha=.05, 95 % confidence intervals are returned where the standard deviation is computed according to Bartlett's formula. If None, no confidence intervals are plotted. use_vlines : bool, optional If True, vertical lines and markers are plotted. If False, only markers are plotted. The default marker is 'o'; it can be overridden with a ``marker`` kwarg. unbiased : bool If True, then denominators for autocovariance are n-k, otherwise n fft : bool, optional If True, computes the ACF via FFT. **kwargs : kwargs, optional Optional keyword arguments that are directly passed on to the Matplotlib ``plot`` and ``axhline`` functions. Returns ------- fig : Matplotlib figure instance If `ax` is None, the created figure. Otherwise the figure to which `ax` is connected. See Also -------- matplotlib.pyplot.xcorr matplotlib.pyplot.acorr mpl_examples/pylab_examples/xcorr_demo.py Notes ----- Adapted from matplotlib's `xcorr`. Data are plotted as ``plot(lags, corr, **kwargs)`` """ fig, ax = utils.create_mpl_ax(ax) if lags is None: lags = np.arange(len(x)) nlags = len(lags) - 1 else: nlags = lags lags = np.arange(lags + 1) # +1 for zero lag confint = None # acf has different return type based on alpha if alpha is None: acf_x = acf(x, nlags=nlags, alpha=alpha, fft=fft, unbiased=unbiased) else: acf_x, confint = acf(x, nlags=nlags, alpha=alpha, fft=fft, unbiased=unbiased) if use_vlines: ax.vlines(lags, [0], acf_x, **kwargs) ax.axhline(**kwargs) kwargs.setdefault('marker', 'o') kwargs.setdefault('markersize', 5) kwargs.setdefault('linestyle', 'None') ax.margins(.05) ax.plot(lags, acf_x, **kwargs) ax.set_title("Autocorrelation") if confint is not None: # center the confidence interval TODO: do in acf? ax.fill_between(lags, confint[:, 0] - acf_x, confint[:, 1] - acf_x, alpha=.25) return fig
def test_acf_fft_dataframe(): # regression test #322 result = acf(sunspots.load_pandas().data[['SUNACTIVITY']], fft=True) assert_equal(result.ndim, 1)
def setup_class(cls): cls.acf = cls.results['acvarfft'] cls.qstat = cls.results['Q1'] cls.res1 = acf(cls.x, nlags=40, qstat=True, fft=True)
def setup_class(cls): cls.acf = cls.results['acvar'] # cls.acf = np.concatenate(([1.], cls.acf)) cls.qstat = cls.results['Q1'] cls.res1 = acf(cls.x, nlags=40, qstat=True, alpha=.05, fft=False) cls.confint_res = cls.results[['acvar_lb', 'acvar_ub']].values
tau = np.zeros((nbeta, 3)) for b in range(nbeta): beta[b] = beta_low + b * (beta_high - beta_low) / (nbeta - 1) for name in range(len(Observables)): Obs_mean = np.zeros((nbeta)) Obs_var = np.zeros((nbeta)) # fileO=("%s/beta_%d/%s.npy" %(BASEDIR, b, Observables[name])) # Obs=np.load(fileO) file = h5py.File('%s/beta_%d/Output.h5' % (BASEDIR, b), 'r') Obs = np.asarray(file['Measurements']['%s' % (Observables[name])]) A_Obs = acf(Obs, nlags=int(len(Obs) / 10), fft=True) # A_Obs=acf(Obs, fft=True) # fig, ax1 = plt.subplots(1, 1) # ax1.set_title(r"$L=%s; beta=%s$" %(L[l], beta[b]) ) # ax1.set_xlabel(r"$t$") # ax1.set_ylabel(r"$Autocorr$") # ax1.plot(A_Obs[:], "-") # ax1.grid() # plt.show() temp = np.where(A_Obs[:] < 0.1) print(Observables[name], b, temp) tmax_int = 10 * temp[0][0] temp_tau = [] time_int = 1000 tmax_int = max(time_int, tmax_int)
ts_data_decompose = residual ts_data_decompose.dropna(inplace=True) # In[50]: # Forecasting a Time Series # ACF and PACF #ACF and PACF plots: from statsmodels.tsa.stattools import acf, pacf lag_acf = acf(ts_data, nlags=20) lag_pacf = pacf(ts_data, nlags=20, method='ols') #Plot ACF: plt.subplot(121) plt.plot(lag_acf) plt.axhline(y=0,linestyle='--',color='gray') plt.axhline(y=-1.96/np.sqrt(len(ts_data)),linestyle='--',color='gray') plt.axhline(y=1.96/np.sqrt(len(ts_data)),linestyle='--',color='gray') plt.title('Autocorrelation Function') #Plot PACF: plt.subplot(122) plt.plot(lag_pacf) plt.axhline(y=0,linestyle='--',color='gray') plt.axhline(y=-1.96/np.sqrt(len(ts_data)),linestyle='--',color='gray')
#plt.tight_layout() # ##there can be cases where an observation simply consisted of trend & seasonality. In that case, there won't be ##any residual component & that would be a null or NaN. Hence, we also remove such cases. #decomposedLogData = residual #decomposedLogData.dropna(inplace=True) #test_stationarity(decomposedLogData) # # #decomposedLogData = residual #decomposedLogData.dropna(inplace=True) #test_stationarity(decomposedLogData) #-------------for main_meter only------------------ lag_acf = acf(datasetLogDiffShifting['main_meter'], nlags=20) lag_pacf = pacf(datasetLogDiffShifting['main_meter'], nlags=20, method='ols') #Plot ACF: plt.subplot(121) plt.plot(lag_acf) plt.axhline(y=0, linestyle='--', color='gray') plt.axhline(y=-1.96 / np.sqrt(len(datasetLogDiffShifting)), linestyle='--', color='gray') plt.axhline(y=1.96 / np.sqrt(len(datasetLogDiffShifting)), linestyle='--', color='red') plt.title('Autocorrelation Function') #Plot PACF
def acf_coefs(x, maxlag=100): x = np.asarray(x).ravel() nlags = np.minimum(len(x) - 1, maxlag) return acf(x, nlags=nlags).ravel()
tmp = dill.load(f) tmp['x'] = tmp['x'][:TMAX] tmp['y'] = tmp['y'][:TMAX] x.append(tmp['x']) y.append(tmp['y']) covs, means = evaluation.mean_hit_rate(tmp['x'], tmp['y'], n_covs=501) xm.append(covs) ym.append(means) return x, y, xm, ym if __name__ == '__main__': x, y, xm, ym = load_compare_data() m0 = evaluation.interpolated_hit_rate(x[0], y[0], 0.05) m1 = evaluation.interpolated_hit_rate(x[2], y[2], 0.05) d = m0 - m1 acorr = acf(d, nlags=21) # Confidence interval: z value / sqrt(sample size) ci = np.sqrt(2) * erfinv(0.95) / np.sqrt(d.size) fig = plt.figure() ax = fig.add_subplot(111) ax.plot(acorr, 'k-') ax.plot([0, len(acorr)], [ci, ci], 'k--') ax.plot([0, len(acorr)], [-ci, -ci], 'k--') plt.xlabel('Lag (days)', size=FONTSIZE) plt.ylabel('Autocorrelation', size=FONTSIZE) plt.tight_layout()
end) returns = RJ['close_price'].pct_change().dropna() returns.name = 'return' # returns.plot() # returns.column = ['return'] # print(returns) # 计算自相关系数 # acfs = stattools.acf(returns) # # print(acfs) # # 偏自相关系数 # pacfs = stattools.pacf(returns) # # print(pacfs) # # 自相关性图 # plot_acf(returns,use_vlines=True,lags=30) # # 偏自相关性图 # plot_pacf(returns,use_vlines=True,lags=30) # plot_acf(RJ['close_price'],use_vlines=True,lags=30) # # plt.show() # 单位根检验 # adfRJ= ADF(returns) # print(adfRJ.summary().as_text()) # adfClose = ADF(RJ['close_price']) # print(adfClose.summary().as_text()) # 白噪声检验 LB = stattools.q_stat(stattools.acf(returns), len(returns)) print(LB)
def modelverification(ts,forecast1,actual1): forecast1=pd.DataFrame(forecast1) actual1=actual1.reset_index() forecast1.reset_index(inplace=True) forecast=forecast1[forecast1.columns[1]] actual=actual1[actual1.columns[1]] mape = np.mean(np.abs(forecast - actual)/np.abs(actual)) # MAPE me = np.mean(forecast - actual) # ME mae = np.mean(np.abs(forecast - actual)) # MAE mpe = np.mean((forecast - actual)/actual) # MPE rmse = np.mean((forecast - actual)**2)**.5 # RMSE corr = np.corrcoef(forecast, actual)[0,1] # corr print("mape:",mape) print("me:",me) print("mae:",mae) print("mpe:",mpe) print("RMSE:",rmse) print("Correlation:",corr) #Validation through residuals resd_ar=pd.DataFrame(ts.resid) plt.figure(figsize=(10,5)) plt.plot(resd_ar,color='red') plt.title("Residual plot of Model") plt.show() print("Mean of Residual is:\n") print(resd_ar.mean()) plt.figure(figsize=(10,5)) resd_ar.hist() plt.title("Histogram of Residual plot of Model") plt.show() #ACF Graph lag_acf=acf(ts.resid,nlags=20) lag_pacf=pacf(ts.resid,nlags=20,method='ols') #plot ACF plt.figure(figsize=(15,5)) plt.subplot(121) plt.plot(lag_acf) plt.axhline(y=0,linestyle='--',color='grey') plt.axhline(y=-1.96/np.sqrt(len(ts.resid)),linestyle='--',color='red') plt.axhline(y=1.96/np.sqrt(len(ts.resid)),linestyle='--',color='red') plt.title('Auto Correlation Function') # plot PACF plt.subplot(122) plt.plot(lag_pacf) plt.axhline(y=0,linestyle='--',color='grey') plt.axhline(y=-1.96/np.sqrt(len(ts.resid)),linestyle='--',color='red') plt.axhline(y=1.96/np.sqrt(len(ts.resid)),linestyle='--',color='red') plt.title('Partial Auto Correlation Function') plt.tight_layout() plt.show() '''-------------Lung-box test-----------''' ltest=sm.stats.acorr_ljungbox(ts.resid, lags=[10]) print(ltest)
plt.plot(Blockchain_df[['Close']].pct_change()) plt.show() ''''CORRELATION AND AUTOCORRELATION (ACF & PLOTTING)''' Correlation = Blockchain_df['USD/EUR'].corr(Blockchain_df['USD/CHF']) print('\n This is the correlation between both:', Correlation) # Imported libraries for Autocorrelation: from statsmodels.tsa.stattools import acf from statsmodels.graphics.tsaplots import plot_acf Autocorrelation = Blockchain_df['Breakeven Inflation Rate'].autocorr() print('\n This is the Autocorrelation:', Autocorrelation) Acf = acf(Blockchain_df['Breakeven Inflation Rate']) print('\n This is the ACF: ', Acf) print('\n This is the lenght:', len(Acf)) Blockchain_df_acf_plot = 0 if Blockchain_df_acf_plot == 1: plot_acf(Blockchain_df['Breakeven Inflation Rate'], lags=20, alpha=0.5) plt.show() '''WHITE NOISE & GAUSSIAN WHITE NOISE''' Blockchain_df_WN = 0 if Blockchain_df_WN == 1: fig, axs = plt.subplots(nrows=3, ncols=1) Blockchain_df[['Breakeven Inflation Rate']].plot(ax=axs[0]) Blockchain_df[['Breakeven Inflation Rate']].plot(kind='hist', alpha=0.8,
def __init__(self): self.acf = self.results['acvarfft'] self.qstat = self.results['Q1'] self.res1 = acf(self.x, nlags=40, qstat=True, fft=True)
from statsmodels import regression from statsmodels.tsa.arima_process import arma_generate_sample, arma_impulse_response from statsmodels.tsa.arima_process import arma_acovf, arma_acf from statsmodels.tsa.arima.model import ARIMA from statsmodels.tsa.stattools import acf, acovf from statsmodels.graphics.tsaplots import plot_acf ar = [1., -0.6] #ar = [1., 0.] ma = [1., 0.4] #ma = [1., 0.4, 0.6] #ma = [1., 0.] mod = '' #'ma2' x = arma_generate_sample(ar, ma, 5000) x_acf = acf(x)[:10] x_ir = arma_impulse_response(ar, ma) #print x_acf[:10] #print x_ir[:10] #irc2 = np.correlate(x_ir,x_ir,'full')[len(x_ir)-1:] #print irc2[:10] #print irc2[:10]/irc2[0] #print irc2[:10-1] / irc2[1:10] #print x_acf[:10-1] / x_acf[1:10] # detrend helper from matplotlib.mlab def detrend(x, key=None): if key is None or key == 'constant': return detrend_mean(x)
from statsmodels.graphics.tsaplots import * import matplotlib.pyplot as plt from arch.unitroot import ADF from statsmodels.tsa import arima_model HS300_data=pd.read_csv("./data/HS300.csv") HS300_data.index=pd.to_datetime(HS300_data['date']) SH_ret=HS300_data['ret_cur'] SH_close=HS300_data['close'] type(SH_ret) SH_ret.head() ##自相关系数 acfs=stattools.acf(SH_ret) ##偏自相关系数 pacfs=stattools.pacf(SH_ret) plot_acf(SH_ret,use_vlines=True,lags=30) SH_ret.plot() plt.title('return') SH_close.plot() plt.title('close price') adfSH_ret=ADF(SH_ret) print(adfSH_ret)
npa = df.to_numpy() logdata = np.log(npa) plt.plot(npa, color = 'blue', marker = "o") plt.plot(logdata, color = 'red', marker = "o") plt.title("numpy.log()") plt.xlabel("x");plt.ylabel("logdata") #plt.show() #Autocorrelazione from statsmodels.tsa.stattools import acf diffdata = df.value.diff() diffdata[0] = df.value[0] # reset 1st elem acfdata = acf(diffdata,unbiased=True,nlags=50) plt.bar(np.arange(len(acfdata)),acfdata) plt.show # oppure import statsmodels.api as sm diffdata = df.value.diff() diffdata[0] = df.value[0] # reset 1st elem sm.graphics.tsa.plot_acf(diffdata, lags=100) plt.title("aoooooto") plt.show #division , train and test set #cutpoint = int(0.7*len(diffdata))
output['value']['滞后数'] = t[2] output['value']['Number of Observations Used'] = t[3] output['value']['Critical Value(1%)'] = t[4]['1%'] output['value']['Critical Value(5%)'] = t[4]['5%'] output['value']['Critical Value(10%)'] = t[4]['10%'] print(output) #p值很小可以看做平稳序列了 #进行白噪声检验 output2 = acorr_ljungbox(data['ts1'].dropna(), boxpierce=True, lags=[6, 12], return_df=True) print(output2) #白噪声显示差分后的序列存在一些相关性可用arma模型 #3.求自相关系数和偏自相关系数 lag_acf = acf(data['ts1'].dropna(), nlags=10, fft=False) lag_pacf = pacf(data['ts1'].dropna(), nlags=10, method='ols') # fig, axes = plt.subplots(1,2, figsize=(20,5)) # plot_acf(data['ts1'].dropna(), lags=10, ax=axes[0]) # plot_pacf(data['ts1'].dropna(), lags=10, ax=axes[1], method='ols') # plt.show(block=True) order_trend = arma_order_select_ic(data['ts1'].dropna()) print(order_trend['bic_min_order']) #这里的选择和书中的一样 #4.拟合 result_trend = ARIMA(data['index'], (0, 1, 1)).fit() print(result_trend.params) #后边的步骤其实和ARMA一样了
def test_acf(): acf_x = tsa.acf(x100, unbiased=False)[:21] assert_array_almost_equal(mlacf.acf100.ravel(), acf_x, 8) # why only dec=8 acf_x = tsa.acf(x1000, unbiased=False)[:21] assert_array_almost_equal(mlacf.acf1000.ravel(), acf_x, 8) # why only dec=9
def plot_acf(x, ax=None, lags=None, alpha=.05, use_vlines=True, unbiased=False, fft=False, title='Autocorrelation', zero=True, vlines_kwargs=None, **kwargs): """Plot the autocorrelation function Plots lags on the horizontal and the correlations on vertical axis. Parameters ---------- x : array_like Array of time-series values ax : Matplotlib AxesSubplot instance, optional If given, this subplot is used to plot in instead of a new figure being created. lags : int or array_like, optional int or Array of lag values, used on horizontal axis. Uses np.arange(lags) when lags is an int. If not provided, ``lags=np.arange(len(corr))`` is used. alpha : scalar, optional If a number is given, the confidence intervals for the given level are returned. For instance if alpha=.05, 95 % confidence intervals are returned where the standard deviation is computed according to Bartlett's formula. If None, no confidence intervals are plotted. use_vlines : bool, optional If True, vertical lines and markers are plotted. If False, only markers are plotted. The default marker is 'o'; it can be overridden with a ``marker`` kwarg. unbiased : bool If True, then denominators for autocovariance are n-k, otherwise n fft : bool, optional If True, computes the ACF via FFT. title : str, optional Title to place on plot. Default is 'Autocorrelation' zero : bool, optional Flag indicating whether to include the 0-lag autocorrelation. Default is True. vlines_kwargs : dict, optional Optional dictionary of keyword arguments that are passed to vlines. **kwargs : kwargs, optional Optional keyword arguments that are directly passed on to the Matplotlib ``plot`` and ``axhline`` functions. Returns ------- fig : Matplotlib figure instance If `ax` is None, the created figure. Otherwise the figure to which `ax` is connected. See Also -------- matplotlib.pyplot.xcorr matplotlib.pyplot.acorr mpl_examples/pylab_examples/xcorr_demo.py Notes ----- Adapted from matplotlib's `xcorr`. Data are plotted as ``plot(lags, corr, **kwargs)`` kwargs is used to pass matplotlib optional arguments to both the line tracing the autocorrelations and for the horizontal line at 0. These options must be valid for a Line2D object. vlines_kwargs is used to pass additional optional arguments to the vertical lines connecting each autocorrelation to the axis. These options must be valid for a LineCollection object. """ fig, ax = utils.create_mpl_ax(ax) lags, nlags, irregular = _prepare_data_corr_plot(x, lags, zero) vlines_kwargs = {} if vlines_kwargs is None else vlines_kwargs confint = None # acf has different return type based on alpha if alpha is None: acf_x = acf(x, nlags=nlags, alpha=alpha, fft=fft, unbiased=unbiased) else: acf_x, confint = acf(x, nlags=nlags, alpha=alpha, fft=fft, unbiased=unbiased) _plot_corr(ax, title, acf_x, confint, lags, irregular, use_vlines, vlines_kwargs, **kwargs) return fig
def naive_plus(series, horizon): rho = acf(series, nlags=horizon + 1) return np.array([ rho[h] * series[-1] + (1 - rho[h]) * np.mean(series) for h in range(horizon) ])
# lets create a MA series having mean 2 and of order 2 y5 = 2 + xma + 0.8 * np.roll(xma, -1) + 0.6 * np.roll( xma, -2) # + 0.6 *np.roll(xma,-3) plt.figure(figsize=(16, 7)) # Plot ACF: plt.subplot(121) plt.plot(xma) plt.subplot(122) plt.plot(y5) plt.show() # calling acf function from stattools lag_acf = acf(y5, nlags=50) # Plot ACF: plt.figure(figsize=(16, 7)) plt.plot(lag_acf, marker="o") plt.axhline(y=0, linestyle='--', color='gray') plt.axhline(y=-1.96 / np.sqrt(len(y5)), linestyle='--', color='gray') plt.axhline(y=1.96 / np.sqrt(len(y5)), linestyle='--', color='gray') plt.title('Autocorrelation Function') plt.xlabel('number of lags') plt.ylabel('correlation') plt.tight_layout() plt.show() # calling pacf function from stattools lag_pacf = pacf(y5, nlags=50, method='ols')
plt.plot(ts_log) plt.show() moving_avg = ts_log.rolling(12).mean() plt.plot(ts_log) plt.plot(moving_avg, color='red') plt.show() ts_log_moving_avg_diff = ts_log - moving_avg print(ts_log_moving_avg_diff.head(12)) ts_log_moving_avg_diff.dropna(inplace=True) test_stationarity(ts_log_moving_avg_diff) ts_log_diff = ts_log - ts_log.shift() plt.plot(ts_log_diff) plt.show() ts_log_diff.dropna(inplace=True) test_stationarity(ts_log_diff) lag_acf = acf(ts_log_diff, nlags=20) lag_pacf = pacf(ts_log_diff, nlags=20, method='ols') plt.subplot(121) plt.plot(lag_acf) plt.axhline(y=0, linestyle='--', color='gray') plt.axhline(y=-1.96 / np.sqrt(len(ts_log_diff)), linestyle='--', color='gray') plt.axhline(y=1.96 / np.sqrt(len(ts_log_diff)), linestyle='--', color='gray') plt.title('Autocorrelation Function') plt.show() plt.subplot(122) plt.plot(lag_pacf) plt.axhline(y=0, linestyle='--', color='gray') plt.axhline(y=-1.96 / np.sqrt(len(ts_log_diff)), linestyle='--', color='gray') plt.axhline(y=1.96 / np.sqrt(len(ts_log_diff)), linestyle='--', color='gray') plt.title('Partial Autocorrelation Function') plt.show()