def test_pandasacovf(): # test that passing Series vs ndarray to acovf doesn't affect results # TODO: GH reference? # TODO: Same test for other functions? ser = pd.Series(list(range(1, 11))) assert_allclose(acovf(ser, fft=False), acovf(ser.values, fft=False), rtol=1e-12)
def test_acovf_fft_vs_convolution(): np.random.seed(1) q = np.random.normal(size=100) # TODO: parametrize? for demean in [True, False]: for unbiased in [True, False]: F1 = acovf(q, demean=demean, unbiased=unbiased, fft=True) F2 = acovf(q, demean=demean, unbiased=unbiased, fft=False) assert_almost_equal(F1, F2, decimal=7)
def test_acovf2d(): dta = sunspots.load_pandas().data dta.index = pd.DatetimeIndex(start='1700', end='2009', freq='A')[:309] del dta["YEAR"] res = acovf(dta, fft=False) assert_equal(res, acovf(dta.values, fft=False)) X = np.random.random((10, 2)) with pytest.raises(ValueError): acovf(X, fft=False)
def test_acovf_nlags(acovf_data, unbiased, demean, fft, missing): # GH#4937 full = acovf(acovf_data, unbiased=unbiased, demean=demean, fft=fft, missing=missing) limited = acovf(acovf_data, unbiased=unbiased, demean=demean, fft=fft, missing=missing, nlag=10) assert_allclose(full[:11], limited)
def test_acovf_nlags_missing(acovf_data, unbiased, demean, fft, missing): # GH#4937 acovf_data = acovf_data.copy() acovf_data[1:3] = np.nan full = acovf(acovf_data, unbiased=unbiased, demean=demean, fft=fft, missing=missing) limited = acovf(acovf_data, unbiased=unbiased, demean=demean, fft=fft, missing=missing, nlag=10) assert_allclose(full[:11], limited)
def levinson_durbin(s, nlags=10, isacov=False): """ Levinson-Durbin recursion for autoregressive processes Parameters ---------- s : array_like If isacov is False, then this is the time series. If iasacov is true then this is interpreted as autocovariance starting with lag 0 nlags : integer largest lag to include in recursion or order of the autoregressive process isacov : boolean flag to indicate whether the first argument, s, contains the autocovariances or the data series. Returns ------- sigma_v : float estimate of the error variance ? arcoefs : ndarray estimate of the autoregressive coefficients for a model including nlags pacf : ndarray partial autocorrelation function sigma : ndarray entire sigma array from intermediate result, last value is sigma_v phi : ndarray entire phi array from intermediate result, last column contains autoregressive coefficients for AR(nlags) Notes ----- This function returns currently all results, but maybe we drop sigma and phi from the returns. If this function is called with the time series (isacov=False), then the sample autocovariance function is calculated with the default options (biased, no fft). """ s = np.asarray(s) order = nlags if isacov: sxx_m = s else: sxx_m = acovf(s, fft=False)[:order + 1] # TODO: not tested phi = np.zeros((order + 1, order + 1), 'd') sig = np.zeros(order + 1) # initial points for the recursion phi[1, 1] = sxx_m[1] / sxx_m[0] sig[1] = sxx_m[0] - phi[1, 1] * sxx_m[1] for k in range(2, order + 1): phi[k, k] = (sxx_m[k] - np.dot(phi[1:k, k - 1], sxx_m[1:k][::-1])) / sig[k - 1] for j in range(1, k): phi[j, k] = phi[j, k - 1] - phi[k, k] * phi[k - j, k - 1] sig[k] = sig[k - 1] * (1 - phi[k, k]**2) sigma_v = sig[-1] arcoefs = phi[1:, -1] pacf_ = np.diag(phi).copy() pacf_[0] = 1. return sigma_v, arcoefs, pacf_, sig, phi # return everything
def pacf(x, nlags=40, method='ywunbiased', alpha=None): """ Partial autocorrelation estimated Parameters ---------- x : 1d array observations of time series for which pacf is calculated nlags : int largest lag for which the pacf is returned method : str specifies which method for the calculations to use: - 'yw' or 'ywunbiased' : Yule-Walker with bias correction in denominator for acovf. Default. - 'ywm' or 'ywmle' : Yule-Walker without bias correction - 'ols' : regression of time series on lags of it and on constant - 'ols-inefficient' : regression of time series on lags using a single common sample to estimate all pacf coefficients - 'ols-unbiased' : regression of time series on lags with a bias adjustment - 'ld' or 'ldunbiased' : Levinson-Durbin recursion with bias correction - 'ldb' or 'ldbiased' : Levinson-Durbin recursion without bias correction alpha : float, optional If a number is given, the confidence intervals for the given level are returned. For instance if alpha=.05, 95 % confidence intervals are returned where the standard deviation is computed according to 1/sqrt(len(x)) Returns ------- pacf : 1d array partial autocorrelations, nlags elements, including lag zero confint : array, optional Confidence intervals for the PACF. Returned if confint is not None. See also -------- sm2.tsa.autocov.acf sm2.tsa.autocov.pacf_yw sm2.tsa.autocov.pacf_burg sm2.tsa.stattools.pacf_ols Notes ----- Based on simulation evidence across a range of low-order ARMA models, the best methods based on root MSE are Yule-Walker (MLW), Levinson-Durbin (MLE) and Burg, respectively. The estimators with the lowest bias included included these three in addition to OLS and OLS-unbiased. Yule-Walker (unbiased) and Levinson-Durbin (unbiased) performed consistently worse than the other options. """ if method in ('ols', 'ols-inefficient', 'ols-unbiased'): # GH#5153 efficient = 'inefficient' not in method unbiased = 'unbiased' in method ret = pacf_ols(x, nlags=nlags, efficient=efficient, unbiased=unbiased) elif method in ('yw', 'ywu', 'ywunbiased', 'yw_unbiased'): ret = pacf_yw(x, nlags=nlags, method='unbiased') elif method in ('ywm', 'ywmle', 'yw_mle'): ret = pacf_yw(x, nlags=nlags, method='mle') elif method in ('ld', 'ldu', 'ldunbiased', 'ld_unbiased'): acv = acovf(x, unbiased=True, fft=False) ld_ = levinson_durbin(acv, nlags=nlags, isacov=True) ret = ld_[2] # FIXME: inconsistent naming with ywmle elif method in ('ldb', 'ldbiased', 'ld_biased'): acv = acovf(x, unbiased=False, fft=False) ld_ = levinson_durbin(acv, nlags=nlags, isacov=True) ret = ld_[2] else: # pragma: no cover raise ValueError('method not available') if alpha is not None: varacf = 1. / len(x) # for all lags >=1 interval = stats.norm.ppf(1. - alpha / 2.) * np.sqrt(varacf) confint = np.array(list(zip(ret - interval, ret + interval))) confint[0] = ret[0] # fix confidence interval for lag 0 to varpacf=0 return ret, confint else: return ret
def test_acovf_warns(acovf_data): # GH#4937 with pytest.warns(FutureWarning): acovf(acovf_data)
def test_acovf_error(acovf_data): # GH#4937 with pytest.raises(ValueError): acovf(acovf_data, nlag=250, fft=False)