def test_forecast(): # Numpy mod = MLEModel([1,2], **kwargs) res = mod.filter([]) forecast = res.forecast(steps=10) assert_allclose(forecast, np.ones((10,)) * 2) assert_allclose(res.get_forecast(steps=10).predicted_mean, forecast) # Pandas index = pd.date_range('1960-01-01', periods=2, freq='MS') mod = MLEModel(pd.Series([1,2], index=index), **kwargs) res = mod.filter([]) assert_allclose(res.forecast(steps=10), np.ones((10,)) * 2) assert_allclose(res.forecast(steps='1960-12-01'), np.ones((10,)) * 2) assert_allclose(res.get_forecast(steps=10).predicted_mean, np.ones((10,)) * 2)
def test_diagnostics_nile_durbinkoopman(): # Test the diagnostic tests using the Nile dataset. Results are from # Durbin and Koopman (2012); parameter values reported on page 37; test # statistics on page 40 niledata = nile.data.load_pandas().data niledata.index = pd.date_range('1871-01-01', '1970-01-01', freq='AS') mod = MLEModel(niledata['volume'], k_states=1, initialization='approximate_diffuse', initial_variance=1e15, loglikelihood_burn=1) mod.ssm['design', 0, 0] = 1 mod.ssm['obs_cov', 0, 0] = 15099. mod.ssm['transition', 0, 0] = 1 mod.ssm['selection', 0, 0] = 1 mod.ssm['state_cov', 0, 0] = 1469.1 res = mod.filter([]) # Test Ljung-Box # Note: only 3 digits provided in the reference paper actual = res.test_serial_correlation(method='ljungbox', lags=9)[0, 0, -1] assert_allclose(actual, [8.84], atol=1e-2) # Test Jarque-Bera # Note: The book reports 0.09 for Kurtosis, because it is reporting the # statistic less the mean of the Kurtosis distribution (which is 3). norm = res.test_normality(method='jarquebera')[0] actual = [norm[0], norm[2], norm[3]] assert_allclose(actual, [0.05, -0.03, 3.09], atol=1e-2) # Test Heteroskedasticity # Note: only 2 digits provided in the book actual = res.test_heteroskedasticity(method='breakvar')[0, 0] assert_allclose(actual, [0.61], atol=1e-2)
def test_diagnostics_nile_eviews(): # Test the diagnostic tests using the Nile dataset. Results are from # "Fitting State Space Models with EViews" (Van den Bossche 2011, # Journal of Statistical Software). # For parameter values, see Figure 2 # For Ljung-Box and Jarque-Bera statistics and p-values, see Figure 5 # The Heteroskedasticity statistic is not provided in this paper. niledata = nile.data.load_pandas().data niledata.index = pd.date_range('1871-01-01', '1970-01-01', freq='AS') mod = MLEModel(niledata['volume'], k_states=1, initialization='approximate_diffuse', initial_variance=1e15, loglikelihood_burn=1) mod.ssm['design', 0, 0] = 1 mod.ssm['obs_cov', 0, 0] = np.exp(9.600350) mod.ssm['transition', 0, 0] = 1 mod.ssm['selection', 0, 0] = 1 mod.ssm['state_cov', 0, 0] = np.exp(7.348705) res = mod.filter([]) # Test Ljung-Box # Note: only 3 digits provided in the reference paper actual = res.test_serial_correlation(method='ljungbox', lags=10)[0, :, -1] assert_allclose(actual, [13.117, 0.217], atol=1e-3) # Test Jarque-Bera actual = res.test_normality(method='jarquebera')[0, :2] assert_allclose(actual, [0.041686, 0.979373], atol=1e-5)
def test_transform(): # The transforms in MLEModel are noops mod = MLEModel([1,2], **kwargs) # Test direct transform, untransform assert_allclose(mod.transform_params([2, 3]), [2, 3]) assert_allclose(mod.untransform_params([2, 3]), [2, 3]) # Smoke test for transformation in `filter`, `update`, `loglike`, # `loglikeobs` mod.filter([], transformed=False) mod.update([], transformed=False) mod.loglike([], transformed=False) mod.loglikeobs([], transformed=False) # Note that mod is an SARIMAX instance, and the two parameters are # variances mod, _ = get_dummy_mod(fit=False) # Test direct transform, untransform assert_allclose(mod.transform_params([2, 3]), [4, 9]) assert_allclose(mod.untransform_params([4, 9]), [2, 3]) # Test transformation in `filter` res = mod.filter([2, 3], transformed=True) assert_allclose(res.params, [2, 3]) res = mod.filter([2, 3], transformed=False) assert_allclose(res.params, [4, 9])
def test_predict(): dates = pd.date_range(start='1980-01-01', end='1981-01-01', freq='AS') endog = pd.TimeSeries([1,2], index=dates) mod = MLEModel(endog, **kwargs) res = mod.filter([]) # Test that predict with start=None, end=None does prediction with full # dataset predict = res.predict() assert_equal(predict.shape, (mod.nobs,)) assert_allclose(res.get_prediction().predicted_mean, predict) # Test a string value to the dynamic option assert_allclose(res.predict(dynamic='1981-01-01'), res.predict()) # Test an invalid date string value to the dynamic option assert_raises(ValueError, res.predict, dynamic='1982-01-01') # Test for passing a string to predict when dates are not set mod = MLEModel([1,2], **kwargs) res = mod.filter([]) assert_raises(ValueError, res.predict, dynamic='string')
def test_basic_endog(): # Test various types of basic python endog inputs (e.g. lists, scalars...) # Check cannot call with non-array-like # fails due to checks in Statsmodels base classes assert_raises(ValueError, MLEModel, endog=1, k_states=1) assert_raises(ValueError, MLEModel, endog='a', k_states=1) assert_raises(ValueError, MLEModel, endog=True, k_states=1) # Check behavior with different types mod = MLEModel([1], **kwargs) res = mod.filter([]) assert_equal(res.filter_results.endog, [[1]]) mod = MLEModel([1.], **kwargs) res = mod.filter([]) assert_equal(res.filter_results.endog, [[1]]) mod = MLEModel([True], **kwargs) res = mod.filter([]) assert_equal(res.filter_results.endog, [[1]]) mod = MLEModel(['a'], **kwargs) # raises error due to inability coerce string to numeric assert_raises(ValueError, mod.filter, []) # Check that a different iterable tpyes give the expected result endog = [1.,2.] mod = check_endog(endog, **kwargs) mod.filter([]) endog = [[1.],[2.]] mod = check_endog(endog, **kwargs) mod.filter([]) endog = (1.,2.) mod = check_endog(endog, **kwargs) mod.filter([])
def test_params(): mod = MLEModel([1,2], **kwargs) # By default start_params raises NotImplementedError assert_raises(NotImplementedError, lambda: mod.start_params) # But param names are by default an empty array assert_equal(mod.param_names, []) # We can set them in the object if we want mod._start_params = [1] mod._param_names = ['a'] assert_equal(mod.start_params, [1]) assert_equal(mod.param_names, ['a'])
def test_summary(): dates = pd.date_range(start='1980-01-01', end='1984-01-01', freq='AS') endog = pd.TimeSeries([1,2,3,4,5], index=dates) mod = MLEModel(endog, **kwargs) res = mod.filter([]) # Get the summary txt = str(res.summary()) # Test res.summary when the model has dates assert_equal(re.search('Sample:\s+01-01-1980', txt) is not None, True) assert_equal(re.search('\s+- 01-01-1984', txt) is not None, True) # Test res.summary when `model_name` was not provided assert_equal(re.search('Model:\s+MLEModel', txt) is not None, True)
def test_filter(): endog = np.array([1., 2.]) mod = MLEModel(endog, **kwargs) # Test return of ssm object res = mod.filter([], return_ssm=True) assert_equal(isinstance(res, kalman_filter.FilterResults), True) # Test return of full results object res = mod.filter([]) assert_equal(isinstance(res, MLEResultsWrapper), True) assert_equal(res.cov_type, 'opg') # Test return of full results object, specific covariance type res = mod.filter([], cov_type='oim') assert_equal(isinstance(res, MLEResultsWrapper), True) assert_equal(res.cov_type, 'oim')
def check_endog(endog, nobs=2, k_endog=1, **kwargs): # create the model mod = MLEModel(endog, **kwargs) # the data directly available in the model is the Statsmodels version of # the data; it should be 2-dim, C-contiguous, long-shaped: # (nobs, k_endog) == (2, 1) assert_equal(mod.endog.ndim, 2) assert_equal(mod.endog.flags['C_CONTIGUOUS'], True) assert_equal(mod.endog.shape, (nobs, k_endog)) # the data in the `ssm` object is the state space version of the data; it # should be 2-dim, F-contiguous, wide-shaped (k_endog, nobs) == (1, 2) # and it should share data with mod.endog assert_equal(mod.ssm.endog.ndim, 2) assert_equal(mod.ssm.endog.flags['F_CONTIGUOUS'], True) assert_equal(mod.ssm.endog.shape, (k_endog, nobs)) assert_equal(mod.ssm.endog.base is mod.endog, True) return mod
def test_numpy_endog(): # Test various types of numpy endog inputs # Check behavior of the link maintained between passed `endog` and # `mod.endog` arrays endog = np.array([1., 2.]) mod = MLEModel(endog, **kwargs) assert_equal(mod.endog.base is not mod.data.orig_endog, True) assert_equal(mod.endog.base is not endog, True) assert_equal(mod.data.orig_endog.base is not endog, True) endog[0] = 2 # there is no link to mod.endog assert_equal(mod.endog, np.r_[1, 2].reshape(2,1)) # there remains a link to mod.data.orig_endog assert_equal(mod.data.orig_endog, endog) # Check behavior with different memory layouts / shapes # Example (failure): 0-dim array endog = np.array(1.) # raises error due to len(endog) failing in Statsmodels base classes assert_raises(TypeError, check_endog, endog, **kwargs) # Example : 1-dim array, both C- and F-contiguous, length 2 endog = np.array([1.,2.]) assert_equal(endog.ndim, 1) assert_equal(endog.flags['C_CONTIGUOUS'], True) assert_equal(endog.flags['F_CONTIGUOUS'], True) assert_equal(endog.shape, (2,)) mod = check_endog(endog, **kwargs) mod.filter([]) # Example : 2-dim array, C-contiguous, long-shaped: (nobs, k_endog) endog = np.array([1., 2.]).reshape(2, 1) assert_equal(endog.ndim, 2) assert_equal(endog.flags['C_CONTIGUOUS'], True) assert_equal(endog.flags['F_CONTIGUOUS'], False) assert_equal(endog.shape, (2, 1)) mod = check_endog(endog, **kwargs) mod.filter([]) # Example : 2-dim array, C-contiguous, wide-shaped: (k_endog, nobs) endog = np.array([1., 2.]).reshape(1, 2) assert_equal(endog.ndim, 2) assert_equal(endog.flags['C_CONTIGUOUS'], True) assert_equal(endog.flags['F_CONTIGUOUS'], False) assert_equal(endog.shape, (1, 2)) # raises error because arrays are always interpreted as # (nobs, k_endog), which means that k_endog=2 is incompatibile with shape # of design matrix (1, 1) assert_raises(ValueError, check_endog, endog, **kwargs) # Example : 2-dim array, F-contiguous, long-shaped (nobs, k_endog) endog = np.array([1., 2.]).reshape(1, 2).transpose() assert_equal(endog.ndim, 2) assert_equal(endog.flags['C_CONTIGUOUS'], False) assert_equal(endog.flags['F_CONTIGUOUS'], True) assert_equal(endog.shape, (2, 1)) mod = check_endog(endog, **kwargs) mod.filter([]) # Example : 2-dim array, F-contiguous, wide-shaped (k_endog, nobs) endog = np.array([1., 2.]).reshape(2, 1).transpose() assert_equal(endog.ndim, 2) assert_equal(endog.flags['C_CONTIGUOUS'], False) assert_equal(endog.flags['F_CONTIGUOUS'], True) assert_equal(endog.shape, (1, 2)) # raises error because arrays are always interpreted as # (nobs, k_endog), which means that k_endog=2 is incompatibile with shape # of design matrix (1, 1) assert_raises(ValueError, check_endog, endog, **kwargs) # Example (failure): 3-dim array endog = np.array([1., 2.]).reshape(2, 1, 1) # raises error due to direct ndim check in Statsmodels base classes assert_raises(ValueError, check_endog, endog, **kwargs) # Example : np.array with 2 columns # Update kwargs for k_endog=2 kwargs2 = { 'k_states': 1, 'design': [[1], [0.]], 'obs_cov': [[1, 0], [0, 1]], 'transition': [[1]], 'selection': [[1]], 'state_cov': [[1]], 'initialization': 'approximate_diffuse' } endog = np.array([[1., 2.], [3., 4.]]) mod = check_endog(endog, k_endog=2, **kwargs2) mod.filter([])