def test_forecast(): # Numpy mod = MLEModel([1, 2], **kwargs) res = mod.filter([]) forecast = res.forecast(steps=10) assert_allclose(forecast, np.ones((10, )) * 2) assert_allclose(res.get_forecast(steps=10).predicted_mean, forecast) # Pandas index = pd.date_range('1960-01-01', periods=2, freq='MS') mod = MLEModel(pd.Series([1, 2], index=index), **kwargs) res = mod.filter([]) assert_allclose(res.forecast(steps=10), np.ones((10, )) * 2) assert_allclose(res.forecast(steps='1960-12-01'), np.ones((10, )) * 2) assert_allclose( res.get_forecast(steps=10).predicted_mean, np.ones((10, )) * 2)
def setup_class(cls, which='none', **kwargs): # Results path = current_path + os.sep + 'results/results_smoothing_generalobscov_R.csv' cls.desired = pd.read_csv(path) # Data dta = datasets.macrodata.load_pandas().data dta.index = pd.date_range(start='1959-01-01', end='2009-7-01', freq='QS') obs = dta[['realgdp', 'realcons', 'realinv']].diff().iloc[1:] if which == 'all': obs.iloc[:50, :] = np.nan obs.iloc[119:130, :] = np.nan elif which == 'partial': obs.iloc[0:50, 0] = np.nan obs.iloc[119:130, 0] = np.nan elif which == 'mixed': obs.iloc[0:50, 0] = np.nan obs.iloc[19:70, 1] = np.nan obs.iloc[39:90, 2] = np.nan obs.iloc[119:130, 0] = np.nan obs.iloc[119:130, 2] = np.nan # Create the model mod = MLEModel(obs, k_states=3, k_posdef=3, **kwargs) mod['design'] = np.eye(3) mod['obs_cov'] = np.array([[609.0746647855, 0., 0.], [0., 1.8774916622, 0.], [0., 0., 124.6768281675]]) mod['transition'] = np.array( [[-0.8110473405, 1.8005304445, 1.0215975772], [-1.9846632699, 2.4091302213, 1.9264449765], [0.9181658823, -0.2442384581, -0.6393462272]]) mod['selection'] = np.eye(3) mod['state_cov'] = np.array( [[1552.9758843938, 612.7185121905, 877.6157204992], [612.7185121905, 467.8739411204, 70.608037339], [877.6157204992, 70.608037339, 900.5440385836]]) mod.initialize_approximate_diffuse(1e6) cls.model = mod.ssm # Conventional filtering, smoothing, and simulation smoothing cls.model.filter_conventional = True cls.conventional_results = cls.model.smooth() n_disturbance_variates = ((cls.model.k_endog + cls.model.k_posdef) * cls.model.nobs) cls.conventional_sim = cls.model.simulation_smoother( disturbance_variates=np.zeros(n_disturbance_variates), initial_state_variates=np.zeros(cls.model.k_states)) # Univariate filtering, smoothing, and simulation smoothing cls.model.filter_univariate = True cls.univariate_results = cls.model.smooth() cls.univariate_sim = cls.model.simulation_smoother( disturbance_variates=np.zeros(n_disturbance_variates), initial_state_variates=np.zeros(cls.model.k_states))
def setup_class(cls, which, dtype=float, alternate_timing=False, **kwargs): # Results path = os.path.join(current_path, 'results', 'results_smoothing_generalobscov_R.csv') cls.desired = pd.read_csv(path) # Data dta = datasets.macrodata.load_pandas().data dta.index = pd.date_range(start='1959-01-01', end='2009-7-01', freq='QS') obs = dta[['realgdp', 'realcons', 'realinv']].diff().iloc[1:] if which == 'all': obs.iloc[:50, :] = np.nan obs.iloc[119:130, :] = np.nan elif which == 'partial': obs.iloc[0:50, 0] = np.nan obs.iloc[119:130, 0] = np.nan elif which == 'mixed': obs.iloc[0:50, 0] = np.nan obs.iloc[19:70, 1] = np.nan obs.iloc[39:90, 2] = np.nan obs.iloc[119:130, 0] = np.nan obs.iloc[119:130, 2] = np.nan # Create the model mod = MLEModel(obs, k_states=3, k_posdef=3, **kwargs) mod['design'] = np.eye(3) X = (np.arange(9) + 1).reshape((3, 3)) / 10. mod['obs_cov'] = np.dot(X, X.T) mod['transition'] = np.eye(3) mod['selection'] = np.eye(3) mod['state_cov'] = np.eye(3) mod.initialize_approximate_diffuse(1e6) cls.model = mod.ssm # Conventional filtering, smoothing, and simulation smoothing cls.model.filter_conventional = True cls.conventional_results = cls.model.smooth() n_disturbance_variates = ( (cls.model.k_endog + cls.model.k_posdef) * cls.model.nobs ) cls.conventional_sim = cls.model.simulation_smoother( disturbance_variates=np.zeros(n_disturbance_variates), initial_state_variates=np.zeros(cls.model.k_states) ) # Univariate filtering, smoothing, and simulation smoothing cls.model.filter_univariate = True cls.univariate_results = cls.model.smooth() cls.univariate_sim = cls.model.simulation_smoother( disturbance_variates=np.zeros(n_disturbance_variates), initial_state_variates=np.zeros(cls.model.k_states) )
def test_predict(): dates = pd.date_range(start='1980-01-01', end='1981-01-01', freq='AS') endog = pd.Series([1, 2], index=dates) mod = MLEModel(endog, **kwargs) res = mod.filter([]) # Test that predict with start=None, end=None does prediction with full # dataset predict = res.predict() assert_equal(predict.shape, (mod.nobs, )) assert_allclose(res.get_prediction().predicted_mean, predict) # Test a string value to the dynamic option assert_allclose(res.predict(dynamic='1981-01-01'), res.predict()) # Test an invalid date string value to the dynamic option # assert_raises(ValueError, res.predict, dynamic='1982-01-01') # Test for passing a string to predict when dates are not set mod = MLEModel([1, 2], **kwargs) res = mod.filter([]) assert_raises(KeyError, res.predict, dynamic='string')
def test_basic_endog(): # Test various types of basic python endog inputs (e.g. lists, scalars...) # Check cannot call with non-array_like # fails due to checks in Statsmodels base classes assert_raises(ValueError, MLEModel, endog=1, k_states=1) assert_raises(ValueError, MLEModel, endog='a', k_states=1) assert_raises(ValueError, MLEModel, endog=True, k_states=1) # Check behavior with different types mod = MLEModel([1], **kwargs) res = mod.filter([]) assert_equal(res.filter_results.endog, [[1]]) mod = MLEModel([1.], **kwargs) res = mod.filter([]) assert_equal(res.filter_results.endog, [[1]]) mod = MLEModel([True], **kwargs) res = mod.filter([]) assert_equal(res.filter_results.endog, [[1]]) mod = MLEModel(['a'], **kwargs) # raises error due to inability coerce string to numeric assert_raises(ValueError, mod.filter, []) # Check that a different iterable tpyes give the expected result endog = [1., 2.] mod = check_endog(endog, **kwargs) mod.filter([]) endog = [[1.], [2.]] mod = check_endog(endog, **kwargs) mod.filter([]) endog = (1., 2.) mod = check_endog(endog, **kwargs) mod.filter([])
def test_params(): mod = MLEModel([1, 2], **kwargs) # By default start_params raises NotImplementedError assert_raises(NotImplementedError, lambda: mod.start_params) # But param names are by default an empty array assert_equal(mod.param_names, []) # We can set them in the object if we want mod._start_params = [1] mod._param_names = ['a'] assert_equal(mod.start_params, [1]) assert_equal(mod.param_names, ['a'])
def create_model(cls, obs, **kwargs): # Create the model with typical state space mod = MLEModel(obs, k_states=2, k_posdef=2, **kwargs) mod['design'] = np.array([[-32.47143586, 17.33779024], [-7.40264169, 1.69279859], [-209.04702853, 125.2879374]]) mod['obs_cov'] = np.diag(np.array([0.0622668, 1.95666886, 58.37473642])) mod['transition'] = np.array([[0.29935707, 0.33289005], [-0.7639868, 1.2844237]]) mod['selection'] = np.eye(2) mod['state_cov'] = np.array([[1.2, -0.25], [-0.25, 1.1]]) mod.initialize_approximate_diffuse(1e6) return mod
def test_summary(): dates = pd.date_range(start='1980-01-01', end='1984-01-01', freq='AS') endog = pd.Series([1,2,3,4,5], index=dates) mod = MLEModel(endog, **kwargs) res = mod.filter([]) # Get the summary txt = str(res.summary()) # Test res.summary when the model has dates assert_equal(re.search('Sample:\s+01-01-1980', txt) is not None, True) assert_equal(re.search('\s+- 01-01-1984', txt) is not None, True) # Test res.summary when `model_name` was not provided assert_equal(re.search('Model:\s+MLEModel', txt) is not None, True)
def test_filter(): endog = np.array([1., 2.]) mod = MLEModel(endog, **kwargs) # Test return of ssm object res = mod.filter([], return_ssm=True) assert_equal(isinstance(res, kalman_filter.FilterResults), True) # Test return of full results object res = mod.filter([]) assert_equal(isinstance(res, MLEResultsWrapper), True) assert_equal(res.cov_type, 'opg') # Test return of full results object, specific covariance type res = mod.filter([], cov_type='oim') assert_equal(isinstance(res, MLEResultsWrapper), True) assert_equal(res.cov_type, 'oim')
def check_endog(endog, nobs=2, k_endog=1, **kwargs): # create the model mod = MLEModel(endog, **kwargs) # the data directly available in the model is the Statsmodels version of # the data; it should be 2-dim, C-contiguous, long-shaped: # (nobs, k_endog) == (2, 1) assert_equal(mod.endog.ndim, 2) assert_equal(mod.endog.flags['C_CONTIGUOUS'], True) assert_equal(mod.endog.shape, (nobs, k_endog)) # the data in the `ssm` object is the state space version of the data; it # should be 2-dim, F-contiguous, wide-shaped (k_endog, nobs) == (1, 2) # and it should share data with mod.endog assert_equal(mod.ssm.endog.ndim, 2) assert_equal(mod.ssm.endog.flags['F_CONTIGUOUS'], True) assert_equal(mod.ssm.endog.shape, (k_endog, nobs)) assert_equal(mod.ssm.endog.base is mod.endog, True) return mod
def test_init_matrices_time_varying(): # Test setting state space system matrices in __init__, with time-varying # matrices nobs = 10 k_endog = 2 k_states = 3 k_posdef = 1 endog = np.zeros((10, 2)) obs_intercept = np.reshape( np.arange(k_endog * nobs) * 1.0, (k_endog, nobs)) design = np.reshape( np.arange(k_endog * k_states * nobs) * 1.0, (k_endog, k_states, nobs)) obs_cov = np.reshape( np.arange(k_endog**2 * nobs) * 1.0, (k_endog, k_endog, nobs)) state_intercept = np.reshape( np.arange(k_states * nobs) * 1.0, (k_states, nobs)) transition = np.reshape( np.arange(k_states**2 * nobs) * 1.0, (k_states, k_states, nobs)) selection = np.reshape( np.arange(k_states * k_posdef * nobs) * 1.0, (k_states, k_posdef, nobs)) state_cov = np.reshape( np.arange(k_posdef**2 * nobs) * 1.0, (k_posdef, k_posdef, nobs)) mod = MLEModel(endog, k_states=k_states, k_posdef=k_posdef, obs_intercept=obs_intercept, design=design, obs_cov=obs_cov, state_intercept=state_intercept, transition=transition, selection=selection, state_cov=state_cov) assert_allclose(mod['obs_intercept'], obs_intercept) assert_allclose(mod['design'], design) assert_allclose(mod['obs_cov'], obs_cov) assert_allclose(mod['state_intercept'], state_intercept) assert_allclose(mod['transition'], transition) assert_allclose(mod['selection'], selection) assert_allclose(mod['state_cov'], state_cov)
def test_predict(): dates = pd.date_range(start='1980-01-01', end='1981-01-01', freq='AS') endog = pd.TimeSeries([1, 2], index=dates) mod = MLEModel(endog, **kwargs) res = mod.filter([]) # Test that predict with start=None, end=None does prediction with full # dataset assert_equal(res.predict().shape, (mod.k_endog, mod.nobs)) # Test a string value to the dynamic option assert_allclose(res.predict(dynamic='1981-01-01'), res.predict()) # Test an invalid date string value to the dynamic option assert_raises(ValueError, res.predict, dynamic='1982-01-01') # Test predict with full results assert_equal( isinstance(res.predict(full_results=True), kalman_filter.FilterResults), True)
def test_summary(): dates = pd.date_range(start='1980-01-01', end='1984-01-01', freq='AS') endog = pd.Series([1,2,3,4,5], index=dates) mod = MLEModel(endog, **kwargs) res = mod.filter([]) # Get the summary txt = str(res.summary()) # Test res.summary when the model has dates assert_equal(re.search('Sample:\s+01-01-1980', txt) is not None, True) assert_equal(re.search('\s+- 01-01-1984', txt) is not None, True) # Test res.summary when `model_name` was not provided assert_equal(re.search('Model:\s+MLEModel', txt) is not None, True) # Smoke test that summary still works when diagnostic tests fail res.filter_results._standardized_forecasts_error[:] = np.nan res.summary() res.filter_results._standardized_forecasts_error = 1 res.summary() res.filter_results._standardized_forecasts_error = 'a' res.summary()
def test_numpy_endog(): # Test various types of numpy endog inputs # Check behavior of the link maintained between passed `endog` and # `mod.endog` arrays endog = np.array([1., 2.]) mod = MLEModel(endog, **kwargs) assert_equal(mod.endog.base is not mod.data.orig_endog, True) assert_equal(mod.endog.base is not endog, True) assert_equal(mod.data.orig_endog.base is not endog, True) endog[0] = 2 # there is no link to mod.endog assert_equal(mod.endog, np.r_[1, 2].reshape(2, 1)) # there remains a link to mod.data.orig_endog assert_equal(mod.data.orig_endog, endog) # Check behavior with different memory layouts / shapes # Example (failure): 0-dim array endog = np.array(1.) # raises error due to len(endog) failing in Statsmodels base classes assert_raises(TypeError, check_endog, endog, **kwargs) # Example : 1-dim array, both C- and F-contiguous, length 2 endog = np.array([1., 2.]) assert_equal(endog.ndim, 1) assert_equal(endog.flags['C_CONTIGUOUS'], True) assert_equal(endog.flags['F_CONTIGUOUS'], True) assert_equal(endog.shape, (2, )) mod = check_endog(endog, **kwargs) mod.filter([]) # Example : 2-dim array, C-contiguous, long-shaped: (nobs, k_endog) endog = np.array([1., 2.]).reshape(2, 1) assert_equal(endog.ndim, 2) assert_equal(endog.flags['C_CONTIGUOUS'], True) # On newer numpy (>= 0.10), this array is (rightly) both C and F contiguous # assert_equal(endog.flags['F_CONTIGUOUS'], False) assert_equal(endog.shape, (2, 1)) mod = check_endog(endog, **kwargs) mod.filter([]) # Example : 2-dim array, C-contiguous, wide-shaped: (k_endog, nobs) endog = np.array([1., 2.]).reshape(1, 2) assert_equal(endog.ndim, 2) assert_equal(endog.flags['C_CONTIGUOUS'], True) # On newer numpy (>= 0.10), this array is (rightly) both C and F contiguous # assert_equal(endog.flags['F_CONTIGUOUS'], False) assert_equal(endog.shape, (1, 2)) # raises error because arrays are always interpreted as # (nobs, k_endog), which means that k_endog=2 is incompatibile with shape # of design matrix (1, 1) assert_raises(ValueError, check_endog, endog, **kwargs) # Example : 2-dim array, F-contiguous, long-shaped (nobs, k_endog) endog = np.array([1., 2.]).reshape(1, 2).transpose() assert_equal(endog.ndim, 2) # On newer numpy (>= 0.10), this array is (rightly) both C and F contiguous # assert_equal(endog.flags['C_CONTIGUOUS'], False) assert_equal(endog.flags['F_CONTIGUOUS'], True) assert_equal(endog.shape, (2, 1)) mod = check_endog(endog, **kwargs) mod.filter([]) # Example : 2-dim array, F-contiguous, wide-shaped (k_endog, nobs) endog = np.array([1., 2.]).reshape(2, 1).transpose() assert_equal(endog.ndim, 2) # On newer numpy (>= 0.10), this array is (rightly) both C and F contiguous # assert_equal(endog.flags['C_CONTIGUOUS'], False) assert_equal(endog.flags['F_CONTIGUOUS'], True) assert_equal(endog.shape, (1, 2)) # raises error because arrays are always interpreted as # (nobs, k_endog), which means that k_endog=2 is incompatibile with shape # of design matrix (1, 1) assert_raises(ValueError, check_endog, endog, **kwargs) # Example (failure): 3-dim array endog = np.array([1., 2.]).reshape(2, 1, 1) # raises error due to direct ndim check in Statsmodels base classes assert_raises(ValueError, check_endog, endog, **kwargs) # Example : np.array with 2 columns # Update kwargs for k_endog=2 kwargs2 = { 'k_states': 1, 'design': [[1], [0.]], 'obs_cov': [[1, 0], [0, 1]], 'transition': [[1]], 'selection': [[1]], 'state_cov': [[1]], 'initialization': 'approximate_diffuse' } endog = np.array([[1., 2.], [3., 4.]]) mod = check_endog(endog, k_endog=2, **kwargs2) mod.filter([])
def setup_class(cls, dtype=float, alternate_timing=False, **kwargs): cls.true = results_kalman_filter.uc_bi cls.true_states = pd.DataFrame(cls.true['states']) # GDP and Unemployment, Quarterly, 1948.1 - 1995.3 data = pd.DataFrame(cls.true['data'], index=pd.date_range('1947-01-01', '1995-07-01', freq='QS'), columns=['GDP', 'UNEMP'])[4:] data['GDP'] = np.log(data['GDP']) data['UNEMP'] = (data['UNEMP'] / 100) k_states = 6 cls.mlemodel = MLEModel(data, k_states=k_states, **kwargs) cls.model = cls.mlemodel.ssm # Statespace representation cls.model.design[:, :, 0] = [[1, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1]] cls.model.transition[([0, 0, 1, 1, 2, 3, 4, 5], [0, 4, 1, 2, 1, 2, 4, 5], [0, 0, 0, 0, 0, 0, 0, 0])] = [1, 1, 0, 0, 1, 1, 1, 1] cls.model.selection = np.eye(cls.model.k_states) # Update matrices with given parameters (sigma_v, sigma_e, sigma_w, sigma_vl, sigma_ec, phi_1, phi_2, alpha_1, alpha_2, alpha_3) = np.array(cls.true['parameters'], ) cls.model.design[([1, 1, 1], [1, 2, 3], [0, 0, 0])] = [alpha_1, alpha_2, alpha_3] cls.model.transition[([1, 1], [1, 2], [0, 0])] = [phi_1, phi_2] cls.model.obs_cov[1, 1, 0] = sigma_ec**2 cls.model.state_cov[np.diag_indices(k_states) + (np.zeros(k_states, dtype=int), )] = [ sigma_v**2, sigma_e**2, 0, 0, sigma_w**2, sigma_vl**2 ] # Initialization initial_state = np.zeros((k_states, )) initial_state_cov = np.eye(k_states) * 100 # Initialization: cls.modification if not alternate_timing: initial_state_cov = np.dot( np.dot(cls.model.transition[:, :, 0], initial_state_cov), cls.model.transition[:, :, 0].T) else: cls.model.timing_init_filtered = True cls.model.initialize_known(initial_state, initial_state_cov) # Conventional filtering, smoothing, and simulation smoothing cls.model.filter_conventional = True cls.conventional_results = cls.model.smooth() n_disturbance_variates = ((cls.model.k_endog + cls.model.k_posdef) * cls.model.nobs) cls.conventional_sim = cls.model.simulation_smoother( disturbance_variates=np.zeros(n_disturbance_variates), initial_state_variates=np.zeros(cls.model.k_states)) # Univariate filtering, smoothing, and simulation smoothing cls.model.filter_univariate = True cls.univariate_results = cls.model.smooth() cls.univariate_sim = cls.model.simulation_smoother( disturbance_variates=np.zeros(n_disturbance_variates), initial_state_variates=np.zeros(cls.model.k_states))
def test_forecast(): mod = MLEModel([1, 2], **kwargs) res = mod.filter([]) assert_allclose(res.forecast(steps=10), [[2] * 10])
def setup_class(cls, which='mixed', *args, **kwargs): # Data dta = datasets.macrodata.load_pandas().data dta.index = pd.date_range(start='1959-01-01', end='2009-7-01', freq='QS') obs = np.log(dta[['realgdp','realcons','realinv']]).diff().iloc[1:] * 400 if which == 'all': obs.iloc[:50, :] = np.nan obs.iloc[119:130, :] = np.nan elif which == 'partial': obs.iloc[0:50, 0] = np.nan obs.iloc[119:130, 0] = np.nan elif which == 'mixed': obs.iloc[0:50, 0] = np.nan obs.iloc[19:70, 1] = np.nan obs.iloc[39:90, 2] = np.nan obs.iloc[119:130, 0] = np.nan obs.iloc[119:130, 2] = np.nan # Create the model with typical state space mod = MLEModel(obs, k_states=2, k_posdef=2, **kwargs) mod['design'] = np.array([[-32.47143586, 17.33779024], [-7.40264169, 1.69279859], [-209.04702853, 125.2879374]]) mod['obs_cov'] = np.diag( np.array([0.0622668, 1.95666886, 58.37473642])) mod['transition'] = np.array([[0.29935707, 0.33289005], [-0.7639868, 1.2844237]]) mod['selection'] = np.eye(2) mod['state_cov'] = np.array([[1.2, -0.25], [-0.25, 1.1]]) mod.initialize_approximate_diffuse(1e6) cls.model = mod.ssm n_disturbance_variates = ( (cls.model.k_endog + cls.model.k_posdef) * cls.model.nobs ) # Collapsed filtering, smoothing, and simulation smoothing cls.model.filter_collapsed = True cls.results_b = cls.model.smooth() cls.sim_b = cls.model.simulation_smoother( disturbance_variates=np.zeros(n_disturbance_variates), initial_state_variates=np.zeros(cls.model.k_states) ) # Conventional filtering, smoothing, and simulation smoothing cls.model.filter_collapsed = False cls.results_a = cls.model.smooth() cls.sim_a = cls.model.simulation_smoother( disturbance_variates=np.zeros(n_disturbance_variates), initial_state_variates=np.zeros(cls.model.k_states) ) # Create the model with augmented state space kwargs.pop('filter_collapsed', None) mod = MLEModel(obs, k_states=4, k_posdef=2, **kwargs) mod['design', :3, :2] = np.array([[-32.47143586, 17.33779024], [-7.40264169, 1.69279859], [-209.04702853, 125.2879374]]) mod['obs_cov'] = np.diag( np.array([0.0622668, 1.95666886, 58.37473642])) mod['transition', :2, :2] = np.array([[0.29935707, 0.33289005], [-0.7639868, 1.2844237]]) mod['transition', 2:, :2] = np.eye(2) mod['selection', :2, :2] = np.eye(2) mod['state_cov'] = np.array([[1.2, -0.25], [-0.25, 1.1]]) mod.initialize_approximate_diffuse(1e6) cls.augmented_model = mod.ssm cls.augmented_results = mod.ssm.smooth()