def test_set_bounds_for_Q_robust(self): self.robust_bounds = True expected = np.zeros(100, dtype=object) expected[:] = None expected[10: 30] = 0.001 smo._set_bounds_for_Q(self, slice(10, 30)) aae(self.lower_bound, expected)
def test_likelihood_value(): df = pd.read_stata("skillmodels/tests/estimation/chs_test_ex2.dta") df.set_index(["id", "period"], inplace=True) with open("skillmodels/tests/estimation/test_model2.json") as j: model_dict = json.load(j) mod = SkillModel( model_dict=model_dict, dataset=df, estimator="chs", model_name="test_model" ) args = mod.likelihood_arguments_dict() params_df = pd.read_csv("skillmodels/tests/estimation/like_reg_params_new.csv") params_df["name2"].fillna("", inplace=True) params_df["name1"].replace("0", 0, inplace=True) params_df.set_index(["category", "period", "name1", "name2"], inplace=True) mod.start_params = params_df full_params = mod.generate_full_start_params()["value"] log_like_contributions = log_likelihood_contributions(full_params, **args) like_contributions = np.exp(log_like_contributions) small = 1e-250 like_vec = np.prod(like_contributions, axis=0) like_vec[like_vec < small] = small res = np.log(like_vec) in_path = "skillmodels/tests/estimation/regression_test_fixture.pickle" with open(in_path, "rb") as p: last_result = pickle.load(p) aaae(res, last_result)
def test_predict_ff_mocked_same_result_in_second(self, mock_tsp, mock_pp): # this test makes sure that y copy arrays where necessary mock_tsp.side_effect = fake_tsp self.likelihood_arguments_dict = Mock(return_value=self.lh_args) calc1 = smo._predict_final_factors(self, self.change) calc2 = smo._predict_final_factors(self, self.change) aaae(calc1, calc2)
def test_set_bounds_P_zero_restricted_not_robust(self): self.robust_bounds = False self.restrict_P_zeros = True expected = np.zeros(100, dtype=object) expected[:] = None expected[self.bound_indices[:4]] = 0.0 smo._set_bounds_for_P_zero(self, slice(10, 20)) aae(self.lower_bound, expected)
def test_params_slice_deltas(self): arr0 = np.ones((4, 2), dtype=bool) arr1 = np.ones((6, 3), dtype=bool) arr1[(0, 1), :] = 0 self._deltas_bool = Mock(return_value=[arr0, arr1, arr0]) self._general_params_slice = Mock() smo._params_slice_for_deltas(self, 'short') self._general_params_slice.assert_has_calls( [call(8), call(12), call(8)])
def test_set_bounds_for_X_zero(self): self.lower_bound = np.empty(100, dtype=object) self.lower_bound[:] = None params_slice = slice(10, 22) expected = self.lower_bound.copy() expected[[16, 20]] = 0 smo._set_bounds_for_X_zero(self, params_slice=params_slice) aae(self.lower_bound, expected)
def test_loadings_intercepts_transparams_anchparams_and_xzeros(self): self.nobs = 5000 self.base_meas_sd = 0.00001 self.base_trans_sd = 0.00001 self.anch_sd = 0.1 self.true_meas_sd = self.true_loadings * self.base_meas_sd self.true_meas_var = self.true_meas_sd**2 self.true_trans_sd = self.base_trans_sd * np.arange( start=0.2, step=0.1, stop=0.75).reshape(self.nperiods - 1, 2) self.true_trans_var = self.true_trans_sd**2 self.true_cov_matrix = np.array([[1.44, 0.05, 0.1], [0.05, 2.25, 0.0], [0.1, 0.0, 4.0]]) self.true_P_zero = self.true_cov_matrix[np.triu_indices(self.nfac)] self.y_data = generate_test_data( nobs=self.nobs, factors=self.factor_names, periods=self.periods, included_positions=self.included_positions, meas_names=self.meas_names, initial_mean=self.true_X_zero, initial_cov=self.true_cov_matrix, intercepts=self.true_intercepts, loadings=self.true_loadings, meas_sd=self.true_meas_sd, gammas=self.true_gammas, trans_sd=self.true_trans_sd, anch_intercept=self.anch_intercept, anch_loadings=self.anch_loadings, anch_sd=self.anch_sd) wa_model = SkillModel(model_name='no_squares_translog', dataset_name='test_data', model_dict=model_dict, dataset=self.y_data, estimator='wa') calc_storage_df, calc_X_zero, calc_P_zero, calc_gammas, trans_vars, \ anch_intercept, anch_loadings, anch_variance = \ wa_model._calculate_wa_quantities() calc_loadings = calc_storage_df['loadings'] calc_intercepts = calc_storage_df['intercepts'] aaae(calc_loadings.values, self.true_loadings, decimal=3) aaae(calc_intercepts.values, self.true_intercepts, decimal=3) aaae(calc_X_zero, self.true_X_zero, decimal=1) for arr1, arr2 in zip(calc_gammas, self.true_gammas): aaae(arr1, arr2, decimal=3) assert_almost_equal(anch_intercept, 3.0, places=1) aaae(anch_loadings, self.anch_loadings, decimal=2)
def test_likelihood_value(model, params, data, model_name): mod = SkillModel(model_dict=model, dataset=data) full_params = mod.generate_full_start_params(params)["value"] args = mod.likelihood_arguments_dict() log_like_contributions = log_likelihood_contributions(full_params, **args) like_contributions = np.exp(log_like_contributions) small = 1e-250 like_vec = np.prod(like_contributions, axis=0) like_vec[like_vec < small] = small res = np.log(like_vec) in_path = f"skillmodels/tests/regression/{model_name}_result.pickle" with open(in_path, "rb") as p: last_result = pickle.load(p) aaae(res, last_result)
def test_marginal_effect_outcome_anch_outcome(self): self.anchoring = True self.me_anchor_on = True self.me_on = 'anch_outcome' exp = np.ones((10)) * 4 calc = smo._marginal_effect_outcome(self, self.change) aaae(calc, exp)
def test_predict_ff_intermediate_false_mocked(self, mock_tsp, mock_pp): mock_tsp.side_effect = fake_tsp self.likelihood_arguments_dict = Mock(return_value=self.lh_args) exp = np.ones((10, 2)) * 4 exp[:, 0] = 12 calc = smo._predict_final_factors(self, self.change) aaae(calc, exp)
def test_all_bootstrap_params(self): calc_params = smo.all_bootstrap_params(self, params=np.ones(3)) expected_params = pd.DataFrame( data=[[0.0] * 3, [1.0] * 3, [2.0] * 3], index=['rep_0', 'rep_1', 'rep_2'], columns=['p1', 'p2', 'p3']) assert_frame_equal(calc_params, expected_params)
def test_all_variables_for_iv_equations_constant_factor(self): calc_meas_list = smo.all_variables_for_iv_equations( self, 1, 'f1', 'test') expected_meas_list = [ ['y11_test', 'y12_test'], ['y07_copied_test', 'y08_copied_test']] assert_equal(calc_meas_list, expected_meas_list)
def test_expand_params(self, mock_pt): mock_pt.transform_params_for_X_zero.return_value = np.arange(3) mock_pt.transform_params_for_trans_coeffs.return_value = np.ones(9) mock_pt.transform_params_for_P_zero.return_value = np.ones(3) * 17 expected = np.array([0] * 5 + [1] * 9 + [0, 1, 2] + [17] * 3) aae(smo._transform_params(self, np.zeros(18), 'short_to_long'), expected)
def test_initial_delta_with_controls_and_constants(self): expected = [np.zeros((6, 3)), np.zeros((3, 4)), np.zeros((4, 3))] calculated = SkillModel._initial_delta(self) for calc, ex in zip(calculated, expected): aae(calc, ex)
def test_initial_trans_coeffs(self, mock_tf): mock_tf.nr_coeffs_first_func.return_value = 3 mock_tf.nr_coeffs_second_func.return_value = 10 expected = [np.zeros((2, 3)), np.zeros((2, 10))] initials = smo._initial_trans_coeffs(self) for i, e in zip(initials, expected): aae(i, e)
def test_extended_meas_coeffs_no_constant_factor_and_intercepts_case(self): coeff_type = 'intercepts' calc_intercepts = smo.extended_meas_coeffs(self, coeff_type, 0) expected_intercepts = pd.Series( data=[0.8, 1.2, 1.6, 2.0], name='intercepts', index=['y01', 'y02', 'y03', 'y04']) assert_series_equal(calc_intercepts, expected_intercepts)
def test_extendend_meas_coeffs_constant_factor_and_loadings_case(self): coeff_type = 'loadings' calc_loadings = smo.extended_meas_coeffs(self, coeff_type, 1) expected_loadings = pd.Series( data=[2.2, 2.6, 1.4, 1.8], name='loadings', index=['y11', 'y12', 'y03_copied', 'y04_copied']) assert_series_equal(calc_loadings, expected_loadings)
def test_indepvar_permutations(self): ret_val = [['y1', 'y2'], ['y3', 'y4']] self.all_variables_for_iv_equations = Mock(return_value=ret_val) expected_xs = [ ['y1', 'y3'], ['y1', 'y4'], ['y2', 'y3'], ['y2', 'y4']] calc_xs = smo.variable_permutations_for_iv_equations(self, 1, 1)[0] assert_equal(calc_xs, expected_xs)
def test_residual_measurements(self): expected_data = np.array([ [1.5, 2], [0.5, -2]]) expected_resid = pd.DataFrame( expected_data, columns=['m1_resid', 'm2_resid']) calc_resid = smo.residual_measurements(self, period=1) assert_frame_equal(calc_resid, expected_resid)
def test_x_zero_names_short_params(self): expected = [ 'X_zero__0__f1', 'X_zero__0__f2', 'X_zero__0__f3', 'X_zero__0__f4', 'X_zero__1__f1', 'X_zero__1__f2', 'diff_X_zero__1__f3', 'X_zero__1__f4', 'X_zero__2__f1', 'X_zero__2__f2', 'diff_X_zero__2__f3', 'X_zero__2__f4'] assert_equal(smo._X_zero_names(self, params_type='short'), expected)
def test_generate_bs_samples(self): np.random.seed(495) expected_samples = [ ['id_1', 'id_1', 'id_1'], ['id_0', 'id_2', 'id_2'], ['id_2', 'id_2', 'id_1']] calc_samples = smo._generate_bs_samples(self) assert_equal(calc_samples, expected_samples)
def test_set_bounds_for_trans_coeffs(self, mock_tf): lb = np.array([0, None, None], dtype=object) ub = np.array([None, None, 1], dtype=object) mock_tf.bounds_first_func.return_value = (lb, ub) del mock_tf.bounds_second_func sl = [[slice(0, 3)] * 2, [slice(3, 13), slice(13, 23)]] expected_lb = self.lower_bound.copy() expected_lb[0] = 0 expected_ub = self.upper_bound.copy() expected_ub[2] = 1 smo._set_bounds_for_trans_coeffs(self, sl) aae(self.lower_bound, expected_lb) aae(self.upper_bound, expected_ub)
def test_initial_w(mocker): # noqa mocker.nobs = 10 mocker.nemf = 3 expected = np.ones((10, 3)) / 3 calculated = SkillModel._initial_w(mocker) aae(calculated, expected)
def test_generate_start_factors_cov_cholesky(self): self.nobs = 200000 self.me_params = np.array([5, 10, 1, 0.1, 1.99749844]) self.cholesky_of_P_zero = True calc_factors = smo._generate_start_factors(self) df = pd.DataFrame(calc_factors) calc_cov = df.cov().values aaae(calc_cov, self.exp_cov, decimal=2)
def test_loadings_intercepts_transparams_anchparams_and_xzeros(self): self.nobs = 5000 self.base_meas_sd = 0.00001 self.base_trans_sd = 0.00001 self.anch_sd = 0.1 self.true_meas_sd = self.true_loadings * self.base_meas_sd self.true_meas_var = self.true_meas_sd ** 2 self.true_trans_sd = self.base_trans_sd * np.arange( start=0.2, step=0.1, stop=0.75).reshape(self.nperiods - 1, 2) self.true_trans_var = self.true_trans_sd ** 2 self.true_cov_matrix = np.array([[1.44, 0.05, 0.1], [0.05, 2.25, 0.0], [0.1, 0.0, 4.0]]) self.true_P_zero = self.true_cov_matrix[np.triu_indices(self.nfac)] self.y_data = generate_test_data( nobs=self.nobs, factors=self.factor_names, periods=self.periods, included_positions=self.included_positions, meas_names=self.meas_names, initial_mean=self.true_X_zero, initial_cov=self.true_cov_matrix, intercepts=self.true_intercepts, loadings=self.true_loadings, meas_sd=self.true_meas_sd, gammas=self.true_gammas, trans_sd=self.true_trans_sd, anch_intercept=self.anch_intercept, anch_loadings=self.anch_loadings, anch_sd=self.anch_sd) wa_model = SkillModel( model_name='no_squares_translog', dataset_name='test_data', model_dict=model_dict, dataset=self.y_data, estimator='wa') calc_storage_df, calc_X_zero, calc_P_zero, calc_gammas, trans_vars, \ anch_intercept, anch_loadings, anch_variance = \ wa_model._calculate_wa_quantities() calc_loadings = calc_storage_df['loadings'] calc_intercepts = calc_storage_df['intercepts'] aaae(calc_loadings.values, self.true_loadings, decimal=3) aaae(calc_intercepts.values, self.true_intercepts, decimal=3) aaae(calc_X_zero, self.true_X_zero, decimal=1) for arr1, arr2 in zip(calc_gammas, self.true_gammas): aaae(arr1, arr2, decimal=3) assert_almost_equal(anch_intercept, 3.0, places=1) aaae(anch_loadings, self.anch_loadings, decimal=2)
def test_initial_deltas_without_controls_besides_constant(self): self.controls = [[], [], []] exp1 = np.array([[3], [0], [0], [4], [0], [0]]) exp2 = np.array([[5], [6], [0]]) exp3 = np.array([[7], [0], [0], [0]]) expected = [exp1, exp2, exp3] calculated = smo._initial_deltas(self) for calc, ex in zip(calculated, expected): aae(calc, ex)
def test_anch_outcome_from_final_factors_with_linear_anchoring(self): self.anchoring = True self.anchoring_update_type = 'linear' self._anchor_final_factors = Mock( return_value=self.exp_anchored_factors) exp = np.ones(10) * 3.6 calc = smo._anchoring_outcome_from_final_factors( self, self.final_factors, self.al, self.ai) aae(calc, exp)
def test_initial_deltas_with_controls_and_constants(self): exp1 = np.array([ [3, 0, 0], [0, 0, 0], [0, 0, 0], [4, 0, 0], [0, 0, 0], [0, 0, 0]]) exp2 = np.array([[5, 0, 0, 0], [6, 0, 0, 0], [0, 0, 0, 0]]) exp3 = np.array([[7, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]]) expected = [exp1, exp2, exp3] calculated = smo._initial_deltas(self) for calc, ex in zip(calculated, expected): aae(calc, ex)
def test_deltas_bool_without_controls_besides_constants(self): exp1 = np.array([False, True, True, False, True, True]).reshape(6, 1) exp2 = np.array([False, False, True]).reshape(3, 1) exp3 = np.array([False, True, True, True]).reshape(4, 1) expected = [exp1, exp2, exp3] self.controls = [[], [], []] calculated = smo._deltas_bool(self) for calc, ex in zip(calculated, expected): aae(calc, ex)
def test_predict_ff_intermediate_true_mocked(self, mock_tsp, mock_pp): mock_tsp.side_effect = fake_tsp self.likelihood_arguments_dict = Mock(return_value=self.lh_args) exp1 = np.ones((10, 2)) exp2 = np.ones((10, 2)) * 2 exp2[:, 0] = 4 exp = [exp1, exp2] calc = smo._predict_final_factors(self, self.change, True) for c, e in zip(calc, exp): aaae(c, e)
def test_initial_p_normal_filters(p_mocker): p_mocker.square_root_filters = False expected = [np.zeros((10, 2, 3, 3)), np.zeros((20, 3, 3))] calculated = SkillModel._initial_p(p_mocker) for calc, exp in zip(calculated, expected): aae(calc, exp) # test that the second is pointing to the same data as the first. calc1, calc2 = calculated calc1 += 1 aae(calc2, np.ones_like(calc2))
def test_P_zero_names_long(self): self.nemf = 1 self.nfac = 3 self.factors = ['f1', 'f2', 'f3'] self.restrict_P_zeros = False self.cholesky_of_P_zero = False fs = 'P_zero__0__{}__{}' expected = [fs.format('f1', 'f1'), fs.format('f1', 'f2'), fs.format('f1', 'f3'), fs.format('f2', 'f2'), fs.format('f2', 'f3'), fs.format('f3', 'f3')] assert_equal(smo._P_zero_names(self, params_type='long'), expected)
def test_trans_coeffs_names(self, mock_tf): mock_tf.nr_coeffs_second_func.return_value = 2 mock_tf.coeff_names_first_func.return_value = ['epsilon', 'psi', 'pi'] del mock_tf.coeff_names_second_func expected = [ 'epsilon', 'psi', 'pi', 'trans_coeff__0__f2__0', 'trans_coeff__0__f2__1', 'trans_coeff__1__f2__0', 'trans_coeff__1__f2__1'] assert_equal(smo._trans_coeffs_names(self, params_type='short'), expected)
def test_select_bootstrap_data(self): expected_data = pd.DataFrame( data=np.array([ [0.0, 1.0, 2.0, 0.0, 1.0, 2.0, 0.0, 1.0, 2.0], [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 3.0, 4.0, 5.0]]).T, columns=['period', 'arange']) expected_data['id'] = [ 'id_0', 'id_0', 'id_0', 'id_1', 'id_1', 'id_1', 'id_1', 'id_1', 'id_1'] calc_data = smo._select_bootstrap_data(self, 0) assert_frame_equal(calc_data, expected_data)
def test_instrument_permutations(self): ret_val = [['y1_resid', 'y2_resid'], ['y3_resid', 'y4_resid']] self.all_variables_for_iv_equations = Mock(return_value=ret_val) expected_zs = [ [['y2'], ['y4']], [['y2'], ['y3']], [['y1'], ['y4']], [['y1'], ['y3']]] calc_zs = smo.variable_permutations_for_iv_equations(self, 1, 1)[1] assert_equal(calc_zs, expected_zs)
def test_params_slices_for_trans_coeffs(self, mock_tf): mock_tf.nr_coeffs_first_func.return_value = 3 mock_tf.nr_coeffs_second_func.return_value = 10 self._general_params_slice = Mock( side_effect=[slice(0, 3), slice(3, 13), slice(13, 23)]) res = smo._params_slice_for_trans_coeffs(self, params_type='short') self._general_params_slice.assert_has_calls( [call(3), call(10), call(10)]) mock_tf.nr_coeffs_first_func.assert_has_calls( [call(included_factors=['f1', 'f2'], params_type='short')]) mock_tf.nr_coeffs_second_func.assert_has_calls( [call(included_factors=['f2'], params_type='short')] * 2) assert_equal([[slice(0, 3)] * 2, [slice(3, 13), slice(13, 23)]], res)
def test_initial_x(mocker): # noqa mocker.nobs = 10 mocker.nemf = 2 mocker.nfac = 3 exp1 = np.zeros((10, 2, 3)) exp2 = np.zeros((20, 3)) calc1, calc2 = SkillModel._initial_x(mocker) aae(calc1, exp1) aae(calc2, exp2) calc1 += 1 aae(calc2, np.ones((20, 3)))
def test_initial_trans_coeffs(mocker): # noqa mocker.factors = ["fac1", "fac2", "fac3"] mocker.transition_names = ["linear", "linear", "log_ces"] mocker.included_factors = [["fac1", "fac2"], ["fac2"], ["fac2", "fac3"]] mocker.nperiods = 5 mock_linear = mocker.patch( "skillmodels.estimation.skill_model.tf.index_tuples_linear" ) mock_linear.return_value = [0, 1, 2, 3] mock_log_ces = mocker.patch( "skillmodels.estimation.skill_model.tf.index_tuples_log_ces" ) mock_log_ces.return_value = [0, 1, 2] expected = [np.zeros((4, 4)), np.zeros((4, 4)), np.zeros((4, 3))] calculated = SkillModel._initial_trans_coeffs(mocker) for calc, exp in zip(calculated, expected): aae(calc, exp)
def test_julier_scaling_factor(self): expected_sf = 2.34520787991 assert_almost_equal(SkillModel.sigma_scaling_factor(self), expected_sf)
def test_pzero_and_measurement_variances(self): self.nobs = 20000 self.true_gammas = [[[1.1, 0.01, 0.02, 0.0, 0.0, 0.0, 0.5], [1.2, 0.03, 0.03, 0.0, 0.0, 0.0, 0.6], [1.3, 0.05, 0.04, 0.0, 0.0, 0.0, 0.7]], [[1.05, 0.01, 0.0, 0.25], [1.15, 0.04, 0.0, 0.75], [1.25, 0.07, 0.0, 1.25]], np.zeros((3, 0))] self.base_meas_sd = 0.15 self.base_trans_sd = 1e-50 self.anch_sd = 0.4 self.true_meas_sd = self.true_loadings * self.base_meas_sd self.true_meas_var = self.true_meas_sd**2 self.true_trans_sd = self.base_trans_sd * np.arange( start=0.2, step=0.1, stop=0.75).reshape(self.nperiods - 1, 2) self.true_trans_var = self.true_trans_sd**2 self.true_cov_matrix = np.array([[1.0, 0.05, 0.05], [0.05, 1.0, 0.05], [0.05, 0.05, 1.0]]) self.true_P_zero = self.true_cov_matrix[np.triu_indices(self.nfac)] self.y_data = generate_test_data( nobs=self.nobs, factors=self.factor_names, periods=self.periods, included_positions=self.included_positions, meas_names=self.meas_names, initial_mean=self.true_X_zero, initial_cov=self.true_cov_matrix, intercepts=self.true_intercepts, loadings=self.true_loadings, meas_sd=self.true_meas_sd, gammas=self.true_gammas, trans_sd=self.true_trans_sd, anch_intercept=self.anch_intercept, anch_loadings=self.anch_loadings, anch_sd=self.anch_sd) wa_model = SkillModel(model_name='no_squares_translog', dataset_name='test_data', model_dict=model_dict, dataset=self.y_data, estimator='wa') calc_storage_df, calc_X_zero, calc_P_zero, calc_gammas, trans_vars, \ anch_intercept, anch_loadings, anch_variance = \ wa_model._calculate_wa_quantities() # df = calc_storage_df.copy(deep=True) # df['true_meas_var'] = self.true_meas_var # df['diff'] = df['meas_error_variances'] - df['true_meas_var'] # df['perc_diff'] = df['diff'] / df['true_meas_var'] # df['true_loadings'] = self.true_loadings # print(df[['meas_error_variances', 'true_meas_var', 'diff', 'perc_diff', # 'loadings', 'true_loadings']]) # print(df['diff'].mean()) calc_epsilon_variances = calc_storage_df['meas_error_variances'].values # average_epsilon_diff = \ # (calc_epsilon_variances - self.true_meas_var).mean() aaae(calc_P_zero, self.true_P_zero, decimal=2) aaae(calc_epsilon_variances[:9], self.true_meas_var[:9], decimal=2) assert_almost_equal(np.sqrt(anch_variance), self.anch_sd, places=1)
def test_initial_q(mocker): # noqa mocker.nperiods = 5 mocker.nfac = 3 expected = np.zeros((4, 3, 3)) calculated = SkillModel._initial_q(mocker) aae(calculated, expected)
def test_initial_r(mocker): # noqa mocker.nupdates = 8 calculated = SkillModel._initial_r(mocker) expected = np.zeros(8) aae(calculated, expected)
def test_initial_h(mocker): # noqa mocker.nfac = 5 mocker.nupdates = 10 calculated = SkillModel._initial_h(mocker) expected = np.zeros((10, 5)) aae(calculated, expected)
def test_initial_delta_without_controls_besides_constant(self): self.controls = [[], [], []] expected = [np.zeros((6, 1)), np.zeros((3, 1)), np.zeros((4, 1))] calculated = SkillModel._initial_delta(self) for calc, ex in zip(calculated, expected): aae(calc, ex)
def test_julier_sigma_weight_construction(self): expected_sws = self.fixtures["julier_wm"] aae(SkillModel.sigma_weights(self)[0], expected_sws)