def test6(): """Additionally to test5 this test checks if the comparison file provides the expected output when maxiter is set to zero and the estimation process uses the initialization file values as start values. """ for _ in range(5): constr = dict() constr["DETERMINISTIC"], constr["MAXITER"], constr[ "AGENTS"] = False, 0, 15000 constr["START"], constr["SAME_SIZE"] = "init", True dict_ = generate_random_dict(constr) dict_["DIST"]["params"][1], dict_["DIST"]["params"][5] = 0.0, 1.0 print_dict(dict_) simulate("test.grmpy.yml") fit("test.grmpy.yml") dict_ = read_desc("comparison.grmpy.info") for section in ["ALL", "TREATED", "UNTREATED"]: np.testing.assert_equal(len(set(dict_[section]["Number"])), 1) np.testing.assert_almost_equal( dict_[section]["Observed Sample"], dict_[section]["Simulated Sample (finish)"], 0.001, ) np.testing.assert_array_almost_equal( dict_[section]["Simulated Sample (finish)"], dict_[section]["Simulated Sample (start)"], 0.001, )
def test1(): """The test runs a loop to check the consistency of the random init file generating process and the following simulation. """ for _ in range(10): dict_ = generate_random_dict() print_dict(dict_) simulate('test.grmpy.ini')
def test5(): """The test checks if the estimation process works properly when maxiter is set to zero.""" for _ in range(10): constr = dict() constr['DETERMINISTIC'], constr['MAXITER'] = False, 0 generate_random_dict(constr) simulate('test.grmpy.ini') estimate('test.grmpy.ini')
def test3(): """The fourth test checks whether the simulation process works if there are only treated or untreated Agents by setting the number of agents to one. """ constr = constraints(probability=0.0, agents=1) for _ in range(10): generate_random_dict(constr) simulate('test.grmpy.ini')
def test5(): """The test checks if the estimation process works properly when maxiter is set to zero. """ for _ in range(10): constr = constraints(probability=0.0, maxiter=0) generate_random_dict(constr) simulate('test.grmpy.ini') estimate('test.grmpy.ini')
def test5(): """The test checks if the estimation process works properly when maxiter is set to zero. """ for _ in range(5): constr = dict() constr["DETERMINISTIC"], constr["MAXITER"] = False, 0 generate_random_dict(constr) simulate("test.grmpy.yml") fit("test.grmpy.yml")
def test4(): """The test checks if the estimation process works if the Powell algorithm is specified as the optimizer option. """ for _ in range(5): constr = dict() constr['DETERMINISTIC'], constr['AGENTS'], constr[ 'start'] = False, 10000, 'init' constr['optimizer'] = 'SCIPY-Powell' generate_random_dict(constr) simulate('test.grmpy.ini') estimate('test.grmpy.ini')
def test4(): """The test checks if the estimation process works if the Powell algorithm is specified as the optimizer option. """ for _ in range(5): constr = constraints(probability=0.0, agents=10000, start='init', optimizer='SCIPY-POWELL') generate_random_dict(constr) simulate('test.grmpy.ini') estimate('test.grmpy.ini')
def test9(): """This test ensures that the random initialization file generating process, the read in process and the simulation process works if the constraints function allows for different number of co- variates for each treatment state and the occurence of cost-benefit shifters.""" for i in range(5): constr = dict() constr['DETERMINISTIC'], constr['AGENT'], constr[ 'STATE_DIFF'] = False, 1000, True constr['OVERLAP'] = True generate_random_dict(constr) read('test.grmpy.ini') simulate('test.grmpy.ini') estimate('test.grmpy.ini') cleanup()
def test5(): """The tests checks if the simulation process works even if the covariance between U1 and V and U0 and V is equal. Further the test ensures that the mte_information function returns the same value for each quantile. """ for _ in range(10): generate_random_dict() init_dict = read('test.grmpy.ini') # We impose that the covariance between the random components of the potential # outcomes and the random component determining choice is identical. init_dict['DIST']['all'][2] = init_dict['DIST']['all'][4] # Distribute information coeffs_untreated = init_dict['UNTREATED']['all'] coeffs_treated = init_dict['TREATED']['all'] # Construct auxiliary information cov = construct_covariance_matrix(init_dict) df = simulate('test.grmpy.ini') x = df.filter(regex=r'^X\_', axis=1) q = [0.01] + list(np.arange(0.05, 1, 0.05)) + [0.99] mte = mte_information(coeffs_treated, coeffs_untreated, cov, q, x) # We simply test that there is a single unique value for the marginal treatment effect. np.testing.assert_equal(len(set(mte)), 1)
def test1(): """The first test tests whether the relationships in the simulated datasets are appropriate in a deterministic and an un-deterministic setting. """ constr = dict() for case in ['deterministic', 'undeterministic']: if case == 'deterministic': constr['DETERMINISTIC'] = True else: constr['DETERMINISTIC'] = False for _ in range(10): generate_random_dict(constr) df = simulate('test.grmpy.ini') dict_ = read('test.grmpy.ini') x_treated = df[[ dict_['varnames'][i - 1] for i in dict_['TREATED']['order'] ]] y_treated = pd.DataFrame.sum(dict_['TREATED']['all'] * x_treated, axis=1) + df.U1 x_untreated = df[[ dict_['varnames'][i - 1] for i in dict_['UNTREATED']['order'] ]] y_untreated = pd.DataFrame.sum( dict_['UNTREATED']['all'] * x_untreated, axis=1) + df.U0 np.testing.assert_array_almost_equal(df.Y1, y_treated, decimal=5) np.testing.assert_array_almost_equal(df.Y0, y_untreated, decimal=5) np.testing.assert_array_equal(df.Y[df.D == 1], df.Y1[df.D == 1]) np.testing.assert_array_equal(df.Y[df.D == 0], df.Y0[df.D == 0])
def test1(): """The first test tests whether the relationships in the simulated datasets are appropriate in a deterministic and an un-deterministic setting. """ constr = dict() for case in ["deterministic", "undeterministic"]: if case == "deterministic": constr["DETERMINISTIC"] = True else: constr["DETERMINISTIC"] = True for _ in range(10): generate_random_dict(constr) df = simulate("test.grmpy.yml") dict_ = read("test.grmpy.yml") x_treated = df[dict_["TREATED"]["order"]] y_treated = (pd.DataFrame.sum( dict_["TREATED"]["params"] * x_treated, axis=1) + df.U1) x_untreated = df[dict_["UNTREATED"]["order"]] y_untreated = (pd.DataFrame.sum( dict_["UNTREATED"]["params"] * x_untreated, axis=1) + df.U0) np.testing.assert_array_almost_equal(df.Y1, y_treated, decimal=5) np.testing.assert_array_almost_equal(df.Y0, y_untreated, decimal=5) np.testing.assert_array_equal(df.Y[df.D == 1], df.Y1[df.D == 1]) np.testing.assert_array_equal(df.Y[df.D == 0], df.Y0[df.D == 0])
def test4(): """The test checks if the estimation process works if the Powell algorithm is specified as the optimizer option. """ for _ in range(5): constr = dict() constr["DETERMINISTIC"], constr["AGENTS"], constr["start"] = ( False, 10000, "init", ) constr["optimizer"] = "SCIPY-Powell" generate_random_dict(constr) simulate("test.grmpy.yml") fit("test.grmpy.yml")
def check_vault(num_tests=100): """This function checks the complete regression vault that is distributed as part of the package. """ fname = (os.path.dirname(grmpy.__file__) + "/test/resources/old_regression_vault.grmpy.json") tests = json.load(open(fname)) if num_tests > len(tests): print("The specified number of evaluations is larger than the number" " of entries in the regression_test vault.\n" "Therefore the test runs the complete test battery.") else: tests = [tests[i] for i in np.random.choice(len(tests), num_tests)] for test in tests: stat, dict_, criteria = test print_dict(dict_transformation(dict_)) init_dict = read("test.grmpy.yml") df = simulate("test.grmpy.yml") _, X1, X0, Z1, Z0, Y1, Y0 = process_data(df, init_dict) x0 = start_values(init_dict, df, "init") criteria_ = calculate_criteria(init_dict, X1, X0, Z1, Z0, Y1, Y0, x0) np.testing.assert_almost_equal(criteria_, criteria) np.testing.assert_almost_equal(np.sum(df.sum()), stat) cleanup("regression")
def test5(): """The tests checks if the simulation process works even if the covariance between U1 and V and U0 and V is equal. Further the test ensures that the mte_information function returns the same value for each quantile. """ for _ in range(10): generate_random_dict() init_dict = read("test.grmpy.yml") # We impose that the covariance between the random components of the potential # outcomes and the random component determining choice is identical. init_dict["DIST"]["params"][2] = init_dict["DIST"]["params"][4] # Distribute information coeffs_untreated = init_dict["UNTREATED"]["params"] coeffs_treated = init_dict["TREATED"]["params"] # Construct auxiliary information cov = construct_covariance_matrix(init_dict) df = simulate("test.grmpy.yml") x = df[list( set(init_dict["TREATED"]["order"] + init_dict["UNTREATED"]["order"]))] q = [0.01] + list(np.arange(0.05, 1, 0.05)) + [0.99] mte = mte_information(coeffs_treated, coeffs_untreated, cov, q, x, init_dict) # We simply test that there is a single unique value for the marginal treatment # effect. np.testing.assert_equal(len(set(mte)), 1)
def test9(): """This test ensures that the random initialization file generating process, the read in process and the simulation process works if the constraints function allows for different number of covariates for each treatment state and the occurence of cost-benefit shifters.""" for _ in range(5): constr = dict() constr["DETERMINISTIC"], constr["AGENT"], constr["STATE_DIFF"] = ( False, 1000, True, ) constr["OVERLAP"] = True generate_random_dict(constr) read("test.grmpy.yml") simulate("test.grmpy.yml") fit("test.grmpy.yml") cleanup()
def simulate_test_data(): """ Simulate test dict_ and data. """ fname = TEST_RESOURCES_DIR + "/tutorial.grmpy.yml" data = simulate(fname) dict_ = read(fname) dict_, data = check_append_constant( TEST_RESOURCES_DIR + "/tutorial.grmpy.yml", dict_, data, semipar=True ) return dict_, data
def create_vault(num_tests=100, seed=123): """This function creates a new regression vault.""" np.random.seed(seed) tests = [] for _ in range(num_tests): dict_ = generate_random_dict() df = simulate('test.grmpy.ini') stat = np.sum(df.sum()) tests += [(stat, dict_)] cleanup() json.dump(tests, open('regression_vault.grmpy.json', 'w'))
def test_rslt_dictionary(): """ This test checks if the elements of the estimation dictionary are equal to their expected values when the initialization file of the semipar tutorial is used. """ fname = TEST_RESOURCES_DIR + "/tutorial-semipar.grmpy.yml" simulate(fname) rslt = fit(fname, semipar=True) expected_rslt = pickle.load( open(TEST_RESOURCES_DIR + "/tutorial-semipar-results.pkl", "rb")) np.testing.assert_equal(rslt["quantiles"], expected_rslt["quantiles"]) np.testing.assert_almost_equal(rslt["mte"], expected_rslt["mte"], 7) np.testing.assert_almost_equal(rslt["mte_u"], expected_rslt["mte_u"], 7) np.testing.assert_almost_equal(rslt["mte_min"], expected_rslt["mte_min"], 5) np.testing.assert_almost_equal(rslt["mte_max"], expected_rslt["mte_max"], 5) np.testing.assert_almost_equal(rslt["b0"], expected_rslt["b0"], 7) np.testing.assert_almost_equal(rslt["b1"], expected_rslt["b1"], 7)
def check_vault(): """This function checks the complete regression vault that is distributed as part of the package. """ fname = os.path.dirname( grmpy.__file__) + '/test/resources/regression_vault.grmpy.json' tests = json.load(open(fname)) for test in tests: stat, dict_, criteria = test print_dict(dict_) df = simulate('test.grmpy.ini') np.testing.assert_almost_equal(np.sum(df.sum()), stat) cleanup('regression')
def test6(): """Additionally to test5 this test checks if the descriptives file provides the expected output when maxiter is set to zero and the estimation process uses the initialization file values as start values. """ for _ in range(5): constr = constraints(probability=0.0, maxiter=0, agents=1000, start='init') generate_random_dict(constr) simulate('test.grmpy.ini') estimate('test.grmpy.ini') dict_ = read_desc('descriptives.grmpy.txt') for key_ in ['All', 'Treated', 'Untreated']: np.testing.assert_equal(len(set(dict_[key_]['Number'])), 1) np.testing.assert_array_equal( dict_[key_]['Observed Sample'], dict_[key_]['Simulated Sample (finish)']) np.testing.assert_array_equal( dict_[key_]['Simulated Sample (finish)'], dict_[key_]['Simulated Sample (start)']) cleanup()
def test2(): """The third test checks whether the relationships hold if the coefficients are zero in different setups. """ for _ in range(10): for i in ['ALL', 'TREATED', 'UNTREATED', 'COST', 'TREATED & UNTREATED']: constr = constraints(probability=0.0) dict_ = generate_random_dict(constr) if i == 'ALL': for key_ in ['TREATED', 'UNTREATED', 'COST']: dict_[key_]['coeff'] = np.array([0.] * len(dict_[key_]['coeff'])) elif i == 'TREATED & UNTREATED': for key_ in ['TREATED', 'UNTREATED']: dict_[key_]['coeff'] = np.array([0.] * len(dict_[key_]['coeff'])) else: dict_[i]['coeff'] = np.array([0.] * len(dict_[i]['coeff'])) print_dict(dict_) dict_ = read('test.grmpy.ini') df = simulate('test.grmpy.ini') x = df.filter(regex=r'^X\_', axis=1) if i == 'ALL': np.testing.assert_array_equal(df.Y1, df.U1) np.testing.assert_array_equal(df.Y0, df.U0) elif i == 'TREATED & UNTREATED': np.testing.assert_array_equal(df.Y1, df.U1) np.testing.assert_array_equal(df.Y0, df.U0) np.testing.assert_array_equal(df.Y[df.D == 1], df.U1[df.D == 1]) np.testing.assert_array_equal(df.Y[df.D == 0], df.U0[df.D == 0]) elif i == 'TREATED': y_untreated = pd.DataFrame.sum(dict_['UNTREATED']['all'] * x, axis=1) + df.U0 np.testing.assert_array_almost_equal(df.Y0, y_untreated, decimal=5) np.testing.assert_array_equal(df.Y1, df.U1) elif i == 'UNTREATED': y_treated = pd.DataFrame.sum(dict_['TREATED']['all'] * x, axis=1) + df.U1 np.testing.assert_array_almost_equal(df.Y1, y_treated, decimal=5) np.testing.assert_array_equal(df.Y0, df.U0) else: y_treated = pd.DataFrame.sum(dict_['TREATED']['all'] * x, axis=1) + df.U1 y_untreated = pd.DataFrame.sum(dict_['UNTREATED']['all'] * x, axis=1) + df.U0 np.testing.assert_array_almost_equal(df.Y1, y_treated, decimal=5) np.testing.assert_array_almost_equal(df.Y0, y_untreated, decimal=5) np.testing.assert_array_equal(df.Y[df.D == 1], df.Y1[df.D == 1]) np.testing.assert_array_equal(df.Y[df.D == 0], df.Y0[df.D == 0]) np.testing.assert_array_almost_equal(df.V, (df.UC - df.U1 + df.U0))
def test3(): """The fourth test checks whether the simulation process works if there are only treated or untreated Agents by setting the number of agents to one. Additionally the test checks if the start values for the estimation process are set to the init- ialization file values due to perfect separation. """ constr = dict() constr["AGENTS"], constr["DETERMINISTIC"] = 1, False for _ in range(10): generate_random_dict(constr) dict_ = read("test.grmpy.yml") df = simulate("test.grmpy.yml") start = start_values(dict_, df, "auto") np.testing.assert_equal(dict_["AUX"]["init_values"][:(-6)], start[:(-4)])
def test6(): """Additionally to test5 this test checks if the comparison file provides the expected output when maxiter is set to zero and the estimation process uses the initialization file values as start values. """ for _ in range(5): constr = dict() constr['DETERMINISTIC'], constr['MAXITER'], constr[ 'AGENTS'] = False, 0, 10000 constr['START'], constr['SAME_SIZE'] = 'init', True dict_ = generate_random_dict(constr) dict_['DIST']['all'][1], dict_['DIST']['all'][5] = 0.0, 1.0 print_dict(dict_) simulate('test.grmpy.ini') estimate('test.grmpy.ini') dict_ = read_desc('comparison.grmpy.txt') for key_ in ['All', 'Treated', 'Untreated']: np.testing.assert_equal(len(set(dict_[key_]['Number'])), 1) np.testing.assert_almost_equal( dict_[key_]['Observed Sample'], dict_[key_]['Simulated Sample (finish)'], 0.001) np.testing.assert_array_almost_equal( dict_[key_]['Simulated Sample (finish)'], dict_[key_]['Simulated Sample (start)'], 0.001)
def test3(): """The fourth test checks whether the simulation process works if there are only treated or un- treated Agents by setting the number of agents to one. Additionally the test checks if the start values for the estimation process are set to the initialization file values due to perfect separation. """ constr = dict() constr['AGENTS'], constr['DETERMINISTIC'] = 1, False for _ in range(10): generate_random_dict(constr) dict_ = read('test.grmpy.ini') df = simulate('test.grmpy.ini') start = start_values(dict_, df, 'auto') np.testing.assert_equal(dict_['AUX']['init_values'][:(-6)], start[:(-4)])
def test10(): """This test checks if the start_values function returns the init file values if the start option is set to init. """ for _ in range(10): constr = dict() constr['DETERMINISTIC'] = False generate_random_dict(constr) dict_ = read('test.grmpy.ini') true = [] for key_ in ['TREATED', 'UNTREATED', 'CHOICE']: true += list(dict_[key_]['all']) df = simulate('test.grmpy.ini') x0 = start_values(dict_, df, 'init')[:-4] np.testing.assert_array_equal(true, x0)
def test9(): """This test checks if the start_values function returns the init file values if the start option is set to init. """ for _ in range(10): constr = dict() constr["DETERMINISTIC"] = False generate_random_dict(constr) dict_ = read("test.grmpy.yml") true = [] for key_ in ["TREATED", "UNTREATED", "CHOICE"]: true += list(dict_[key_]["params"]) df = simulate("test.grmpy.yml") x0 = start_values(dict_, df, "init")[:-4] np.testing.assert_array_equal(true, x0)
def test_common_support(): """ Test whether common support is indeed zero if treatment propensity is 0.5 for everyone. """ fname = TEST_RESOURCES_DIR + "/tutorial.grmpy.yml" data = simulate(fname) dict_ = read(fname) prop_score = pd.Series(np.ones(len(data))) * 0.5 data.loc[:, "prop_score"] = prop_score estimated_support = _define_common_support(dict_, data) expected_support = [0.5, 0.5] np.testing.assert_equal(estimated_support, expected_support)
def test11(): """This test checks if the refactor auxiliary function returns an unchanged init file if the maximum number of iterations is set to zero. """ for _ in range(10): constr = dict() constr['DETERMINISTIC'], constr['AGENTS'] = False, 1000 constr['MAXITER'], constr['START'] = 0, 'init' generate_random_dict(constr) init_dict = read('test.grmpy.ini') df = simulate('test.grmpy.ini') start = start_values(init_dict, df, 'init') start = backward_transformation(start) rslt = estimate('test.grmpy.ini') np.testing.assert_equal(start, rslt['AUX']['x_internal'])
def create_vault(num_tests=100, seed=123): """This function creates a new regression vault.""" np.random.seed(seed) tests = [] for _ in range(num_tests): dict_ = generate_random_dict() init_dict = read("test.grmpy.yml") df = simulate("test.grmpy.yml") _, X1, X0, Z1, Z0, Y1, Y0 = process_data(df, init_dict) x0 = start_values(init_dict, df, "init") criteria = calculate_criteria(init_dict, X1, X0, Z1, Z0, Y1, Y0, x0) stat = np.sum(df.sum()) tests += [(stat, dict_, criteria)] cleanup() json.dump(tests, open("regression_vault.grmpy.json", "w"))