def test1(): """The first test tests whether the relationships in the simulated datasets are appropriate in a deterministic and an un-deterministic setting. """ constr = dict() for case in ['deterministic', 'undeterministic']: if case == 'deterministic': constr['DETERMINISTIC'] = True else: constr['DETERMINISTIC'] = False for _ in range(10): generate_random_dict(constr) df = simulate('test.grmpy.ini') dict_ = read('test.grmpy.ini') x_treated = df[[ dict_['varnames'][i - 1] for i in dict_['TREATED']['order'] ]] y_treated = pd.DataFrame.sum(dict_['TREATED']['all'] * x_treated, axis=1) + df.U1 x_untreated = df[[ dict_['varnames'][i - 1] for i in dict_['UNTREATED']['order'] ]] y_untreated = pd.DataFrame.sum( dict_['UNTREATED']['all'] * x_untreated, axis=1) + df.U0 np.testing.assert_array_almost_equal(df.Y1, y_treated, decimal=5) np.testing.assert_array_almost_equal(df.Y0, y_untreated, decimal=5) np.testing.assert_array_equal(df.Y[df.D == 1], df.Y1[df.D == 1]) np.testing.assert_array_equal(df.Y[df.D == 0], df.Y0[df.D == 0])
def test5(): """The tests checks if the simulation process works even if the covariance between U1 and V and U0 and V is equal. Further the test ensures that the mte_information function returns the same value for each quantile. """ for _ in range(10): generate_random_dict() init_dict = read("test.grmpy.yml") # We impose that the covariance between the random components of the potential # outcomes and the random component determining choice is identical. init_dict["DIST"]["params"][2] = init_dict["DIST"]["params"][4] # Distribute information coeffs_untreated = init_dict["UNTREATED"]["params"] coeffs_treated = init_dict["TREATED"]["params"] # Construct auxiliary information cov = construct_covariance_matrix(init_dict) df = simulate("test.grmpy.yml") x = df[list( set(init_dict["TREATED"]["order"] + init_dict["UNTREATED"]["order"]))] q = [0.01] + list(np.arange(0.05, 1, 0.05)) + [0.99] mte = mte_information(coeffs_treated, coeffs_untreated, cov, q, x, init_dict) # We simply test that there is a single unique value for the marginal treatment # effect. np.testing.assert_equal(len(set(mte)), 1)
def test5(): """The tests checks if the simulation process works even if the covariance between U1 and V and U0 and V is equal. Further the test ensures that the mte_information function returns the same value for each quantile. """ for _ in range(10): generate_random_dict() init_dict = read('test.grmpy.ini') # We impose that the covariance between the random components of the potential # outcomes and the random component determining choice is identical. init_dict['DIST']['all'][2] = init_dict['DIST']['all'][4] # Distribute information coeffs_untreated = init_dict['UNTREATED']['all'] coeffs_treated = init_dict['TREATED']['all'] # Construct auxiliary information cov = construct_covariance_matrix(init_dict) df = simulate('test.grmpy.ini') x = df.filter(regex=r'^X\_', axis=1) q = [0.01] + list(np.arange(0.05, 1, 0.05)) + [0.99] mte = mte_information(coeffs_treated, coeffs_untreated, cov, q, x) # We simply test that there is a single unique value for the marginal treatment effect. np.testing.assert_equal(len(set(mte)), 1)
def test1(): """The first test tests whether the relationships in the simulated datasets are appropriate in a deterministic and an un-deterministic setting. """ constr = dict() for case in ["deterministic", "undeterministic"]: if case == "deterministic": constr["DETERMINISTIC"] = True else: constr["DETERMINISTIC"] = True for _ in range(10): generate_random_dict(constr) df = simulate("test.grmpy.yml") dict_ = read("test.grmpy.yml") x_treated = df[dict_["TREATED"]["order"]] y_treated = (pd.DataFrame.sum( dict_["TREATED"]["params"] * x_treated, axis=1) + df.U1) x_untreated = df[dict_["UNTREATED"]["order"]] y_untreated = (pd.DataFrame.sum( dict_["UNTREATED"]["params"] * x_untreated, axis=1) + df.U0) np.testing.assert_array_almost_equal(df.Y1, y_treated, decimal=5) np.testing.assert_array_almost_equal(df.Y0, y_untreated, decimal=5) np.testing.assert_array_equal(df.Y[df.D == 1], df.Y1[df.D == 1]) np.testing.assert_array_equal(df.Y[df.D == 0], df.Y0[df.D == 0])
def test5(): """The test checks if the estimation process works properly when maxiter is set to zero.""" for _ in range(10): constr = dict() constr['DETERMINISTIC'], constr['MAXITER'] = False, 0 generate_random_dict(constr) simulate('test.grmpy.ini') estimate('test.grmpy.ini')
def test3(): """The fourth test checks whether the simulation process works if there are only treated or untreated Agents by setting the number of agents to one. """ constr = constraints(probability=0.0, agents=1) for _ in range(10): generate_random_dict(constr) simulate('test.grmpy.ini')
def test5(): """The test checks if the estimation process works properly when maxiter is set to zero. """ for _ in range(10): constr = constraints(probability=0.0, maxiter=0) generate_random_dict(constr) simulate('test.grmpy.ini') estimate('test.grmpy.ini')
def test5(): """The test checks if the estimation process works properly when maxiter is set to zero. """ for _ in range(5): constr = dict() constr["DETERMINISTIC"], constr["MAXITER"] = False, 0 generate_random_dict(constr) simulate("test.grmpy.yml") fit("test.grmpy.yml")
def test8(): """We want to able to smoothly switch between generating and printing random initialization files. """ for _ in range(10): generate_random_dict() dict_1 = read('test.grmpy.ini') print_dict(dict_1) dict_2 = read('test.grmpy.ini') np.testing.assert_equal(dict_1, dict_2)
def test4(): """The test checks if the estimation process works if the Powell algorithm is specified as the optimizer option. """ for _ in range(5): constr = constraints(probability=0.0, agents=10000, start='init', optimizer='SCIPY-POWELL') generate_random_dict(constr) simulate('test.grmpy.ini') estimate('test.grmpy.ini')
def test4(): """The test checks if the estimation process works if the Powell algorithm is specified as the optimizer option. """ for _ in range(5): constr = dict() constr['DETERMINISTIC'], constr['AGENTS'], constr[ 'start'] = False, 10000, 'init' constr['optimizer'] = 'SCIPY-Powell' generate_random_dict(constr) simulate('test.grmpy.ini') estimate('test.grmpy.ini')
def test3(): """The fourth test checks whether the simulation process works if there are only treated or untreated Agents by setting the number of agents to one. Additionally the test checks if the start values for the estimation process are set to the init- ialization file values due to perfect separation. """ constr = dict() constr["AGENTS"], constr["DETERMINISTIC"] = 1, False for _ in range(10): generate_random_dict(constr) dict_ = read("test.grmpy.yml") df = simulate("test.grmpy.yml") start = start_values(dict_, df, "auto") np.testing.assert_equal(dict_["AUX"]["init_values"][:(-6)], start[:(-4)])
def test3(): """The fourth test checks whether the simulation process works if there are only treated or un- treated Agents by setting the number of agents to one. Additionally the test checks if the start values for the estimation process are set to the initialization file values due to perfect separation. """ constr = dict() constr['AGENTS'], constr['DETERMINISTIC'] = 1, False for _ in range(10): generate_random_dict(constr) dict_ = read('test.grmpy.ini') df = simulate('test.grmpy.ini') start = start_values(dict_, df, 'auto') np.testing.assert_equal(dict_['AUX']['init_values'][:(-6)], start[:(-4)])
def test9(): """This test ensures that the random initialization file generating process, the read in process and the simulation process works if the constraints function allows for different number of co- variates for each treatment state and the occurence of cost-benefit shifters.""" for i in range(5): constr = dict() constr['DETERMINISTIC'], constr['AGENT'], constr[ 'STATE_DIFF'] = False, 1000, True constr['OVERLAP'] = True generate_random_dict(constr) read('test.grmpy.ini') simulate('test.grmpy.ini') estimate('test.grmpy.ini') cleanup()
def test4(): """The test checks if the estimation process works if the Powell algorithm is specified as the optimizer option. """ for _ in range(5): constr = dict() constr["DETERMINISTIC"], constr["AGENTS"], constr["start"] = ( False, 10000, "init", ) constr["optimizer"] = "SCIPY-Powell" generate_random_dict(constr) simulate("test.grmpy.yml") fit("test.grmpy.yml")
def test6(): """Additionally to test5 this test checks if the comparison file provides the expected output when maxiter is set to zero and the estimation process uses the initialization file values as start values. """ for _ in range(5): constr = dict() constr["DETERMINISTIC"], constr["MAXITER"], constr[ "AGENTS"] = False, 0, 15000 constr["START"], constr["SAME_SIZE"] = "init", True dict_ = generate_random_dict(constr) dict_["DIST"]["params"][1], dict_["DIST"]["params"][5] = 0.0, 1.0 print_dict(dict_) simulate("test.grmpy.yml") fit("test.grmpy.yml") dict_ = read_desc("comparison.grmpy.info") for section in ["ALL", "TREATED", "UNTREATED"]: np.testing.assert_equal(len(set(dict_[section]["Number"])), 1) np.testing.assert_almost_equal( dict_[section]["Observed Sample"], dict_[section]["Simulated Sample (finish)"], 0.001, ) np.testing.assert_array_almost_equal( dict_[section]["Simulated Sample (finish)"], dict_[section]["Simulated Sample (start)"], 0.001, )
def test9(): """This test checks if the start_values function returns the init file values if the start option is set to init. """ for _ in range(10): constr = dict() constr["DETERMINISTIC"] = False generate_random_dict(constr) dict_ = read("test.grmpy.yml") true = [] for key_ in ["TREATED", "UNTREATED", "CHOICE"]: true += list(dict_[key_]["params"]) df = simulate("test.grmpy.yml") x0 = start_values(dict_, df, "init")[:-4] np.testing.assert_array_equal(true, x0)
def test10(): """This test checks if the start_values function returns the init file values if the start option is set to init. """ for _ in range(10): constr = dict() constr['DETERMINISTIC'] = False generate_random_dict(constr) dict_ = read('test.grmpy.ini') true = [] for key_ in ['TREATED', 'UNTREATED', 'CHOICE']: true += list(dict_[key_]['all']) df = simulate('test.grmpy.ini') x0 = start_values(dict_, df, 'init')[:-4] np.testing.assert_array_equal(true, x0)
def test4(): """The fifth test tests the random init file generating process and the import process. It generates an random init file, imports it again and compares the entries in the both dictio- naries. """ for _ in range(10): gen_dict = generate_random_dict() init_file_name = gen_dict['SIMULATION']['source'] print_dict(gen_dict, init_file_name) imp_dict = read(init_file_name + '.grmpy.ini') for key_ in ['TREATED', 'UNTREATED', 'COST', 'DIST']: np.testing.assert_array_almost_equal(gen_dict[key_]['coeff'], imp_dict[key_]['all'], decimal=4) if key_ in ['TREATED', 'UNTREATED', 'COST']: for i in range(len(gen_dict[key_]['types'])): if isinstance(gen_dict[key_]['types'][i], str): if not gen_dict[key_]['types'][i] == imp_dict[key_]['types'][i]: raise AssertionError() elif isinstance(gen_dict[key_]['types'][i], list): if not gen_dict[key_]['types'][i][0] == imp_dict[key_]['types'][i][0]: raise AssertionError() np.testing.assert_array_almost_equal( gen_dict[key_]['types'][i][1], imp_dict[key_]['types'][i][1], 4) for key_ in ['source', 'agents', 'seed']: if not gen_dict['SIMULATION'][key_] == imp_dict['SIMULATION'][key_]: raise AssertionError()
def test11(): """This test checks if the refactor auxiliary function returns an unchanged init file if the maximum number of iterations is set to zero. """ for _ in range(10): constr = dict() constr['DETERMINISTIC'], constr['AGENTS'] = False, 1000 constr['MAXITER'], constr['START'] = 0, 'init' generate_random_dict(constr) init_dict = read('test.grmpy.ini') df = simulate('test.grmpy.ini') start = start_values(init_dict, df, 'init') start = backward_transformation(start) rslt = estimate('test.grmpy.ini') np.testing.assert_equal(start, rslt['AUX']['x_internal'])
def test1(): """The test runs a loop to check the consistency of the random init file generating process and the following simulation. """ for _ in range(10): dict_ = generate_random_dict() print_dict(dict_) simulate('test.grmpy.ini')
def test8(): """The test checks if an UserError occurs if wrong inputs are specified for a different functions/methods. """ constr = dict() constr['DETERMINISTIC'], constr['AGENTS'] = False, 1000 generate_random_dict(constr) df = simulate('test.grmpy.ini') a = [] dict_ = read('test.grmpy.ini') dict_['ESTIMATION']['file'] = 'data.grmpy.ini' print_dict(dict_, 'false_data') pytest.raises(UserError, estimate, 'tast.grmpy.ini') pytest.raises(UserError, estimate, 'false_data.grmpy.ini') pytest.raises(UserError, simulate, 'tast.grmpy.ini') pytest.raises(UserError, read, 'tast.grmpy.ini') pytest.raises(UserError, start_values, a, df, 'init') pytest.raises(UserError, generate_random_dict, a)
def test8(): """The test checks if an UserError occurs if wrong inputs are specified for a different functions/methods. """ constr = dict() constr["DETERMINISTIC"], constr["AGENTS"] = False, 1000 generate_random_dict(constr) df = simulate("test.grmpy.yml") dict_ = read("test.grmpy.yml") a = list() dict_["ESTIMATION"]["file"] = "data.grmpy.yml" print_dict(dict_, "false_data") pytest.raises(UserError, fit, "tast.grmpy.yml") pytest.raises(UserError, fit, "false_data.grmpy.yml") pytest.raises(UserError, simulate, "tast.grmpy.yml") pytest.raises(UserError, read, "tast.grmpy.yml") pytest.raises(UserError, start_values, a, df, "init") pytest.raises(UserError, generate_random_dict, a)
def test10(): """This test checks if the refactor auxiliary function returns an unchanged init file if the maximum number of iterations is set to zero. """ for _ in range(10): constr = dict() constr["DETERMINISTIC"], constr["AGENTS"] = False, 1000 constr["MAXITER"], constr["START"] = 0, "init" generate_random_dict(constr) init_dict = read("test.grmpy.yml") df = simulate("test.grmpy.yml") start = start_values(init_dict, df, "init") start = backward_transformation(start) rslt = fit("test.grmpy.yml") np.testing.assert_equal(start, rslt["AUX"]["x_internal"])
def test9(): """This test ensures that the random initialization file generating process, the read in process and the simulation process works if the constraints function allows for different number of covariates for each treatment state and the occurence of cost-benefit shifters.""" for _ in range(5): constr = dict() constr["DETERMINISTIC"], constr["AGENT"], constr["STATE_DIFF"] = ( False, 1000, True, ) constr["OVERLAP"] = True generate_random_dict(constr) read("test.grmpy.yml") simulate("test.grmpy.yml") fit("test.grmpy.yml") cleanup()
def test10(): """This test checks if the refactor auxiliary function returns an unchanged init file if the maximum number of iterations is set to zero. """ for _ in range(10): constr = dict() constr["DETERMINISTIC"], constr["AGENTS"] = False, 1000 constr["MAXITER"], constr["START"], constr[ "OPTIMIZER"] = 0, "init", "BFGS" generate_random_dict(constr) dict_ = read("test.grmpy.yml") df = simulate("test.grmpy.yml") D, X1, X0, Z1, Z0, Y1, Y0 = process_data(df, dict_) start = start_values(dict_, D, X1, X0, Z1, Z0, Y1, Y0, "init") start = backward_transformation(start) rslt = fit("test.grmpy.yml") np.testing.assert_equal(start, rslt["opt_rslt"]["params"].values)
def test3(): """The test checks if the criteria function value of the simulated and the 'estimated' sample is equal if both samples include an identical number of individuals. """ for _ in range(5): constr = dict() constr['DETERMINISTIC'], constr['AGENTS'], constr[ 'START'] = False, 1000, 'init' constr['OPTIMIZER'], constr['SAME_SIZE'] = 'SCIPY-BFGS', True generate_random_dict(constr) df1 = simulate('test.grmpy.ini') rslt = estimate('test.grmpy.ini') init_dict = read('test.grmpy.ini') df2 = simulate_estimation(init_dict, rslt) start = start_values(init_dict, df1, 'init') criteria = [] for data in [df1, df2]: criteria += [calculate_criteria(init_dict, data, start)] np.testing.assert_allclose(criteria[1], criteria[0], rtol=0.1)
def test4(): """The fifth test tests the random init file generating process and the import process. It generates an random init file, imports it again and compares the entries in both dictionaries. """ for _ in range(10): gen_dict = generate_random_dict() init_file_name = gen_dict["SIMULATION"]["source"] print_dict(gen_dict, init_file_name) imp_dict = read(init_file_name + ".grmpy.yml") dicts = [gen_dict, imp_dict] for section in ["TREATED", "UNTREATED", "CHOICE", "DIST"]: np.testing.assert_array_almost_equal(gen_dict[section]["params"], imp_dict[section]["params"], decimal=4) if section in ["TREATED", "UNTREATED", "CHOICE"]: for dict_ in dicts: if not dict_[section]["order"] == dict_[section]["order"]: raise AssertionError() if len(dict_[section]["order"]) != len( set(dict_[section]["order"])): raise AssertionError() if dict_[section]["order"][0] != "X1": raise AssertionError() for variable in gen_dict["VARTYPES"].keys(): if variable not in imp_dict["VARTYPES"].keys(): raise AssertionError() if gen_dict["VARTYPES"][variable] != imp_dict["VARTYPES"][variable]: raise AssertionError if gen_dict["VARTYPES"]["X1"] != "nonbinary": raise AssertionError for subkey in ["source", "agents", "seed"]: if not gen_dict["SIMULATION"][subkey] == imp_dict["SIMULATION"][ subkey]: raise AssertionError() for subkey in [ "agents", "file", "optimizer", "start", "maxiter", "dependent", "indicator", "comparison", "output_file", ]: if not gen_dict["ESTIMATION"][subkey] == imp_dict["ESTIMATION"][ subkey]: raise AssertionError()
def test14(): """This test checks wether our gradient functions work properly.""" constr = {"AGENTS": 10000, "DETERMINISTIC": False} for _ in range(10): generate_random_dict(constr) init_dict = read("test.grmpy.yml") print(init_dict["AUX"]) df = simulate("test.grmpy.yml") D, X1, X0, Z1, Z0, Y1, Y0 = process_data(df, init_dict) num_treated = X1.shape[1] num_untreated = X1.shape[1] + X0.shape[1] x0 = start_values(init_dict, D, X1, X0, Z1, Z0, Y1, Y0, "init") x0_back = backward_transformation(x0) llh_gradient_approx = approx_fprime_cs( x0_back, log_likelihood, args=(X1, X0, Z1, Z0, Y1, Y0, num_treated, num_untreated, None, False), ) llh_gradient = gradient_hessian(x0_back, X1, X0, Z1, Z0, Y1, Y0) min_inter_approx = approx_fprime_cs( x0, minimizing_interface, args=(X1, X0, Z1, Z0, Y1, Y0, num_treated, num_untreated, None, False), ) _, min_inter_gradient = log_likelihood(x0_back, X1, X0, Z1, Z0, Y1, Y0, num_treated, num_untreated, None, True) np.testing.assert_array_almost_equal(min_inter_approx, min_inter_gradient, decimal=5) np.testing.assert_array_almost_equal(llh_gradient_approx, llh_gradient, decimal=5) cleanup()
def create_vault(num_tests=100, seed=123): """This function creates a new regression vault.""" np.random.seed(seed) tests = [] for _ in range(num_tests): dict_ = generate_random_dict() df = simulate('test.grmpy.ini') stat = np.sum(df.sum()) tests += [(stat, dict_)] cleanup() json.dump(tests, open('regression_vault.grmpy.json', 'w'))