Exemple #1
0
def test6():
    """Additionally to test5 this test checks if the comparison file provides the
    expected output when maxiter is set to zero and the estimation process uses the
    initialization file values as start values.
    """
    for _ in range(5):
        constr = dict()
        constr["DETERMINISTIC"], constr["MAXITER"], constr[
            "AGENTS"] = False, 0, 15000
        constr["START"], constr["SAME_SIZE"] = "init", True
        dict_ = generate_random_dict(constr)
        dict_["DIST"]["params"][1], dict_["DIST"]["params"][5] = 0.0, 1.0
        print_dict(dict_)
        simulate("test.grmpy.yml")
        fit("test.grmpy.yml")
        dict_ = read_desc("comparison.grmpy.info")
        for section in ["ALL", "TREATED", "UNTREATED"]:
            np.testing.assert_equal(len(set(dict_[section]["Number"])), 1)
            np.testing.assert_almost_equal(
                dict_[section]["Observed Sample"],
                dict_[section]["Simulated Sample (finish)"],
                0.001,
            )
            np.testing.assert_array_almost_equal(
                dict_[section]["Simulated Sample (finish)"],
                dict_[section]["Simulated Sample (start)"],
                0.001,
            )
Exemple #2
0
def test1():
    """The test runs a loop to check the consistency of the random init file generating process
    and the following simulation.
    """
    for _ in range(10):
        dict_ = generate_random_dict()
        print_dict(dict_)
        simulate('test.grmpy.ini')
Exemple #3
0
def test5():
    """The test checks if the estimation process works properly when maxiter is set to zero."""
    for _ in range(10):
        constr = dict()
        constr['DETERMINISTIC'], constr['MAXITER'] = False, 0
        generate_random_dict(constr)
        simulate('test.grmpy.ini')
        estimate('test.grmpy.ini')
Exemple #4
0
def test3():
    """The fourth test checks whether the simulation process works if there are only treated or
    untreated Agents by setting the number of agents to one.
    """
    constr = constraints(probability=0.0, agents=1)
    for _ in range(10):
        generate_random_dict(constr)
        simulate('test.grmpy.ini')
Exemple #5
0
def test5():
    """The test checks if the estimation process works properly when maxiter is set to
    zero.
    """
    for _ in range(10):
        constr = constraints(probability=0.0, maxiter=0)
        generate_random_dict(constr)
        simulate('test.grmpy.ini')
        estimate('test.grmpy.ini')
Exemple #6
0
def test5():
    """The test checks if the estimation process works properly when maxiter is set to
    zero.
    """
    for _ in range(5):
        constr = dict()
        constr["DETERMINISTIC"], constr["MAXITER"] = False, 0
        generate_random_dict(constr)
        simulate("test.grmpy.yml")
        fit("test.grmpy.yml")
Exemple #7
0
def test4():
    """The test checks if the estimation process works if the Powell algorithm is specified as
    the optimizer option.
    """
    for _ in range(5):
        constr = dict()
        constr['DETERMINISTIC'], constr['AGENTS'], constr[
            'start'] = False, 10000, 'init'
        constr['optimizer'] = 'SCIPY-Powell'
        generate_random_dict(constr)

        simulate('test.grmpy.ini')
        estimate('test.grmpy.ini')
Exemple #8
0
def test4():
    """The test checks if the estimation process works if the Powell algorithm is specified as
    the optimizer option.
    """
    for _ in range(5):
        constr = constraints(probability=0.0,
                             agents=10000,
                             start='init',
                             optimizer='SCIPY-POWELL')
        generate_random_dict(constr)

        simulate('test.grmpy.ini')
        estimate('test.grmpy.ini')
Exemple #9
0
def test9():
    """This test ensures that the random initialization file generating process, the read in process
    and the simulation process works if the constraints function allows for different number of co-
    variates for each treatment state and the occurence of cost-benefit shifters."""
    for i in range(5):
        constr = dict()
        constr['DETERMINISTIC'], constr['AGENT'], constr[
            'STATE_DIFF'] = False, 1000, True
        constr['OVERLAP'] = True
        generate_random_dict(constr)
        read('test.grmpy.ini')
        simulate('test.grmpy.ini')
        estimate('test.grmpy.ini')

    cleanup()
Exemple #10
0
def test5():
    """The tests checks if the simulation process works even if the covariance between U1 and V
    and U0 and V is equal. Further the test ensures that the mte_information function returns
    the same value for each quantile.
    """
    for _ in range(10):
        generate_random_dict()
        init_dict = read('test.grmpy.ini')

        # We impose that the covariance between the random components of the potential
        # outcomes and the random component determining choice is identical.
        init_dict['DIST']['all'][2] = init_dict['DIST']['all'][4]

        # Distribute information
        coeffs_untreated = init_dict['UNTREATED']['all']
        coeffs_treated = init_dict['TREATED']['all']

        # Construct auxiliary information
        cov = construct_covariance_matrix(init_dict)

        df = simulate('test.grmpy.ini')
        x = df.filter(regex=r'^X\_', axis=1)
        q = [0.01] + list(np.arange(0.05, 1, 0.05)) + [0.99]
        mte = mte_information(coeffs_treated, coeffs_untreated, cov, q, x)

        # We simply test that there is a single unique value for the marginal treatment effect.
        np.testing.assert_equal(len(set(mte)), 1)
Exemple #11
0
def test1():
    """The first test tests whether the relationships in the simulated datasets are appropriate
    in a deterministic and an un-deterministic setting.
    """
    constr = dict()
    for case in ['deterministic', 'undeterministic']:
        if case == 'deterministic':
            constr['DETERMINISTIC'] = True
        else:
            constr['DETERMINISTIC'] = False
        for _ in range(10):
            generate_random_dict(constr)
            df = simulate('test.grmpy.ini')
            dict_ = read('test.grmpy.ini')
            x_treated = df[[
                dict_['varnames'][i - 1] for i in dict_['TREATED']['order']
            ]]
            y_treated = pd.DataFrame.sum(dict_['TREATED']['all'] * x_treated,
                                         axis=1) + df.U1
            x_untreated = df[[
                dict_['varnames'][i - 1] for i in dict_['UNTREATED']['order']
            ]]
            y_untreated = pd.DataFrame.sum(
                dict_['UNTREATED']['all'] * x_untreated, axis=1) + df.U0

            np.testing.assert_array_almost_equal(df.Y1, y_treated, decimal=5)
            np.testing.assert_array_almost_equal(df.Y0, y_untreated, decimal=5)
            np.testing.assert_array_equal(df.Y[df.D == 1], df.Y1[df.D == 1])
            np.testing.assert_array_equal(df.Y[df.D == 0], df.Y0[df.D == 0])
Exemple #12
0
def test1():
    """The first test tests whether the relationships in the simulated datasets are
    appropriate in a deterministic and an un-deterministic setting.
    """
    constr = dict()
    for case in ["deterministic", "undeterministic"]:
        if case == "deterministic":
            constr["DETERMINISTIC"] = True
        else:
            constr["DETERMINISTIC"] = True
        for _ in range(10):
            generate_random_dict(constr)
            df = simulate("test.grmpy.yml")
            dict_ = read("test.grmpy.yml")
            x_treated = df[dict_["TREATED"]["order"]]
            y_treated = (pd.DataFrame.sum(
                dict_["TREATED"]["params"] * x_treated, axis=1) + df.U1)
            x_untreated = df[dict_["UNTREATED"]["order"]]
            y_untreated = (pd.DataFrame.sum(
                dict_["UNTREATED"]["params"] * x_untreated, axis=1) + df.U0)

            np.testing.assert_array_almost_equal(df.Y1, y_treated, decimal=5)
            np.testing.assert_array_almost_equal(df.Y0, y_untreated, decimal=5)
            np.testing.assert_array_equal(df.Y[df.D == 1], df.Y1[df.D == 1])
            np.testing.assert_array_equal(df.Y[df.D == 0], df.Y0[df.D == 0])
Exemple #13
0
def test4():
    """The test checks if the estimation process works if the Powell algorithm is
    specified as the optimizer option.
    """
    for _ in range(5):
        constr = dict()
        constr["DETERMINISTIC"], constr["AGENTS"], constr["start"] = (
            False,
            10000,
            "init",
        )
        constr["optimizer"] = "SCIPY-Powell"
        generate_random_dict(constr)

        simulate("test.grmpy.yml")
        fit("test.grmpy.yml")
Exemple #14
0
def check_vault(num_tests=100):
    """This function checks the complete regression vault that is distributed as part of
    the package.
    """
    fname = (os.path.dirname(grmpy.__file__) +
             "/test/resources/old_regression_vault.grmpy.json")
    tests = json.load(open(fname))

    if num_tests > len(tests):
        print("The specified number of evaluations is larger than the number"
              " of entries in the regression_test vault.\n"
              "Therefore the test runs the complete test battery.")
    else:
        tests = [tests[i] for i in np.random.choice(len(tests), num_tests)]

    for test in tests:
        stat, dict_, criteria = test
        print_dict(dict_transformation(dict_))
        init_dict = read("test.grmpy.yml")
        df = simulate("test.grmpy.yml")
        _, X1, X0, Z1, Z0, Y1, Y0 = process_data(df, init_dict)
        x0 = start_values(init_dict, df, "init")
        criteria_ = calculate_criteria(init_dict, X1, X0, Z1, Z0, Y1, Y0, x0)
        np.testing.assert_almost_equal(criteria_, criteria)
        np.testing.assert_almost_equal(np.sum(df.sum()), stat)
        cleanup("regression")
Exemple #15
0
def test5():
    """The tests checks if the simulation process works even if the covariance between
    U1 and V and U0 and V is equal. Further the test ensures that the mte_information
    function returns the same value for each quantile.
    """
    for _ in range(10):
        generate_random_dict()
        init_dict = read("test.grmpy.yml")

        # We impose that the covariance between the random components of the potential
        # outcomes and the random component determining choice is identical.
        init_dict["DIST"]["params"][2] = init_dict["DIST"]["params"][4]

        # Distribute information
        coeffs_untreated = init_dict["UNTREATED"]["params"]
        coeffs_treated = init_dict["TREATED"]["params"]

        # Construct auxiliary information
        cov = construct_covariance_matrix(init_dict)

        df = simulate("test.grmpy.yml")

        x = df[list(
            set(init_dict["TREATED"]["order"] +
                init_dict["UNTREATED"]["order"]))]

        q = [0.01] + list(np.arange(0.05, 1, 0.05)) + [0.99]
        mte = mte_information(coeffs_treated, coeffs_untreated, cov, q, x,
                              init_dict)

        # We simply test that there is a single unique value for the marginal treatment
        #  effect.
        np.testing.assert_equal(len(set(mte)), 1)
Exemple #16
0
def test9():
    """This test ensures that the random initialization file generating process, the
    read in process and the simulation process works if the constraints function allows
    for different number of covariates for each treatment state and the occurence of
    cost-benefit shifters."""
    for _ in range(5):
        constr = dict()
        constr["DETERMINISTIC"], constr["AGENT"], constr["STATE_DIFF"] = (
            False,
            1000,
            True,
        )
        constr["OVERLAP"] = True
        generate_random_dict(constr)
        read("test.grmpy.yml")
        simulate("test.grmpy.yml")
        fit("test.grmpy.yml")

    cleanup()
Exemple #17
0
def simulate_test_data():
    """
    Simulate test dict_ and data.
    """
    fname = TEST_RESOURCES_DIR + "/tutorial.grmpy.yml"
    data = simulate(fname)
    dict_ = read(fname)
    dict_, data = check_append_constant(
        TEST_RESOURCES_DIR + "/tutorial.grmpy.yml", dict_, data, semipar=True
    )

    return dict_, data
Exemple #18
0
def create_vault(num_tests=100, seed=123):
    """This function creates a new regression vault."""
    np.random.seed(seed)

    tests = []
    for _ in range(num_tests):
        dict_ = generate_random_dict()
        df = simulate('test.grmpy.ini')
        stat = np.sum(df.sum())
        tests += [(stat, dict_)]
        cleanup()

    json.dump(tests, open('regression_vault.grmpy.json', 'w'))
def test_rslt_dictionary():
    """
    This test checks if the elements of the estimation dictionary are equal
    to their expected values when the initialization file of the
    semipar tutorial is used.
    """
    fname = TEST_RESOURCES_DIR + "/tutorial-semipar.grmpy.yml"
    simulate(fname)

    rslt = fit(fname, semipar=True)
    expected_rslt = pickle.load(
        open(TEST_RESOURCES_DIR + "/tutorial-semipar-results.pkl", "rb"))

    np.testing.assert_equal(rslt["quantiles"], expected_rslt["quantiles"])
    np.testing.assert_almost_equal(rslt["mte"], expected_rslt["mte"], 7)
    np.testing.assert_almost_equal(rslt["mte_u"], expected_rslt["mte_u"], 7)
    np.testing.assert_almost_equal(rslt["mte_min"], expected_rslt["mte_min"],
                                   5)
    np.testing.assert_almost_equal(rslt["mte_max"], expected_rslt["mte_max"],
                                   5)
    np.testing.assert_almost_equal(rslt["b0"], expected_rslt["b0"], 7)
    np.testing.assert_almost_equal(rslt["b1"], expected_rslt["b1"], 7)
Exemple #20
0
def check_vault():
    """This function checks the complete regression vault that is distributed as part of the
    package.
    """
    fname = os.path.dirname(
        grmpy.__file__) + '/test/resources/regression_vault.grmpy.json'
    tests = json.load(open(fname))

    for test in tests:
        stat, dict_, criteria = test
        print_dict(dict_)
        df = simulate('test.grmpy.ini')
        np.testing.assert_almost_equal(np.sum(df.sum()), stat)
        cleanup('regression')
Exemple #21
0
def test6():
    """Additionally to test5 this test checks if the descriptives file provides the expected
    output when maxiter is set to zero and the estimation process uses the initialization file
    values as start values.
    """
    for _ in range(5):
        constr = constraints(probability=0.0,
                             maxiter=0,
                             agents=1000,
                             start='init')
        generate_random_dict(constr)
        simulate('test.grmpy.ini')
        estimate('test.grmpy.ini')
        dict_ = read_desc('descriptives.grmpy.txt')
        for key_ in ['All', 'Treated', 'Untreated']:
            np.testing.assert_equal(len(set(dict_[key_]['Number'])), 1)
            np.testing.assert_array_equal(
                dict_[key_]['Observed Sample'],
                dict_[key_]['Simulated Sample (finish)'])
            np.testing.assert_array_equal(
                dict_[key_]['Simulated Sample (finish)'],
                dict_[key_]['Simulated Sample (start)'])
    cleanup()
Exemple #22
0
def test2():
    """The third test  checks whether the relationships hold if the coefficients are zero in
    different setups.
    """
    for _ in range(10):
        for i in ['ALL', 'TREATED', 'UNTREATED', 'COST', 'TREATED & UNTREATED']:
            constr = constraints(probability=0.0)
            dict_ = generate_random_dict(constr)

            if i == 'ALL':
                for key_ in ['TREATED', 'UNTREATED', 'COST']:
                    dict_[key_]['coeff'] = np.array([0.] * len(dict_[key_]['coeff']))
            elif i == 'TREATED & UNTREATED':
                for key_ in ['TREATED', 'UNTREATED']:
                    dict_[key_]['coeff'] = np.array([0.] * len(dict_[key_]['coeff']))
            else:
                dict_[i]['coeff'] = np.array([0.] * len(dict_[i]['coeff']))

            print_dict(dict_)
            dict_ = read('test.grmpy.ini')
            df = simulate('test.grmpy.ini')
            x = df.filter(regex=r'^X\_', axis=1)

            if i == 'ALL':
                np.testing.assert_array_equal(df.Y1, df.U1)
                np.testing.assert_array_equal(df.Y0, df.U0)
            elif i == 'TREATED & UNTREATED':
                np.testing.assert_array_equal(df.Y1, df.U1)
                np.testing.assert_array_equal(df.Y0, df.U0)
                np.testing.assert_array_equal(df.Y[df.D == 1], df.U1[df.D == 1])
                np.testing.assert_array_equal(df.Y[df.D == 0], df.U0[df.D == 0])
            elif i == 'TREATED':
                y_untreated = pd.DataFrame.sum(dict_['UNTREATED']['all'] * x, axis=1) + df.U0
                np.testing.assert_array_almost_equal(df.Y0, y_untreated, decimal=5)
                np.testing.assert_array_equal(df.Y1, df.U1)

            elif i == 'UNTREATED':
                y_treated = pd.DataFrame.sum(dict_['TREATED']['all'] * x, axis=1) + df.U1
                np.testing.assert_array_almost_equal(df.Y1, y_treated, decimal=5)
                np.testing.assert_array_equal(df.Y0, df.U0)
            else:
                y_treated = pd.DataFrame.sum(dict_['TREATED']['all'] * x, axis=1) + df.U1
                y_untreated = pd.DataFrame.sum(dict_['UNTREATED']['all'] * x, axis=1) + df.U0
                np.testing.assert_array_almost_equal(df.Y1, y_treated, decimal=5)
                np.testing.assert_array_almost_equal(df.Y0, y_untreated, decimal=5)

            np.testing.assert_array_equal(df.Y[df.D == 1], df.Y1[df.D == 1])
            np.testing.assert_array_equal(df.Y[df.D == 0], df.Y0[df.D == 0])
            np.testing.assert_array_almost_equal(df.V, (df.UC - df.U1 + df.U0))
Exemple #23
0
def test3():
    """The fourth test checks whether the simulation process works if there are only
    treated or untreated Agents by setting the number of agents to one. Additionally the
    test checks if the start values for the estimation process are set to the init-
    ialization file values due to perfect separation.
    """
    constr = dict()
    constr["AGENTS"], constr["DETERMINISTIC"] = 1, False
    for _ in range(10):
        generate_random_dict(constr)
        dict_ = read("test.grmpy.yml")
        df = simulate("test.grmpy.yml")
        start = start_values(dict_, df, "auto")
        np.testing.assert_equal(dict_["AUX"]["init_values"][:(-6)],
                                start[:(-4)])
Exemple #24
0
def test6():
    """Additionally to test5 this test checks if the comparison file provides the expected
    output when maxiter is set to zero and the estimation process uses the initialization file
    values as start values.
    """
    for _ in range(5):
        constr = dict()
        constr['DETERMINISTIC'], constr['MAXITER'], constr[
            'AGENTS'] = False, 0, 10000
        constr['START'], constr['SAME_SIZE'] = 'init', True
        dict_ = generate_random_dict(constr)
        dict_['DIST']['all'][1], dict_['DIST']['all'][5] = 0.0, 1.0
        print_dict(dict_)
        simulate('test.grmpy.ini')
        estimate('test.grmpy.ini')
        dict_ = read_desc('comparison.grmpy.txt')
        for key_ in ['All', 'Treated', 'Untreated']:
            np.testing.assert_equal(len(set(dict_[key_]['Number'])), 1)
            np.testing.assert_almost_equal(
                dict_[key_]['Observed Sample'],
                dict_[key_]['Simulated Sample (finish)'], 0.001)
            np.testing.assert_array_almost_equal(
                dict_[key_]['Simulated Sample (finish)'],
                dict_[key_]['Simulated Sample (start)'], 0.001)
Exemple #25
0
def test3():
    """The fourth test checks whether the simulation process works if there are only treated or un-
    treated Agents by setting the number of agents to one. Additionally the test checks if the start
    values for the estimation process are set to the initialization file values due to perfect
    separation.
    """
    constr = dict()
    constr['AGENTS'], constr['DETERMINISTIC'] = 1, False
    for _ in range(10):
        generate_random_dict(constr)
        dict_ = read('test.grmpy.ini')
        df = simulate('test.grmpy.ini')
        start = start_values(dict_, df, 'auto')
        np.testing.assert_equal(dict_['AUX']['init_values'][:(-6)],
                                start[:(-4)])
Exemple #26
0
def test10():
    """This test checks if the start_values function returns the init file values if the start
    option is set to init.
    """
    for _ in range(10):
        constr = dict()
        constr['DETERMINISTIC'] = False
        generate_random_dict(constr)
        dict_ = read('test.grmpy.ini')
        true = []
        for key_ in ['TREATED', 'UNTREATED', 'CHOICE']:
            true += list(dict_[key_]['all'])
        df = simulate('test.grmpy.ini')
        x0 = start_values(dict_, df, 'init')[:-4]

        np.testing.assert_array_equal(true, x0)
Exemple #27
0
def test9():
    """This test checks if the start_values function returns the init file values if the
    start option is set to init.
    """
    for _ in range(10):
        constr = dict()
        constr["DETERMINISTIC"] = False
        generate_random_dict(constr)
        dict_ = read("test.grmpy.yml")
        true = []
        for key_ in ["TREATED", "UNTREATED", "CHOICE"]:
            true += list(dict_[key_]["params"])
        df = simulate("test.grmpy.yml")
        x0 = start_values(dict_, df, "init")[:-4]

        np.testing.assert_array_equal(true, x0)
Exemple #28
0
def test_common_support():
    """
    Test whether common support is indeed zero if treatment propensity
    is 0.5 for everyone.
    """
    fname = TEST_RESOURCES_DIR + "/tutorial.grmpy.yml"
    data = simulate(fname)
    dict_ = read(fname)

    prop_score = pd.Series(np.ones(len(data))) * 0.5
    data.loc[:, "prop_score"] = prop_score

    estimated_support = _define_common_support(dict_, data)
    expected_support = [0.5, 0.5]

    np.testing.assert_equal(estimated_support, expected_support)
Exemple #29
0
def test11():
    """This test checks if the refactor auxiliary function returns an unchanged init file if the
    maximum number of iterations is set to zero.
    """

    for _ in range(10):
        constr = dict()
        constr['DETERMINISTIC'], constr['AGENTS'] = False, 1000
        constr['MAXITER'], constr['START'] = 0, 'init'
        generate_random_dict(constr)
        init_dict = read('test.grmpy.ini')
        df = simulate('test.grmpy.ini')
        start = start_values(init_dict, df, 'init')
        start = backward_transformation(start)
        rslt = estimate('test.grmpy.ini')

        np.testing.assert_equal(start, rslt['AUX']['x_internal'])
Exemple #30
0
def create_vault(num_tests=100, seed=123):
    """This function creates a new regression vault."""
    np.random.seed(seed)

    tests = []
    for _ in range(num_tests):
        dict_ = generate_random_dict()
        init_dict = read("test.grmpy.yml")
        df = simulate("test.grmpy.yml")
        _, X1, X0, Z1, Z0, Y1, Y0 = process_data(df, init_dict)
        x0 = start_values(init_dict, df, "init")
        criteria = calculate_criteria(init_dict, X1, X0, Z1, Z0, Y1, Y0, x0)
        stat = np.sum(df.sum())
        tests += [(stat, dict_, criteria)]
        cleanup()

    json.dump(tests, open("regression_vault.grmpy.json", "w"))