예제 #1
0
def _update_correlation_structure(file, model_dict, rho):
    """
    This function takes a valid model specification and updates the correlation
    structure among the unobservables.

    The information is saved to a new init file replacing the input file.

    Parameters
    ----------
    file: yaml
        grmpy initialization file.
    model_dict: dict
        grmpy initialization dictionary, the output of grmpy.read()
    rho: float
        The correlation coefficient between U_1 and V, which
        takes values between [0, -1). Values closer to -1 denote a larger
        degree of essential heterogeneity in the sample.
    """
    # We first extract the baseline information from the model dictionary.
    sd_v = model_dict["DIST"]["params"][-1]
    sd_u1 = model_dict["DIST"]["params"][0]

    # Now we construct the implied covariance, which is relevant for the
    # initialization file.
    cov1v = rho * sd_v * sd_u1

    model_dict["DIST"]["params"][2] = cov1v

    # We print the specification of the covariance to a new init file,
    # which has the same name as the input file and replaces the original one.
    print_dict(model_dict, file.replace(".grmpy.yml", ""))
예제 #2
0
def test6():
    """Additionally to test5 this test checks if the comparison file provides the
    expected output when maxiter is set to zero and the estimation process uses the
    initialization file values as start values.
    """
    for _ in range(5):
        constr = dict()
        constr["DETERMINISTIC"], constr["MAXITER"], constr[
            "AGENTS"] = False, 0, 15000
        constr["START"], constr["SAME_SIZE"] = "init", True
        dict_ = generate_random_dict(constr)
        dict_["DIST"]["params"][1], dict_["DIST"]["params"][5] = 0.0, 1.0
        print_dict(dict_)
        simulate("test.grmpy.yml")
        fit("test.grmpy.yml")
        dict_ = read_desc("comparison.grmpy.info")
        for section in ["ALL", "TREATED", "UNTREATED"]:
            np.testing.assert_equal(len(set(dict_[section]["Number"])), 1)
            np.testing.assert_almost_equal(
                dict_[section]["Observed Sample"],
                dict_[section]["Simulated Sample (finish)"],
                0.001,
            )
            np.testing.assert_array_almost_equal(
                dict_[section]["Simulated Sample (finish)"],
                dict_[section]["Simulated Sample (start)"],
                0.001,
            )
예제 #3
0
def check_vault(num_tests=100):
    """This function checks the complete regression vault that is distributed as part of
    the package.
    """
    fname = (os.path.dirname(grmpy.__file__) +
             "/test/resources/old_regression_vault.grmpy.json")
    tests = json.load(open(fname))

    if num_tests > len(tests):
        print("The specified number of evaluations is larger than the number"
              " of entries in the regression_test vault.\n"
              "Therefore the test runs the complete test battery.")
    else:
        tests = [tests[i] for i in np.random.choice(len(tests), num_tests)]

    for test in tests:
        stat, dict_, criteria = test
        print_dict(dict_transformation(dict_))
        init_dict = read("test.grmpy.yml")
        df = simulate("test.grmpy.yml")
        _, X1, X0, Z1, Z0, Y1, Y0 = process_data(df, init_dict)
        x0 = start_values(init_dict, df, "init")
        criteria_ = calculate_criteria(init_dict, X1, X0, Z1, Z0, Y1, Y0, x0)
        np.testing.assert_almost_equal(criteria_, criteria)
        np.testing.assert_almost_equal(np.sum(df.sum()), stat)
        cleanup("regression")
예제 #4
0
def test4():
    """The fifth test tests the random init file generating process and the  import process. It
    generates an random init file, imports it again and compares the entries in the both dictio-
    naries.
    """
    for _ in range(10):
        gen_dict = generate_random_dict()
        init_file_name = gen_dict['SIMULATION']['source']
        print_dict(gen_dict, init_file_name)
        imp_dict = read(init_file_name + '.grmpy.ini')

        for key_ in ['TREATED', 'UNTREATED', 'COST', 'DIST']:
            np.testing.assert_array_almost_equal(gen_dict[key_]['coeff'], imp_dict[key_]['all'],
                                                 decimal=4)
            if key_ in ['TREATED', 'UNTREATED', 'COST']:
                for i in range(len(gen_dict[key_]['types'])):
                    if isinstance(gen_dict[key_]['types'][i], str):
                        if not gen_dict[key_]['types'][i] == imp_dict[key_]['types'][i]:
                            raise AssertionError()
                    elif isinstance(gen_dict[key_]['types'][i], list):
                        if not gen_dict[key_]['types'][i][0] == imp_dict[key_]['types'][i][0]:
                            raise AssertionError()
                        np.testing.assert_array_almost_equal(
                            gen_dict[key_]['types'][i][1], imp_dict[key_]['types'][i][1], 4)

        for key_ in ['source', 'agents', 'seed']:
            if not gen_dict['SIMULATION'][key_] == imp_dict['SIMULATION'][key_]:
                raise AssertionError()
예제 #5
0
def test1():
    """The test runs a loop to check the consistency of the random init file generating process
    and the following simulation.
    """
    for _ in range(10):
        dict_ = generate_random_dict()
        print_dict(dict_)
        simulate('test.grmpy.ini')
예제 #6
0
def test8():
    """We want to able to smoothly switch between generating and printing random initialization
    files.
    """
    for _ in range(10):
        generate_random_dict()
        dict_1 = read('test.grmpy.ini')
        print_dict(dict_1)
        dict_2 = read('test.grmpy.ini')
        np.testing.assert_equal(dict_1, dict_2)
예제 #7
0
def refactor_results(dict_, file):
    pseudo = read(file)

    for key in ['TREATED', 'UNTREATED', 'COST', 'DIST']:
        if key == 'DIST':
            pseudo['DIST']['coeff'] = dict_['AUX']['x_internal'][-6:]
        else:
            pseudo[key]['coeff'] = dict_[key]['all'].tolist()
            del pseudo[key]['all']
    print_dict(pseudo, 'test')
예제 #8
0
def test4():
    """The fifth test tests the random init file generating process and the import
    process. It generates an random init file, imports it again and compares the entries
    in  both dictionaries.
    """
    for _ in range(10):
        gen_dict = generate_random_dict()
        init_file_name = gen_dict["SIMULATION"]["source"]
        print_dict(gen_dict, init_file_name)
        imp_dict = read(init_file_name + ".grmpy.yml")
        dicts = [gen_dict, imp_dict]
        for section in ["TREATED", "UNTREATED", "CHOICE", "DIST"]:
            np.testing.assert_array_almost_equal(gen_dict[section]["params"],
                                                 imp_dict[section]["params"],
                                                 decimal=4)
            if section in ["TREATED", "UNTREATED", "CHOICE"]:
                for dict_ in dicts:
                    if not dict_[section]["order"] == dict_[section]["order"]:
                        raise AssertionError()
                    if len(dict_[section]["order"]) != len(
                            set(dict_[section]["order"])):
                        raise AssertionError()
                    if dict_[section]["order"][0] != "X1":
                        raise AssertionError()

        for variable in gen_dict["VARTYPES"].keys():
            if variable not in imp_dict["VARTYPES"].keys():
                raise AssertionError()

            if gen_dict["VARTYPES"][variable] != imp_dict["VARTYPES"][variable]:
                raise AssertionError

        if gen_dict["VARTYPES"]["X1"] != "nonbinary":
            raise AssertionError

        for subkey in ["source", "agents", "seed"]:
            if not gen_dict["SIMULATION"][subkey] == imp_dict["SIMULATION"][
                    subkey]:
                raise AssertionError()

        for subkey in [
                "agents",
                "file",
                "optimizer",
                "start",
                "maxiter",
                "dependent",
                "indicator",
                "comparison",
                "output_file",
        ]:
            if not gen_dict["ESTIMATION"][subkey] == imp_dict["ESTIMATION"][
                    subkey]:
                raise AssertionError()
예제 #9
0
def update_correlation_structure(model_dict, rho):
    """This function takes a valid model specification and updates the correlation structure
    among the unobservables."""

    # We first extract the baseline information from the model dictionary.
    sd_v = model_dict["DIST"]["params"][-1]
    sd_u = model_dict["DIST"]["params"][0]

    # Now we construct the implied covariance, which is relevant for the initialization file.
    cov = rho * sd_v * sd_u
    model_dict["DIST"]["params"][2] = cov

    # We print out the specification to an initialization file with the name mc_init.grmpy.ini.
    print_dict(model_dict, "reliability")
예제 #10
0
파일: run.py 프로젝트: lnsongxf/grmpy
def check_vault():
    """This function checks the complete regression vault that is distributed as part of the
    package.
    """
    fname = os.path.dirname(
        grmpy.__file__) + '/test/resources/regression_vault.grmpy.json'
    tests = json.load(open(fname))

    for test in tests:
        stat, dict_, criteria = test
        print_dict(dict_)
        df = simulate('test.grmpy.ini')
        np.testing.assert_almost_equal(np.sum(df.sum()), stat)
        cleanup('regression')
예제 #11
0
def test2():
    """The third test  checks whether the relationships hold if the coefficients are zero in
    different setups.
    """
    for _ in range(10):
        for i in ['ALL', 'TREATED', 'UNTREATED', 'COST', 'TREATED & UNTREATED']:
            constr = constraints(probability=0.0)
            dict_ = generate_random_dict(constr)

            if i == 'ALL':
                for key_ in ['TREATED', 'UNTREATED', 'COST']:
                    dict_[key_]['coeff'] = np.array([0.] * len(dict_[key_]['coeff']))
            elif i == 'TREATED & UNTREATED':
                for key_ in ['TREATED', 'UNTREATED']:
                    dict_[key_]['coeff'] = np.array([0.] * len(dict_[key_]['coeff']))
            else:
                dict_[i]['coeff'] = np.array([0.] * len(dict_[i]['coeff']))

            print_dict(dict_)
            dict_ = read('test.grmpy.ini')
            df = simulate('test.grmpy.ini')
            x = df.filter(regex=r'^X\_', axis=1)

            if i == 'ALL':
                np.testing.assert_array_equal(df.Y1, df.U1)
                np.testing.assert_array_equal(df.Y0, df.U0)
            elif i == 'TREATED & UNTREATED':
                np.testing.assert_array_equal(df.Y1, df.U1)
                np.testing.assert_array_equal(df.Y0, df.U0)
                np.testing.assert_array_equal(df.Y[df.D == 1], df.U1[df.D == 1])
                np.testing.assert_array_equal(df.Y[df.D == 0], df.U0[df.D == 0])
            elif i == 'TREATED':
                y_untreated = pd.DataFrame.sum(dict_['UNTREATED']['all'] * x, axis=1) + df.U0
                np.testing.assert_array_almost_equal(df.Y0, y_untreated, decimal=5)
                np.testing.assert_array_equal(df.Y1, df.U1)

            elif i == 'UNTREATED':
                y_treated = pd.DataFrame.sum(dict_['TREATED']['all'] * x, axis=1) + df.U1
                np.testing.assert_array_almost_equal(df.Y1, y_treated, decimal=5)
                np.testing.assert_array_equal(df.Y0, df.U0)
            else:
                y_treated = pd.DataFrame.sum(dict_['TREATED']['all'] * x, axis=1) + df.U1
                y_untreated = pd.DataFrame.sum(dict_['UNTREATED']['all'] * x, axis=1) + df.U0
                np.testing.assert_array_almost_equal(df.Y1, y_treated, decimal=5)
                np.testing.assert_array_almost_equal(df.Y0, y_untreated, decimal=5)

            np.testing.assert_array_equal(df.Y[df.D == 1], df.Y1[df.D == 1])
            np.testing.assert_array_equal(df.Y[df.D == 0], df.Y0[df.D == 0])
            np.testing.assert_array_almost_equal(df.V, (df.UC - df.U1 + df.U0))
예제 #12
0
def test1():
    """
    This module contains a simple test for the equality of the results of
    R's locpoly function and grmpy's locpoly function. Therefore,
    the mock data set from Carneiro et al (2011) is used.
    """
    init_dict = read(TEST_RESOURCES_DIR + "/replication_semipar.yml")
    init_dict["ESTIMATION"]["file"] = TEST_RESOURCES_DIR + "/aer-replication-mock.pkl"
    print_dict(init_dict, TEST_RESOURCES_DIR + "/replication_semipar")
    test_rslt = fit(TEST_RESOURCES_DIR + "/replication_semipar.grmpy.yml", semipar=True)

    expected_mte_u = pd.read_pickle(
        TEST_RESOURCES_DIR + "/replication-results-mte_u.pkl"
    )

    np.testing.assert_array_almost_equal(test_rslt["mte_u"], expected_mte_u, 6)
예제 #13
0
def test2():
    """This test runs a random selection of five regression tests from the package's
    regression test vault.
    """
    fname = os.path.dirname(
        grmpy.__file__) + '/test/resources/regression_vault.grmpy.json'
    tests = json.load(open(fname))

    for i in np.random.choice(range(len(tests)), size=5):
        stat, dict_, criteria = tests[i]
        print_dict(dict_)
        df = simulate('test.grmpy.ini')
        init_dict = read('test.grmpy.ini')
        start = start_values(init_dict, df, 'init')
        criteria_ = calculate_criteria(init_dict, df, start)
        np.testing.assert_array_almost_equal(criteria, criteria_)
        np.testing.assert_almost_equal(np.sum(df.sum()), stat)
예제 #14
0
def update_correlation_structure(model_dict, rho):
    """This function takes a valid model specification and updates the correlation structure
    among the unobservables."""

    # We first extract the baseline information from the model dictionary.
    sd_v = model_dict['DIST']['all'][-1]
    sd_u = model_dict['DIST']['all'][0]

    # Now we construct the implied covariance, which is relevant for the initialization file.
    cov = rho * sd_v * sd_u
    model_dict['DIST']['all'][2] = cov

    # We print out the specification to an initialization file with the name mc_init.grmpy.ini.
    for key_ in ['TREATED', 'UNTREATED', 'CHOICE']:
        x = [model_dict['varnames'][j - 1] for j in model_dict[key_]['order']]
        model_dict[key_]['order'] = x
    print_dict(model_dict, 'reliability')
예제 #15
0
def test8():
    """The test checks if an UserError occurs if wrong inputs are specified for a
    different functions/methods.
    """
    constr = dict()
    constr["DETERMINISTIC"], constr["AGENTS"] = False, 1000
    generate_random_dict(constr)
    df = simulate("test.grmpy.yml")
    dict_ = read("test.grmpy.yml")
    a = list()
    dict_["ESTIMATION"]["file"] = "data.grmpy.yml"
    print_dict(dict_, "false_data")
    pytest.raises(UserError, fit, "tast.grmpy.yml")
    pytest.raises(UserError, fit, "false_data.grmpy.yml")
    pytest.raises(UserError, simulate, "tast.grmpy.yml")
    pytest.raises(UserError, read, "tast.grmpy.yml")
    pytest.raises(UserError, start_values, a, df, "init")
    pytest.raises(UserError, generate_random_dict, a)
예제 #16
0
def test8():
    """The test checks if an UserError occurs if wrong inputs are specified for a different
    functions/methods.
    """
    constr = dict()
    constr['DETERMINISTIC'], constr['AGENTS'] = False, 1000
    generate_random_dict(constr)
    df = simulate('test.grmpy.ini')
    a = []
    dict_ = read('test.grmpy.ini')
    dict_['ESTIMATION']['file'] = 'data.grmpy.ini'
    print_dict(dict_, 'false_data')
    pytest.raises(UserError, estimate, 'tast.grmpy.ini')
    pytest.raises(UserError, estimate, 'false_data.grmpy.ini')
    pytest.raises(UserError, simulate, 'tast.grmpy.ini')
    pytest.raises(UserError, read, 'tast.grmpy.ini')
    pytest.raises(UserError, start_values, a, df, 'init')
    pytest.raises(UserError, generate_random_dict, a)
예제 #17
0
def test2():
    """This test runs a random selection of five regression tests from the package's
    regression test vault.
    """
    fname = TEST_RESOURCES_DIR + '/regression_vault.grmpy.json'
    tests = json.load(open(fname))
    random_choice = np.random.choice(range(len(tests)), 5)
    tests = [tests[i] for i in random_choice]

    for test in tests:
        stat, dict_, criteria = test
        print_dict(dict_)
        df = simulate('test.grmpy.ini')
        init_dict = read('test.grmpy.ini')
        start = start_values(init_dict, df, 'init')
        criteria_ = calculate_criteria(init_dict, df, start)
        np.testing.assert_almost_equal(np.sum(df.sum()), stat)
        np.testing.assert_array_almost_equal(criteria, criteria_)
예제 #18
0
def test_replication_carneiro():
    """
    This function checks the equality of the results of
    R's locpoly function and grmpy's locpoly function. The mock data set
    from Carneiro et al (2011) is used and both the mte_u and the final
    mte are compared.
    """
    init_dict = read(TEST_RESOURCES_DIR + "/replication_semipar.yml")
    init_dict["ESTIMATION"][
        "file"] = TEST_RESOURCES_DIR + "/aer-replication-mock.pkl"
    print_dict(init_dict, TEST_RESOURCES_DIR + "/replication_semipar")
    test_rslt = fit(TEST_RESOURCES_DIR + "/replication_semipar.grmpy.yml",
                    semipar=True)

    expected_mte_u = pd.read_pickle(TEST_RESOURCES_DIR +
                                    "/replication-results-mte_u.pkl")
    expected_mte = pd.read_pickle(TEST_RESOURCES_DIR +
                                  "/replication-results-mte.pkl")

    np.testing.assert_array_almost_equal(test_rslt["mte_u"], expected_mte_u, 6)
    np.testing.assert_array_almost_equal(test_rslt["mte"], expected_mte, 6)
예제 #19
0
def test2():
    """This test runs a random selection of five regression tests from the our old
    regression test battery.
    """
    fname = TEST_RESOURCES_DIR + "/old_regression_vault.grmpy.json"
    tests = json.load(open(fname))
    random_choice = np.random.choice(range(len(tests)), 5)
    tests = [tests[i] for i in random_choice]

    for test in tests:
        stat, dict_, criteria = test
        print_dict(dict_transformation(dict_))
        df = simulate("test.grmpy.yml")
        init_dict = read("test.grmpy.yml")
        start = start_values(init_dict, df, "init")
        _, X1, X0, Z1, Z0, Y1, Y0 = process_data(df, init_dict)

        criteria_ = calculate_criteria(init_dict, X1, X0, Z1, Z0, Y1, Y0,
                                       start)
        np.testing.assert_almost_equal(np.sum(df.sum()), stat)
        np.testing.assert_array_almost_equal(criteria, criteria_)
예제 #20
0
def update_tutorial(file, rho=None):
    """This function enables us to rewrite the grmpy tutorial file so that it correspond
    to a parameterization with essential heterogeneity"""

    if rho is None:
        rho = []
        rho += [np.random.uniform(0.3, 0.7, 1)]
        rho += [np.random.uniform(-0.1, -0.345, 1)]

    init_dict = read(file)

    init_dict["SIMULATION"]["source"] = "data_eh"

    sd1 = init_dict["DIST"]["params"][0]
    sd0 = init_dict["DIST"]["params"][3]
    sdv = init_dict["DIST"]["params"][-1]

    init_dict["DIST"]["params"][2] = sd1 * sdv * rho[0]

    init_dict["DIST"]["params"][-2] = sd0 * sdv * rho[1]

    print_dict(init_dict, "files/tutorial_eh")
예제 #21
0
def test3():
    """The test checks if the criteria function value of the simulated and the 'estimated'
    sample is equal if both samples include an identical number of individuals.
    """
    for _ in range(5):
        constr = constraints(probability=0.0,
                             agents=10000,
                             start='init',
                             optimizer='SCIPY-BFGS')
        dict_ = generate_random_dict(constr)
        print_dict(dict_)

        df1 = simulate('test.grmpy.ini')
        rslt = estimate('test.grmpy.ini')
        init_dict = read('test.grmpy.ini')
        df2 = simulate_estimation(init_dict, rslt, df1)
        start = start_values(init_dict, df1, 'init')

        criteria = []
        for data in [df1, df2]:
            criteria += [calculate_criteria(init_dict, data, start)]
        np.testing.assert_allclose(criteria[1], criteria[0], rtol=0.1)
예제 #22
0
def test6():
    """Additionally to test5 this test checks if the comparison file provides the expected
    output when maxiter is set to zero and the estimation process uses the initialization file
    values as start values.
    """
    for _ in range(5):
        constr = dict()
        constr['DETERMINISTIC'], constr['MAXITER'], constr[
            'AGENTS'] = False, 0, 10000
        constr['START'], constr['SAME_SIZE'] = 'init', True
        dict_ = generate_random_dict(constr)
        dict_['DIST']['all'][1], dict_['DIST']['all'][5] = 0.0, 1.0
        print_dict(dict_)
        simulate('test.grmpy.ini')
        estimate('test.grmpy.ini')
        dict_ = read_desc('comparison.grmpy.txt')
        for key_ in ['All', 'Treated', 'Untreated']:
            np.testing.assert_equal(len(set(dict_[key_]['Number'])), 1)
            np.testing.assert_almost_equal(
                dict_[key_]['Observed Sample'],
                dict_[key_]['Simulated Sample (finish)'], 0.001)
            np.testing.assert_array_almost_equal(
                dict_[key_]['Simulated Sample (finish)'],
                dict_[key_]['Simulated Sample (start)'], 0.001)
예제 #23
0
def print_model_dict(model_dict, fname='mc_init'):
    """This function prints a model specification."""
    print_dict(model_dict, fname)
예제 #24
0
파일: run.py 프로젝트: lnsongxf/grmpy
from grmpy.test.random_init import generate_random_dict
from grmpy.test.random_init import print_dict
import grmpy

# We simply specify a minimum number of minutes for our package to run with different requests.
MINUTES = 1

end_time = datetime.datetime.now() + datetime.timedelta(minutes=MINUTES)
counter = 1
while True:
    if datetime.datetime.now() >= end_time:
        break

    print('\n Iteration ', counter)

    dict_ = generate_random_dict()
    print_dict(dict_)

    grmpy.simulate('test.grmpy.ini')

    # This is a temporary fix so that the determination of starting values by PROBIT does
    # not work if we have a perfect separation.
    try:
        grmpy.estimate('test.grmpy.ini')
    except statsmodels.tools.sm_exceptions.PerfectSeparationError:
        print('separation error, skip')
    subprocess.check_call(['git', 'clean', '-d', '-f'])

    counter += 1
예제 #25
0
def test7():
    """This test ensures that the estimation process returns an UserError if one tries
    to execute an estimation process with initialization file values as start values for
    an deterministic setting.
    """
    fname_falsespec1 = TEST_RESOURCES_DIR + "/test_falsespec1.grmpy.yml"
    fname_falsespec2 = TEST_RESOURCES_DIR + "/test_falsespec2.grmpy.yml"
    fname_noparams = TEST_RESOURCES_DIR + "/test_noparams.grmpy.yml"
    fname_binary = TEST_RESOURCES_DIR + "/test_binary.grmpy.yml"
    fname_vzero = TEST_RESOURCES_DIR + "/test_vzero.grmpy.yml"
    fname_possd = TEST_RESOURCES_DIR + "/test_npsd.grmpy.yml"
    fname_zero = TEST_RESOURCES_DIR + "/test_zero.grmpy.yml"

    for _ in range(5):
        constr = dict()
        constr["AGENTS"], constr["DETERMINISTIC"] = 1000, True
        generate_random_dict(constr)
        dict_ = read("test.grmpy.yml")
        pytest.raises(UserError, check_sim_distribution, dict_)
        pytest.raises(UserError, fit, "test.grmpy.yml")

        generate_random_dict(constr)
        dict_ = read("test.grmpy.yml")
        if len(dict_["CHOICE"]["order"]) == 1:
            dict_["CHOICE"]["params"] = list(dict_["CHOICE"]["params"])
            dict_["CHOICE"]["params"] += [1.000]
            dict_["CHOICE"]["order"] += [2]

        dict_["CHOICE"]["order"][1] = "X1"
        print_dict(dict_)
        pytest.raises(UserError, check_sim_init_dict, dict_)
        pytest.raises(UserError, simulate, "test.grmpy.yml")
        pytest.raises(UserError, fit, "test.grmpy.yml")

        constr["AGENTS"] = 0
        generate_random_dict(constr)
        dict_ = read("test.grmpy.yml")
        pytest.raises(UserError, check_sim_init_dict, dict_)
        pytest.raises(UserError, simulate, "test.grmpy.yml")

        length = np.random.randint(2, 100)
        array = np.random.rand(length, 1)
        subsitute = np.random.randint(0, len(array) - 1)
        array[subsitute] = np.inf
        pytest.raises(UserError, check_start_values, array)

    dict_ = read(fname_possd)
    pytest.raises(UserError, check_sim_init_dict, dict_)
    pytest.raises(UserError, simulate, fname_possd)

    dict_ = read(fname_zero)
    pytest.raises(UserError, check_sim_distribution, dict_)
    pytest.raises(UserError, fit, fname_zero)

    dict_ = read(fname_vzero)
    pytest.raises(UserError, check_sim_distribution, dict_)
    pytest.raises(UserError, fit, fname_vzero)

    dict_ = read(fname_noparams)
    pytest.raises(UserError, check_sim_distribution, dict_)
    pytest.raises(UserError, fit, fname_noparams)

    dict_ = read(fname_falsespec1)
    pytest.raises(UserError, check_sim_init_dict, dict_)
    pytest.raises(UserError, fit, fname_noparams)

    dict_ = read(fname_falsespec2)
    pytest.raises(UserError, check_sim_init_dict, dict_)
    pytest.raises(UserError, fit, fname_noparams)

    dict_ = read(fname_binary)
    status, _ = check_special_conf(dict_)
    np.testing.assert_equal(status, True)
    pytest.raises(UserError, check_sim_init_dict, dict_)
    pytest.raises(UserError, fit, fname_noparams)
예제 #26
0
def monte_carlo(file, which, grid_points=10):
    """This function estimates various effect parameters for
    increasing presence of essential heterogeneity, which is reflected
    by increasing correlation between U_1 and V.
    """
    # simulate a new data set with essential heterogeneity present
    model_dict = read(file)
    original_correlation = model_dict["DIST"]["params"][2]

    model_dict["DIST"]["params"][2] = -0.191
    print_dict(model_dict, file.replace(".grmpy.yml", ""))
    grmpy.simulate(file)

    effects = []

    # Loop over different correlations between V and U_1
    for rho in np.linspace(0.00, -0.99, grid_points):
        # effects["rho"] += [rho]
        # Readjust the initialization file values to add correlation
        model_spec = read(file)
        X = model_spec["TREATED"]["order"]
        update_correlation_structure(file, model_spec, rho)
        sim_spec = read(file)
        # Simulate a Data set and specify exogeneous and endogeneous variables
        df_mc = create_data(file)
        treated = df_mc["D"] == 1
        Xvar = df_mc[X]
        instr = sim_spec["CHOICE"]["order"]
        instr = [i for i in instr if i != "const"]

        # We calculate our parameter of interest
        label = which.lower()

        if label == "conventional_average_effects":
            ATE = np.mean(df_mc["Y1"] - df_mc["Y0"])
            TT = np.mean(df_mc["Y1"].loc[treated] - df_mc["Y0"].loc[treated])
            stat = (ATE, TT)

        elif label in ["random", "randomization"]:
            random = np.mean(df_mc[df_mc.D == 1]["Y"]) - np.mean(
                df_mc[df_mc.D == 0]["Y"])
            stat = random

        elif label in ["ordinary_least_squares", "ols"]:
            results = sm.OLS(df_mc["Y"], df_mc[["const", "D"]]).fit()
            stat = results.params[1]

        elif label in ["instrumental_variables", "iv"]:
            iv = IV2SLS(df_mc["Y"], Xvar, df_mc["D"], df_mc[instr]).fit()
            stat = iv.params["D"]

        elif label in ["grmpy", "grmpy-par"]:
            rslt = grmpy.fit(file)
            beta_diff = rslt["TREATED"]["params"] - rslt["UNTREATED"]["params"]
            stat = np.dot(np.mean(Xvar), beta_diff)

        elif label in ["grmpy-semipar", "grmpy-liv"]:
            rslt = grmpy.fit(file, semipar=True)

            y0_fitted = np.dot(rslt["X"], rslt["b0"])
            y1_fitted = np.dot(rslt["X"], rslt["b1"])

            mte_x_ = y1_fitted - y0_fitted
            mte_u = rslt["mte_u"]

            us = np.linspace(0.005, 0.995, len(rslt["quantiles"]))
            mte_mat = np.zeros((len(mte_x_), len(mte_u)))

            for i in range(len(mte_x_)):
                for j in range(len(mte_u)):
                    mte_mat[i, j] = mte_x_[i] + mte_u[j]

            ate_tilde_p = np.mean(mte_mat, axis=1)
            stat = ate_tilde_p.mean()

        else:
            raise NotImplementedError

        effects += [stat]

    # Restore original init file
    model_dict = read(file)
    model_dict["DIST"]["params"][2] = original_correlation
    print_dict(model_dict, file.replace(".grmpy.yml", ""))
    grmpy.simulate(file)

    return effects
예제 #27
0
def monte_carlo(file, which, grid_points=10):
    """
    This function conducts a Monte Carlo simulation to compare
    the true and estimated treatment parameters for increasing
    (absolute) correlation between U_1 and V (i.e essential
    heterogeneity).

    In the example here, the correlation between U_1 and V becomes
    increasingly more negative. As we consider the absolute value
    of the correlation coefficient, values closer to -1
    (or in the analogous case closer to +1)
    denote a higher degree of essential heterogeneity.

    The results of the Monte Carlo simulation can be used
    to evaluate the performance of different estimation strategies
    in the presence of essential heterogeneity.

    Depending on the specification of *which*, either the true ATE
    and TT, or an estimate of the ATE are returned.

    Options for *which*:

        Comparison of ATE and TT
        - "conventional_average_effects"

        Different estimation strategies for ATE
        - "randomization" ("random")
        - "ordinary_least_squares" ("ols")
        - "instrumental_variables" ("iv")
        - "grmpy_par" ("grmpy")
        - "grmpy_semipar"("grmpy-liv")

    Post-estimation: To plot the comparison between the true ATE
    and the respective parameter, use the function
    - plot_effects() for *which* = "conventional_average_effects", and
    - plot_estimates() else.

    Parameters
    ----------
    file: yaml
        grmpy initialization file, provides information for the simulation process.
    which: string
        String denoting whether conventional average effects shall be computed
        or, alternatively, which estimation approach shall be implemented for the ATE.
    grid_points: int, default 10
        Number of different values for rho, the correlation coefficient
        between U_1 and V, on the interval [0, -1), along which the parameters
        shall be evaluated.

    Returns
    -------
    effects: list
        If *which* = "conventional_average_effects",
            list of lenght *grid_points* x 2 containing the true ATE and TT.
        Else, list of length *grid_points* x 1 containing an estimate
            of the ATE.
    """
    # simulate a new data set with essential heterogeneity present
    model_dict = read(file)
    original_correlation = model_dict["DIST"]["params"][2]

    model_dict["DIST"]["params"][2] = -0.191
    print_dict(model_dict, file.replace(".grmpy.yml", ""))
    grmpy.simulate(file)

    effects = []

    # Loop over different correlations between U_1 and V
    for rho in np.linspace(0.00, -0.99, grid_points):
        # effects["rho"] += [rho]
        # Readjust the initialization file values to add correlation
        model_spec = read(file)
        X = model_spec["TREATED"]["order"]
        _update_correlation_structure(file, model_spec, rho)
        sim_spec = read(file)
        # Simulate a Data set and specify exogeneous and endogeneous variables
        df_mc = _create_data(file)
        treated = df_mc["D"] == 1
        Xvar = df_mc[X]
        instr = sim_spec["CHOICE"]["order"]
        instr = [i for i in instr if i != "const"]

        # We calculate our parameter of interest
        label = which.lower()

        if label == "conventional_average_effects":
            ATE = np.mean(df_mc["Y1"] - df_mc["Y0"])
            TT = np.mean(df_mc["Y1"].loc[treated] - df_mc["Y0"].loc[treated])
            stat = (ATE, TT)

        elif label in ["randomization", "random"]:
            random = np.mean(df_mc[df_mc.D == 1]["Y"]) - np.mean(
                df_mc[df_mc.D == 0]["Y"]
            )
            stat = random

        elif label in ["ordinary_least_squares", "ols"]:
            results = sm.OLS(df_mc["Y"], df_mc[["const", "D"]]).fit()
            stat = results.params[1]

        elif label in ["instrumental_variables", "iv"]:
            iv = IV2SLS(df_mc["Y"], Xvar, df_mc["D"], df_mc[instr]).fit()
            stat = iv.params["D"]

        elif label in ["grmpy", "grmpy-par"]:
            rslt = grmpy.fit(file)
            beta_diff = rslt["TREATED"]["params"] - rslt["UNTREATED"]["params"]
            stat = np.dot(np.mean(Xvar), beta_diff)

        elif label in ["grmpy-semipar", "grmpy-liv"]:
            rslt = grmpy.fit(file, semipar=True)

            y0_fitted = np.dot(rslt["X"], rslt["b0"])
            y1_fitted = np.dot(rslt["X"], rslt["b1"])

            mte_x_ = y1_fitted - y0_fitted
            mte_u = rslt["mte_u"]

            us = np.linspace(0.005, 0.995, len(rslt["quantiles"]))
            mte_mat = np.zeros((len(mte_x_), len(mte_u)))

            for i in range(len(mte_x_)):
                for j in range(len(mte_u)):
                    mte_mat[i, j] = mte_x_[i] + mte_u[j]

            ate_tilde_p = np.mean(mte_mat, axis=1)
            stat = ate_tilde_p.mean()

        else:
            raise NotImplementedError

        effects += [stat]

    # Restore original init file
    model_dict = read(file)
    model_dict["DIST"]["params"][2] = original_correlation
    print_dict(model_dict, file.replace(".grmpy.yml", ""))
    grmpy.simulate(file)

    return effects
예제 #28
0
def test2():
    """The third test  checks whether the relationships hold if the coefficients are zero in
    different setups.
    """
    for _ in range(10):
        for i in [
                'ALL', 'TREATED', 'UNTREATED', 'CHOICE', 'TREATED & UNTREATED'
        ]:
            constr = dict()
            constr['DETERMINISTIC'] = False
            dict_ = generate_random_dict(constr)

            if i == 'ALL':
                for key_ in ['TREATED', 'UNTREATED', 'CHOICE']:
                    dict_[key_]['all'] = np.array([0.] *
                                                  len(dict_[key_]['all']))
            elif i == 'TREATED & UNTREATED':
                for key_ in ['TREATED', 'UNTREATED']:
                    dict_[key_]['all'] = np.array([0.] *
                                                  len(dict_[key_]['all']))
            else:
                dict_[i]['all'] = np.array([0.] * len(dict_[i]['all']))

            print_dict(dict_)

            dict_ = read('test.grmpy.ini')
            df = simulate('test.grmpy.ini')
            x_treated = df[[
                dict_['varnames'][i - 1] for i in dict_['TREATED']['order']
            ]]
            x_untreated = df[[
                dict_['varnames'][i - 1] for i in dict_['UNTREATED']['order']
            ]]

            if i == 'ALL':
                np.testing.assert_array_equal(df.Y1, df.U1)
                np.testing.assert_array_equal(df.Y0, df.U0)
            elif i == 'TREATED & UNTREATED':
                np.testing.assert_array_equal(df.Y1, df.U1)
                np.testing.assert_array_equal(df.Y0, df.U0)
                np.testing.assert_array_equal(df.Y[df.D == 1],
                                              df.U1[df.D == 1])
                np.testing.assert_array_equal(df.Y[df.D == 0],
                                              df.U0[df.D == 0])
            elif i == 'TREATED':
                y_untreated = pd.DataFrame.sum(
                    dict_['UNTREATED']['all'] * x_untreated, axis=1) + df.U0
                np.testing.assert_array_almost_equal(df.Y0,
                                                     y_untreated,
                                                     decimal=5)
                np.testing.assert_array_equal(df.Y1, df.U1)

            elif i == 'UNTREATED':
                y_treated = pd.DataFrame.sum(
                    dict_['TREATED']['all'] * x_treated, axis=1) + df.U1
                np.testing.assert_array_almost_equal(df.Y1,
                                                     y_treated,
                                                     decimal=5)
                np.testing.assert_array_equal(df.Y0, df.U0)
            else:
                y_treated = pd.DataFrame.sum(
                    dict_['TREATED']['all'] * x_treated, axis=1) + df.U1
                y_untreated = pd.DataFrame.sum(
                    dict_['UNTREATED']['all'] * x_untreated, axis=1) + df.U0
                np.testing.assert_array_almost_equal(df.Y1,
                                                     y_treated,
                                                     decimal=5)
                np.testing.assert_array_almost_equal(df.Y0,
                                                     y_untreated,
                                                     decimal=5)

            np.testing.assert_array_equal(df.Y[df.D == 1], df.Y1[df.D == 1])
            np.testing.assert_array_equal(df.Y[df.D == 0], df.Y0[df.D == 0])
예제 #29
0
def test2():
    """The second test  checks whether the relationships hold if the coefficients are zero in
    different setups.
    """
    for _ in range(10):
        for case in [
                "ALL", "TREATED", "UNTREATED", "CHOICE", "TREATED & UNTREATED"
        ]:
            constr = dict()
            constr["DETERMINISTIC"] = False
            dict_ = generate_random_dict(constr)

            if case == "ALL":
                for section in ["TREATED", "UNTREATED", "CHOICE"]:
                    dict_[section]["params"] = np.array(
                        [0.0] * len(dict_[section]["params"]))
            elif case == "TREATED & UNTREATED":
                for section in ["TREATED", "UNTREATED"]:
                    dict_[section]["params"] = np.array(
                        [0.0] * len(dict_[section]["params"]))
            else:
                dict_[case]["params"] = np.array([0.0] *
                                                 len(dict_[case]["params"]))

            print_dict(dict_)

            dict_ = read("test.grmpy.yml")
            df = simulate("test.grmpy.yml")
            x_treated = df[dict_["TREATED"]["order"]]
            x_untreated = df[dict_["UNTREATED"]["order"]]

            if case == "ALL":
                np.testing.assert_array_equal(df.Y1, df.U1)
                np.testing.assert_array_equal(df.Y0, df.U0)
            elif case == "TREATED & UNTREATED":
                np.testing.assert_array_equal(df.Y1, df.U1)
                np.testing.assert_array_equal(df.Y0, df.U0)
                np.testing.assert_array_equal(df.Y[df.D == 1],
                                              df.U1[df.D == 1])
                np.testing.assert_array_equal(df.Y[df.D == 0],
                                              df.U0[df.D == 0])
            elif case == "TREATED":
                y_untreated = (pd.DataFrame.sum(
                    dict_["UNTREATED"]["params"] * x_untreated, axis=1) +
                               df.U0)
                np.testing.assert_array_almost_equal(df.Y0,
                                                     y_untreated,
                                                     decimal=5)
                np.testing.assert_array_equal(df.Y1, df.U1)

            elif case == "UNTREATED":
                y_treated = (pd.DataFrame.sum(
                    dict_["TREATED"]["params"] * x_treated, axis=1) + df.U1)
                np.testing.assert_array_almost_equal(df.Y1,
                                                     y_treated,
                                                     decimal=5)
                np.testing.assert_array_equal(df.Y0, df.U0)
            else:
                y_treated = (pd.DataFrame.sum(
                    dict_["TREATED"]["params"] * x_treated, axis=1) + df.U1)
                y_untreated = (pd.DataFrame.sum(
                    dict_["UNTREATED"]["params"] * x_untreated, axis=1) +
                               df.U0)
                np.testing.assert_array_almost_equal(df.Y1,
                                                     y_treated,
                                                     decimal=5)
                np.testing.assert_array_almost_equal(df.Y0,
                                                     y_untreated,
                                                     decimal=5)

            np.testing.assert_array_equal(df.Y[df.D == 1], df.Y1[df.D == 1])
            np.testing.assert_array_equal(df.Y[df.D == 0], df.Y0[df.D == 0])
예제 #30
0
def test7():
    """This test ensures that the estimation process returns an UserError if one tries to execute an
    estimation process with initialization file values as start values for an deterministic setting.
    """
    fname_diff_categorical = TEST_RESOURCES_DIR + '/test_categorical_diff.grmpy.ini'
    fname_categorical = TEST_RESOURCES_DIR + '/test_categorical.grmpy.ini'
    fname_diff_binary = TEST_RESOURCES_DIR + '/test_binary_diff.grmpy.ini'
    fname_vzero = TEST_RESOURCES_DIR + '/test_vzero.grmpy.ini'
    fname_possd = TEST_RESOURCES_DIR + '/test_npsd.grmpy.ini'
    fname_zero = TEST_RESOURCES_DIR + '/test_zero.grmpy.ini'

    for _ in range(5):
        constr = dict()
        constr['AGENTS'], constr['DETERMINISTIC'] = 1000, True
        generate_random_dict(constr)
        dict_ = read('test.grmpy.ini')
        pytest.raises(UserError, check_init_file, dict_)
        pytest.raises(UserError, estimate, 'test.grmpy.ini')

        generate_random_dict(constr)
        dict_ = read('test.grmpy.ini')
        if len(dict_['CHOICE']['order']) == 1:
            dict_['CHOICE']['all'] = list(dict_['CHOICE']['all'])
            dict_['CHOICE']['all'] += [1.000]
            dict_['CHOICE']['order'] += [2]
            dict_['CHOICE']['types'] += ['nonbinary']

        dict_['CHOICE']['order'][1] = 1
        print_dict(dict_)
        pytest.raises(UserError, check_initialization_dict, dict_)
        pytest.raises(UserError, simulate, 'test.grmpy.ini')
        pytest.raises(UserError, estimate, 'test.grmpy.ini')

        constr['AGENTS'] = 0
        generate_random_dict(constr)
        dict_ = read('test.grmpy.ini')
        pytest.raises(UserError, check_initialization_dict, dict_)
        pytest.raises(UserError, simulate, 'test.grmpy.ini')

        tests = []
        tests += [['TREATED', 'UNTREATED'], ['TREATED', 'CHOICE'],
                  ['UNTREATED', 'CHOICE']]
        tests += [['TREATED', 'UNTREATED', 'CHOICE']]

        for combi in tests:
            constr['STATE_DIFF'], constr['OVERLAP'] = True, True
            generate_random_dict(constr)
            dict_ = read('test.grmpy.ini')
            for j in combi:

                if len(dict_[j]['order']) == 1:
                    dict_[j]['all'] = list(dict_[j]['all'])
                    dict_[j]['all'] += [1.000]
                    dict_[j]['order'] += [2]
                    dict_[j]['types'] += ['nonbinary']
                else:
                    pass
                dict_[j]['order'][1] = len(dict_['AUX']['types']) + 1

                frac = np.random.uniform(0.1, 0.8)
                dict_[j]['types'][1] = ['binary', frac]

            print_dict(dict_)

            pytest.raises(UserError, read, 'test.grmpy.ini')

    dict_ = read(fname_possd)
    pytest.raises(UserError, check_initialization_dict, dict_)
    pytest.raises(UserError, simulate, fname_possd)

    dict_ = read(fname_categorical)
    pytest.raises(UserError, check_initialization_dict, dict_)
    pytest.raises(UserError, simulate, fname_categorical)

    dict_ = read(fname_zero)
    pytest.raises(UserError, check_init_file, dict_)
    pytest.raises(UserError, estimate, fname_zero)

    dict_ = read(fname_vzero)
    pytest.raises(UserError, check_init_file, dict_)
    pytest.raises(UserError, estimate, fname_vzero)

    dict_ = read(fname_diff_binary)
    pytest.raises(UserError, check_initialization_dict, dict_)
    pytest.raises(UserError, estimate, fname_diff_binary)

    dict_ = read(fname_diff_categorical)
    pytest.raises(UserError, check_initialization_dict, dict_)
    pytest.raises(UserError, estimate, fname_diff_categorical)