コード例 #1
0
def check_vault(num_tests=100):
    """This function checks the complete regression vault that is distributed as part of
    the package.
    """
    fname = (os.path.dirname(grmpy.__file__) +
             "/test/resources/old_regression_vault.grmpy.json")
    tests = json.load(open(fname))

    if num_tests > len(tests):
        print("The specified number of evaluations is larger than the number"
              " of entries in the regression_test vault.\n"
              "Therefore the test runs the complete test battery.")
    else:
        tests = [tests[i] for i in np.random.choice(len(tests), num_tests)]

    for test in tests:
        stat, dict_, criteria = test
        print_dict(dict_transformation(dict_))
        init_dict = read("test.grmpy.yml")
        df = simulate("test.grmpy.yml")
        _, X1, X0, Z1, Z0, Y1, Y0 = process_data(df, init_dict)
        x0 = start_values(init_dict, df, "init")
        criteria_ = calculate_criteria(init_dict, X1, X0, Z1, Z0, Y1, Y0, x0)
        np.testing.assert_almost_equal(criteria_, criteria)
        np.testing.assert_almost_equal(np.sum(df.sum()), stat)
        cleanup("regression")
コード例 #2
0
ファイル: run.py プロジェクト: fagan2888/grmpy
def run(args):
    """This function runs the property test battery."""
    args = distribute_command_line_arguments(args)

    test_dict = collect_tests()

    rslt = dict()
    for module in test_dict.keys():
        rslt[module] = dict()
        for test in test_dict[module]:
            rslt[module][test] = [0, 0]

    cleanup()

    if args['is_check']:
        np.random.seed(args['seed'])
        module = choose_module(test_dict)
        test = np.random.choice(test_dict[module])
        run_property_test(module, test)

    else:
        err_msg = []

        start, timeout = datetime.now(), timedelta(hours=args['hours'])

        print_rslt = functools.partial(print_rslt_ext, start, timeout)
        print_rslt(rslt, err_msg)

        while True:

            seed = random.randrange(1, 100000)
            dirname = get_random_string()
            np.random.seed(seed)
            module = choose_module(test_dict)
            test = np.random.choice(test_dict[module])

            try:
                run_property_test(module, test, dirname)
                rslt[module][test][0] += 1
            except Exception:
                rslt[module][test][1] += 1
                msg = traceback.format_exc()
                err_msg += [(module, test, seed, msg)]

            os.chdir('../')

            shutil.rmtree(dirname)

            print_rslt(rslt, err_msg)

            if timeout < datetime.now() - start:
                break

        finish(rslt)
コード例 #3
0
ファイル: run.py プロジェクト: lnsongxf/grmpy
def create_vault(num_tests=100, seed=123):
    """This function creates a new regression vault."""
    np.random.seed(seed)

    tests = []
    for _ in range(num_tests):
        dict_ = generate_random_dict()
        df = simulate('test.grmpy.ini')
        stat = np.sum(df.sum())
        tests += [(stat, dict_)]
        cleanup()

    json.dump(tests, open('regression_vault.grmpy.json', 'w'))
コード例 #4
0
ファイル: run.py プロジェクト: lnsongxf/grmpy
def check_vault():
    """This function checks the complete regression vault that is distributed as part of the
    package.
    """
    fname = os.path.dirname(
        grmpy.__file__) + '/test/resources/regression_vault.grmpy.json'
    tests = json.load(open(fname))

    for test in tests:
        stat, dict_, criteria = test
        print_dict(dict_)
        df = simulate('test.grmpy.ini')
        np.testing.assert_almost_equal(np.sum(df.sum()), stat)
        cleanup('regression')
コード例 #5
0
def test9():
    """This test ensures that the random initialization file generating process, the read in process
    and the simulation process works if the constraints function allows for different number of co-
    variates for each treatment state and the occurence of cost-benefit shifters."""
    for i in range(5):
        constr = dict()
        constr['DETERMINISTIC'], constr['AGENT'], constr[
            'STATE_DIFF'] = False, 1000, True
        constr['OVERLAP'] = True
        generate_random_dict(constr)
        read('test.grmpy.ini')
        simulate('test.grmpy.ini')
        estimate('test.grmpy.ini')

    cleanup()
コード例 #6
0
def create_vault(num_tests=100, seed=123):
    """This function creates a new regression vault."""
    np.random.seed(seed)

    tests = []
    for _ in range(num_tests):
        dict_ = generate_random_dict()
        init_dict = read("test.grmpy.yml")
        df = simulate("test.grmpy.yml")
        _, X1, X0, Z1, Z0, Y1, Y0 = process_data(df, init_dict)
        x0 = start_values(init_dict, df, "init")
        criteria = calculate_criteria(init_dict, X1, X0, Z1, Z0, Y1, Y0, x0)
        stat = np.sum(df.sum())
        tests += [(stat, dict_, criteria)]
        cleanup()

    json.dump(tests, open("regression_vault.grmpy.json", "w"))
コード例 #7
0
ファイル: test_integration.py プロジェクト: fagan2888/grmpy
def test9():
    """This test ensures that the random initialization file generating process, the
    read in process and the simulation process works if the constraints function allows
    for different number of covariates for each treatment state and the occurence of
    cost-benefit shifters."""
    for _ in range(5):
        constr = dict()
        constr["DETERMINISTIC"], constr["AGENT"], constr["STATE_DIFF"] = (
            False,
            1000,
            True,
        )
        constr["OVERLAP"] = True
        generate_random_dict(constr)
        read("test.grmpy.yml")
        simulate("test.grmpy.yml")
        fit("test.grmpy.yml")

    cleanup()
コード例 #8
0
def test13():
    """This test checks if our data import process is able to handle .txt, .dta and .pkl files."""

    pkl = TEST_RESOURCES_DIR + '/data.grmpy.pkl'
    dta = TEST_RESOURCES_DIR + '/data.grmpy.dta'
    txt = TEST_RESOURCES_DIR + '/data.grmpy.txt'

    real_sum = -3211.20122
    real_column_values = [
        'Y', 'D', 'X1', 'X2', 'X3', 'X5', 'X4', 'Y1', 'Y0', 'U1', 'U0', 'V'
    ]

    for data in [pkl, dta, txt]:
        df = read_data(data)
        sum = np.sum(df.sum())
        columns = list(df)
        np.testing.assert_array_almost_equal(sum, real_sum, decimal=5)
        np.testing.assert_equal(columns, real_column_values)

    cleanup()
コード例 #9
0
def test14():
    """This test checks wether our gradient functions work properly."""
    constr = {"AGENTS": 10000, "DETERMINISTIC": False}

    for _ in range(10):

        generate_random_dict(constr)
        init_dict = read("test.grmpy.yml")
        print(init_dict["AUX"])
        df = simulate("test.grmpy.yml")
        D, X1, X0, Z1, Z0, Y1, Y0 = process_data(df, init_dict)
        num_treated = X1.shape[1]
        num_untreated = X1.shape[1] + X0.shape[1]

        x0 = start_values(init_dict, D, X1, X0, Z1, Z0, Y1, Y0, "init")
        x0_back = backward_transformation(x0)
        llh_gradient_approx = approx_fprime_cs(
            x0_back,
            log_likelihood,
            args=(X1, X0, Z1, Z0, Y1, Y0, num_treated, num_untreated, None,
                  False),
        )
        llh_gradient = gradient_hessian(x0_back, X1, X0, Z1, Z0, Y1, Y0)
        min_inter_approx = approx_fprime_cs(
            x0,
            minimizing_interface,
            args=(X1, X0, Z1, Z0, Y1, Y0, num_treated, num_untreated, None,
                  False),
        )
        _, min_inter_gradient = log_likelihood(x0_back, X1, X0, Z1, Z0, Y1, Y0,
                                               num_treated, num_untreated,
                                               None, True)
        np.testing.assert_array_almost_equal(min_inter_approx,
                                             min_inter_gradient,
                                             decimal=5)
        np.testing.assert_array_almost_equal(llh_gradient_approx,
                                             llh_gradient,
                                             decimal=5)

    cleanup()
コード例 #10
0
def test6():
    """The test ensures that the cholesky decomposition and recomposition works appropriately.
    For this purpose the test creates a positive smi definite matrix fom a wishart distribution,
    decomposes this matrix with, reconstruct it and compares the matrix with the one that was
    specified as the input for the decomposition process.
    """
    pseudo_dict = {'DIST': {'all': []}, 'AUX': {'init_values': []}}
    for _ in range(20):
        b = wishart.rvs(df=10, scale=np.identity(3), size=1)
        parameter = b[np.triu_indices(3)]
        for i in [0, 3, 5]:
            parameter[i] **= 0.5
        pseudo_dict['DIST']['all'] = parameter
        pseudo_dict['AUX']['init_values'] = parameter
        cov_1 = construct_covariance_matrix(pseudo_dict)
        x0, _ = provide_cholesky_decom(pseudo_dict, [], 'init')
        output = backward_cholesky_transformation(x0, test=True)
        output = adjust_output_cholesky(output)
        pseudo_dict['DIST']['all'] = output
        cov_2 = construct_covariance_matrix(pseudo_dict)
        np.testing.assert_array_almost_equal(cov_1, cov_2)
    cleanup()
コード例 #11
0
def test6():
    """Additionally to test5 this test checks if the descriptives file provides the expected
    output when maxiter is set to zero and the estimation process uses the initialization file
    values as start values.
    """
    for _ in range(5):
        constr = constraints(probability=0.0,
                             maxiter=0,
                             agents=1000,
                             start='init')
        generate_random_dict(constr)
        simulate('test.grmpy.ini')
        estimate('test.grmpy.ini')
        dict_ = read_desc('descriptives.grmpy.txt')
        for key_ in ['All', 'Treated', 'Untreated']:
            np.testing.assert_equal(len(set(dict_[key_]['Number'])), 1)
            np.testing.assert_array_equal(
                dict_[key_]['Observed Sample'],
                dict_[key_]['Simulated Sample (finish)'])
            np.testing.assert_array_equal(
                dict_[key_]['Simulated Sample (finish)'],
                dict_[key_]['Simulated Sample (start)'])
    cleanup()
コード例 #12
0
    ax.plot(GRID, abs_, label="Absence", linestyle="--")

    ax.set_ylim([1.5, 4.5])

    plt.legend()

    plt.tight_layout()
    plt.savefig(OUTPUT_DIR + "/fig-eh-marginal-effect.png", dpi=300)


if __name__ == "__main__":
    coeffs_untreated = init_dict["UNTREATED"]["params"]
    coeffs_treated = init_dict["TREATED"]["params"]
    cov = construct_covariance_matrix(init_dict)
    df = simulate(RESOURCE_DIR + filename)
    x = df[init_dict["TREATED"]["order"]]
    MTE_pres = mte_information(coeffs_treated, coeffs_untreated, cov, GRID, x,
                               init_dict)

    para_diff = coeffs_treated - coeffs_untreated

    MTE_abs = []
    for i in GRID:
        if cov[2, 2] == 0.00:
            MTE_abs += ["---"]
        else:
            MTE_abs += [np.mean(np.dot(para_diff, x.T))]

    plot_marginal_treatment_effect(MTE_pres, MTE_abs)
    cleanup()
コード例 #13
0
ファイル: draft.py プロジェクト: lnsongxf/grmpy
file_dir = os.path.join(directory, 'regression_vault.grmpy.json')

if True:
    tests = []
    for seed in seeds:
        np.random.seed(seed)
        constr = constraints(0.0)
        dict_ = generate_random_dict(constr)
        df = simulate('test.grmpy.ini')
        stat = np.sum(df.sum())
        init_dict = read('test.grmpy.ini')
        start = start_values(init_dict, df, 'init')
        criteria = calculate_criteria(init_dict, df, start)
        tests += [(stat, dict_, criteria)]
    json.dump(tests, open(file_dir, 'w'))

if True:
    tests = json.load(open(file_dir, 'r'))

    for test in tests:
        stat, dict_, criteria = test
        print_dict(dict_)
        init_dict = read('test.grmpy.ini')
        df = simulate('test.grmpy.ini')
        start = start_values(init_dict, df, 'init')
        criteria_ = calculate_criteria(init_dict, df, start)
        np.testing.assert_array_almost_equal(criteria, criteria_)
        np.testing.assert_almost_equal(np.sum(df.sum()), stat)

cleanup('regression')
コード例 #14
0
if True:
    tests = []
    for seed in seeds:
        np.random.seed(seed)
        constr = dict()
        constr["DETERMINISTIC"], constr["CATEGORICAL"] = False, False
        dict_ = generate_random_dict(constr)
        df = simulate("test.grmpy.yml")
        stat = np.sum(df.sum())
        init_dict = read("test.grmpy.yml")
        start = start_values(init_dict, df, "init")
        _, X1, X0, Z1, Z0, Y1, Y0 = process_data(df, init_dict)
        criteria = calculate_criteria(init_dict, X1, X0, Z1, Z0, Y1, Y0, start)
        tests += [(stat, dict_, criteria)]
    json.dump(tests, open(file_dir, "w"))

if True:
    tests = json.load(open(file_dir))

    for test in tests:
        stat, dict_, criteria = test
        print_dict(dict_)
        init_dict = read("test.grmpy.yml")
        df = simulate("test.grmpy.yml")
        start = start_values(init_dict, df, "init")
        criteria_ = calculate_criteria(init_dict, df, start)
        np.testing.assert_array_almost_equal(criteria, criteria_)
        np.testing.assert_almost_equal(np.sum(df.sum()), stat)

cleanup("regression")
コード例 #15
0
def test13():
    """This test checks if functions that affect the estimation output adjustment work as
    intended.
    """
    for _ in range(5):
        generate_random_dict({"DETERMINISTIC": False})
        df = simulate("test.grmpy.yml")
        init_dict = read("test.grmpy.yml")
        start = start_values(init_dict, dict, "init")
        _, X1, X0, Z1, Z0, Y1, Y0 = process_data(df, init_dict)
        init_dict["AUX"]["criteria"] = calculate_criteria(
            init_dict, X1, X0, Z1, Z0, Y1, Y0, start)
        init_dict["AUX"]["starting_values"] = backward_transformation(start)

        aux_dict1 = {"crit": {"1": 10}}

        x0, se = [np.nan] * len(start), [np.nan] * len(start)
        index = np.random.randint(0, len(x0) - 1)
        x0[index], se[index] = np.nan, np.nan

        p_values, t_values = calculate_p_values(se, x0, df.shape[0])
        np.testing.assert_array_equal([p_values[index], t_values[index]],
                                      [np.nan, np.nan])

        x_processed, crit_processed, _ = process_output(
            init_dict, aux_dict1, x0, "notfinite")

        np.testing.assert_equal(
            [x_processed, crit_processed],
            [
                init_dict["AUX"]["starting_values"],
                init_dict["AUX"]["criteria"]
            ],
        )

        check1, flag1 = check_rslt_parameters(init_dict, X1, X0, Z1, Z0, Y1,
                                              Y0, aux_dict1, start)
        check2, flag2 = check_rslt_parameters(init_dict, X1, X0, Z1, Z0, Y1,
                                              Y0, aux_dict1, x0)

        np.testing.assert_equal([check1, flag1], [False, None])
        np.testing.assert_equal([check2, flag2], [True, "notfinite"])

        opt_rslt = {
            "fun": 1.0,
            "success": 1,
            "status": 1,
            "message": "msg",
            "nfev": 10000,
        }
        rslt = adjust_output(opt_rslt,
                             init_dict,
                             start,
                             X1,
                             X0,
                             Z1,
                             Z0,
                             Y1,
                             Y0,
                             dict_=aux_dict1)
        np.testing.assert_equal(rslt["crit"], opt_rslt["fun"])
        np.testing.assert_equal(rslt["warning"][0], "---")

        x_linalign = [0.0000000000000001] * len(x0)
        num_treated = init_dict["AUX"]["num_covars_treated"]
        num_untreated = num_treated + init_dict["AUX"]["num_covars_untreated"]
        se, hess_inv, conf_interval, p_values, t_values, _ = calculate_se(
            x_linalign, init_dict, X1, X0, Z1, Z0, Y1, Y0, num_treated,
            num_untreated)
        np.testing.assert_equal(se, [np.nan] * len(x0))
        np.testing.assert_equal(hess_inv, np.full((len(x0), len(x0)), np.nan))
        np.testing.assert_equal(conf_interval, [[np.nan, np.nan]] * len(x0))
        np.testing.assert_equal(t_values, [np.nan] * len(x0))
        np.testing.assert_equal(p_values, [np.nan] * len(x0))

    cleanup()