Example #1
0
def check_vault(num_tests=100):
    """This function checks the complete regression vault that is distributed as part of
    the package.
    """
    fname = (os.path.dirname(grmpy.__file__) +
             "/test/resources/old_regression_vault.grmpy.json")
    tests = json.load(open(fname))

    if num_tests > len(tests):
        print("The specified number of evaluations is larger than the number"
              " of entries in the regression_test vault.\n"
              "Therefore the test runs the complete test battery.")
    else:
        tests = [tests[i] for i in np.random.choice(len(tests), num_tests)]

    for test in tests:
        stat, dict_, criteria = test
        print_dict(dict_transformation(dict_))
        init_dict = read("test.grmpy.yml")
        df = simulate("test.grmpy.yml")
        _, X1, X0, Z1, Z0, Y1, Y0 = process_data(df, init_dict)
        x0 = start_values(init_dict, df, "init")
        criteria_ = calculate_criteria(init_dict, X1, X0, Z1, Z0, Y1, Y0, x0)
        np.testing.assert_almost_equal(criteria_, criteria)
        np.testing.assert_almost_equal(np.sum(df.sum()), stat)
        cleanup("regression")
Example #2
0
def create_vault(num_tests=100, seed=123):
    """This function creates a new regression vault."""
    np.random.seed(seed)

    tests = []
    for _ in range(num_tests):
        dict_ = generate_random_dict()
        init_dict = read("test.grmpy.yml")
        df = simulate("test.grmpy.yml")
        _, X1, X0, Z1, Z0, Y1, Y0 = process_data(df, init_dict)
        x0 = start_values(init_dict, df, "init")
        criteria = calculate_criteria(init_dict, X1, X0, Z1, Z0, Y1, Y0, x0)
        stat = np.sum(df.sum())
        tests += [(stat, dict_, criteria)]
        cleanup()

    json.dump(tests, open("regression_vault.grmpy.json", "w"))
Example #3
0
def test2():
    """This test runs a random selection of five regression tests from the our old
    regression test battery.
    """
    fname = TEST_RESOURCES_DIR + "/old_regression_vault.grmpy.json"
    tests = json.load(open(fname))
    random_choice = np.random.choice(range(len(tests)), 5)
    tests = [tests[i] for i in random_choice]

    for test in tests:
        stat, dict_, criteria = test
        print_dict(dict_transformation(dict_))
        df = simulate("test.grmpy.yml")
        init_dict = read("test.grmpy.yml")
        start = start_values(init_dict, df, "init")
        _, X1, X0, Z1, Z0, Y1, Y0 = process_data(df, init_dict)

        criteria_ = calculate_criteria(init_dict, X1, X0, Z1, Z0, Y1, Y0,
                                       start)
        np.testing.assert_almost_equal(np.sum(df.sum()), stat)
        np.testing.assert_array_almost_equal(criteria, criteria_)
Example #4
0
def simulate(init_file):
    """This function simulates a user-specified version of the generalized Roy model."""
    init_dict = read_simulation(init_file)

    # We perform some basic consistency checks regarding the user's request.
    check_sim_init_dict(init_dict)

    # Distribute information
    seed = init_dict["SIMULATION"]["seed"]

    # Set random seed to ensure recomputabiltiy
    np.random.seed(seed)

    # Simulate unobservables of the model
    U = simulate_unobservables(init_dict)

    # Simulate observables of the model
    X = simulate_covariates(init_dict)

    # Simulate endogeneous variables of the model
    df = simulate_outcomes(init_dict, X, U)

    # Write output file
    df = write_output(init_dict, df)

    # Calculate Criteria function value
    if not init_dict["DETERMINISTIC"]:
        D, X1, X0, Z1, Z0, Y1, Y0 = process_data(df, init_dict)
        x0 = start_values(init_dict, D, X1, X0, Z1, Z0, Y1, Y0, "init")
        init_dict["AUX"]["criteria_value"] = calculate_criteria(
            x0, X1, X0, Z1, Z0, Y1, Y0
        )

    # Print Log file
    print_info(init_dict, df)

    return df
Example #5
0
def test3():
    """The test checks if the criteria function value of the simulated and the
    'estimated' sample is equal if both samples include an identical number of
    individuals.
    """
    for _ in range(5):
        constr = dict()
        constr["DETERMINISTIC"], constr["AGENTS"], constr[
            "START"] = False, 1000, "init"
        constr["OPTIMIZER"], constr["SAME_SIZE"] = "SCIPY-BFGS", True
        generate_random_dict(constr)
        df1 = simulate("test.grmpy.yml")
        rslt = fit("test.grmpy.yml")
        init_dict = read("test.grmpy.yml")
        _, df2 = simulate_estimation(rslt)
        start = start_values(init_dict, df1, "init")

        criteria = []
        for data in [df1, df2]:
            _, X1, X0, Z1, Z0, Y1, Y0 = process_data(data, init_dict)
            criteria += [
                calculate_criteria(init_dict, X1, X0, Z1, Z0, Y1, Y0, start)
            ]
        np.testing.assert_allclose(criteria[1], criteria[0], rtol=0.1)
Example #6
0
directory = os.path.dirname(__file__)
file_dir = os.path.join(directory, "old_regression_vault.grmpy.json")

if True:
    tests = []
    for seed in seeds:
        np.random.seed(seed)
        constr = dict()
        constr["DETERMINISTIC"], constr["CATEGORICAL"] = False, False
        dict_ = generate_random_dict(constr)
        df = simulate("test.grmpy.yml")
        stat = np.sum(df.sum())
        init_dict = read("test.grmpy.yml")
        start = start_values(init_dict, df, "init")
        _, X1, X0, Z1, Z0, Y1, Y0 = process_data(df, init_dict)
        criteria = calculate_criteria(init_dict, X1, X0, Z1, Z0, Y1, Y0, start)
        tests += [(stat, dict_, criteria)]
    json.dump(tests, open(file_dir, "w"))

if True:
    tests = json.load(open(file_dir))

    for test in tests:
        stat, dict_, criteria = test
        print_dict(dict_)
        init_dict = read("test.grmpy.yml")
        df = simulate("test.grmpy.yml")
        start = start_values(init_dict, df, "init")
        criteria_ = calculate_criteria(init_dict, df, start)
        np.testing.assert_array_almost_equal(criteria, criteria_)
        np.testing.assert_almost_equal(np.sum(df.sum()), stat)
Example #7
0
def test13():
    """This test checks if functions that affect the estimation output adjustment work as
    intended.
    """
    for _ in range(5):
        generate_random_dict({"DETERMINISTIC": False})
        df = simulate("test.grmpy.yml")
        init_dict = read("test.grmpy.yml")
        start = start_values(init_dict, dict, "init")
        _, X1, X0, Z1, Z0, Y1, Y0 = process_data(df, init_dict)
        init_dict["AUX"]["criteria"] = calculate_criteria(
            init_dict, X1, X0, Z1, Z0, Y1, Y0, start)
        init_dict["AUX"]["starting_values"] = backward_transformation(start)

        aux_dict1 = {"crit": {"1": 10}}

        x0, se = [np.nan] * len(start), [np.nan] * len(start)
        index = np.random.randint(0, len(x0) - 1)
        x0[index], se[index] = np.nan, np.nan

        p_values, t_values = calculate_p_values(se, x0, df.shape[0])
        np.testing.assert_array_equal([p_values[index], t_values[index]],
                                      [np.nan, np.nan])

        x_processed, crit_processed, _ = process_output(
            init_dict, aux_dict1, x0, "notfinite")

        np.testing.assert_equal(
            [x_processed, crit_processed],
            [
                init_dict["AUX"]["starting_values"],
                init_dict["AUX"]["criteria"]
            ],
        )

        check1, flag1 = check_rslt_parameters(init_dict, X1, X0, Z1, Z0, Y1,
                                              Y0, aux_dict1, start)
        check2, flag2 = check_rslt_parameters(init_dict, X1, X0, Z1, Z0, Y1,
                                              Y0, aux_dict1, x0)

        np.testing.assert_equal([check1, flag1], [False, None])
        np.testing.assert_equal([check2, flag2], [True, "notfinite"])

        opt_rslt = {
            "fun": 1.0,
            "success": 1,
            "status": 1,
            "message": "msg",
            "nfev": 10000,
        }
        rslt = adjust_output(opt_rslt,
                             init_dict,
                             start,
                             X1,
                             X0,
                             Z1,
                             Z0,
                             Y1,
                             Y0,
                             dict_=aux_dict1)
        np.testing.assert_equal(rslt["crit"], opt_rslt["fun"])
        np.testing.assert_equal(rslt["warning"][0], "---")

        x_linalign = [0.0000000000000001] * len(x0)
        num_treated = init_dict["AUX"]["num_covars_treated"]
        num_untreated = num_treated + init_dict["AUX"]["num_covars_untreated"]
        se, hess_inv, conf_interval, p_values, t_values, _ = calculate_se(
            x_linalign, init_dict, X1, X0, Z1, Z0, Y1, Y0, num_treated,
            num_untreated)
        np.testing.assert_equal(se, [np.nan] * len(x0))
        np.testing.assert_equal(hess_inv, np.full((len(x0), len(x0)), np.nan))
        np.testing.assert_equal(conf_interval, [[np.nan, np.nan]] * len(x0))
        np.testing.assert_equal(t_values, [np.nan] * len(x0))
        np.testing.assert_equal(p_values, [np.nan] * len(x0))

    cleanup()
Example #8
0
def test13():
    """This test checks if functions that affect the estimation output adjustment work as
    intended.
    """
    for _ in range(5):
        generate_random_dict({"DETERMINISTIC": False})
        df = simulate("test.grmpy.yml")
        init_dict = read("test.grmpy.yml")
        D, X1, X0, Z1, Z0, Y1, Y0 = process_data(df, init_dict)
        rslt_cont = create_rslt_df(init_dict)
        start = start_values(init_dict, D, X1, X0, Z1, Z0, Y1, Y0, "init")
        init_dict["AUX"]["criteria"] = calculate_criteria(
            start, X1, X0, Z1, Z0, Y1, Y0)
        init_dict["AUX"]["starting_values"] = backward_transformation(start)

        aux_dict1 = {"crit": {"1": 10}}

        x0, se = [np.nan] * len(start), [np.nan] * len(start)
        index = np.random.randint(0, len(x0) - 1)
        x0[index], se[index] = np.nan, np.nan

        x_processed, crit_processed, _ = process_output(
            init_dict, aux_dict1, x0, "notfinite")

        np.testing.assert_equal(
            [x_processed, crit_processed],
            [
                init_dict["AUX"]["starting_values"],
                init_dict["AUX"]["criteria"]
            ],
        )

        check1, flag1 = check_rslt_parameters(start, X1, X0, Z1, Z0, Y1, Y0,
                                              aux_dict1)
        check2, flag2 = check_rslt_parameters(x0, X1, X0, Z1, Z0, Y1, Y0,
                                              aux_dict1)

        np.testing.assert_equal([check1, flag1], [False, None])
        np.testing.assert_equal([check2, flag2], [True, "notfinite"])

        opt_rslt = {
            "x": start,
            "fun": 1.0,
            "success": 1,
            "status": 1,
            "message": "msg",
            "nit": 10000,
        }

        rslt = adjust_output(
            opt_rslt,
            init_dict,
            rslt_cont,
            start,
            "BFGS",
            "init",
            X1,
            X0,
            Z1,
            Z0,
            Y1,
            Y0,
            aux_dict1,
        )
        np.testing.assert_equal(rslt["opt_info"]["crit"], opt_rslt["fun"])
        np.testing.assert_equal(rslt["opt_info"]["warning"][0], "---")

        x_linalign = [0] * len(x0)
        (
            se,
            hess_inv,
            conf_interval_low,
            conf_interval_up,
            p_values,
            t_values,
            _,
        ) = calculate_se(x_linalign, 1, X1, X0, Z1, Z0, Y1, Y0)
        np.testing.assert_equal(se, [np.nan] * len(x0))
        np.testing.assert_equal(hess_inv, np.full((len(x0), len(x0)), np.nan))
        np.testing.assert_equal(conf_interval_low, [np.nan] * len(x0))
        np.testing.assert_equal(conf_interval_up, [np.nan] * len(x0))
        np.testing.assert_equal(t_values, [np.nan] * len(x0))
        np.testing.assert_equal(p_values, [np.nan] * len(x0))