Python process_data 예제들, grmpy.estimate.estimate_par.process_data Python 예제들

예제 #1

0

파일 보기

def check_vault(num_tests=100):
    """This function checks the complete regression vault that is distributed as part of
    the package.
    """
    fname = (os.path.dirname(grmpy.__file__) +
             "/test/resources/old_regression_vault.grmpy.json")
    tests = json.load(open(fname))

    if num_tests > len(tests):
        print("The specified number of evaluations is larger than the number"
              " of entries in the regression_test vault.\n"
              "Therefore the test runs the complete test battery.")
    else:
        tests = [tests[i] for i in np.random.choice(len(tests), num_tests)]

    for test in tests:
        stat, dict_, criteria = test
        print_dict(dict_transformation(dict_))
        init_dict = read("test.grmpy.yml")
        df = simulate("test.grmpy.yml")
        _, X1, X0, Z1, Z0, Y1, Y0 = process_data(df, init_dict)
        x0 = start_values(init_dict, df, "init")
        criteria_ = calculate_criteria(init_dict, X1, X0, Z1, Z0, Y1, Y0, x0)
        np.testing.assert_almost_equal(criteria_, criteria)
        np.testing.assert_almost_equal(np.sum(df.sum()), stat)
        cleanup("regression")

예제 #2

0

파일 보기

파일: test_unit.py 프로젝트: OpenSourceEconomics/grmpy

def test3():
    """The fourth test checks whether the simulation process works if there are only
    treated or untreated Agents by setting the number of agents to one. Additionally the
    test checks if the start values for the estimation process are set to the init-
    ialization file values due to perfect separation.
    """
    constr = {"AGENTS": 1, "DETERMINISTIC": False}
    for _ in range(10):
        generate_random_dict(constr)
        dict_ = read("test.grmpy.yml")
        df = simulate("test.grmpy.yml")
        D, X1, X0, Z1, Z0, Y1, Y0 = process_data(df, dict_)
        start = start_values(dict_, D, X1, X0, Z1, Z0, Y1, Y0, "init")
        np.testing.assert_equal(dict_["AUX"]["init_values"][:(-6)],
                                start[:(-4)])

예제 #3

0

파일 보기

파일: test_unit.py 프로젝트: OpenSourceEconomics/grmpy

def test9():
    """This test checks if the start_values function returns the init file values if the
    start option is set to init.
    """
    for _ in range(10):
        constr = dict()
        constr["DETERMINISTIC"] = False
        generate_random_dict(constr)
        dict_ = read("test.grmpy.yml")
        true = []
        for key_ in ["TREATED", "UNTREATED", "CHOICE"]:
            true += list(dict_[key_]["params"])
        df = simulate("test.grmpy.yml")
        D, X1, X0, Z1, Z0, Y1, Y0 = process_data(df, dict_)
        x0 = start_values(dict_, D, X1, X0, Z1, Z0, Y1, Y0, "init")[:-4]
        np.testing.assert_array_equal(true, x0)

예제 #4

0

파일 보기

def create_vault(num_tests=100, seed=123):
    """This function creates a new regression vault."""
    np.random.seed(seed)

    tests = []
    for _ in range(num_tests):
        dict_ = generate_random_dict()
        init_dict = read("test.grmpy.yml")
        df = simulate("test.grmpy.yml")
        _, X1, X0, Z1, Z0, Y1, Y0 = process_data(df, init_dict)
        x0 = start_values(init_dict, df, "init")
        criteria = calculate_criteria(init_dict, X1, X0, Z1, Z0, Y1, Y0, x0)
        stat = np.sum(df.sum())
        tests += [(stat, dict_, criteria)]
        cleanup()

    json.dump(tests, open("regression_vault.grmpy.json", "w"))

예제 #5

0

파일 보기

파일: test_unit.py 프로젝트: OpenSourceEconomics/grmpy

def test10():
    """This test checks if the refactor auxiliary function returns an unchanged init
    file if the maximum number of iterations is set to zero.
    """

    for _ in range(10):
        constr = dict()
        constr["DETERMINISTIC"], constr["AGENTS"] = False, 1000
        constr["MAXITER"], constr["START"], constr[
            "OPTIMIZER"] = 0, "init", "BFGS"
        generate_random_dict(constr)
        dict_ = read("test.grmpy.yml")
        df = simulate("test.grmpy.yml")
        D, X1, X0, Z1, Z0, Y1, Y0 = process_data(df, dict_)
        start = start_values(dict_, D, X1, X0, Z1, Z0, Y1, Y0, "init")
        start = backward_transformation(start)

        rslt = fit("test.grmpy.yml")

        np.testing.assert_equal(start, rslt["opt_rslt"]["params"].values)

예제 #6

0

파일 보기

파일: test_integration.py 프로젝트: fagan2888/grmpy

def test2():
    """This test runs a random selection of five regression tests from the our old
    regression test battery.
    """
    fname = TEST_RESOURCES_DIR + "/old_regression_vault.grmpy.json"
    tests = json.load(open(fname))
    random_choice = np.random.choice(range(len(tests)), 5)
    tests = [tests[i] for i in random_choice]

    for test in tests:
        stat, dict_, criteria = test
        print_dict(dict_transformation(dict_))
        df = simulate("test.grmpy.yml")
        init_dict = read("test.grmpy.yml")
        start = start_values(init_dict, df, "init")
        _, X1, X0, Z1, Z0, Y1, Y0 = process_data(df, init_dict)

        criteria_ = calculate_criteria(init_dict, X1, X0, Z1, Z0, Y1, Y0,
                                       start)
        np.testing.assert_almost_equal(np.sum(df.sum()), stat)
        np.testing.assert_array_almost_equal(criteria, criteria_)

예제 #7

0

파일 보기

파일: test_unit.py 프로젝트: OpenSourceEconomics/grmpy

def test14():
    """This test checks wether our gradient functions work properly."""
    constr = {"AGENTS": 10000, "DETERMINISTIC": False}

    for _ in range(10):

        generate_random_dict(constr)
        init_dict = read("test.grmpy.yml")
        print(init_dict["AUX"])
        df = simulate("test.grmpy.yml")
        D, X1, X0, Z1, Z0, Y1, Y0 = process_data(df, init_dict)
        num_treated = X1.shape[1]
        num_untreated = X1.shape[1] + X0.shape[1]

        x0 = start_values(init_dict, D, X1, X0, Z1, Z0, Y1, Y0, "init")
        x0_back = backward_transformation(x0)
        llh_gradient_approx = approx_fprime_cs(
            x0_back,
            log_likelihood,
            args=(X1, X0, Z1, Z0, Y1, Y0, num_treated, num_untreated, None,
                  False),
        )
        llh_gradient = gradient_hessian(x0_back, X1, X0, Z1, Z0, Y1, Y0)
        min_inter_approx = approx_fprime_cs(
            x0,
            minimizing_interface,
            args=(X1, X0, Z1, Z0, Y1, Y0, num_treated, num_untreated, None,
                  False),
        )
        _, min_inter_gradient = log_likelihood(x0_back, X1, X0, Z1, Z0, Y1, Y0,
                                               num_treated, num_untreated,
                                               None, True)
        np.testing.assert_array_almost_equal(min_inter_approx,
                                             min_inter_gradient,
                                             decimal=5)
        np.testing.assert_array_almost_equal(llh_gradient_approx,
                                             llh_gradient,
                                             decimal=5)

    cleanup()

예제 #8

0

파일 보기

def simulate(init_file):
    """This function simulates a user-specified version of the generalized Roy model."""
    init_dict = read_simulation(init_file)

    # We perform some basic consistency checks regarding the user's request.
    check_sim_init_dict(init_dict)

    # Distribute information
    seed = init_dict["SIMULATION"]["seed"]

    # Set random seed to ensure recomputabiltiy
    np.random.seed(seed)

    # Simulate unobservables of the model
    U = simulate_unobservables(init_dict)

    # Simulate observables of the model
    X = simulate_covariates(init_dict)

    # Simulate endogeneous variables of the model
    df = simulate_outcomes(init_dict, X, U)

    # Write output file
    df = write_output(init_dict, df)

    # Calculate Criteria function value
    if not init_dict["DETERMINISTIC"]:
        D, X1, X0, Z1, Z0, Y1, Y0 = process_data(df, init_dict)
        x0 = start_values(init_dict, D, X1, X0, Z1, Z0, Y1, Y0, "init")
        init_dict["AUX"]["criteria_value"] = calculate_criteria(
            x0, X1, X0, Z1, Z0, Y1, Y0
        )

    # Print Log file
    print_info(init_dict, df)

    return df

예제 #9

0

파일 보기

파일: test_integration.py 프로젝트: fagan2888/grmpy

def test3():
    """The test checks if the criteria function value of the simulated and the
    'estimated' sample is equal if both samples include an identical number of
    individuals.
    """
    for _ in range(5):
        constr = dict()
        constr["DETERMINISTIC"], constr["AGENTS"], constr[
            "START"] = False, 1000, "init"
        constr["OPTIMIZER"], constr["SAME_SIZE"] = "SCIPY-BFGS", True
        generate_random_dict(constr)
        df1 = simulate("test.grmpy.yml")
        rslt = fit("test.grmpy.yml")
        init_dict = read("test.grmpy.yml")
        _, df2 = simulate_estimation(rslt)
        start = start_values(init_dict, df1, "init")

        criteria = []
        for data in [df1, df2]:
            _, X1, X0, Z1, Z0, Y1, Y0 = process_data(data, init_dict)
            criteria += [
                calculate_criteria(init_dict, X1, X0, Z1, Z0, Y1, Y0, start)
            ]
        np.testing.assert_allclose(criteria[1], criteria[0], rtol=0.1)

예제 #10

0

파일 보기

seeds = np.random.randint(0, 1000, size=NUM_TESTS)
directory = os.path.dirname(__file__)
file_dir = os.path.join(directory, "old_regression_vault.grmpy.json")

if True:
    tests = []
    for seed in seeds:
        np.random.seed(seed)
        constr = dict()
        constr["DETERMINISTIC"], constr["CATEGORICAL"] = False, False
        dict_ = generate_random_dict(constr)
        df = simulate("test.grmpy.yml")
        stat = np.sum(df.sum())
        init_dict = read("test.grmpy.yml")
        start = start_values(init_dict, df, "init")
        _, X1, X0, Z1, Z0, Y1, Y0 = process_data(df, init_dict)
        criteria = calculate_criteria(init_dict, X1, X0, Z1, Z0, Y1, Y0, start)
        tests += [(stat, dict_, criteria)]
    json.dump(tests, open(file_dir, "w"))

if True:
    tests = json.load(open(file_dir))

    for test in tests:
        stat, dict_, criteria = test
        print_dict(dict_)
        init_dict = read("test.grmpy.yml")
        df = simulate("test.grmpy.yml")
        start = start_values(init_dict, df, "init")
        criteria_ = calculate_criteria(init_dict, df, start)
        np.testing.assert_array_almost_equal(criteria, criteria_)

예제 #11

0

파일 보기

def test13():
    """This test checks if functions that affect the estimation output adjustment work as
    intended.
    """
    for _ in range(5):
        generate_random_dict({"DETERMINISTIC": False})
        df = simulate("test.grmpy.yml")
        init_dict = read("test.grmpy.yml")
        start = start_values(init_dict, dict, "init")
        _, X1, X0, Z1, Z0, Y1, Y0 = process_data(df, init_dict)
        init_dict["AUX"]["criteria"] = calculate_criteria(
            init_dict, X1, X0, Z1, Z0, Y1, Y0, start)
        init_dict["AUX"]["starting_values"] = backward_transformation(start)

        aux_dict1 = {"crit": {"1": 10}}

        x0, se = [np.nan] * len(start), [np.nan] * len(start)
        index = np.random.randint(0, len(x0) - 1)
        x0[index], se[index] = np.nan, np.nan

        p_values, t_values = calculate_p_values(se, x0, df.shape[0])
        np.testing.assert_array_equal([p_values[index], t_values[index]],
                                      [np.nan, np.nan])

        x_processed, crit_processed, _ = process_output(
            init_dict, aux_dict1, x0, "notfinite")

        np.testing.assert_equal(
            [x_processed, crit_processed],
            [
                init_dict["AUX"]["starting_values"],
                init_dict["AUX"]["criteria"]
            ],
        )

        check1, flag1 = check_rslt_parameters(init_dict, X1, X0, Z1, Z0, Y1,
                                              Y0, aux_dict1, start)
        check2, flag2 = check_rslt_parameters(init_dict, X1, X0, Z1, Z0, Y1,
                                              Y0, aux_dict1, x0)

        np.testing.assert_equal([check1, flag1], [False, None])
        np.testing.assert_equal([check2, flag2], [True, "notfinite"])

        opt_rslt = {
            "fun": 1.0,
            "success": 1,
            "status": 1,
            "message": "msg",
            "nfev": 10000,
        }
        rslt = adjust_output(opt_rslt,
                             init_dict,
                             start,
                             X1,
                             X0,
                             Z1,
                             Z0,
                             Y1,
                             Y0,
                             dict_=aux_dict1)
        np.testing.assert_equal(rslt["crit"], opt_rslt["fun"])
        np.testing.assert_equal(rslt["warning"][0], "---")

        x_linalign = [0.0000000000000001] * len(x0)
        num_treated = init_dict["AUX"]["num_covars_treated"]
        num_untreated = num_treated + init_dict["AUX"]["num_covars_untreated"]
        se, hess_inv, conf_interval, p_values, t_values, _ = calculate_se(
            x_linalign, init_dict, X1, X0, Z1, Z0, Y1, Y0, num_treated,
            num_untreated)
        np.testing.assert_equal(se, [np.nan] * len(x0))
        np.testing.assert_equal(hess_inv, np.full((len(x0), len(x0)), np.nan))
        np.testing.assert_equal(conf_interval, [[np.nan, np.nan]] * len(x0))
        np.testing.assert_equal(t_values, [np.nan] * len(x0))
        np.testing.assert_equal(p_values, [np.nan] * len(x0))

    cleanup()

예제 #12

0

파일 보기

파일: test_unit.py 프로젝트: OpenSourceEconomics/grmpy

def test13():
    """This test checks if functions that affect the estimation output adjustment work as
    intended.
    """
    for _ in range(5):
        generate_random_dict({"DETERMINISTIC": False})
        df = simulate("test.grmpy.yml")
        init_dict = read("test.grmpy.yml")
        D, X1, X0, Z1, Z0, Y1, Y0 = process_data(df, init_dict)
        rslt_cont = create_rslt_df(init_dict)
        start = start_values(init_dict, D, X1, X0, Z1, Z0, Y1, Y0, "init")
        init_dict["AUX"]["criteria"] = calculate_criteria(
            start, X1, X0, Z1, Z0, Y1, Y0)
        init_dict["AUX"]["starting_values"] = backward_transformation(start)

        aux_dict1 = {"crit": {"1": 10}}

        x0, se = [np.nan] * len(start), [np.nan] * len(start)
        index = np.random.randint(0, len(x0) - 1)
        x0[index], se[index] = np.nan, np.nan

        x_processed, crit_processed, _ = process_output(
            init_dict, aux_dict1, x0, "notfinite")

        np.testing.assert_equal(
            [x_processed, crit_processed],
            [
                init_dict["AUX"]["starting_values"],
                init_dict["AUX"]["criteria"]
            ],
        )

        check1, flag1 = check_rslt_parameters(start, X1, X0, Z1, Z0, Y1, Y0,
                                              aux_dict1)
        check2, flag2 = check_rslt_parameters(x0, X1, X0, Z1, Z0, Y1, Y0,
                                              aux_dict1)

        np.testing.assert_equal([check1, flag1], [False, None])
        np.testing.assert_equal([check2, flag2], [True, "notfinite"])

        opt_rslt = {
            "x": start,
            "fun": 1.0,
            "success": 1,
            "status": 1,
            "message": "msg",
            "nit": 10000,
        }

        rslt = adjust_output(
            opt_rslt,
            init_dict,
            rslt_cont,
            start,
            "BFGS",
            "init",
            X1,
            X0,
            Z1,
            Z0,
            Y1,
            Y0,
            aux_dict1,
        )
        np.testing.assert_equal(rslt["opt_info"]["crit"], opt_rslt["fun"])
        np.testing.assert_equal(rslt["opt_info"]["warning"][0], "---")

        x_linalign = [0] * len(x0)
        (
            se,
            hess_inv,
            conf_interval_low,
            conf_interval_up,
            p_values,
            t_values,
            _,
        ) = calculate_se(x_linalign, 1, X1, X0, Z1, Z0, Y1, Y0)
        np.testing.assert_equal(se, [np.nan] * len(x0))
        np.testing.assert_equal(hess_inv, np.full((len(x0), len(x0)), np.nan))
        np.testing.assert_equal(conf_interval_low, [np.nan] * len(x0))
        np.testing.assert_equal(conf_interval_up, [np.nan] * len(x0))
        np.testing.assert_equal(t_values, [np.nan] * len(x0))
        np.testing.assert_equal(p_values, [np.nan] * len(x0))