Ejemplo n.º 1
0
# edit PYTHONPATH
sys.path.insert(0, 'grmpy')

# project library
import grmpy as gp
# project library
from tests._auxiliary import random_init
from tools.economics.clsAgent import AgentCls
from tools.optimization.estimation import _load_data, _object_negative_log_likelihood
# Generate random request

if False:
    init_dict = gp.process('init.ini')

    # Simulate synthetic sample
    gp.simulate(init_dict)

    # Load dataset
    Y, D, X, Z, agent_objs = _load_data(init_dict)

    _object_negative_log_likelihood(init_dict, agent_objs)

    # Process initialization file
    init_dict = gp.process('init.ini')

    # Simulate synthetic sample
    gp.simulate(init_dict)

    # Estimate model
    rslt = gp.estimate(init_dict)
Ejemplo n.º 2
0
from grmpy.test.random_init import generate_random_dict
from grmpy.test.random_init import print_dict
import grmpy

# We simply specify a minimum number of minutes for our package to run with different requests.
MINUTES = 1

end_time = datetime.datetime.now() + datetime.timedelta(minutes=MINUTES)
counter = 1
while True:
    if datetime.datetime.now() >= end_time:
        break

    print('\n Iteration ', counter)

    dict_ = generate_random_dict()
    print_dict(dict_)

    grmpy.simulate('test.grmpy.ini')

    # This is a temporary fix so that the determination of starting values by PROBIT does
    # not work if we have a perfect separation.
    try:
        grmpy.estimate('test.grmpy.ini')
    except statsmodels.tools.sm_exceptions.PerfectSeparationError:
        print('separation error, skip')
    subprocess.check_call(['git', 'clean', '-d', '-f'])

    counter += 1
Ejemplo n.º 3
0
def monte_carlo(file, which, grid_points=10):
    """This function estimates various effect parameters for
    increasing presence of essential heterogeneity, which is reflected
    by increasing correlation between U_1 and V.
    """
    # simulate a new data set with essential heterogeneity present
    model_dict = read(file)
    original_correlation = model_dict["DIST"]["params"][2]

    model_dict["DIST"]["params"][2] = -0.191
    print_dict(model_dict, file.replace(".grmpy.yml", ""))
    grmpy.simulate(file)

    effects = []

    # Loop over different correlations between V and U_1
    for rho in np.linspace(0.00, -0.99, grid_points):
        # effects["rho"] += [rho]
        # Readjust the initialization file values to add correlation
        model_spec = read(file)
        X = model_spec["TREATED"]["order"]
        update_correlation_structure(file, model_spec, rho)
        sim_spec = read(file)
        # Simulate a Data set and specify exogeneous and endogeneous variables
        df_mc = create_data(file)
        treated = df_mc["D"] == 1
        Xvar = df_mc[X]
        instr = sim_spec["CHOICE"]["order"]
        instr = [i for i in instr if i != "const"]

        # We calculate our parameter of interest
        label = which.lower()

        if label == "conventional_average_effects":
            ATE = np.mean(df_mc["Y1"] - df_mc["Y0"])
            TT = np.mean(df_mc["Y1"].loc[treated] - df_mc["Y0"].loc[treated])
            stat = (ATE, TT)

        elif label in ["random", "randomization"]:
            random = np.mean(df_mc[df_mc.D == 1]["Y"]) - np.mean(
                df_mc[df_mc.D == 0]["Y"])
            stat = random

        elif label in ["ordinary_least_squares", "ols"]:
            results = sm.OLS(df_mc["Y"], df_mc[["const", "D"]]).fit()
            stat = results.params[1]

        elif label in ["instrumental_variables", "iv"]:
            iv = IV2SLS(df_mc["Y"], Xvar, df_mc["D"], df_mc[instr]).fit()
            stat = iv.params["D"]

        elif label in ["grmpy", "grmpy-par"]:
            rslt = grmpy.fit(file)
            beta_diff = rslt["TREATED"]["params"] - rslt["UNTREATED"]["params"]
            stat = np.dot(np.mean(Xvar), beta_diff)

        elif label in ["grmpy-semipar", "grmpy-liv"]:
            rslt = grmpy.fit(file, semipar=True)

            y0_fitted = np.dot(rslt["X"], rslt["b0"])
            y1_fitted = np.dot(rslt["X"], rslt["b1"])

            mte_x_ = y1_fitted - y0_fitted
            mte_u = rslt["mte_u"]

            us = np.linspace(0.005, 0.995, len(rslt["quantiles"]))
            mte_mat = np.zeros((len(mte_x_), len(mte_u)))

            for i in range(len(mte_x_)):
                for j in range(len(mte_u)):
                    mte_mat[i, j] = mte_x_[i] + mte_u[j]

            ate_tilde_p = np.mean(mte_mat, axis=1)
            stat = ate_tilde_p.mean()

        else:
            raise NotImplementedError

        effects += [stat]

    # Restore original init file
    model_dict = read(file)
    model_dict["DIST"]["params"][2] = original_correlation
    print_dict(model_dict, file.replace(".grmpy.yml", ""))
    grmpy.simulate(file)

    return effects
Ejemplo n.º 4
0
"""This module contains a tutorial illustrating the basic capabilities of the grmpy package."""
import grmpy

grmpy.simulate('tutorial.grmpy.ini')
grmpy.estimate('tutorial.grmpy.ini')
Ejemplo n.º 5
0
def monte_carlo(file, which, grid_points=10):
    """
    This function conducts a Monte Carlo simulation to compare
    the true and estimated treatment parameters for increasing
    (absolute) correlation between U_1 and V (i.e essential
    heterogeneity).

    In the example here, the correlation between U_1 and V becomes
    increasingly more negative. As we consider the absolute value
    of the correlation coefficient, values closer to -1
    (or in the analogous case closer to +1)
    denote a higher degree of essential heterogeneity.

    The results of the Monte Carlo simulation can be used
    to evaluate the performance of different estimation strategies
    in the presence of essential heterogeneity.

    Depending on the specification of *which*, either the true ATE
    and TT, or an estimate of the ATE are returned.

    Options for *which*:

        Comparison of ATE and TT
        - "conventional_average_effects"

        Different estimation strategies for ATE
        - "randomization" ("random")
        - "ordinary_least_squares" ("ols")
        - "instrumental_variables" ("iv")
        - "grmpy_par" ("grmpy")
        - "grmpy_semipar"("grmpy-liv")

    Post-estimation: To plot the comparison between the true ATE
    and the respective parameter, use the function
    - plot_effects() for *which* = "conventional_average_effects", and
    - plot_estimates() else.

    Parameters
    ----------
    file: yaml
        grmpy initialization file, provides information for the simulation process.
    which: string
        String denoting whether conventional average effects shall be computed
        or, alternatively, which estimation approach shall be implemented for the ATE.
    grid_points: int, default 10
        Number of different values for rho, the correlation coefficient
        between U_1 and V, on the interval [0, -1), along which the parameters
        shall be evaluated.

    Returns
    -------
    effects: list
        If *which* = "conventional_average_effects",
            list of lenght *grid_points* x 2 containing the true ATE and TT.
        Else, list of length *grid_points* x 1 containing an estimate
            of the ATE.
    """
    # simulate a new data set with essential heterogeneity present
    model_dict = read(file)
    original_correlation = model_dict["DIST"]["params"][2]

    model_dict["DIST"]["params"][2] = -0.191
    print_dict(model_dict, file.replace(".grmpy.yml", ""))
    grmpy.simulate(file)

    effects = []

    # Loop over different correlations between U_1 and V
    for rho in np.linspace(0.00, -0.99, grid_points):
        # effects["rho"] += [rho]
        # Readjust the initialization file values to add correlation
        model_spec = read(file)
        X = model_spec["TREATED"]["order"]
        _update_correlation_structure(file, model_spec, rho)
        sim_spec = read(file)
        # Simulate a Data set and specify exogeneous and endogeneous variables
        df_mc = _create_data(file)
        treated = df_mc["D"] == 1
        Xvar = df_mc[X]
        instr = sim_spec["CHOICE"]["order"]
        instr = [i for i in instr if i != "const"]

        # We calculate our parameter of interest
        label = which.lower()

        if label == "conventional_average_effects":
            ATE = np.mean(df_mc["Y1"] - df_mc["Y0"])
            TT = np.mean(df_mc["Y1"].loc[treated] - df_mc["Y0"].loc[treated])
            stat = (ATE, TT)

        elif label in ["randomization", "random"]:
            random = np.mean(df_mc[df_mc.D == 1]["Y"]) - np.mean(
                df_mc[df_mc.D == 0]["Y"]
            )
            stat = random

        elif label in ["ordinary_least_squares", "ols"]:
            results = sm.OLS(df_mc["Y"], df_mc[["const", "D"]]).fit()
            stat = results.params[1]

        elif label in ["instrumental_variables", "iv"]:
            iv = IV2SLS(df_mc["Y"], Xvar, df_mc["D"], df_mc[instr]).fit()
            stat = iv.params["D"]

        elif label in ["grmpy", "grmpy-par"]:
            rslt = grmpy.fit(file)
            beta_diff = rslt["TREATED"]["params"] - rslt["UNTREATED"]["params"]
            stat = np.dot(np.mean(Xvar), beta_diff)

        elif label in ["grmpy-semipar", "grmpy-liv"]:
            rslt = grmpy.fit(file, semipar=True)

            y0_fitted = np.dot(rslt["X"], rslt["b0"])
            y1_fitted = np.dot(rslt["X"], rslt["b1"])

            mte_x_ = y1_fitted - y0_fitted
            mte_u = rslt["mte_u"]

            us = np.linspace(0.005, 0.995, len(rslt["quantiles"]))
            mte_mat = np.zeros((len(mte_x_), len(mte_u)))

            for i in range(len(mte_x_)):
                for j in range(len(mte_u)):
                    mte_mat[i, j] = mte_x_[i] + mte_u[j]

            ate_tilde_p = np.mean(mte_mat, axis=1)
            stat = ate_tilde_p.mean()

        else:
            raise NotImplementedError

        effects += [stat]

    # Restore original init file
    model_dict = read(file)
    model_dict["DIST"]["params"][2] = original_correlation
    print_dict(model_dict, file.replace(".grmpy.yml", ""))
    grmpy.simulate(file)

    return effects