Ejemplo n.º 1
0
def monte_carlo(file, which, grid_points=10):
    """This function estimates various effect parameters for
    increasing presence of essential heterogeneity, which is reflected
    by increasing correlation between U_1 and V.
    """
    # simulate a new data set with essential heterogeneity present
    model_dict = read(file)
    original_correlation = model_dict["DIST"]["params"][2]

    model_dict["DIST"]["params"][2] = -0.191
    print_dict(model_dict, file.replace(".grmpy.yml", ""))
    grmpy.simulate(file)

    effects = []

    # Loop over different correlations between V and U_1
    for rho in np.linspace(0.00, -0.99, grid_points):
        # effects["rho"] += [rho]
        # Readjust the initialization file values to add correlation
        model_spec = read(file)
        X = model_spec["TREATED"]["order"]
        update_correlation_structure(file, model_spec, rho)
        sim_spec = read(file)
        # Simulate a Data set and specify exogeneous and endogeneous variables
        df_mc = create_data(file)
        treated = df_mc["D"] == 1
        Xvar = df_mc[X]
        instr = sim_spec["CHOICE"]["order"]
        instr = [i for i in instr if i != "const"]

        # We calculate our parameter of interest
        label = which.lower()

        if label == "conventional_average_effects":
            ATE = np.mean(df_mc["Y1"] - df_mc["Y0"])
            TT = np.mean(df_mc["Y1"].loc[treated] - df_mc["Y0"].loc[treated])
            stat = (ATE, TT)

        elif label in ["random", "randomization"]:
            random = np.mean(df_mc[df_mc.D == 1]["Y"]) - np.mean(
                df_mc[df_mc.D == 0]["Y"])
            stat = random

        elif label in ["ordinary_least_squares", "ols"]:
            results = sm.OLS(df_mc["Y"], df_mc[["const", "D"]]).fit()
            stat = results.params[1]

        elif label in ["instrumental_variables", "iv"]:
            iv = IV2SLS(df_mc["Y"], Xvar, df_mc["D"], df_mc[instr]).fit()
            stat = iv.params["D"]

        elif label in ["grmpy", "grmpy-par"]:
            rslt = grmpy.fit(file)
            beta_diff = rslt["TREATED"]["params"] - rslt["UNTREATED"]["params"]
            stat = np.dot(np.mean(Xvar), beta_diff)

        elif label in ["grmpy-semipar", "grmpy-liv"]:
            rslt = grmpy.fit(file, semipar=True)

            y0_fitted = np.dot(rslt["X"], rslt["b0"])
            y1_fitted = np.dot(rslt["X"], rslt["b1"])

            mte_x_ = y1_fitted - y0_fitted
            mte_u = rslt["mte_u"]

            us = np.linspace(0.005, 0.995, len(rslt["quantiles"]))
            mte_mat = np.zeros((len(mte_x_), len(mte_u)))

            for i in range(len(mte_x_)):
                for j in range(len(mte_u)):
                    mte_mat[i, j] = mte_x_[i] + mte_u[j]

            ate_tilde_p = np.mean(mte_mat, axis=1)
            stat = ate_tilde_p.mean()

        else:
            raise NotImplementedError

        effects += [stat]

    # Restore original init file
    model_dict = read(file)
    model_dict["DIST"]["params"][2] = original_correlation
    print_dict(model_dict, file.replace(".grmpy.yml", ""))
    grmpy.simulate(file)

    return effects
Ejemplo n.º 2
0
def monte_carlo(file, which, grid_points=10):
    """
    This function conducts a Monte Carlo simulation to compare
    the true and estimated treatment parameters for increasing
    (absolute) correlation between U_1 and V (i.e essential
    heterogeneity).

    In the example here, the correlation between U_1 and V becomes
    increasingly more negative. As we consider the absolute value
    of the correlation coefficient, values closer to -1
    (or in the analogous case closer to +1)
    denote a higher degree of essential heterogeneity.

    The results of the Monte Carlo simulation can be used
    to evaluate the performance of different estimation strategies
    in the presence of essential heterogeneity.

    Depending on the specification of *which*, either the true ATE
    and TT, or an estimate of the ATE are returned.

    Options for *which*:

        Comparison of ATE and TT
        - "conventional_average_effects"

        Different estimation strategies for ATE
        - "randomization" ("random")
        - "ordinary_least_squares" ("ols")
        - "instrumental_variables" ("iv")
        - "grmpy_par" ("grmpy")
        - "grmpy_semipar"("grmpy-liv")

    Post-estimation: To plot the comparison between the true ATE
    and the respective parameter, use the function
    - plot_effects() for *which* = "conventional_average_effects", and
    - plot_estimates() else.

    Parameters
    ----------
    file: yaml
        grmpy initialization file, provides information for the simulation process.
    which: string
        String denoting whether conventional average effects shall be computed
        or, alternatively, which estimation approach shall be implemented for the ATE.
    grid_points: int, default 10
        Number of different values for rho, the correlation coefficient
        between U_1 and V, on the interval [0, -1), along which the parameters
        shall be evaluated.

    Returns
    -------
    effects: list
        If *which* = "conventional_average_effects",
            list of lenght *grid_points* x 2 containing the true ATE and TT.
        Else, list of length *grid_points* x 1 containing an estimate
            of the ATE.
    """
    # simulate a new data set with essential heterogeneity present
    model_dict = read(file)
    original_correlation = model_dict["DIST"]["params"][2]

    model_dict["DIST"]["params"][2] = -0.191
    print_dict(model_dict, file.replace(".grmpy.yml", ""))
    grmpy.simulate(file)

    effects = []

    # Loop over different correlations between U_1 and V
    for rho in np.linspace(0.00, -0.99, grid_points):
        # effects["rho"] += [rho]
        # Readjust the initialization file values to add correlation
        model_spec = read(file)
        X = model_spec["TREATED"]["order"]
        _update_correlation_structure(file, model_spec, rho)
        sim_spec = read(file)
        # Simulate a Data set and specify exogeneous and endogeneous variables
        df_mc = _create_data(file)
        treated = df_mc["D"] == 1
        Xvar = df_mc[X]
        instr = sim_spec["CHOICE"]["order"]
        instr = [i for i in instr if i != "const"]

        # We calculate our parameter of interest
        label = which.lower()

        if label == "conventional_average_effects":
            ATE = np.mean(df_mc["Y1"] - df_mc["Y0"])
            TT = np.mean(df_mc["Y1"].loc[treated] - df_mc["Y0"].loc[treated])
            stat = (ATE, TT)

        elif label in ["randomization", "random"]:
            random = np.mean(df_mc[df_mc.D == 1]["Y"]) - np.mean(
                df_mc[df_mc.D == 0]["Y"]
            )
            stat = random

        elif label in ["ordinary_least_squares", "ols"]:
            results = sm.OLS(df_mc["Y"], df_mc[["const", "D"]]).fit()
            stat = results.params[1]

        elif label in ["instrumental_variables", "iv"]:
            iv = IV2SLS(df_mc["Y"], Xvar, df_mc["D"], df_mc[instr]).fit()
            stat = iv.params["D"]

        elif label in ["grmpy", "grmpy-par"]:
            rslt = grmpy.fit(file)
            beta_diff = rslt["TREATED"]["params"] - rslt["UNTREATED"]["params"]
            stat = np.dot(np.mean(Xvar), beta_diff)

        elif label in ["grmpy-semipar", "grmpy-liv"]:
            rslt = grmpy.fit(file, semipar=True)

            y0_fitted = np.dot(rslt["X"], rslt["b0"])
            y1_fitted = np.dot(rslt["X"], rslt["b1"])

            mte_x_ = y1_fitted - y0_fitted
            mte_u = rslt["mte_u"]

            us = np.linspace(0.005, 0.995, len(rslt["quantiles"]))
            mte_mat = np.zeros((len(mte_x_), len(mte_u)))

            for i in range(len(mte_x_)):
                for j in range(len(mte_u)):
                    mte_mat[i, j] = mte_x_[i] + mte_u[j]

            ate_tilde_p = np.mean(mte_mat, axis=1)
            stat = ate_tilde_p.mean()

        else:
            raise NotImplementedError

        effects += [stat]

    # Restore original init file
    model_dict = read(file)
    model_dict["DIST"]["params"][2] = original_correlation
    print_dict(model_dict, file.replace(".grmpy.yml", ""))
    grmpy.simulate(file)

    return effects