Exemple #1
0
def simulate(init_file):
    """This function simulates a user-specified version of the generalized Roy model."""
    init_dict = read(init_file)

    # Distribute information
    seed = init_dict['SIMULATION']['seed']

    # Set random seed to ensure recomputabiltiy
    np.random.seed(seed)

    # Simulate unobservables of the model
    U, V = simulate_unobservables(init_dict)

    # Simulate observables of the model
    X = simulate_covariates(init_dict, 'TREATED')
    Z = simulate_covariates(init_dict, 'COST')

    # Simulate endogeneous variables of the model
    Y, D, Y_1, Y_0 = simulate_outcomes(init_dict, X, Z, U)

    # Write output file
    df = write_output(init_dict, Y, D, X, Z, Y_1, Y_0, U, V)

    # Calculate Criteria function value
    if init_dict['DETERMINISTIC'] is False:
        x0 = start_values(init_dict, df, 'init')
        init_dict['AUX']['criteria_value'] = calculate_criteria(
            init_dict, df, x0)

    # Print Log file
    print_info(init_dict, df)

    return df
Exemple #2
0
def simulate_estimation(init_dict, rslt, data_frame, start=False):
    """The function simulates a new sample based on the estimated coefficients."""

    # Distribute information
    seed = init_dict['SIMULATION']['seed']

    # Determine parametrization and read in /simulate observables
    if start is True:
        start_dict, rslt_dict = process_results(init_dict, rslt, start)
        dicts = [start_dict, rslt_dict]
        X = data_frame.filter(regex=r'^X\_')
        Z = data_frame.filter(regex=r'^Z\_')
    else:
        rslt_dict = process_results(init_dict, rslt, start)
        dicts = [rslt_dict]
        X = simulate_covariates(rslt_dict, 'TREATED')
        Z = simulate_covariates(rslt_dict, 'COST')

    data_frames = []
    for dict_ in dicts:
        # Set seed value
        np.random.seed(seed)
        # Simulate unobservables
        U, _ = simulate_unobservables(dict_)

        # Simulate endogeneous variables
        Y, D, Y_1, Y_0 = simulate_outcomes(dict_, X, Z, U)

        df = write_output_estimation(Y, D, X, Z, Y_1, Y_0)
        data_frames += [df]

    if start is True:
        return data_frames[0], data_frames[1]
    else:
        return data_frames[0]
Exemple #3
0
def simulate(init_file):
    """This function simulates a user-specified version of the generalized Roy model."""
    init_dict = read(init_file)

    # We perform some basic consistency checks regarding the user's request.
    check_initialization_dict(init_dict)

    # Distribute information
    seed = init_dict['SIMULATION']['seed']

    # Set random seed to ensure recomputabiltiy
    np.random.seed(seed)

    # Simulate unobservables of the model
    U, V = simulate_unobservables(init_dict)

    # Simulate observables of the model
    X = simulate_covariates(init_dict)

    # Simulate endogeneous variables of the model
    Y, D, Y_1, Y_0 = simulate_outcomes(init_dict, X, U, V)

    # Write output file
    df = write_output(init_dict, Y, D, X, Y_1, Y_0, U, V)

    # Calculate Criteria function value
    if not init_dict['DETERMINISTIC']:
        x0 = start_values(init_dict, df, 'init')
        init_dict['AUX']['criteria_value'] = calculate_criteria(
            init_dict, df, x0)

    # Print Log file
    print_info(init_dict, df)

    return df
Exemple #4
0
def simulate_estimation(init_dict, rslt, start=False):
    """The function simulates a new sample based on the estimated coefficients."""

    # Distribute information
    seed = init_dict['SIMULATION']['seed']
    labels = init_dict['varnames']
    # Determine parametrization and read in /simulate observables
    if start:
        start_dict = process_results(init_dict, None)
        rslt_dict = process_results(init_dict, rslt)
        dicts = [start_dict, rslt_dict]
    else:
        rslt_dict = process_results(init_dict, rslt)
        dicts = [rslt_dict]
    data_frames = []
    for dict_ in dicts:

        # Set seed value
        np.random.seed(seed)
        # Simulate unobservables
        U, V = simulate_unobservables(dict_)
        X = simulate_covariates(rslt_dict)

        # Simulate endogeneous variables
        Y, D, Y_1, Y_0 = simulate_outcomes(dict_, X, U, V)

        df = write_output_estimation(labels, Y, D, X, Y_1, Y_0, init_dict)
        data_frames += [df]

    if start:
        return data_frames[0], data_frames[1]
    else:
        return data_frames[0]
Exemple #5
0
def weights_treatment_parameters(init_dict, GRID):
    """This function calculates the weights for the special case in
    Heckman & Vytlacil (2005) Figure 1B.

    """
    GRID = np.linspace(0.01, 0.99, num=99, endpoint=True)

    coeffs_untreated = init_dict['UNTREATED']['all']
    coeffs_treated = init_dict['TREATED']['all']
    cov = construct_covariance_matrix(init_dict)

    x = simulate_covariates(init_dict)
    x = x[:, :2]

    # We take the specified distribution for the cost shifters from the paper.
    cost_mean, cost_sd = -0.0026, np.sqrt(0.270)
    v_mean, v_sd = 0.00, np.sqrt(cov[2, 2])

    eval_points = norm.ppf(GRID, loc=v_mean, scale=v_sd)

    ate_weights = np.tile(1.0, 99)
    tut_weights = norm.cdf(eval_points, loc=cost_mean, scale=cost_sd)

    tt_weights = 1 - tut_weights

    def tut_integrand(point):
        eval_point = norm.ppf(point, loc=v_mean, scale=v_sd)
        return norm.cdf(eval_point, loc=cost_mean, scale=cost_sd)

    def tt_integrand(point):
        eval_point = norm.ppf(point, loc=v_mean, scale=v_sd)
        return norm.cdf(eval_point, loc=cost_mean, scale=cost_sd)

    # Scaling so that the weights integrate to one.
    tut_scaling = quad(tut_integrand, 0.01, 0.99)[0]
    tut_weights /= tut_scaling

    tt_scaling = quad(tt_integrand, 0.01, 0.99)[0]
    tt_weights /= tt_scaling

    mte = mte_information(coeffs_treated, coeffs_untreated, cov, GRID, x,
                          init_dict)

    return ate_weights, tt_weights, tut_weights, mte
Exemple #6
0
def simulate_estimation(rslt):
    """The function simulates a new sample based on the estimated coefficients."""

    # Distribute information
    seed = rslt["SIMULATION"]["seed"]
    # Determine parametrization and read in /simulate observables
    start, finish = process_results(rslt)
    data_frames = []
    for dict_ in [start, finish]:

        # Set seed value
        np.random.seed(seed)
        # Simulate unobservables
        U = simulate_unobservables(dict_)
        X = simulate_covariates(dict_)

        # Simulate endogeneous variables
        df = simulate_outcomes(dict_, X, U)
        data_frames += [df]

    return data_frames[0], data_frames[1]
Exemple #7
0
def simulate(init_file):
    """This function simulates a user-specified version of the generalized Roy model."""
    init_dict = read_simulation(init_file)

    # We perform some basic consistency checks regarding the user's request.
    check_sim_init_dict(init_dict)

    # Distribute information
    seed = init_dict["SIMULATION"]["seed"]

    # Set random seed to ensure recomputabiltiy
    np.random.seed(seed)

    # Simulate unobservables of the model
    U = simulate_unobservables(init_dict)

    # Simulate observables of the model
    X = simulate_covariates(init_dict)

    # Simulate endogeneous variables of the model
    df = simulate_outcomes(init_dict, X, U)

    # Write output file
    df = write_output(init_dict, df)

    # Calculate Criteria function value
    if not init_dict["DETERMINISTIC"]:
        D, X1, X0, Z1, Z0, Y1, Y0 = process_data(df, init_dict)
        x0 = start_values(init_dict, D, X1, X0, Z1, Z0, Y1, Y0, "init")
        init_dict["AUX"]["criteria_value"] = calculate_criteria(
            x0, X1, X0, Z1, Z0, Y1, Y0
        )

    # Print Log file
    print_info(init_dict, df)

    return df
Exemple #8
0
        index = GRID.index(round(xtick, 2))
        height = mte[index]
        ax.text(x=xtick - 0.002, y=height - 0.1, s='[', fontsize=30)
    for xtick in [0.39, 0.79]:
        index = GRID.index(round(xtick, 2))
        height = mte[index]
        ax.text(x=xtick - 0.01, y=height - 0.1, s=']', fontsize=30)

    ax.set_xlabel('$u_S$')
    ax.set_ylabel(r'$B^{MTE}$')

    ax.set_xticks([0, 0.2, 0.4, 0.6, 0.8, 1])
    ax.set_xticklabels([0, '$p_1$', '$p_2$', '$p_3$', '$p_4$', 1])
    ax.tick_params(axis='x', which='major')
    ax.set_ylim([1.5, 4.5])

    plt.tight_layout()
    plt.savefig(OUTPUT_DIR + '/fig-local-average-treatment.png')


if __name__ == '__main__':
    coeffs_untreated = init_dict['UNTREATED']['all']
    coeffs_treated = init_dict['TREATED']['all']
    cov = construct_covariance_matrix(init_dict)
    x = simulate_covariates(init_dict)
    x = x[:, :2]

    mte = mte_information(coeffs_treated, coeffs_untreated, cov, GRID, x, init_dict)

    plot_local_average_treatment(mte)
Exemple #9
0
    ax.set_xlabel("$u_S$")
    ax.plot(GRID, pres, label='Presence')
    ax.plot(GRID, abs_, label='Absence', linestyle='--')

    ax.set_ylim([1.5, 4.5])

    plt.legend()

    plt.tight_layout()
    plt.savefig(ppj("OUT_FIGURES", 'fig-eh-marginal-effect.png'))


if __name__ == '__main__':
    coeffs_untreated = init_dict['UNTREATED']['all']
    coeffs_treated = init_dict['TREATED']['all']
    cov = construct_covariance_matrix(init_dict)
    x = simulate_covariates(init_dict, 'TREATED')
    save_data(x)

    MTE_pres = mte_information(coeffs_treated, coeffs_untreated, cov, GRID, x)

    para_diff = coeffs_treated - coeffs_untreated

    MTE_abs = []
    for i in GRID:
        if cov[2, 2] == 0.00:
            MTE_abs += ['---']
        else:
            MTE_abs += [np.mean(np.dot(para_diff, x.T))]

    plot_marginal_treatment_effect(MTE_pres, MTE_abs)