def blp(y, d, prop, b_hat, s_hat, print_table=True): """Return intercept and slope for Best Linear Predictor (BLP) Parameters ---------- y : ndarray vector of outcomes d : ndarray treatment indicator prop : ndarray treatment propensity b_hat : ndarray [description] s_hat : ndarray [description] print_table : bool, optional Toggle results table, by default True Returns ------- dict results for ATE and HET """ # Calculate model matrix y_reg = y # outcome w_reg = (prop * (1 - prop)) ** (-1) # weights x_reg = np.column_stack( ( np.repeat(1, repeats=len(y)), # constant b_hat, # baseline b0 d - prop, # average treatment effect ate (d - prop) * (s_hat - np.mean(s_hat)), # heterogeneity het ) ) labels = ["const.", "b0", "ate", "het"] # Run weighted least squares wls = WLS(endog=y_reg, exog=x_reg, w=w_reg) wls = wls.fit() if print_table: print(wls.summary(xname=labels)) return { "ate": wls.params[labels.index("ate")], "het": wls.params[labels.index("het")], }
def gates(y, d, prop, s_hat, q=10, print_table=True): """Calculate Group Average Treatment Effect Parameters ---------- y : ndarray vector of outcomes d : ndarray treatment indicator prop : ndarray treatment propensity s_hat : ndarray estimated treatment effect q : int, optional number of groups, by default 10 print_table : bool, optional toggle results table, by default True Returns ------- dict results with baseline and treatment effect for each group """ # Define groups bin_indices, bin_edges, bin_pct = quantile_grid( x=s_hat + 1e-16 * np.random.uniform(size=len(s_hat)), q=q # Break ties ) # Dummy coding s_onehot = np.zeros((len(s_hat), len(bin_edges))) s_onehot[np.arange(0, len(s_hat)), bin_indices] = 1 # Calculate model matrix x_reg = np.column_stack( (s_onehot, s_onehot * np.reshape(d - prop, newshape=(-1, 1))) ) w_reg = (prop * (1 - prop)) ** (-1) # weights y_reg = y # Run weighted least squares labels_baseline = [ f"Baseline: p={p / 100:.2f} ({x:.2f})" for p, x in zip(bin_pct.tolist(), bin_edges.tolist()) ] labels_treatment = [ f"Treatment: p={p / 100:.2f} ({x:.2f})" for p, x in zip(bin_pct.tolist(), bin_edges.tolist()) ] labels = labels_baseline + labels_treatment wls = WLS(endog=y_reg, exog=x_reg, w=w_reg) wls = wls.fit() if print_table: print(wls.summary(xname=labels)) return { "coef_baseline": wls.params[: len(labels_baseline)], "coef_treatment": wls.params[len(labels_baseline) :], "bin_values": bin_edges, "bin_count": np.sum(s_onehot, axis=0), }