def test_SensitivityRandomCause(): y, X, treatment, tau, b, e = synthetic_data(mode=1, n=100000, p=NUM_FEATURES, sigma=1.0) # generate the dataset format for sensitivity analysis INFERENCE_FEATURES = ["feature_" + str(i) for i in range(NUM_FEATURES)] df = pd.DataFrame(X, columns=INFERENCE_FEATURES) df[TREATMENT_COL] = treatment df[OUTCOME_COL] = y df[SCORE_COL] = e # calling the Base XLearner class and return the sensitivity analysis summary report learner = BaseXLearner(LinearRegression()) sens = SensitivityRandomCause( df=df, inference_features=INFERENCE_FEATURES, p_col=SCORE_COL, treatment_col=TREATMENT_COL, outcome_col=OUTCOME_COL, learner=learner, ) sens_summary = sens.summary(method="Random Cause") print(sens_summary)
def test_SensitivitySelectionBias(): y, X, treatment, tau, b, e = synthetic_data(mode=1, n=100000, p=NUM_FEATURES, sigma=1.0) # generate the dataset format for sensitivity analysis INFERENCE_FEATURES = ["feature_" + str(i) for i in range(NUM_FEATURES)] df = pd.DataFrame(X, columns=INFERENCE_FEATURES) df[TREATMENT_COL] = treatment df[OUTCOME_COL] = y df[SCORE_COL] = e # calling the Base XLearner class and return the sensitivity analysis summary report learner = BaseXLearner(LinearRegression()) sens = SensitivitySelectionBias( df, INFERENCE_FEATURES, p_col=SCORE_COL, treatment_col=TREATMENT_COL, outcome_col=OUTCOME_COL, learner=learner, confound="alignment", alpha_range=None, ) lls_bias_alignment, partial_rsqs_bias_alignment = sens.causalsens() print(lls_bias_alignment, partial_rsqs_bias_alignment) # Plot the results by confounding vector and plot Confidence Intervals for ATE sens.plot(lls_bias_alignment, ci=True)
def test_Sensitivity(): y, X, treatment, tau, b, e = synthetic_data(mode=1, n=100000, p=NUM_FEATURES, sigma=1.0) # generate the dataset format for sensitivity analysis INFERENCE_FEATURES = ['feature_' + str(i) for i in range(NUM_FEATURES)] df = pd.DataFrame(X, columns=INFERENCE_FEATURES) df[TREATMENT_COL] = treatment df[OUTCOME_COL] = y df[SCORE_COL] = e # calling the Base XLearner class and return the sensitivity analysis summary report learner = BaseXLearner(LinearRegression()) sens = Sensitivity(df=df, inference_features=INFERENCE_FEATURES, p_col=SCORE_COL, treatment_col=TREATMENT_COL, outcome_col=OUTCOME_COL, learner=learner) # check the sensitivity summary report sens_summary = sens.sensitivity_analysis(methods=[ 'Placebo Treatment', 'Random Cause', 'Subset Data', 'Random Replace', 'Selection Bias' ], sample_size=0.5) print(sens_summary)
def test_alignment_att(): y, X, treatment, tau, b, e = synthetic_data(mode=1, n=100000, p=NUM_FEATURES, sigma=1.0) alpha = np.quantile(y, 0.25) adj = alignment_att(alpha, e, treatment) assert y.shape == adj.shape
def test_synthetic_data(): y, X, treatment, tau, b, e = synthetic_data(mode=1, n=N_SAMPLE, p=8, sigma=.1) assert (y.shape[0] == X.shape[0] and y.shape[0] == treatment.shape[0] and y.shape[0] == tau.shape[0] and y.shape[0] == b.shape[0] and y.shape[0] == e.shape[0]) y, X, treatment, tau, b, e = synthetic_data(mode=2, n=N_SAMPLE, p=8, sigma=.1) assert (y.shape[0] == X.shape[0] and y.shape[0] == treatment.shape[0] and y.shape[0] == tau.shape[0] and y.shape[0] == b.shape[0] and y.shape[0] == e.shape[0]) y, X, treatment, tau, b, e = synthetic_data(mode=3, n=N_SAMPLE, p=8, sigma=.1) assert (y.shape[0] == X.shape[0] and y.shape[0] == treatment.shape[0] and y.shape[0] == tau.shape[0] and y.shape[0] == b.shape[0] and y.shape[0] == e.shape[0]) y, X, treatment, tau, b, e = synthetic_data(mode=4, n=N_SAMPLE, p=8, sigma=.1) assert (y.shape[0] == X.shape[0] and y.shape[0] == treatment.shape[0] and y.shape[0] == tau.shape[0] and y.shape[0] == b.shape[0] and y.shape[0] == e.shape[0])
def _generate_data(): if not generated: np.random.seed(RANDOM_SEED) data = synthetic_data(mode=1, n=N_SAMPLE, p=8, sigma=.1) return data
''' Day 52 Uber CausalML ''' from causalml.inference import LRSRegressor from causalml.inference import XGBTRegressor, MLPTRegressor from causalml.inference import BaseXRegressor from causalml.dataset import synthetic_data y, X, treatment, _ = synthetic_data(mode=1, n=1000, p=5, sigma=1.0) lr = LRSRegressor() te, lb, ub = lr.estimate_ate(X, treatment, y) logger.info( 'Average Treatment Effect (Linear Regression): {:.2f} ({:.2f}, {:.2f})'. format(te, lb, ub)) xg = XGBTRegressor(random_state=42) te, lb, ub = xg.estimate_ate(X, treatment, y) logger.info( 'Average Treatment Effect (XGBoost): {:.2f} ({:.2f}, {:.2f})'.format( te, lb, ub)) nn = MLPTRegressor(hidden_layer_sizes=(10, 10), learning_rate_init=.1, early_stopping=True, random_state=42) te, lb, ub = nn.estimate_ate(X, treatment, y) logger.info(