def test_propensity_truncation(self): self.ensure_truncation(test_weights=False) with self.subTest("Illegal truncation values assertion on compute"): with self.assertRaises(AssertionError): self.estimator.compute_propensity(self.data_r_80["X"], self.data_r_80["a"], clip_min=0.6) with self.assertRaises(AssertionError): self.estimator.compute_propensity(self.data_r_80["X"], self.data_r_80["a"], clip_max=0.4) with self.assertRaises(AssertionError): self.estimator.compute_propensity(self.data_r_80["X"], self.data_r_80["a"], clip_min=0.6, clip_max=0.9) with self.assertRaises(AssertionError): self.estimator.compute_propensity(self.data_r_80["X"], self.data_r_80["a"], clip_min=0.1, clip_max=0.4) with self.subTest( "Illegal truncation values assertion on initialization"): with self.assertRaises(AssertionError): IPW(LogisticRegression(), clip_min=0.6) with self.assertRaises(AssertionError): IPW(LogisticRegression(), clip_max=0.4) with self.assertRaises(AssertionError): IPW(LogisticRegression(), clip_min=0.1, clip_max=0.4) with self.assertRaises(AssertionError): IPW(LogisticRegression(), clip_min=0.6, clip_max=0.9)
def setUpClass(cls): # Data: X, a = make_classification(n_features=1, n_informative=1, n_redundant=0, n_repeated=0, n_classes=2, n_clusters_per_class=1, flip_y=0.0, class_sep=10.0) cls.data_r_100 = {"X": pd.DataFrame(X), "a": pd.Series(a)} X, a = make_classification(n_features=1, n_informative=1, n_redundant=0, n_repeated=0, n_classes=2, n_clusters_per_class=1, flip_y=0.2, class_sep=10.0) cls.data_r_80 = {"X": pd.DataFrame(X), "a": pd.Series(a)} # Data that maps x=0->a=0 and x=1->a=1: X = pd.Series([0] * 50 + [1] * 50) cls.data_cat_r_100 = {"X": X.to_frame(), "a": X} # Data that maps x=0->a=0 and x=1->a=1, but 10% of x=0->a=1 and 10% of x=1->a=0: X = pd.Series([0] * 40 + [1] * 10 + [1] * 40 + [0] * 10).to_frame() a = pd.Series([0] * 50 + [1] * 50) cls.data_cat_r_80 = {"X": X, "a": a} # Avoids regularization of the model: cls.estimator = IPW(LogisticRegression(C=1e6, solver='lbfgs'), clip_min=0.05, clip_max=0.95, use_stabilized=False)
def setUpClass(cls): TestDoublyRobustBase.setUpClass() # Avoids regularization of the model: ipw = IPW(LogisticRegression(C=1e6, solver='lbfgs'), use_stabilized=False) std = Standardization(LinearRegression(normalize=True)) cls.estimator = DoublyRobustIpFeature(std, ipw)
def __init__(self, outcome_model=LinearRegression(), prop_score_model=LogisticRegression(), doubly_robust_type='vanilla', standardization_type='standardization', trim_weights=False, trim_eps=None, stabilized=False): if doubly_robust_type not in DOUBLY_ROBUST_TYPES: raise ValueError( 'Invalid double_robust_type. Valid types: {}'.format( list(DOUBLY_ROBUST_TYPES))) if standardization_type not in STR_TO_STANDARDIZATION.keys(): raise ValueError( 'Invalid standardization_type. Valid types: {}'.format( list(STR_TO_STANDARDIZATION.keys()))) if trim_weights and trim_eps is None: trim_eps = TRIM_EPS ipw = IPW(learner=prop_score_model, truncate_eps=trim_eps, use_stabilized=stabilized) standardization = STR_TO_STANDARDIZATION[standardization_type]( outcome_model) doubly_robust = STR_TO_DOUBLY_ROBUST[doubly_robust_type]( outcome_model=standardization, weight_model=ipw) super().__init__(causallib_estimator=doubly_robust)
def calc_outcome_adaptive_lasso_single_lambda(A, Y, X, Lambda, gamma_convergence_factor): """Calculate ATE with the outcome adaptive lasso""" n = A.shape[0] # number of samples # extract gamma according to Lambda and gamma_convergence_factor gamma = 2 * (1 + gamma_convergence_factor - log(Lambda, n)) # fit regression from covariates X and exposure A to outcome Y lr = LinearRegression(fit_intercept=True).fit( np.hstack([A.values.reshape(-1, 1), X]), Y) # extract the coefficients of the covariates x_coefs = lr.coef_[1:] # calculate outcome adaptive penalization weights weights = (np.abs(x_coefs))**(-1 * gamma) # apply the penalization to the covariates themselves X_w = X / weights # fit logistic propensity score model from penalized covariates to the exposure ipw = IPW(LogisticRegression(solver='liblinear', penalty='l1', C=1 / Lambda), use_stabilized=False).fit(X_w, A) # compute inverse propensity weighting and calculate ATE weights = ipw.compute_weights(X_w, A) outcomes = ipw.estimate_population_outcome(X_w, A, Y, w=weights) effect = ipw.estimate_effect(outcomes[1], outcomes[0]) return effect, x_coefs, weights
def setUpClass(self): self.data = load_nhefs() ipw = IPW(LogisticRegression(solver="liblinear"), truncate_eps=0.05) std = StratifiedStandardization(LinearRegression()) self.dr = DoublyRobustVanilla(std, ipw) self.dr.fit(self.data.X, self.data.a, self.data.y) self.prp_evaluator = PropensityEvaluator(self.dr.weight_model) self.out_evaluator = OutcomeEvaluator(self.dr.outcome_model)
def calc_ate_vanilla_ipw(A, Y, X): ipw = IPW(LogisticRegression(solver='liblinear', penalty='l1', C=1e2, max_iter=500), use_stabilized=True).fit(X, A) weights = ipw.compute_weights(X, A) outcomes = ipw.estimate_population_outcome(X, A, Y, w=weights) effect = ipw.estimate_effect(outcomes[1], outcomes[0]) return effect[0]
def ensure_many_models(self, clip_min=None, clip_max=None): from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor from sklearn.neural_network import MLPRegressor from sklearn.linear_model import ElasticNet, RANSACRegressor, HuberRegressor, PassiveAggressiveRegressor from sklearn.neighbors import KNeighborsRegressor from sklearn.svm import SVR, LinearSVR from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier from sklearn.neural_network import MLPClassifier from sklearn.neighbors import KNeighborsClassifier from sklearn.exceptions import ConvergenceWarning warnings.filterwarnings('ignore', category=ConvergenceWarning) data = self.create_uninformative_ox_dataset() for propensity_learner in [ GradientBoostingClassifier(n_estimators=10), RandomForestClassifier(n_estimators=100), MLPClassifier(hidden_layer_sizes=(5, )), KNeighborsClassifier(n_neighbors=20) ]: weight_model = IPW(propensity_learner, clip_min=clip_min, clip_max=clip_max) propensity_learner_name = str(propensity_learner).split( "(", maxsplit=1)[0] for outcome_learner in [ GradientBoostingRegressor(n_estimators=10), RandomForestRegressor(n_estimators=10), MLPRegressor(hidden_layer_sizes=(5, )), ElasticNet(), RANSACRegressor(), HuberRegressor(), PassiveAggressiveRegressor(), KNeighborsRegressor(), SVR(), LinearSVR() ]: outcome_learner_name = str(outcome_learner).split( "(", maxsplit=1)[0] outcome_model = Standardization(outcome_learner) with self.subTest("Test fit & predict using {} & {}".format( propensity_learner_name, outcome_learner_name)): model = self.estimator.__class__(outcome_model, weight_model) model.fit(data["X"], data["a"], data["y"], refit_weight_model=False) model.estimate_individual_outcome(data["X"], data["a"]) self.assertTrue(True) # Fit did not crash
def test_ipw_matches_causallib(linear_data_pandas): w, t, y = linear_data_pandas causallib_ipw = IPW(learner=LogisticRegression()) causallib_ipw.fit(w, t) potential_outcomes = causallib_ipw.estimate_population_outcome( w, t, y, treatment_values=[0, 1]) causallib_effect = causallib_ipw.estimate_effect(potential_outcomes[1], potential_outcomes[0])[0] ipw = IPWEstimator() ipw.fit(w, t, y) our_effect = ipw.estimate_ate() assert our_effect == causallib_effect
def __init__(self, prop_score_model=LogisticRegression(), trim_weights=False, trim_eps=None, stabilized=False): if trim_weights and trim_eps is None: trim_eps = TRIM_EPS self.ipw = IPW(learner=prop_score_model, truncate_eps=trim_eps, use_stabilized=stabilized) self.w = None self.t = None self.y = None
def test_pipeline_learner(self): from sklearn.preprocessing import StandardScaler, MinMaxScaler from sklearn.pipeline import make_pipeline learner = make_pipeline(StandardScaler(), MinMaxScaler(), LogisticRegression(solver='lbfgs')) with self.subTest("Test initialization with pipeline learner"): self.estimator = IPW(learner) self.assertTrue(True) # Dummy assert for not thrown exception with self.subTest("Test fit with pipeline learner"): self.estimator.fit(self.data_r_100["X"], self.data_r_100["a"]) self.assertTrue(True) # Dummy assert for not thrown exception with self.subTest("Test 'predict' with pipeline learner"): self.estimator.compute_weights(self.data_r_100["X"], self.data_r_100["a"]) self.assertTrue(True) # Dummy assert for not thrown exception
def test_forcing_probability_learner(self): from sklearn.svm import SVC # Arbitrary model with decision_function instead of predict_proba with self.assertRaises(AttributeError): IPW(SVC())
def test_many_models(self): from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor from sklearn.neural_network import MLPRegressor from sklearn.linear_model import ElasticNet, RANSACRegressor, HuberRegressor, PassiveAggressiveRegressor from sklearn.neighbors import KNeighborsRegressor from sklearn.svm import SVR, LinearSVR from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier from sklearn.neural_network import MLPClassifier from sklearn.neighbors import KNeighborsClassifier from sklearn.exceptions import ConvergenceWarning warnings.filterwarnings('ignore', category=ConvergenceWarning) data = self.create_uninformative_ox_dataset() for propensity_learner in [ GradientBoostingClassifier(n_estimators=10), RandomForestClassifier(n_estimators=100), MLPClassifier(hidden_layer_sizes=(5, )), KNeighborsClassifier(n_neighbors=20) ]: weight_model = IPW(propensity_learner) propensity_learner_name = str(propensity_learner).split( "(", maxsplit=1)[0] for outcome_learner in [ GradientBoostingRegressor(n_estimators=10), RandomForestRegressor(n_estimators=10), RANSACRegressor(), HuberRegressor(), SVR(), LinearSVR() ]: outcome_learner_name = str(outcome_learner).split( "(", maxsplit=1)[0] outcome_model = Standardization(outcome_learner) with self.subTest("Test fit using {} & {}".format( propensity_learner_name, outcome_learner_name)): model = self.estimator.__class__(outcome_model, weight_model) model.fit(data["X"], data["a"], data["y"], refit_weight_model=False) self.assertTrue(True) # Fit did not crash for outcome_learner in [ MLPRegressor(hidden_layer_sizes=(5, )), # ElasticNet(), # supports sample_weights since v0.23, remove to support v<0.23 PassiveAggressiveRegressor(), KNeighborsRegressor() ]: outcome_learner_name = str(outcome_learner).split( "(", maxsplit=1)[0] outcome_model = Standardization(outcome_learner) with self.subTest("Test fit using {} & {}".format( propensity_learner_name, outcome_learner_name)): model = self.estimator.__class__(outcome_model, weight_model) with self.assertRaises(TypeError): # Joffe forces learning with sample_weights, # not all ML models support that and so calling should fail model.fit(data["X"], data["a"], data["y"], refit_weight_model=False)
def init(self, reduced, importance_sampling): self._estimator = TMLE( Standardization(self.outcome_model_cont), IPW(self.treatment_model), reduced=reduced, importance_sampling=importance_sampling, )