コード例 #1
0
ファイル: test_ipw.py プロジェクト: IBM/causallib
    def test_propensity_truncation(self):
        self.ensure_truncation(test_weights=False)

        with self.subTest("Illegal truncation values assertion on compute"):
            with self.assertRaises(AssertionError):
                self.estimator.compute_propensity(self.data_r_80["X"],
                                                  self.data_r_80["a"],
                                                  clip_min=0.6)
            with self.assertRaises(AssertionError):
                self.estimator.compute_propensity(self.data_r_80["X"],
                                                  self.data_r_80["a"],
                                                  clip_max=0.4)
            with self.assertRaises(AssertionError):
                self.estimator.compute_propensity(self.data_r_80["X"],
                                                  self.data_r_80["a"],
                                                  clip_min=0.6,
                                                  clip_max=0.9)
            with self.assertRaises(AssertionError):
                self.estimator.compute_propensity(self.data_r_80["X"],
                                                  self.data_r_80["a"],
                                                  clip_min=0.1,
                                                  clip_max=0.4)

        with self.subTest(
                "Illegal truncation values assertion on initialization"):
            with self.assertRaises(AssertionError):
                IPW(LogisticRegression(), clip_min=0.6)
            with self.assertRaises(AssertionError):
                IPW(LogisticRegression(), clip_max=0.4)
            with self.assertRaises(AssertionError):
                IPW(LogisticRegression(), clip_min=0.1, clip_max=0.4)
            with self.assertRaises(AssertionError):
                IPW(LogisticRegression(), clip_min=0.6, clip_max=0.9)
コード例 #2
0
ファイル: test_ipw.py プロジェクト: IBM/causallib
    def setUpClass(cls):
        # Data:
        X, a = make_classification(n_features=1,
                                   n_informative=1,
                                   n_redundant=0,
                                   n_repeated=0,
                                   n_classes=2,
                                   n_clusters_per_class=1,
                                   flip_y=0.0,
                                   class_sep=10.0)
        cls.data_r_100 = {"X": pd.DataFrame(X), "a": pd.Series(a)}
        X, a = make_classification(n_features=1,
                                   n_informative=1,
                                   n_redundant=0,
                                   n_repeated=0,
                                   n_classes=2,
                                   n_clusters_per_class=1,
                                   flip_y=0.2,
                                   class_sep=10.0)
        cls.data_r_80 = {"X": pd.DataFrame(X), "a": pd.Series(a)}

        # Data that maps x=0->a=0 and x=1->a=1:
        X = pd.Series([0] * 50 + [1] * 50)
        cls.data_cat_r_100 = {"X": X.to_frame(), "a": X}

        # Data that maps x=0->a=0 and x=1->a=1, but 10% of x=0->a=1 and 10% of x=1->a=0:
        X = pd.Series([0] * 40 + [1] * 10 + [1] * 40 + [0] * 10).to_frame()
        a = pd.Series([0] * 50 + [1] * 50)
        cls.data_cat_r_80 = {"X": X, "a": a}

        # Avoids regularization of the model:
        cls.estimator = IPW(LogisticRegression(C=1e6, solver='lbfgs'),
                            clip_min=0.05,
                            clip_max=0.95,
                            use_stabilized=False)
コード例 #3
0
 def setUpClass(cls):
     TestDoublyRobustBase.setUpClass()
     # Avoids regularization of the model:
     ipw = IPW(LogisticRegression(C=1e6, solver='lbfgs'),
               use_stabilized=False)
     std = Standardization(LinearRegression(normalize=True))
     cls.estimator = DoublyRobustIpFeature(std, ipw)
コード例 #4
0
    def __init__(self,
                 outcome_model=LinearRegression(),
                 prop_score_model=LogisticRegression(),
                 doubly_robust_type='vanilla',
                 standardization_type='standardization',
                 trim_weights=False,
                 trim_eps=None,
                 stabilized=False):

        if doubly_robust_type not in DOUBLY_ROBUST_TYPES:
            raise ValueError(
                'Invalid double_robust_type. Valid types: {}'.format(
                    list(DOUBLY_ROBUST_TYPES)))
        if standardization_type not in STR_TO_STANDARDIZATION.keys():
            raise ValueError(
                'Invalid standardization_type. Valid types: {}'.format(
                    list(STR_TO_STANDARDIZATION.keys())))

        if trim_weights and trim_eps is None:
            trim_eps = TRIM_EPS
        ipw = IPW(learner=prop_score_model,
                  truncate_eps=trim_eps,
                  use_stabilized=stabilized)

        standardization = STR_TO_STANDARDIZATION[standardization_type](
            outcome_model)
        doubly_robust = STR_TO_DOUBLY_ROBUST[doubly_robust_type](
            outcome_model=standardization, weight_model=ipw)

        super().__init__(causallib_estimator=doubly_robust)
コード例 #5
0
def calc_outcome_adaptive_lasso_single_lambda(A, Y, X, Lambda,
                                              gamma_convergence_factor):
    """Calculate ATE with the outcome adaptive lasso"""
    n = A.shape[0]  # number of samples
    # extract gamma according to Lambda and gamma_convergence_factor
    gamma = 2 * (1 + gamma_convergence_factor - log(Lambda, n))
    # fit regression from covariates X and exposure A to outcome Y
    lr = LinearRegression(fit_intercept=True).fit(
        np.hstack([A.values.reshape(-1, 1), X]), Y)
    # extract the coefficients of the covariates
    x_coefs = lr.coef_[1:]
    # calculate outcome adaptive penalization weights
    weights = (np.abs(x_coefs))**(-1 * gamma)
    # apply the penalization to the covariates themselves
    X_w = X / weights
    # fit logistic propensity score model from penalized covariates to the exposure
    ipw = IPW(LogisticRegression(solver='liblinear',
                                 penalty='l1',
                                 C=1 / Lambda),
              use_stabilized=False).fit(X_w, A)
    # compute inverse propensity weighting and calculate ATE
    weights = ipw.compute_weights(X_w, A)
    outcomes = ipw.estimate_population_outcome(X_w, A, Y, w=weights)
    effect = ipw.estimate_effect(outcomes[1], outcomes[0])
    return effect, x_coefs, weights
コード例 #6
0
 def setUpClass(self):
     self.data = load_nhefs()
     ipw = IPW(LogisticRegression(solver="liblinear"), truncate_eps=0.05)
     std = StratifiedStandardization(LinearRegression())
     self.dr = DoublyRobustVanilla(std, ipw)
     self.dr.fit(self.data.X, self.data.a, self.data.y)
     self.prp_evaluator = PropensityEvaluator(self.dr.weight_model)
     self.out_evaluator = OutcomeEvaluator(self.dr.outcome_model)
コード例 #7
0
def calc_ate_vanilla_ipw(A, Y, X):
    ipw = IPW(LogisticRegression(solver='liblinear',
                                 penalty='l1',
                                 C=1e2,
                                 max_iter=500),
              use_stabilized=True).fit(X, A)
    weights = ipw.compute_weights(X, A)
    outcomes = ipw.estimate_population_outcome(X, A, Y, w=weights)
    effect = ipw.estimate_effect(outcomes[1], outcomes[0])
    return effect[0]
コード例 #8
0
ファイル: test_doublyrobust.py プロジェクト: IBM/causallib
    def ensure_many_models(self, clip_min=None, clip_max=None):
        from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
        from sklearn.neural_network import MLPRegressor
        from sklearn.linear_model import ElasticNet, RANSACRegressor, HuberRegressor, PassiveAggressiveRegressor
        from sklearn.neighbors import KNeighborsRegressor
        from sklearn.svm import SVR, LinearSVR

        from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
        from sklearn.neural_network import MLPClassifier
        from sklearn.neighbors import KNeighborsClassifier

        from sklearn.exceptions import ConvergenceWarning
        warnings.filterwarnings('ignore', category=ConvergenceWarning)

        data = self.create_uninformative_ox_dataset()
        for propensity_learner in [
                GradientBoostingClassifier(n_estimators=10),
                RandomForestClassifier(n_estimators=100),
                MLPClassifier(hidden_layer_sizes=(5, )),
                KNeighborsClassifier(n_neighbors=20)
        ]:
            weight_model = IPW(propensity_learner,
                               clip_min=clip_min,
                               clip_max=clip_max)
            propensity_learner_name = str(propensity_learner).split(
                "(", maxsplit=1)[0]
            for outcome_learner in [
                    GradientBoostingRegressor(n_estimators=10),
                    RandomForestRegressor(n_estimators=10),
                    MLPRegressor(hidden_layer_sizes=(5, )),
                    ElasticNet(),
                    RANSACRegressor(),
                    HuberRegressor(),
                    PassiveAggressiveRegressor(),
                    KNeighborsRegressor(),
                    SVR(),
                    LinearSVR()
            ]:
                outcome_learner_name = str(outcome_learner).split(
                    "(", maxsplit=1)[0]
                outcome_model = Standardization(outcome_learner)

                with self.subTest("Test fit & predict using {} & {}".format(
                        propensity_learner_name, outcome_learner_name)):
                    model = self.estimator.__class__(outcome_model,
                                                     weight_model)
                    model.fit(data["X"],
                              data["a"],
                              data["y"],
                              refit_weight_model=False)
                    model.estimate_individual_outcome(data["X"], data["a"])
                    self.assertTrue(True)  # Fit did not crash
コード例 #9
0
def test_ipw_matches_causallib(linear_data_pandas):
    w, t, y = linear_data_pandas
    causallib_ipw = IPW(learner=LogisticRegression())
    causallib_ipw.fit(w, t)
    potential_outcomes = causallib_ipw.estimate_population_outcome(
        w, t, y, treatment_values=[0, 1])
    causallib_effect = causallib_ipw.estimate_effect(potential_outcomes[1],
                                                     potential_outcomes[0])[0]

    ipw = IPWEstimator()
    ipw.fit(w, t, y)
    our_effect = ipw.estimate_ate()
    assert our_effect == causallib_effect
コード例 #10
0
ファイル: ipw_estimator.py プロジェクト: bradyneal/realcause
 def __init__(self,
              prop_score_model=LogisticRegression(),
              trim_weights=False,
              trim_eps=None,
              stabilized=False):
     if trim_weights and trim_eps is None:
         trim_eps = TRIM_EPS
     self.ipw = IPW(learner=prop_score_model,
                    truncate_eps=trim_eps,
                    use_stabilized=stabilized)
     self.w = None
     self.t = None
     self.y = None
コード例 #11
0
ファイル: test_ipw.py プロジェクト: IBM/causallib
    def test_pipeline_learner(self):
        from sklearn.preprocessing import StandardScaler, MinMaxScaler
        from sklearn.pipeline import make_pipeline
        learner = make_pipeline(StandardScaler(), MinMaxScaler(),
                                LogisticRegression(solver='lbfgs'))
        with self.subTest("Test initialization with pipeline learner"):
            self.estimator = IPW(learner)
            self.assertTrue(True)  # Dummy assert for not thrown exception

        with self.subTest("Test fit with pipeline learner"):
            self.estimator.fit(self.data_r_100["X"], self.data_r_100["a"])
            self.assertTrue(True)  # Dummy assert for not thrown exception

        with self.subTest("Test 'predict' with pipeline learner"):
            self.estimator.compute_weights(self.data_r_100["X"],
                                           self.data_r_100["a"])
            self.assertTrue(True)  # Dummy assert for not thrown exception
コード例 #12
0
ファイル: test_ipw.py プロジェクト: IBM/causallib
 def test_forcing_probability_learner(self):
     from sklearn.svm import SVC  # Arbitrary model with decision_function instead of predict_proba
     with self.assertRaises(AttributeError):
         IPW(SVC())
コード例 #13
0
    def test_many_models(self):
        from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
        from sklearn.neural_network import MLPRegressor
        from sklearn.linear_model import ElasticNet, RANSACRegressor, HuberRegressor, PassiveAggressiveRegressor
        from sklearn.neighbors import KNeighborsRegressor
        from sklearn.svm import SVR, LinearSVR

        from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
        from sklearn.neural_network import MLPClassifier
        from sklearn.neighbors import KNeighborsClassifier

        from sklearn.exceptions import ConvergenceWarning
        warnings.filterwarnings('ignore', category=ConvergenceWarning)

        data = self.create_uninformative_ox_dataset()

        for propensity_learner in [
                GradientBoostingClassifier(n_estimators=10),
                RandomForestClassifier(n_estimators=100),
                MLPClassifier(hidden_layer_sizes=(5, )),
                KNeighborsClassifier(n_neighbors=20)
        ]:
            weight_model = IPW(propensity_learner)
            propensity_learner_name = str(propensity_learner).split(
                "(", maxsplit=1)[0]
            for outcome_learner in [
                    GradientBoostingRegressor(n_estimators=10),
                    RandomForestRegressor(n_estimators=10),
                    RANSACRegressor(),
                    HuberRegressor(),
                    SVR(),
                    LinearSVR()
            ]:
                outcome_learner_name = str(outcome_learner).split(
                    "(", maxsplit=1)[0]
                outcome_model = Standardization(outcome_learner)

                with self.subTest("Test fit using {} & {}".format(
                        propensity_learner_name, outcome_learner_name)):
                    model = self.estimator.__class__(outcome_model,
                                                     weight_model)
                    model.fit(data["X"],
                              data["a"],
                              data["y"],
                              refit_weight_model=False)
                    self.assertTrue(True)  # Fit did not crash

            for outcome_learner in [
                    MLPRegressor(hidden_layer_sizes=(5, )),
                    # ElasticNet(),  # supports sample_weights since v0.23, remove to support v<0.23
                    PassiveAggressiveRegressor(),
                    KNeighborsRegressor()
            ]:
                outcome_learner_name = str(outcome_learner).split(
                    "(", maxsplit=1)[0]
                outcome_model = Standardization(outcome_learner)

                with self.subTest("Test fit using {} & {}".format(
                        propensity_learner_name, outcome_learner_name)):
                    model = self.estimator.__class__(outcome_model,
                                                     weight_model)
                    with self.assertRaises(TypeError):
                        # Joffe forces learning with sample_weights,
                        # not all ML models support that and so calling should fail
                        model.fit(data["X"],
                                  data["a"],
                                  data["y"],
                                  refit_weight_model=False)
コード例 #14
0
ファイル: test_tmle.py プロジェクト: IBM/causallib
 def init(self, reduced, importance_sampling):
     self._estimator = TMLE(
         Standardization(self.outcome_model_cont),
         IPW(self.treatment_model),
         reduced=reduced, importance_sampling=importance_sampling,
     )