Ejemplo n.º 1
0
class TestStandardization(TestStandardizationCommon):
    @classmethod
    def setUpClass(cls):
        TestStandardizationCommon.setUpClass()
        # Avoids regularization of the model:
        cls.estimator = Standardization(LinearRegression(normalize=True))

    def setUp(self):
        self.estimator.fit(self.data_lin["X"], self.data_lin["a"],
                           self.data_lin["y"])

    def test_is_fitted(self):
        self.assertTrue(hasattr(self.estimator.learner, "coef_"))

    def test_effect_estimation(self):
        with self.subTest("Check by model coefficient:"):
            self.assertAlmostEqual(self.estimator.learner.coef_[0],
                                   self.data_lin["beta"],
                                   places=5)
        self.ensure_effect_estimation()

    def test_observed_prediction(self):
        self.ensure_observed_prediction()

    def test_counterfactual_outcomes(self):
        self.ensure_counterfactual_outcomes()

    def test_treatment_encoding(self):
        self.estimator = Standardization(LinearRegression(),
                                         encode_treatment=True)
        a = self.data_lin["a"].replace({0: "p", 1: "q"})
        self.estimator.fit(self.data_lin["X"], a, self.data_lin["y"])
        with self.subTest("Treatment encoder created:"):
            self.assertTrue(hasattr(self.estimator, "treatment_encoder_"))
        with self.subTest("Treatment categories properly encoded"):
            self.assertSetEqual(
                {"p", "q"},
                set(*self.estimator.treatment_encoder_.categories_))
        with self.subTest("Fitted model has the right size"):
            self.assertEqual(len(self.estimator.learner.coef_),
                             self.data_lin["X"].shape[1] + a.nunique())

    def test_pipeline_learner(self):
        self.ensure_pipeline_learner()

    def test_many_models(self):
        self.ensure_many_models()
Ejemplo n.º 2
0
def test_standardization_matches_causallib(linear_data_pandas):
    w, t, y = linear_data_pandas
    causallib_standardization = Standardization(LinearRegression())
    causallib_standardization.fit(w, t, y)
    individual_potential_outcomes = causallib_standardization.estimate_individual_outcome(
        w, t)
    causallib_ite_estimates = individual_potential_outcomes[
        1] - individual_potential_outcomes[0]
    mean_potential_outcomes = causallib_standardization.estimate_population_outcome(
        w, t)
    causallib_ate_estimate = mean_potential_outcomes[
        1] - mean_potential_outcomes[0]

    standardization = StandardizationEstimator()
    standardization.fit(w, t, y)
    assert causallib_ate_estimate == standardization.estimate_ate()
    pd.testing.assert_series_equal(causallib_ite_estimates,
                                   standardization.estimate_ite())
Ejemplo n.º 3
0
class TestStandardizationClassification(TestStandardizationCommon):
    @classmethod
    def setUpClass(cls):
        # Three-class outcome, since decision_function might return a vector when n_classes=2, and we wish to check the
        # matrix form of the output behaves as expected:
        X, y = make_classification(n_features=3,
                                   n_informative=2,
                                   n_redundant=0,
                                   n_repeated=0,
                                   n_classes=3,
                                   n_clusters_per_class=1,
                                   flip_y=0.0,
                                   class_sep=10.0)
        X, a = X[:, :-1], X[:, -1]
        a = (a > np.median(a)).astype(int)
        cls.data_3cls = {
            "X": pd.DataFrame(X),
            "a": pd.Series(a),
            "y": pd.Series(y)
        }

        # X, y = make_classification(n_features=2, n_informative=1, n_redundant=0, n_repeated=0, n_classes=2,
        #                            n_clusters_per_class=1, flip_y=0.0, class_sep=10.0)
        # X, a = X[:, :-1], X[:, -1]
        # a = (a > np.median(a)).astype(int)
        # cls.data_2cls = {"X": pd.DataFrame(X), "a": pd.Series(a), "y": pd.Series(y)}

    def verify_individual_multiclass_output(self):
        self.estimator.fit(self.data_3cls["X"], self.data_3cls["a"],
                           self.data_3cls["y"])
        ind_outcome = self.estimator.estimate_individual_outcome(
            self.data_3cls["X"], self.data_3cls["a"])

        with self.subTest("Output size, # samples:"):
            self.assertEqual(self.data_3cls["X"].shape[0],
                             ind_outcome.shape[0])
        with self.subTest("Output size, # predictions:"):
            with self.subTest(
                    "Output's multiindex level names are describing treatment and outcome"
            ):
                self.assertEqual(["a", "y"], ind_outcome.columns.names)
            with self.subTest(
                    "Output's number of predictions is the same as number of outcome and treatment values"
            ):
                self.assertEqual(
                    self.data_3cls["a"].nunique() *
                    self.data_3cls["y"].nunique(), ind_outcome.shape[1])
                self.assertEqual(
                    self.data_3cls["a"].nunique(),
                    ind_outcome.columns.get_level_values("a").unique().size)
                self.assertEqual(
                    self.data_3cls["y"].nunique(),
                    ind_outcome.columns.get_level_values("y").unique().size)
        return ind_outcome

    def test_predict_proba(self):
        self.estimator = Standardization(LogisticRegression(C=1e6,
                                                            solver='lbfgs'),
                                         predict_proba=True)
        ind_outcome = self.verify_individual_multiclass_output()
        with self.subTest("Test results are probabilities - sum to 1:"):
            for treatment_value, y_pred in ind_outcome.groupby(level="a",
                                                               axis="columns"):
                pd.testing.assert_series_equal(
                    pd.Series(1.0, index=y_pred.index),
                    y_pred.sum(axis="columns"))

    def test_decision_function(self):
        self.estimator = Standardization(SVC(decision_function_shape='ovr'),
                                         predict_proba=True)
        self.verify_individual_multiclass_output()

    def test_predict(self):
        self.estimator = Standardization(LogisticRegression(C=1e6,
                                                            solver='lbfgs'),
                                         predict_proba=False)
        self.estimator.fit(self.data_3cls["X"], self.data_3cls["a"],
                           self.data_3cls["y"])
        ind_outcome = self.estimator.estimate_individual_outcome(
            self.data_3cls["X"], self.data_3cls["a"])
        with self.subTest("Output size, # predictions:"):
            self.assertEqual(self.data_3cls["a"].nunique(),
                             ind_outcome.shape[1])
            self.assertNotEqual(self.data_3cls["y"].nunique(),
                                ind_outcome.shape[1])