class TestStandardization(TestStandardizationCommon): @classmethod def setUpClass(cls): TestStandardizationCommon.setUpClass() # Avoids regularization of the model: cls.estimator = Standardization(LinearRegression(normalize=True)) def setUp(self): self.estimator.fit(self.data_lin["X"], self.data_lin["a"], self.data_lin["y"]) def test_is_fitted(self): self.assertTrue(hasattr(self.estimator.learner, "coef_")) def test_effect_estimation(self): with self.subTest("Check by model coefficient:"): self.assertAlmostEqual(self.estimator.learner.coef_[0], self.data_lin["beta"], places=5) self.ensure_effect_estimation() def test_observed_prediction(self): self.ensure_observed_prediction() def test_counterfactual_outcomes(self): self.ensure_counterfactual_outcomes() def test_treatment_encoding(self): self.estimator = Standardization(LinearRegression(), encode_treatment=True) a = self.data_lin["a"].replace({0: "p", 1: "q"}) self.estimator.fit(self.data_lin["X"], a, self.data_lin["y"]) with self.subTest("Treatment encoder created:"): self.assertTrue(hasattr(self.estimator, "treatment_encoder_")) with self.subTest("Treatment categories properly encoded"): self.assertSetEqual( {"p", "q"}, set(*self.estimator.treatment_encoder_.categories_)) with self.subTest("Fitted model has the right size"): self.assertEqual(len(self.estimator.learner.coef_), self.data_lin["X"].shape[1] + a.nunique()) def test_pipeline_learner(self): self.ensure_pipeline_learner() def test_many_models(self): self.ensure_many_models()
def test_standardization_matches_causallib(linear_data_pandas): w, t, y = linear_data_pandas causallib_standardization = Standardization(LinearRegression()) causallib_standardization.fit(w, t, y) individual_potential_outcomes = causallib_standardization.estimate_individual_outcome( w, t) causallib_ite_estimates = individual_potential_outcomes[ 1] - individual_potential_outcomes[0] mean_potential_outcomes = causallib_standardization.estimate_population_outcome( w, t) causallib_ate_estimate = mean_potential_outcomes[ 1] - mean_potential_outcomes[0] standardization = StandardizationEstimator() standardization.fit(w, t, y) assert causallib_ate_estimate == standardization.estimate_ate() pd.testing.assert_series_equal(causallib_ite_estimates, standardization.estimate_ite())
class TestStandardizationClassification(TestStandardizationCommon): @classmethod def setUpClass(cls): # Three-class outcome, since decision_function might return a vector when n_classes=2, and we wish to check the # matrix form of the output behaves as expected: X, y = make_classification(n_features=3, n_informative=2, n_redundant=0, n_repeated=0, n_classes=3, n_clusters_per_class=1, flip_y=0.0, class_sep=10.0) X, a = X[:, :-1], X[:, -1] a = (a > np.median(a)).astype(int) cls.data_3cls = { "X": pd.DataFrame(X), "a": pd.Series(a), "y": pd.Series(y) } # X, y = make_classification(n_features=2, n_informative=1, n_redundant=0, n_repeated=0, n_classes=2, # n_clusters_per_class=1, flip_y=0.0, class_sep=10.0) # X, a = X[:, :-1], X[:, -1] # a = (a > np.median(a)).astype(int) # cls.data_2cls = {"X": pd.DataFrame(X), "a": pd.Series(a), "y": pd.Series(y)} def verify_individual_multiclass_output(self): self.estimator.fit(self.data_3cls["X"], self.data_3cls["a"], self.data_3cls["y"]) ind_outcome = self.estimator.estimate_individual_outcome( self.data_3cls["X"], self.data_3cls["a"]) with self.subTest("Output size, # samples:"): self.assertEqual(self.data_3cls["X"].shape[0], ind_outcome.shape[0]) with self.subTest("Output size, # predictions:"): with self.subTest( "Output's multiindex level names are describing treatment and outcome" ): self.assertEqual(["a", "y"], ind_outcome.columns.names) with self.subTest( "Output's number of predictions is the same as number of outcome and treatment values" ): self.assertEqual( self.data_3cls["a"].nunique() * self.data_3cls["y"].nunique(), ind_outcome.shape[1]) self.assertEqual( self.data_3cls["a"].nunique(), ind_outcome.columns.get_level_values("a").unique().size) self.assertEqual( self.data_3cls["y"].nunique(), ind_outcome.columns.get_level_values("y").unique().size) return ind_outcome def test_predict_proba(self): self.estimator = Standardization(LogisticRegression(C=1e6, solver='lbfgs'), predict_proba=True) ind_outcome = self.verify_individual_multiclass_output() with self.subTest("Test results are probabilities - sum to 1:"): for treatment_value, y_pred in ind_outcome.groupby(level="a", axis="columns"): pd.testing.assert_series_equal( pd.Series(1.0, index=y_pred.index), y_pred.sum(axis="columns")) def test_decision_function(self): self.estimator = Standardization(SVC(decision_function_shape='ovr'), predict_proba=True) self.verify_individual_multiclass_output() def test_predict(self): self.estimator = Standardization(LogisticRegression(C=1e6, solver='lbfgs'), predict_proba=False) self.estimator.fit(self.data_3cls["X"], self.data_3cls["a"], self.data_3cls["y"]) ind_outcome = self.estimator.estimate_individual_outcome( self.data_3cls["X"], self.data_3cls["a"]) with self.subTest("Output size, # predictions:"): self.assertEqual(self.data_3cls["a"].nunique(), ind_outcome.shape[1]) self.assertNotEqual(self.data_3cls["y"].nunique(), ind_outcome.shape[1])