Ejemplo n.º 1
0
    def test_drlearner(self):
        y, T, X, W = self._get_data()

        for est in [
                LinearDRLearner(random_state=123),
                SparseLinearDRLearner(random_state=123)
        ]:
            est.fit(y, T, X=X, W=W, cache_values=True)
            np.testing.assert_equal(est.model_regression, 'auto')
            est.model_regression = LinearRegression()
            est.model_propensity = LogisticRegression(random_state=123)
            est.fit(y, T, X=X, W=W, cache_values=True)
            assert isinstance(est.model_regression, LinearRegression)
            with pytest.raises(ValueError):
                est.multitask_model_final = True
            with pytest.raises(ValueError):
                est.model_final = LinearRegression()
            est.min_propensity = .1
            est.mc_iters = 2
            est.featurizer = PolynomialFeatures(degree=2, include_bias=False)
            est.refit_final()
            assert isinstance(est.featurizer_, PolynomialFeatures)
            np.testing.assert_equal(est.mc_iters, 2)
            intcpt = est.intercept_(T=1)
            est.fit_cate_intercept = False
            np.testing.assert_equal(est.intercept_(T=1), intcpt)
            est.refit_final()
            with pytest.raises(AttributeError):
                est.intercept(T=1)
            est.fit(y, T, X=X, W=W, cache_values=False)
            with pytest.raises(AssertionError):
                est.refit_final()
Ejemplo n.º 2
0
    def test_can_summarize(self):
        LinearDML(model_t=LinearRegression(),
                  model_y=LinearRegression()).fit(TestInference.Y,
                                                  TestInference.T,
                                                  TestInference.X,
                                                  TestInference.W).summary()

        LinearDRLearner(model_regression=LinearRegression(),
                        model_propensity=LogisticRegression(),
                        fit_cate_intercept=False).fit(
                            TestInference.Y,
                            TestInference.T > 0,
                            TestInference.X,
                            TestInference.W,
                            inference=BootstrapInference(5)).summary(1)
Ejemplo n.º 3
0
 def test_store_dataframe_name(self):
     Y, T, X, W, Z = self._get_data()
     Y_name = "outcome"
     Y = pd.Series(Y, name=Y_name)
     T_name = "treatment"
     T = pd.Series(T, name=T_name)
     X_name = ["feature"]
     X = pd.DataFrame(X, columns=X_name)
     W_name = ["control1", "control2", "control3", "control4"]
     W = pd.DataFrame(W, columns=W_name)
     est = LinearDRLearner().dowhy.fit(Y, T, X, W)
     np.testing.assert_array_equal(est._common_causes, X_name + W_name)
     np.testing.assert_array_equal(est._effect_modifiers, X_name)
     np.testing.assert_array_equal(est._treatment, [T_name])
     np.testing.assert_array_equal(est._outcome, [Y_name])
Ejemplo n.º 4
0
 def test_dr_random_state(self):
     Y, T, X, W, X_test = self._make_data(500, 2)
     for est in [
             DRLearner(model_final=RandomForestRegressor(
                 max_depth=3,
                 n_estimators=10,
                 min_samples_leaf=100,
                 bootstrap=True,
                 random_state=123),
                       cv=2,
                       random_state=123),
             LinearDRLearner(random_state=123),
             SparseLinearDRLearner(cv=2, random_state=123),
             ForestDRLearner(
                 model_regression=RandomForestRegressor(n_estimators=10,
                                                        max_depth=4,
                                                        random_state=123),
                 model_propensity=RandomForestClassifier(n_estimators=10,
                                                         max_depth=4,
                                                         random_state=123),
                 cv=2,
                 random_state=123)
     ]:
         TestRandomState._test_random_state(est, X_test, Y, T, X=X, W=W)
Ejemplo n.º 5
0
 def test_mean_pred_stderr(self):
     """Test that mean_pred_stderr is not None when estimator's final stage is linear"""
     Y, T, X, W = TestInference.Y, TestInference.T, TestInference.X, TestInference.W
     ests = [
         LinearDML(model_t=LinearRegression(),
                   model_y=LinearRegression(),
                   featurizer=PolynomialFeatures(degree=2,
                                                 include_bias=False)),
         LinearDRLearner(model_regression=LinearRegression(),
                         model_propensity=LogisticRegression(),
                         featurizer=PolynomialFeatures(degree=2,
                                                       include_bias=False))
     ]
     for est in ests:
         est.fit(Y, T, X=X, W=W)
         assert est.const_marginal_effect_inference(
             X).population_summary().mean_pred_stderr is not None
         # only is not None when T1 is a constant or a list of constant
         assert est.effect_inference(
             X).population_summary().mean_pred_stderr is not None
         if est.__class__.__name__ == "LinearDRLearner":
             assert est.coef__inference(T=1).mean_pred_stderr is None
         else:
             assert est.coef__inference().mean_pred_stderr is None
Ejemplo n.º 6
0
    def test_summary_discrete(self):
        """Tests the inference results summary for discrete treatment estimators."""
        # Test inference results when `cate_feature_names` doesn not exist

        for inference in [BootstrapInference(n_bootstrap_samples=5), 'auto']:
            cate_est = LinearDRLearner(model_regression=LinearRegression(), model_propensity=LogisticRegression(),
                                       featurizer=PolynomialFeatures(degree=2,
                                                                     include_bias=False)
                                       )
            cate_est.fit(
                TestInference.Y,
                TestInference.T,
                TestInference.X,
                TestInference.W,
                inference=inference
            )
            summary_results = cate_est.summary(T=1)
            coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0]
            default_names = get_input_columns(TestInference.X)
            fnames = PolynomialFeatures(degree=2, include_bias=False).fit(
                TestInference.X).get_feature_names(default_names)
            np.testing.assert_array_equal(coef_rows, fnames)
            intercept_rows = np.asarray(summary_results.tables[1].data)[1:, 0]
            np.testing.assert_array_equal(intercept_rows, ['cate_intercept'])

            cate_est = LinearDRLearner(model_regression=LinearRegression(),
                                       model_propensity=LogisticRegression(),
                                       featurizer=PolynomialFeatures(degree=2,
                                                                     include_bias=False)
                                       )
            cate_est.fit(
                TestInference.Y,
                TestInference.T,
                TestInference.X,
                TestInference.W,
                inference=inference
            )
            fnames = ['Q' + str(i) for i in range(TestInference.d_x)]
            summary_results = cate_est.summary(T=1, feature_names=fnames)
            coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0]
            fnames = PolynomialFeatures(degree=2, include_bias=False).fit(
                TestInference.X).get_feature_names(input_features=fnames)
            np.testing.assert_array_equal(coef_rows, fnames)
            cate_est = LinearDRLearner(model_regression=LinearRegression(),
                                       model_propensity=LogisticRegression(), featurizer=None)
            cate_est.fit(
                TestInference.Y,
                TestInference.T,
                TestInference.X,
                TestInference.W,
                inference=inference
            )
            summary_results = cate_est.summary(T=1)
            coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0]
            np.testing.assert_array_equal(coef_rows, ['X' + str(i) for i in range(TestInference.d_x)])

            cate_est = LinearDRLearner(model_regression=LinearRegression(),
                                       model_propensity=LogisticRegression(), featurizer=None)
            cate_est.fit(
                TestInference.Y,
                TestInference.T,
                TestInference.X,
                TestInference.W,
                inference=inference
            )
            fnames = ['Q' + str(i) for i in range(TestInference.d_x)]
            summary_results = cate_est.summary(T=1, feature_names=fnames)
            coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0]
            np.testing.assert_array_equal(coef_rows, fnames)

            cate_est = LinearDRLearner(model_regression=LinearRegression(),
                                       model_propensity=LogisticRegression(), featurizer=None)
            wrapped_est = self._NoFeatNamesEst(cate_est)
            wrapped_est.fit(
                TestInference.Y,
                TestInference.T,
                TestInference.X,
                TestInference.W,
                inference=inference
            )
            summary_results = wrapped_est.summary(T=1)
            coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0]
            np.testing.assert_array_equal(coef_rows, ['X' + str(i) for i in range(TestInference.d_x)])

            cate_est = LinearDRLearner(model_regression=LinearRegression(),
                                       model_propensity=LogisticRegression(), featurizer=None)
            wrapped_est = self._NoFeatNamesEst(cate_est)
            wrapped_est.fit(
                TestInference.Y,
                TestInference.T,
                TestInference.X,
                TestInference.W,
                inference=inference
            )
            fnames = ['Q' + str(i) for i in range(TestInference.d_x)]
            summary_results = wrapped_est.summary(T=1, feature_names=fnames)
            coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0]
            np.testing.assert_array_equal(coef_rows, fnames)