def test_ol_no_score_final(self):
        class ModelNuisance:
            def __init__(self, model_t, model_y):
                self._model_t = model_t
                self._model_y = model_y

            def fit(self, Y, T, W=None):
                self._model_t.fit(W, T)
                self._model_y.fit(W, Y)
                return self

            def predict(self, Y, T, W=None):
                return Y - self._model_y.predict(W), T - self._model_t.predict(
                    W)

        class ModelFinal:
            def __init__(self):
                return

            def fit(self, Y, T, W=None, nuisances=None):
                Y_res, T_res = nuisances
                self.model = LinearRegression(fit_intercept=False).fit(
                    T_res.reshape(-1, 1), Y_res)
                return self

            def predict(self, X=None):
                return self.model.coef_[0]

        np.random.seed(123)
        X = np.random.normal(size=(10000, 3))
        sigma = 0.1
        y = X[:, 0] + X[:, 1] + np.random.normal(0, sigma, size=(10000, ))
        est = _OrthoLearner(ModelNuisance(LinearRegression(),
                                          LinearRegression()),
                            ModelFinal(),
                            n_splits=2,
                            discrete_treatment=False,
                            discrete_instrument=False,
                            random_state=None)
        est.fit(y, X[:, 0], W=X[:, 1:])
        np.testing.assert_almost_equal(est.const_marginal_effect(),
                                       1,
                                       decimal=3)
        np.testing.assert_array_almost_equal(est.effect(),
                                             np.ones(1),
                                             decimal=3)
        np.testing.assert_array_almost_equal(est.effect(T0=0, T1=10),
                                             np.ones(1) * 10,
                                             decimal=2)
        assert est.score_ is None
        np.testing.assert_almost_equal(est.model_final.model.coef_[0],
                                       1,
                                       decimal=3)
예제 #2
0
    def test_ol_nuisance_scores(self):
        class ModelNuisance:
            def __init__(self, model_t, model_y):
                self._model_t = model_t
                self._model_y = model_y

            def fit(self, Y, T, W=None):
                self._model_t.fit(W, T)
                self._model_y.fit(W, Y)
                return self

            def predict(self, Y, T, W=None):
                return Y - self._model_y.predict(W), T - self._model_t.predict(W)

            def score(self, Y, T, W=None):
                return (self._model_t.score(W, Y), self._model_y.score(W, T))

        class ModelFinal:

            def __init__(self):
                return

            def fit(self, Y, T, W=None, nuisances=None):
                Y_res, T_res = nuisances
                self.model = LinearRegression(fit_intercept=False).fit(T_res.reshape(-1, 1), Y_res)
                return self

            def predict(self, X=None):
                return self.model.coef_[0]

        np.random.seed(123)
        X = np.random.normal(size=(10000, 3))
        sigma = 0.1
        y = X[:, 0] + X[:, 1] + np.random.normal(0, sigma, size=(10000,))
        est = _OrthoLearner(ModelNuisance(LinearRegression(), LinearRegression()), ModelFinal(),
                            n_splits=2, discrete_treatment=False, discrete_instrument=False,
                            categories='auto', random_state=None)
        est.fit(y, X[:, 0], W=X[:, 1:])
        np.testing.assert_almost_equal(est.const_marginal_effect(), 1, decimal=3)
        np.testing.assert_array_almost_equal(est.effect(), np.ones(1), decimal=3)
        np.testing.assert_array_almost_equal(est.effect(T0=0, T1=10), np.ones(1) * 10, decimal=2)
        np.testing.assert_almost_equal(est.model_final.model.coef_[0], 1, decimal=3)
        nuisance_scores_y = est.nuisance_scores_[0]
        nuisance_scores_t = est.nuisance_scores_[1]
        assert len(nuisance_scores_y) == len(nuisance_scores_t) == 2  # as many scores as splits
        # y scores should be positive, since W predicts Y somewhat
        # t scores might not be, since W and T are uncorrelated
        np.testing.assert_array_less(0, nuisance_scores_y)
예제 #3
0
    def test_ol_discrete_treatment(self):
        class ModelNuisance:
            def __init__(self, model_t, model_y):
                self._model_t = model_t
                self._model_y = model_y

            def fit(self, Y, T, W=None):
                self._model_t.fit(W, np.matmul(T, np.arange(1, T.shape[1] + 1)))
                self._model_y.fit(W, Y)
                return self

            def predict(self, Y, T, W=None):
                return Y - self._model_y.predict(W), T - self._model_t.predict_proba(W)[:, 1:]

        class ModelFinal:

            def __init__(self):
                return

            def fit(self, Y, T, W=None, nuisances=None):
                Y_res, T_res = nuisances
                self.model = LinearRegression(fit_intercept=False).fit(T_res.reshape(-1, 1), Y_res)
                return self

            def predict(self):
                # theta needs to be of dimension (1, d_t) if T is (n, d_t)
                return np.array([[self.model.coef_[0]]])

            def score(self, Y, T, W=None, nuisances=None):
                Y_res, T_res = nuisances
                return np.mean((Y_res - self.model.predict(T_res.reshape(-1, 1)))**2)

        np.random.seed(123)
        X = np.random.normal(size=(10000, 3))
        import scipy.special
        from sklearn.linear_model import LogisticRegression
        T = np.random.binomial(1, scipy.special.expit(X[:, 0]))
        sigma = 0.01
        y = T + X[:, 0] + np.random.normal(0, sigma, size=(10000,))
        est = _OrthoLearner(ModelNuisance(LogisticRegression(solver='lbfgs'), LinearRegression()), ModelFinal(),
                            n_splits=2, discrete_treatment=True, discrete_instrument=False,
                            categories='auto', random_state=None)
        est.fit(y, T, W=X)
        np.testing.assert_almost_equal(est.const_marginal_effect(), 1, decimal=3)
        np.testing.assert_array_almost_equal(est.effect(), np.ones(1), decimal=3)
        np.testing.assert_almost_equal(est.score(y, T, W=X), sigma**2, decimal=3)
        np.testing.assert_almost_equal(est.model_final.model.coef_[0], 1, decimal=3)
예제 #4
0
    def test_ol(self):

        class ModelNuisance:
            def __init__(self, model_t, model_y):
                self._model_t = model_t
                self._model_y = model_y

            def fit(self, Y, T, W=None):
                self._model_t.fit(W, T)
                self._model_y.fit(W, Y)
                return self

            def predict(self, Y, T, W=None):
                return Y - self._model_y.predict(W), T - self._model_t.predict(W)

        class ModelFinal:

            def __init__(self):
                return

            def fit(self, Y, T, W=None, nuisances=None):
                Y_res, T_res = nuisances
                self.model = LinearRegression(fit_intercept=False).fit(T_res.reshape(-1, 1), Y_res)
                return self

            def predict(self, X=None):
                return self.model.coef_[0]

            def score(self, Y, T, W=None, nuisances=None):
                Y_res, T_res = nuisances
                return np.mean((Y_res - self.model.predict(T_res.reshape(-1, 1)))**2)

        np.random.seed(123)
        X = np.random.normal(size=(10000, 3))
        sigma = 0.1
        y = X[:, 0] + X[:, 1] + np.random.normal(0, sigma, size=(10000,))
        est = _OrthoLearner(ModelNuisance(LinearRegression(), LinearRegression()), ModelFinal(),
                            n_splits=2, discrete_treatment=False, discrete_instrument=False, categories='auto',
                            random_state=None)
        est.fit(y, X[:, 0], W=X[:, 1:])
        np.testing.assert_almost_equal(est.const_marginal_effect(), 1, decimal=3)
        np.testing.assert_array_almost_equal(est.effect(), np.ones(1), decimal=3)
        np.testing.assert_array_almost_equal(est.effect(T0=0, T1=10), np.ones(1) * 10, decimal=2)
        np.testing.assert_almost_equal(est.score(y, X[:, 0], W=X[:, 1:]), sigma**2, decimal=3)
        np.testing.assert_almost_equal(est.score_, sigma**2, decimal=3)
        np.testing.assert_almost_equal(est.model_final.model.coef_[0], 1, decimal=3)
        # Nuisance model has no score method, so nuisance_scores_ should be none
        assert est.nuisance_scores_ is None

        # Test non keyword based calls to fit
        np.random.seed(123)
        X = np.random.normal(size=(10000, 3))
        sigma = 0.1
        y = X[:, 0] + X[:, 1] + np.random.normal(0, sigma, size=(10000,))
        est = _OrthoLearner(ModelNuisance(LinearRegression(), LinearRegression()), ModelFinal(),
                            n_splits=2, discrete_treatment=False, discrete_instrument=False,
                            categories='auto', random_state=None)
        # test non-array inputs
        est.fit(list(y), list(X[:, 0]), X=None, W=X[:, 1:])
        np.testing.assert_almost_equal(est.const_marginal_effect(), 1, decimal=3)
        np.testing.assert_array_almost_equal(est.effect(), np.ones(1), decimal=3)
        np.testing.assert_array_almost_equal(est.effect(T0=0, T1=10), np.ones(1) * 10, decimal=2)
        np.testing.assert_almost_equal(est.score(y, X[:, 0], None, X[:, 1:]), sigma**2, decimal=3)
        np.testing.assert_almost_equal(est.score_, sigma**2, decimal=3)
        np.testing.assert_almost_equal(est.model_final.model.coef_[0], 1, decimal=3)

        # Test custom splitter
        np.random.seed(123)
        X = np.random.normal(size=(10000, 3))
        sigma = 0.1
        y = X[:, 0] + X[:, 1] + np.random.normal(0, sigma, size=(10000,))
        est = _OrthoLearner(ModelNuisance(LinearRegression(), LinearRegression()), ModelFinal(),
                            n_splits=KFold(n_splits=3),
                            discrete_treatment=False, discrete_instrument=False,
                            categories='auto', random_state=None)
        est.fit(y, X[:, 0], X=None, W=X[:, 1:])
        np.testing.assert_almost_equal(est.const_marginal_effect(), 1, decimal=3)
        np.testing.assert_array_almost_equal(est.effect(), np.ones(1), decimal=3)
        np.testing.assert_array_almost_equal(est.effect(T0=0, T1=10), np.ones(1) * 10, decimal=2)
        np.testing.assert_almost_equal(est.score(y, X[:, 0], W=X[:, 1:]), sigma**2, decimal=3)
        np.testing.assert_almost_equal(est.score_, sigma**2, decimal=3)
        np.testing.assert_almost_equal(est.model_final.model.coef_[0], 1, decimal=3)

        # Test incomplete set of test folds
        np.random.seed(123)
        X = np.random.normal(size=(10000, 3))
        sigma = 0.1
        y = X[:, 0] + X[:, 1] + np.random.normal(0, sigma, size=(10000,))
        folds = [(np.arange(X.shape[0] // 2), np.arange(X.shape[0] // 2, X.shape[0]))]
        est = _OrthoLearner(ModelNuisance(LinearRegression(), LinearRegression()), ModelFinal(),
                            n_splits=KFold(n_splits=3), discrete_treatment=False,
                            discrete_instrument=False, categories='auto', random_state=None)
        est.fit(y, X[:, 0], X=None, W=X[:, 1:])
        np.testing.assert_almost_equal(est.const_marginal_effect(), 1, decimal=3)
        np.testing.assert_array_almost_equal(est.effect(), np.ones(1), decimal=3)
        np.testing.assert_array_almost_equal(est.effect(T0=0, T1=10), np.ones(1) * 10, decimal=2)
        np.testing.assert_almost_equal(est.score(y, X[:, 0], W=X[:, 1:]), sigma**2, decimal=3)
        np.testing.assert_almost_equal(est.score_, sigma**2, decimal=3)
        np.testing.assert_almost_equal(est.model_final.model.coef_[0], 1, decimal=3)