コード例 #1
0
ファイル: test_bootstrap.py プロジェクト: ytknzw/EconML
    def test_internal_options(self):
        """Test that the internal use of bootstrap within an estimator using custom options works."""
        x = np.random.normal(size=(1000, 2))
        z = np.random.normal(size=(1000, 1))
        t = np.random.normal(size=(1000, 1))
        t2 = np.random.normal(size=(1000, 1))
        y = x[:, 0] * 0.5 + t + np.random.normal(size=(1000, 1))

        opts = BootstrapOptions(50, 2)

        est = NonparametricTwoStageLeastSquares(PolynomialFeatures(2),
                                                PolynomialFeatures(2),
                                                PolynomialFeatures(2),
                                                None,
                                                inference=opts)
        est.fit(y, t, x, None, z)

        # test that we can get an interval for the same attribute for the bootstrap as the original,
        # with the same shape for the lower and upper bounds
        lower, upper = est.effect_interval(x, t, t2)
        for bound in [lower, upper]:
            self.assertEqual(np.shape(est.effect(x, t, t2)), np.shape(bound))
        self.assertFalse(np.allclose(lower, upper))

        # test that we can do the same thing once we provide percentile bounds
        lower, upper = est.effect_interval(x, t, t2, lower=10, upper=90)
        for bound in [lower, upper]:
            self.assertEqual(np.shape(est.effect(x, t, t2)), np.shape(bound))
        self.assertFalse(np.allclose(lower, upper))
コード例 #2
0
    def test_2sls(self):
        n = 50000
        e = np.random.uniform(low=-0.5, high=0.5, size=(n, 1))
        z = np.random.uniform(size=(n, 1))
        x = np.random.uniform(size=(n, 1)) + e
        p = x + z * e + np.random.uniform(size=(n, 1))
        y = p * x + e

        losses = []
        marg_effs = []

        z_fresh = np.random.uniform(size=(n, 1))
        e_fresh = np.random.uniform(low=-0.5, high=0.5, size=(n, 1))
        x_fresh = np.random.uniform(size=(n, 1)) + e_fresh
        p_fresh = x_fresh + z_fresh * e_fresh + np.random.uniform(size=(n, 1))

        for (dt, dx, dz) in [(0, 0, 0), (1, 1, 1), (5, 5, 5), (10, 10, 10),
                             (3, 3, 10), (10, 10, 3)]:
            np2sls = NonparametricTwoStageLeastSquares(
                HermiteFeatures(dt), HermiteFeatures(dx), HermiteFeatures(dz),
                HermiteFeatures(dt, shift=1))
            np2sls.fit(y, p, x, z)
            effect = np2sls.effect(x_fresh, np.zeros(shape(p_fresh)), p_fresh)
            losses.append(np.mean(np.square(p_fresh * x_fresh - effect)))
            marg_effs.append(
                np2sls.marginal_effect(np.array([[0.3], [0.5], [0.7]]),
                                       np.array([[0.4], [0.6], [0.2]])))
        print("losses: {}".format(losses))
        print("marg_effs: {}".format(marg_effs))
コード例 #3
0
    def test_marg_eff(self):
        X = np.random.normal(size=(5000, 2))
        Z = np.random.normal(size=(5000, 2))
        W = np.random.normal(size=(5000, 1))
        # Note: no noise, just testing that we can exactly recover when we ought to be able to
        T = np.hstack([np.cross(X, Z).reshape(-1, 1) + W, (np.prod(X, axis=1) + np.prod(Z, axis=1)).reshape(-1, 1)])
        Y = X * T + X**2

        est = NonparametricTwoStageLeastSquares(
            t_featurizer=PolynomialFeatures(degree=2, interaction_only=False, include_bias=True),
            x_featurizer=PolynomialFeatures(degree=2, interaction_only=False, include_bias=True),
            z_featurizer=PolynomialFeatures(degree=2, interaction_only=False, include_bias=True),
            dt_featurizer=DPolynomialFeatures(degree=2, interaction_only=False, include_bias=True))

        est.fit(Y, T, X, W, Z)

        # pick some arbitrary X
        X_test = np.array([[0.3, 0.7],
                           [0.2, 0.1]])
        eff = est.effect(X_test)  # effect = (X * 1 + X^2) - (X * 0 + X^2) = X
        np.testing.assert_almost_equal(eff, X_test)

        # pick some arbitrary T
        T_test = np.array([[-0.3, 0.1],
                           [0.6, -1.2]])
        marg_eff = est.marginal_effect(T_test, X_test)  # marg effect_{i,j} = X_i if i=j, 0 otherwise
        marg_eff_truth = np.zeros((X_test.shape[0], Y.shape[1], T.shape[1]))
        marg_eff_truth[:, range(X.shape[1]), range(X.shape[1])] = X_test[:, :]
        np.testing.assert_almost_equal(marg_eff, marg_eff_truth)
コード例 #4
0
    def test_2sls_shape(self):
        n = 100

        def make_random(d):
            sz = (n, d) if d >= 0 else (n,)
            return np.random.normal(size=sz)

        for d_t in [-1, 1, 2]:
            n_t = d_t if d_t > 0 else 1
            for d_y in [-1, 1, 2]:
                for d_x in [1, 5]:
                    for d_z in [1, 2]:
                        d_w = 1
                        if d_z >= n_t:
                            T, Y, X, Z, W = [make_random(d) for d in [d_t, d_y, d_x, d_z, d_w]]
                            est = NonparametricTwoStageLeastSquares(
                                t_featurizer=PolynomialFeatures(),
                                x_featurizer=PolynomialFeatures(),
                                z_featurizer=PolynomialFeatures(),
                                dt_featurizer=DPolynomialFeatures())

                            est.fit(Y, T, X=X, W=W, Z=Z)

                            eff = est.effect(X)
                            marg_eff = est.marginal_effect(T, X)

                            effect_shape = (n,) + ((d_y,) if d_y > 0 else ())
                            marginal_effect_shape = ((n if d_x else 1,) +
                                                     ((d_y,) if d_y > 0 else ()) +
                                                     ((d_t,) if d_t > 0 else()))

                            self.assertEqual(shape(marg_eff), marginal_effect_shape)
                            self.assertEqual(shape(eff), effect_shape)
コード例 #5
0
    def test_internal_options(self):
        """Test that the internal use of bootstrap within an estimator using custom options works."""
        x = np.random.normal(size=(1000, 2))
        z = np.random.normal(size=(1000, 1))
        t = np.random.normal(size=(1000, 1))
        t2 = np.random.normal(size=(1000, 1))
        y = x[:, 0:1] * 0.5 + t + np.random.normal(size=(1000, 1))

        opts = BootstrapInference(50, 2)

        est = NonparametricTwoStageLeastSquares(PolynomialFeatures(2),
                                                PolynomialFeatures(2),
                                                PolynomialFeatures(2), None)
        est.fit(y, t, x, None, z, inference=opts)

        # test that we can get an interval for the same attribute for the bootstrap as the original,
        # with the same shape for the lower and upper bounds
        eff = est.effect(x, t, t2)

        lower, upper = est.effect_interval(x, T0=t, T1=t2)
        for bound in [lower, upper]:
            self.assertEqual(np.shape(eff), np.shape(bound))

        # test that the lower and upper bounds differ
        assert (lower <= upper).all()
        assert (lower < upper).any()

        # test that the estimated effect is usually within the bounds
        assert np.mean(np.logical_and(lower <= eff, eff <= upper)) >= 0.7

        # test that we can do the same thing once we provide percentile bounds
        lower, upper = est.effect_interval(x, T0=t, T1=t2, alpha=0.2)
        for bound in [lower, upper]:
            self.assertEqual(np.shape(eff), np.shape(bound))

        # test that the lower and upper bounds differ
        assert (lower <= upper).all()
        assert (lower < upper).any()

        # test that the estimated effect is usually within the bounds
        assert np.mean(np.logical_and(lower <= eff, eff <= upper)) >= 0.65
コード例 #6
0
    def test_2sls(self):
        n = 50000
        d_w = 2
        d_z = 1
        d_x = 1
        d_t = 1
        d_y = 1
        e = np.random.uniform(low=-0.5, high=0.5, size=(n, d_x))
        z = np.random.uniform(size=(n, 1))
        w = np.random.uniform(size=(n, d_w))
        a = np.random.normal(size=(d_w, d_t))
        b = np.random.normal(size=(d_w, d_y))
        x = np.random.uniform(size=(n, d_x)) + e
        p = x + z * e + w @ a + np.random.uniform(size=(n, d_t))
        y = p * x + e + w @ b

        losses = []
        marg_effs = []

        z_fresh = np.random.uniform(size=(n, d_z))
        e_fresh = np.random.uniform(low=-0.5, high=0.5, size=(n, d_x))
        x_fresh = np.random.uniform(size=(n, d_x)) + e_fresh
        w_fresh = np.random.uniform(size=(n, d_w))
        p_fresh = x_fresh + z_fresh * e_fresh + np.random.uniform(size=(n, d_t))

        for (dt, dx, dz) in [(0, 0, 0), (1, 1, 1), (5, 5, 5), (10, 10, 10), (3, 3, 10), (10, 10, 3)]:
            np2sls = NonparametricTwoStageLeastSquares(t_featurizer=HermiteFeatures(dt),
                                                       x_featurizer=HermiteFeatures(dx),
                                                       z_featurizer=HermiteFeatures(dz),
                                                       dt_featurizer=HermiteFeatures(dt, shift=1))
            np2sls.fit(y, p, X=x, W=w, Z=z)
            effect = np2sls.effect(x_fresh, np.zeros(shape(p_fresh)), p_fresh)
            losses.append(np.mean(np.square(p_fresh * x_fresh - effect)))
            marg_effs.append(np2sls.marginal_effect(np.array([[0.3], [0.5], [0.7]]), np.array([[0.4], [0.6], [0.2]])))
        print("losses: {}".format(losses))
        print("marg_effs: {}".format(marg_effs))