Beispiel #1
0
    def test_comp_with_statsmodels(self):
        """ Comparing with confidence intervals and standard errors of statsmodels in the un-weighted case """
        np.random.seed(123)

        # Single dimensional output y
        n = 1000
        d = 3
        X = np.random.binomial(1, .8, size=(n, d))
        T = np.random.binomial(1, .5 * X[:, 0] + .25, size=(n,))

        def true_effect(x):
            return x[:, 0] + .5
        y = true_effect(X) * T + X[:, 0] + X[:, 2] + np.random.normal(0, 1, size=(n,))
        X_test = np.unique(np.random.binomial(1, .5, size=(n, d)), axis=0)
        for fit_intercept in [True, False]:
            for cov_type in ['nonrobust', 'HC0', 'HC1']:
                est = OLS(fit_intercept=fit_intercept, cov_type=cov_type).fit(X, y)
                lr = StatsModelsOLS(fit_intercept=fit_intercept, fit_args={
                                    'cov_type': cov_type, 'use_t': False}).fit(X, y)
                _compare_classes(est, lr, X_test)

        n = 1000
        d = 3
        X = np.random.normal(0, 1, size=(n, d))
        y = X[:, 0] + X[:, 2] + np.random.normal(0, 1, size=(n,))
        X_test = np.unique(np.random.binomial(1, .5, size=(n, d)), axis=0)
        for fit_intercept in [True, False]:
            for cov_type in ['nonrobust', 'HC0', 'HC1']:
                est = OLS(fit_intercept=fit_intercept, cov_type=cov_type).fit(X, y)
                lr = StatsModelsOLS(fit_intercept=fit_intercept, fit_args={
                                    'cov_type': cov_type, 'use_t': False}).fit(X, y)
                _compare_classes(est, lr, X_test)

        d = 3
        X = np.vstack([np.eye(d)])
        y = np.concatenate((X[:, 0] - 1, X[:, 0] + 1))
        X = np.vstack([X, X])
        X_test = np.unique(np.random.binomial(1, .5, size=(n, d)), axis=0)

        for cov_type in ['nonrobust', 'HC0', 'HC1']:
            for alpha in [.01, .05, .1]:
                _compare_classes(OLS(fit_intercept=False, cov_type=cov_type).fit(X, y),
                                 StatsModelsOLS(fit_intercept=False, fit_args={
                                                'cov_type': cov_type, 'use_t': False}).fit(X, y),
                                 X_test, alpha=alpha)

        d = 3
        X = np.vstack([np.eye(d), np.ones((1, d)), np.zeros((1, d))])
        y = np.concatenate((X[:, 0] - 1, X[:, 0] + 1))
        X = np.vstack([X, X])
        X_test = np.unique(np.random.binomial(1, .5, size=(n, d)), axis=0)
        for cov_type in ['nonrobust', 'HC0', 'HC1']:
            _compare_classes(OLS(fit_intercept=True, cov_type=cov_type).fit(X, y),
                             StatsModelsOLS(fit_intercept=True,
                                            fit_args={'cov_type': cov_type, 'use_t': False}).fit(X, y), X_test)

        # Multi-dimensional output y
        n = 1000
        d = 3
        for p in np.arange(1, 4):
            X = np.random.binomial(1, .8, size=(n, d))
            T = np.random.binomial(1, .5 * X[:, 0] + .25, size=(n,))

            def true_effect(x):
                return np.hstack([x[:, [0]] + .5 + t for t in range(p)])
            y = np.zeros((n, p))
            y = true_effect(X) * T.reshape(-1, 1) + X[:, [0] * p] + \
                (0 * X[:, [0] * p] + 1) * np.random.normal(0, 1, size=(n, p))

            for cov_type in ['nonrobust', 'HC0', 'HC1']:
                for fit_intercept in [True, False]:
                    for alpha in [.01, .05, .2]:
                        est = OLS(fit_intercept=fit_intercept, cov_type=cov_type).fit(X, y)
                        lr = [StatsModelsOLS(fit_intercept=fit_intercept, fit_args={
                                             'cov_type': cov_type, 'use_t': False}).fit(X, y[:, t]) for t in range(p)]
                        for t in range(p):
                            assert np.all(np.abs(est.coef_[t] - lr[t].coef_) < 1e-12),\
                                "{}, {}, {}: {}, {}".format(cov_type, fit_intercept, t, est.coef_[t], lr[t].coef_)
                            assert np.all(np.abs(np.array(est.coef__interval(alpha=alpha))[:, t] -
                                                 lr[t].coef__interval(alpha=alpha)) < 1e-12),\
                                "{}, {}, {}: {} vs {}".format(cov_type, fit_intercept, t,
                                                              np.array(est.coef__interval(alpha=alpha))[:, t],
                                                              lr[t].coef__interval(alpha=alpha))
                            assert np.all(np.abs(est.intercept_[t] - lr[t].intercept_) < 1e-12),\
                                "{}, {}, {}: {} vs {}".format(cov_type, fit_intercept, t,
                                                              est.intercept_[t], lr[t].intercept_)
                            assert np.all(np.abs(np.array(est.intercept__interval(alpha=alpha))[:, t] -
                                                 lr[t].intercept__interval(alpha=alpha)) < 1e-12),\
                                "{}, {}, {}: {} vs {}".format(cov_type, fit_intercept, t,
                                                              np.array(est.intercept__interval(alpha=alpha))[:, t],
                                                              lr[t].intercept__interval(alpha=alpha))
                            assert np.all(np.abs(est.predict(X_test)[:, t] - lr[t].predict(X_test)) < 1e-12),\
                                "{}, {}, {}: {} vs {}".format(cov_type, fit_intercept, t, est.predict(X_test)[
                                                              :, t], lr[t].predict(X_test))
                            assert np.all(np.abs(np.array(est.predict_interval(X_test, alpha=alpha))[:, :, t] -
                                                 lr[t].predict_interval(X_test, alpha=alpha)) < 1e-12),\
                                "{}, {}, {}: {} vs {}".format(cov_type, fit_intercept, t,
                                                              np.array(est.predict_interval(X_test,
                                                                                            alpha=alpha))[:, :, t],
                                                              lr[t].predict_interval(X_test, alpha=alpha))
Beispiel #2
0
    def test_inference(self):
        """ Testing that we recover the expected standard errors and confidence intervals in a known example """

        # 1-d output
        d = 3
        X = np.vstack([np.eye(d)])
        y = X[:, 0]
        est = OLS(fit_intercept=False).fit(X, y)
        assert np.all(np.abs(est.coef_ - [1, 0, 0]) <= 1e-12), "{}, {}".format(est.coef_, [1, 0, 0])
        assert np.all(np.abs(est.coef__interval() - np.array([[1, 0, 0], [1, 0, 0]])) <= 1e-12),\
            "{}, {}".format(est.coef__interval(), np.array([[1, 0, 0], [1, 0, 0]]))
        assert np.all(est.coef_stderr_ <= 1e-12)
        assert np.all(est._param_var <= 1e-12)

        d = 3
        X = np.vstack([np.eye(d), np.ones((1, d)), np.zeros((1, d))])
        y = X[:, 0]
        est = OLS(fit_intercept=True).fit(X, y)
        assert np.all(np.abs(est.coef_ - np.array([1] + [0] * (d - 1))) <=
                      1e-12), "{}, {}".format(est.coef_, [1] + [0] * (d - 1))
        assert np.all(np.abs(est.coef__interval() - np.array([[1] + [0] * (d - 1), [1] + [0] * (d - 1)])) <= 1e-12),\
            "{}, {}".format(est.coef__interval(), np.array([[1] + [0] * (d - 1), [1] + [0] * (d - 1)]))
        assert np.all(est.coef_stderr_ <= 1e-12)
        assert np.all(est._param_var <= 1e-12)
        assert np.abs(est.intercept_) <= 1e-12
        assert np.all(np.abs(est.intercept__interval()) <= 1e-12)

        d = 3
        X = np.vstack([np.eye(d)])
        y = np.concatenate((X[:, 0] - 1, X[:, 0] + 1))
        X = np.vstack([X, X])
        est = OLS(fit_intercept=False).fit(X, y)
        assert np.all(np.abs(est.coef_ - ([1] + [0] * (d - 1))) <=
                      1e-12), "{}, {}".format(est.coef_, [1] + [0] * (d - 1))
        assert np.all(np.abs(est.coef_stderr_ - np.array([1] * d)) <= 1e-12)
        assert np.all(np.abs(est.coef__interval()[0] -
                             np.array([scipy.stats.norm.ppf(.025, loc=1, scale=1)] +
                                      [scipy.stats.norm.ppf(.025, loc=0, scale=1)] * (d - 1))) <= 1e-12),\
            "{}, {}".format(est.coef__interval()[0], np.array([scipy.stats.norm.ppf(.025, loc=1, scale=1)] +
                                                              [scipy.stats.norm.ppf(.025, loc=0, scale=1)] * (d - 1)))
        assert np.all(np.abs(est.coef__interval()[1] -
                             np.array([scipy.stats.norm.ppf(.975, loc=1, scale=1)] +
                                      [scipy.stats.norm.ppf(.975, loc=0, scale=1)] * (d - 1))) <= 1e-12),\
            "{}, {}".format(est.coef__interval()[1], np.array([scipy.stats.norm.ppf(.975, loc=1, scale=1)] +
                                                              [scipy.stats.norm.ppf(.975, loc=0, scale=1)] * (d - 1)))

        # 2-d output
        d = 3
        p = 4
        X = np.vstack([np.eye(d)])
        y = np.vstack((X[:, [0] * p] - 1, X[:, [0] * p] + 1))
        X = np.vstack([X, X])
        est = OLS(fit_intercept=False).fit(X, y)
        for t in range(p):
            assert np.all(np.abs(est.coef_[t] - ([1] + [0] * (d - 1))) <=
                          1e-12), "{}, {}".format(est.coef_[t], [1] + [0] * (d - 1))
            assert np.all(np.abs(est.coef_stderr_[t] - np.array([1] * d)) <= 1e-12), "{}".format(est.coef_stderr_[t])
            assert np.all(np.abs(est.coef__interval()[0][t] -
                                 np.array([scipy.stats.norm.ppf(.025, loc=1, scale=1)] +
                                          [scipy.stats.norm.ppf(.025, loc=0, scale=1)] * (d - 1))) <= 1e-12),\
                "{}, {}".format(est.coef__interval()[0][t],
                                np.array([scipy.stats.norm.ppf(.025, loc=1, scale=1)] +
                                         [scipy.stats.norm.ppf(.025, loc=0, scale=1)] * (d - 1)))
            assert np.all(np.abs(est.coef__interval()[1][t] -
                                 np.array([scipy.stats.norm.ppf(.975, loc=1, scale=1)] +
                                          [scipy.stats.norm.ppf(.975, loc=0, scale=1)] * (d - 1))) <= 1e-12),\
                "{}, {}".format(est.coef__interval()[1][t],
                                np.array([scipy.stats.norm.ppf(.975, loc=1, scale=1)] +
                                         [scipy.stats.norm.ppf(.975, loc=0, scale=1)] * (d - 1)))
            assert np.all(np.abs(est.intercept_[t]) <= 1e-12), "{}, {}".format(est.intercept_[t])
            assert np.all(np.abs(est.intercept_stderr_[t]) <= 1e-12), "{}".format(est.intercept_stderr_[t])
            assert np.all(np.abs(est.intercept__interval()[0][t]) <=
                          1e-12), "{}".format(est.intercept__interval()[0][t])

        d = 3
        p = 4
        X = np.vstack([np.eye(d), np.zeros((1, d))])
        y = np.vstack((X[:, [0] * p] - 1, X[:, [0] * p] + 1))
        X = np.vstack([X, X])
        est = OLS(fit_intercept=True).fit(X, y)
        for t in range(p):
            assert np.all(np.abs(est.coef_[t] - ([1] + [0] * (d - 1))) <=
                          1e-12), "{}, {}".format(est.coef_[t], [1] + [0] * (d - 1))
            assert np.all(np.abs(est.coef_stderr_[t] - np.array([np.sqrt(2)] * d)) <=
                          1e-12), "{}".format(est.coef_stderr_[t])
            assert np.all(np.abs(est.coef__interval()[0][t] -
                                 np.array([scipy.stats.norm.ppf(.025, loc=1, scale=np.sqrt(2))] +
                                          [scipy.stats.norm.ppf(.025, loc=0, scale=np.sqrt(2))] * (d - 1))) <= 1e-12),\
                "{}, {}".format(est.coef__interval()[0][t],
                                np.array([scipy.stats.norm.ppf(.025, loc=1, scale=np.sqrt(2))] +
                                         [scipy.stats.norm.ppf(.025, loc=0, scale=np.sqrt(2))] * (d - 1)))
            assert np.all(np.abs(est.coef__interval()[1][t] -
                                 np.array([scipy.stats.norm.ppf(.975, loc=1, scale=np.sqrt(2))] +
                                          [scipy.stats.norm.ppf(.975, loc=0, scale=np.sqrt(2))] * (d - 1))) <= 1e-12),\
                "{}, {}".format(est.coef__interval()[1][t],
                                np.array([scipy.stats.norm.ppf(.975, loc=1, scale=np.sqrt(2))] +
                                         [scipy.stats.norm.ppf(.975, loc=0, scale=np.sqrt(2))] * (d - 1)))
            assert np.all(np.abs(est.intercept_[t]) <= 1e-12), "{}, {}".format(est.intercept_[t])
            assert np.all(np.abs(est.intercept_stderr_[t] - 1) <= 1e-12), "{}".format(est.intercept_stderr_[t])
            assert np.all(np.abs(est.intercept__interval()[0][t] -
                                 scipy.stats.norm.ppf(.025, loc=0, scale=1)) <= 1e-12),\
                "{}, {}".format(est.intercept__interval()[0][t], scipy.stats.norm.ppf(.025, loc=0, scale=1))