Ejemplo n.º 1
0
    def test_regularized_weights(self):
        np.random.seed(1432)
        exog1 = np.random.normal(size=(100, 3))
        endog1 = exog1[:, 0] + exog1[:, 1] + np.random.normal(size=100)
        exog2 = np.random.normal(size=(100, 3))
        endog2 = exog2[:, 0] + exog2[:, 1] + np.random.normal(size=100)

        exog_a = np.vstack((exog1, exog1, exog2))
        endog_a = np.concatenate((endog1, endog1, endog2))

        # Should be equivalent to exog_a, endog_a.
        exog_b = np.vstack((exog1, exog2))
        endog_b = np.concatenate((endog1, endog2))
        wgts = np.ones(200)
        wgts[0:100] = 2
        sigma = np.diag(1 / wgts)

        # TODO: parametrize?
        for L1_wt in [0, 0.5, 1]:
            for alpha in [0, 1]:
                mod1 = OLS(endog_a, exog_a)
                rslt1 = mod1.fit_regularized(L1_wt=L1_wt, alpha=alpha)

                mod2 = WLS(endog_b, exog_b, weights=wgts)
                rslt2 = mod2.fit_regularized(L1_wt=L1_wt, alpha=alpha)

                mod3 = GLS(endog_b, exog_b, sigma=sigma)
                rslt3 = mod3.fit_regularized(L1_wt=L1_wt, alpha=alpha)

                assert_almost_equal(rslt1.params, rslt2.params, decimal=3)
                assert_almost_equal(rslt1.params, rslt3.params, decimal=3)
Ejemplo n.º 2
0
    def test_fixed_scale(self):
        cov_type = 'fixed_scale'
        kwds = {}
        res1 = self.res_ols.get_robustcov_results(cov_type, **kwds)
        res2 = self.res_wls.get_robustcov_results(cov_type, **kwds)
        assert_allclose(res1.params, res2.params, rtol=1e-13)
        assert_allclose(res1.cov_params(), res2.cov_params(), rtol=1e-13)
        assert_allclose(res1.bse, res2.bse, rtol=1e-13)
        assert_allclose(res1.pvalues, res2.pvalues, rtol=1e-12)

        tt = res2.t_test(np.eye(len(res2.params)),
                         cov_p=res2.normalized_cov_params)
        assert_allclose(res2.cov_params(), res2.normalized_cov_params,
                        rtol=1e-13)
        assert_allclose(res2.bse, tt.sd, rtol=1e-13)
        assert_allclose(res2.pvalues, tt.pvalue, rtol=1e-13)
        assert_allclose(res2.tvalues, tt.tvalue, rtol=1e-13)

        # using cov_type in fit
        mod = self.res_wls.model
        mod3 = WLS(mod.endog, mod.exog, weights=mod.weights)
        res3 = mod3.fit(cov_type=cov_type, cov_kwds=kwds)
        tt = res3.t_test(np.eye(len(res3.params)),
                         cov_p=res3.normalized_cov_params)
        assert_allclose(res3.cov_params(), res3.normalized_cov_params,
                        rtol=1e-13)
        assert_allclose(res3.bse, tt.sd, rtol=1e-13)
        assert_allclose(res3.pvalues, tt.pvalue, rtol=1e-13)
        assert_allclose(res3.tvalues, tt.tvalue, rtol=1e-13)
Ejemplo n.º 3
0
def test_cov_type_fixed_scale():
    # this is a unit test from scipy curvefit for `absolute_sigma` keyword
    xdata = np.array([0, 1, 2, 3, 4, 5])
    ydata = np.array([1, 1, 5, 7, 8, 12])
    sigma = np.array([1, 2, 1, 2, 1, 2])

    xdata = np.column_stack((xdata, np.ones(len(xdata))))
    weights = 1. / sigma**2

    res = WLS(ydata, xdata, weights=weights).fit()
    assert_allclose(res.bse,
                    [0.20659803, 0.57204404],
                    rtol=1e-3)

    res = WLS(ydata, xdata, weights=weights).fit()
    assert_allclose(res.bse,
                    [0.20659803, 0.57204404],
                    rtol=1e-3)

    res = WLS(ydata, xdata, weights=weights).fit(cov_type='fixed scale')
    assert_allclose(res.bse,
                    [0.30714756, 0.85045308],
                    rtol=1e-3)

    res = WLS(ydata, xdata, weights=weights / 9.).fit(cov_type='fixed scale')
    assert_allclose(res.bse,
                    [3 * 0.30714756, 3 * 0.85045308],
                    rtol=1e-3)

    res = WLS(ydata, xdata, weights=weights).fit(cov_type='fixed scale',
                                                 cov_kwds={'scale': 9})
    assert_allclose(res.bse,
                    [3 * 0.30714756, 3 * 0.85045308],
                    rtol=1e-3)
Ejemplo n.º 4
0
    def setup_class(cls):
        dta = datasets.longley.load(as_pandas=False)
        dta.exog = add_constant(dta.exog, prepend=True)

        wls_scalar = WLS(dta.endog, dta.exog, weights=1. / 3).fit()
        cls.res1 = wls_scalar

        weights = [1 / 3.] * len(dta.endog)
        wls_array = WLS(dta.endog, dta.exog, weights=weights).fit()
        cls.res2 = wls_array
Ejemplo n.º 5
0
    def setup_class(cls):
        data = datasets.longley.load(as_pandas=False)
        data.exog = add_constant(data.exog, prepend=False)
        y = data.endog
        X = data.exog
        n = y.shape[0]
        np.random.seed(5)
        w = np.random.uniform(0.5, 1, n)
        w_inv = 1. / w

        cls.results = []
        cls.results.append(WLS(y, X, w).fit())
        cls.results.append(WLS(y, X, 0.01 * w).fit())
        cls.results.append(GLS(y, X, 100 * w_inv).fit())
        cls.results.append(GLS(y, X, np.diag(0.1 * w_inv)).fit())
Ejemplo n.º 6
0
 def test_equivalence_unweighted(self, check):
     res = WLS(self.endog1, self.exog1).fit()
     minres = _MinimalWLS(self.endog1,
                          self.exog1,
                          check_endog=check,
                          check_weights=check).fit()
     assert_allclose(res.params, minres.params)
     assert_allclose(res.resid, minres.resid)
Ejemplo n.º 7
0
 def test_equivalence_unweighted2(self, check):
     # TODO: Better name than 1 vs 2?
     res = WLS(self.endog2, self.exog2).fit()
     minres = _MinimalWLS(self.endog2,
                          self.exog2,
                          check_endog=check,
                          check_weights=check).fit()
     assert_allclose(res.params, minres.params)
     assert_allclose(res.resid, minres.resid)
Ejemplo n.º 8
0
def test_wls_missing():
    data = datasets.ccard.load(as_pandas=False)
    endog = data.endog
    endog[[10, 25]] = np.nan
    mod = WLS(data.endog, data.exog, weights=1 / data.exog[:, 2],
              missing='drop')
    assert mod.endog.shape[0] == 70
    assert mod.exog.shape[0] == 70
    assert mod.weights.shape[0] == 70
Ejemplo n.º 9
0
def test_wls_example():
    # example from the docstring, there was a note about a bug, should
    # be fixed now
    Y = [1, 3, 4, 5, 2, 3, 4]
    X = list(range(1, 8))
    X = add_constant(X, prepend=False)
    wls_model = WLS(Y, X, weights=list(range(1, 8))).fit()
    # taken from R lm.summary
    assert_almost_equal(wls_model.fvalue, 0.127337843215, 6)
    assert_almost_equal(wls_model.scale, 2.44608530786**2, 6)
Ejemplo n.º 10
0
    def setup_class(cls):
        nobs, k_exog = 100, 5
        np.random.seed(987125)
        x = np.random.randn(nobs, k_exog - 1)
        x = add_constant(x)
        cls.aweights = np.random.randint(1, 10, nobs)

        y_true = x.sum(1) / 2
        y = y_true + 2 * np.random.randn(nobs)
        cls.endog = y
        cls.exog = x
        cls.idx_p_uc = np.array(cls.idx_uc)
        cls.exogc = xc = x[:, cls.idx_uc]
        mod_ols_c = WLS(y - 0.5 * x[:, 1], xc, weights=cls.aweights)
        mod_ols_c.exog_names[:] = ['const', 'x2', 'x3', 'x4']
        cls.mod2 = mod_ols_c
        cls.res2 = cls.mod2.fit(**cls.fit_kwargs)

        cls.init()
Ejemplo n.º 11
0
    def setup_class(cls):
        # from example wls.py
        nsample = 50
        x = np.linspace(0, 20, nsample)
        X = np.column_stack((x, (x - 5)**2))

        X = add_constant(X)
        beta = [5., 0.5, -0.01]
        sig = 0.5
        w = np.ones(nsample)
        w[int(nsample * 6. / 10):] = 3
        y_true = np.dot(X, beta)
        e = np.random.normal(size=nsample)
        y = y_true + sig * w * e
        X = X[:, [0, 1]]

        # WLS knowing the true variance ratio of heteroscedasticity
        mod_wls = WLS(y, X, weights=1. / w)
        cls.res_wls = mod_wls.fit()
Ejemplo n.º 12
0
 def setup_class(cls):
     data = datasets.longley.load(as_pandas=False)
     data.exog = add_constant(data.exog, prepend=False)
     y = data.endog
     X = data.exog
     n = y.shape[0]
     w = np.ones(n)
     cls.results = []
     cls.results.append(OLS(y, X).fit())
     cls.results.append(WLS(y, X, w).fit())
     cls.results.append(GLS(y, X, 100 * w).fit())
     cls.results.append(GLS(y, X, np.diag(0.1 * w)).fit())
Ejemplo n.º 13
0
def test_wls_tss():
    y = np.array([22, 22, 22, 23, 23, 23])
    X = [[1, 0], [1, 0], [1, 1], [0, 1], [0, 1], [0, 1]]

    ols_mod = OLS(y, add_constant(X, prepend=False)).fit()

    yw = np.array([22, 22, 23.])
    Xw = [[1, 0], [1, 1], [0, 1]]
    w = np.array([2, 1, 3.])

    wls_mod = WLS(yw, add_constant(Xw, prepend=False), weights=w).fit()
    assert_equal(ols_mod.centered_tss, wls_mod.centered_tss)
Ejemplo n.º 14
0
def test_fvalue_only_constant():
    # GH#3642 if only constant in model, fvalue and f_pvalue should be np.nan
    nobs = 20
    np.random.seed(2)
    x = np.ones(nobs)
    y = np.random.randn(nobs)

    res = OLS(y, x).fit(cov_type='hac', cov_kwds={'maxlags': 3})
    assert np.isnan(res.fvalue)
    assert np.isnan(res.f_pvalue)
    # 2018-03-05 disabling smoke-test from upstream
    # res.summary()

    res = WLS(y, x).fit(cov_type='HC1')
    assert np.isnan(res.fvalue)
    assert np.isnan(res.f_pvalue)
Ejemplo n.º 15
0
def test_fvalue_implicit_constant():
    # GH#2444 if constant is implicit, return nan see
    nobs = 100
    np.random.seed(2)
    x = np.random.randn(nobs, 1)
    x = ((x > 0) == [True, False]).astype(int)
    y = x.sum(1) + np.random.randn(nobs)

    res = OLS(y, x).fit(cov_type='HC1')
    assert np.isnan(res.fvalue)
    assert np.isnan(res.f_pvalue)
    # 2018-03-05 disabling smoke-test from upstream
    # res.summary()

    res = WLS(y, x).fit(cov_type='HC1')
    assert np.isnan(res.fvalue)
    assert np.isnan(res.f_pvalue)
Ejemplo n.º 16
0
    def setup_class(cls):
        dta = datasets.ccard.load(as_pandas=False)

        dta.exog = add_constant(dta.exog, prepend=False)
        nobs = 72.

        weights = 1 / dta.exog[:, 2]
        # for comparison with stata analytic weights
        scaled_weights = ((weights * nobs) / weights.sum())

        cls.res1 = WLS(dta.endog, dta.exog, weights=scaled_weights).fit()
        #cls.res2.wresid = scaled_weights ** .5 * cls.res2.resid

        # correction because we use different definition for loglike/llf
        corr_ic = 2 * (cls.res1.llf - cls.res2.llf)
        cls.res2.aic -= corr_ic
        cls.res2.bic -= corr_ic
        cls.res2.llf += 0.5 * np.sum(np.log(cls.res1.model.weights))
Ejemplo n.º 17
0
    def setup_class(cls):
        dtapa = grunfeld.data.load_pandas()
        # Stata example/data seems to miss last firm
        # TODO: Is the comment above (from upstream) actionable?
        dtapa_endog = dtapa.endog[:200]
        dtapa_exog = dtapa.exog[:200]
        exog = add_constant(dtapa_exog[['value', 'capital']], prepend=False)
        # asserts don't work for pandas
        cls.res1 = WLS(dtapa_endog, exog,
                       weights=1 / dtapa_exog['value']).fit()

        firm_names, firm_id = np.unique(np.asarray(dtapa_exog[['firm']], 'S20'),
                                        return_inverse=True)
        cls.groups = firm_id
        # time indicator in range(max Ti)
        time = np.asarray(dtapa_exog[['year']])
        time -= time.min()
        cls.time = np.squeeze(time).astype(int)
        # nw_panel function requires interval bounds
        cls.tidx = [(i * 20, 20 * (i + 1)) for i in range(10)]
Ejemplo n.º 18
0
def test_finite_weight_sigma(bad_value, use_pandas):
    # GH#4969
    endog = np.random.randn(100)
    exog = np.random.randn(100, 2)
    weights = sigma = np.ones(100)
    weights[-2:] = bad_value
    if use_pandas:
        sigma = weights = pd.Series(weights)

    with pytest.raises(MissingDataError) as err:
        WLS(endog, exog, weights=weights)

    assert err.type is MissingDataError
    assert 'weights' in err.value.args[0]

    with pytest.raises(MissingDataError) as err:
        GLS(endog, exog, sigma=sigma)

    assert err.type is MissingDataError
    assert 'sigma' in err.value.args[0]
Ejemplo n.º 19
0
def test_predict_se():
    # this test doesn't use reference values
    # checks consistency across options, and compares to direct calculation

    # generate dataset
    nsample = 50
    x1 = np.linspace(0, 20, nsample)
    x = np.c_[x1, (x1 - 5)**2, np.ones(nsample)]
    np.random.seed(0)
    # TODO: Upstream had commented-out seeds 9876789, 9876543;
    # figure out why 0 is used instead of those
    beta = [0.5, -0.01, 5.]
    y_true2 = np.dot(x, beta)
    w = np.ones(nsample)
    w[int(nsample * 6. / 10):] = 3
    sig = 0.5
    y2 = y_true2 + sig * w * np.random.normal(size=nsample)
    x2 = x[:, [0, 2]]

    # estimate OLS
    res2 = OLS(y2, x2).fit()

    # direct calculation
    covb = res2.cov_params()
    predvar = res2.mse_resid + (x2 * np.dot(covb, x2.T).T).sum(1)
    predstd = np.sqrt(predvar)

    prstd, iv_l, iv_u = wls_prediction_std(res2)
    np.testing.assert_almost_equal(prstd, predstd, 15)

    # stats.t.isf(0.05/2., 50 - 2)
    q = 2.0106347546964458
    ci_half = q * predstd
    assert_allclose(iv_u, res2.fittedvalues + ci_half, rtol=1e-12)
    assert_allclose(iv_l, res2.fittedvalues - ci_half, rtol=1e-12)

    prstd, iv_l, iv_u = wls_prediction_std(res2, x2[:3, :])
    assert_equal(prstd, prstd[:3])
    assert_allclose(iv_u, res2.fittedvalues[:3] + ci_half[:3], rtol=1e-12)
    assert_allclose(iv_l, res2.fittedvalues[:3] - ci_half[:3], rtol=1e-12)

    # check WLS
    res3 = WLS(y2, x2, 1. / w).fit()

    # direct calculation
    covb = res3.cov_params()
    predvar = res3.mse_resid * w + (x2 * np.dot(covb, x2.T).T).sum(1)
    predstd = np.sqrt(predvar)

    prstd, iv_l, iv_u = wls_prediction_std(res3)
    np.testing.assert_almost_equal(prstd, predstd, 15)

    q = 2.0106347546964458  # i.e. stats.t.isf(0.05/2., 50 - 2)
    ci_half = q * predstd
    assert_allclose(iv_u, res3.fittedvalues + ci_half, rtol=1e-12)
    assert_allclose(iv_l, res3.fittedvalues - ci_half, rtol=1e-12)

    # testing shapes of exog
    prstd, iv_l, iv_u = wls_prediction_std(res3, x2[-1:, :], weights=3.)
    assert_equal(prstd, prstd[-1])
    prstd, iv_l, iv_u = wls_prediction_std(res3, x2[-1, :], weights=3.)
    assert_equal(prstd, prstd[-1])

    prstd, iv_l, iv_u = wls_prediction_std(res3, x2[-2:, :], weights=3.)
    assert_equal(prstd, prstd[-2:])

    prstd, iv_l, iv_u = wls_prediction_std(res3, x2[-2:, :], weights=[3, 3])
    assert_equal(prstd, prstd[-2:])

    prstd, iv_l, iv_u = wls_prediction_std(res3, x2[:3, :])
    assert_equal(prstd, prstd[:3])
    assert_allclose(iv_u, res3.fittedvalues[:3] + ci_half[:3], rtol=1e-12)
    assert_allclose(iv_l, res3.fittedvalues[:3] - ci_half[:3], rtol=1e-12)

    # use wrong size for exog
    # prstd, iv_l, iv_u = wls_prediction_std(res3, x2[-1, 0], weights=3.)
    with pytest.raises(ValueError):
        wls_prediction_std(res3, x2[-1, 0], weights=3.)

    # check some weight values
    sew1 = wls_prediction_std(res3, x2[-3:, :])[0]**2
    for wv in np.linspace(0.5, 3, 5):
        sew = wls_prediction_std(res3, x2[-3:, :], weights=1. / wv)[0]**2
        assert_allclose(sew, sew1 + res3.scale * (wv - 1))
Ejemplo n.º 20
0
 def setup_class(cls):
     cls.exog = np.ones((1,))
     cls.endog = np.ones((1,))
     weights = 1
     cls.wls_res = WLS(cls.endog, cls.exog, weights=weights).fit()
Ejemplo n.º 21
0
 def test_wrong_size_weights(self):
     with pytest.raises(ValueError):
         WLS(self.endog, self.exog, weights=np.ones((10, 10)))
Ejemplo n.º 22
0
 def setup_class(cls):
     data = datasets.longley.load(as_pandas=False)
     data.exog = add_constant(data.exog, prepend=False)
     cls.res1 = OLS(data.endog, data.exog).fit()
     cls.res2 = WLS(data.endog, data.exog).fit()