Example #1
0
def break_test(endog, exog, nbreaks, trim=0.15, vcov=None):
    """
    Test that `nbreaks` exists in the sample.

    TODO again, better cache the SSRs
    TODO add support for p-value calculation (Hansen 1997)

    Parameters
    ----------
    endog : array-like
        The endogenous variable.
    exog : array-like
        The exogenous matrix.
    nbreaks : integer
        The number of breakpoints in the null hypothesis
    trim : float or int, optional
        If a float, the minimum percentage of observations in each regime,
        if an integer, the minimum number of observations in each regime.
    vcov : callback, optional
        Optionally provide a callback to modify the variance / covariance
        matrix used in calculating the test statistic.
    
    Returns
    -------
    fstat : float
        The test statistic.
    crits : iterable
        The critical values.
    """
    nobs = len(endog)
    if trim < 1:
        trim = int(np.floor(trim * nobs))

    exog = np.asarray(exog)
    # TODO Is there a better way to test for and fix this? (the problem is
    #      that if the exog argument is a list, so that exog is 1dim,
    #      np.concatenate fails to create a matrix, instead just makes a
    #      long vector)
    if exog.ndim == 1:
        exog = exog[:, None]

    breakpoints, ssr = find_breakpoints(endog, exog, nbreaks, trim)

    built_exog, regime_indicators, nobs_regimes = build_exog(exog, breakpoints)
    res = OLS(endog, built_exog).fit()

    q = exog.shape[1] # number of parameters subject to break, hard-coded to entire exog for now
    p = 0 # number of parameters not subject to break, hard-coded to zero for now
    R = np.zeros((nbreaks, nbreaks+1))
    R[np.diag_indices(nbreaks)] = [-1]*nbreaks
    R[tuple(np.diag_indices(nbreaks) + np.array([[0]*nbreaks, [1]*nbreaks]))] = [1]*nbreaks
    Rd = R.dot(res.params[:, None])

    V = vcov(res) if vcov else res.cov_params()

    const = (nobs - (nbreaks+1)*q - p) / (nobs*nbreaks*q)
    fstat = const * Rd.T.dot(np.linalg.inv(R.dot(V).dot(R.T))).dot(Rd)

    return fstat
Example #2
0
def ocsb_test_value(diff_series, x_reg, period):
    try:
        fit = OLS(diff_series, x_reg).fit()
    except ValueError:
        # Regression Model cannot be fit
        return -np.inf

    t2 = np.sqrt(fit.cov_params()["x2"]["x2"])
    return fit.params["x2"] / t2
Example #3
0
 def test_compatibility(self):
     """Hypothesis test for the compatibility of prior mean with data
     """
     # TODO: should we store the OLS results ?  not needed so far, but maybe cache
     #params_ols = np.linalg.pinv(self.model.exog).dot(self.model.endog)
     #res = self.wald_test(self.model.r_matrix, q_matrix=self.model.q_matrix, use_f=False)
     #from scratch
     res_ols = OLS(self.model.endog, self.model.exog).fit()
     r_mat = self.model.r_matrix
     r_diff = self.model.q_matrix - r_mat.dot(res_ols.params)[:,None]
     ols_cov_r = res_ols.cov_params(r_matrix=r_mat)
     statistic = r_diff.T.dot(np.linalg.solve(ols_cov_r + self.model.sigma_prior, r_diff))
     from scipy import stats
     df = np.linalg.matrix_rank(self.model.sigma_prior)   # same as r_mat.shape[0]
     pvalue = stats.chi2.sf(statistic, df)
     # TODO: return results class
     return statistic, pvalue, df
Example #4
0
    def test_compatibility(self):
        """Hypothesis test for the compatibility of prior mean with data

        """
        # TODO: should we store the OLS results ?  not needed so far, but maybe cache
        #params_ols = np.linalg.pinv(self.model.exog).dot(self.model.endog)
        #res = self.wald_test(self.model.r_matrix, q_matrix=self.model.q_matrix, use_f=False)
        #from scratch
        res_ols = OLS(self.model.endog, self.model.exog).fit()
        r_mat = self.model.r_matrix
        r_diff = self.model.q_matrix - r_mat.dot(res_ols.params)[:,None]
        ols_cov_r = res_ols.cov_params(r_matrix=r_mat)
        statistic = r_diff.T.dot(np.linalg.solve(ols_cov_r + self.model.sigma_prior, r_diff))
        from scipy import stats
        df = np.linalg.matrix_rank(self.model.sigma_prior)   # same as r_mat.shape[0]
        pvalue = stats.chi2.sf(statistic, df)
        # TODO: return results class
        return statistic, pvalue, df
Example #5
0
    def test_combine_subset_regression(self):
        # split sample into two, use first sample as prior for second
        endog = self.endog
        exog = self.exog
        nobs = len(endog)

        n05 = nobs // 2
        np.random.seed(987125)
        # shuffle to get random subsamples
        shuffle_idx = np.random.permutation(np.arange(nobs))
        ys = endog[shuffle_idx]
        xs = exog[shuffle_idx]
        k = 10
        res_ols0 = OLS(ys[:n05], xs[:n05, :k]).fit()
        res_ols1 = OLS(ys[n05:], xs[n05:, :k]).fit()

        w = res_ols1.scale / res_ols0.scale  #1.01
        mod_1 = TheilGLS(ys[n05:],
                         xs[n05:, :k],
                         r_matrix=np.eye(k),
                         q_matrix=res_ols0.params,
                         sigma_prior=w * res_ols0.cov_params())
        res_1p = mod_1.fit(cov_type='data-prior')
        res_1s = mod_1.fit(cov_type='sandwich')
        res_olsf = OLS(ys, xs[:, :k]).fit()

        assert_allclose(res_1p.params, res_olsf.params, rtol=1e-9)
        corr_fact = np.sqrt(res_1p.scale / res_olsf.scale)
        # corrct for differences in scale computation
        assert_allclose(res_1p.bse, res_olsf.bse * corr_fact, rtol=1e-3)

        # regression test, does not verify numbers
        # especially why are these smaller than OLS on full sample
        # in larger sample, nobs=600, those were close to full OLS
        bse1 = np.array([
            0.26589869, 0.15224812, 0.38407399, 0.75679949, 0.66084200,
            0.54174080, 0.53697607, 0.66006377, 0.38228551, 0.53920485
        ])
        assert_allclose(res_1s.bse, bse1, rtol=1e-7)
Example #6
0
    def test_combine_subset_regression(self):
        # split sample into two, use first sample as prior for second
        endog = self.endog
        exog = self.exog
        nobs = len(endog)

        n05 = nobs // 2
        np.random.seed(987125)
        # shuffle to get random subsamples
        shuffle_idx = np.random.permutation(np.arange(nobs))
        ys = endog[shuffle_idx]
        xs = exog[shuffle_idx]
        k = 10
        res_ols0 = OLS(ys[:n05], xs[:n05, :k]).fit()
        res_ols1 = OLS(ys[n05:], xs[n05:, :k]).fit()

        w = res_ols1.scale / res_ols0.scale  #1.01
        mod_1 = TheilGLS(ys[n05:],
                         xs[n05:, :k],
                         r_matrix=np.eye(k),
                         q_matrix=res_ols0.params,
                         sigma_prior=w * res_ols0.cov_params())
        res_1p = mod_1.fit(cov_type='data-prior')
        res_1s = mod_1.fit(cov_type='sandwich')
        res_olsf = OLS(ys, xs[:, :k]).fit()

        assert_allclose(res_1p.params, res_olsf.params, rtol=1e-9)
        corr_fact = 0.96156318  # corrct for differences in scale computation
        assert_allclose(res_1p.bse, res_olsf.bse * corr_fact, rtol=1e-3)

        # regression test, does not verify numbers
        # especially why are these smaller than OLS on full sample
        # in larger sample, nobs=600, those were close to full OLS
        bse1 = np.array([
            0.27609914, 0.15808869, 0.39880789, 0.78583194, 0.68619331,
            0.56252314, 0.55757562, 0.68538523, 0.39695081, 0.55988991
        ])
        assert_allclose(res_1s.bse, bse1, rtol=1e-7)
Example #7
0
    def test_combine_subset_regression(self):
        # split sample into two, use first sample as prior for second
        endog = self.endog
        exog = self.exog
        nobs = len(endog)

        n05 = nobs // 2
        np.random.seed(987125)
        # shuffle to get random subsamples
        shuffle_idx = np.random.permutation(np.arange(nobs))
        ys = endog[shuffle_idx]
        xs = exog[shuffle_idx]
        k = 10
        res_ols0 = OLS(ys[:n05], xs[:n05, :k]).fit()
        res_ols1 = OLS(ys[n05:], xs[n05:, :k]).fit()

        w = res_ols1.scale / res_ols0.scale   #1.01
        mod_1 = TheilGLS(ys[n05:], xs[n05:, :k], r_matrix=np.eye(k),
                         q_matrix=res_ols0.params,
                         sigma_prior=w * res_ols0.cov_params())
        res_1p = mod_1.fit(cov_type='data-prior')
        res_1s = mod_1.fit(cov_type='sandwich')
        res_olsf = OLS(ys, xs[:, :k]).fit()

        assert_allclose(res_1p.params, res_olsf.params, rtol=1e-9)
        corr_fact = np.sqrt(res_1p.scale / res_olsf.scale)
        # corrct for differences in scale computation
        assert_allclose(res_1p.bse, res_olsf.bse * corr_fact, rtol=1e-3)

        # regression test, does not verify numbers
        # especially why are these smaller than OLS on full sample
        # in larger sample, nobs=600, those were close to full OLS
        bse1 = np.array([
            0.26589869,  0.15224812,  0.38407399,  0.75679949,  0.66084200,
            0.54174080,  0.53697607,  0.66006377,  0.38228551,  0.53920485])
        assert_allclose(res_1s.bse, bse1, rtol=1e-7)
def test_predict_se():
    # this test doesn't use reference values
    # checks conistency across options, and compares to direct calculation

    # generate dataset
    nsample = 50
    x1 = np.linspace(0, 20, nsample)
    x = np.c_[x1, (x1 - 5)**2, np.ones(nsample)]
    np.random.seed(0)  #9876789) #9876543)
    beta = [0.5, -0.01, 5.]
    y_true2 = np.dot(x, beta)
    w = np.ones(nsample)
    w[int(nsample * 6. / 10):] = 3
    sig = 0.5
    y2 = y_true2 + sig * w * np.random.normal(size=nsample)
    x2 = x[:, [0, 2]]

    # estimate OLS
    res2 = OLS(y2, x2).fit()

    #direct calculation
    covb = res2.cov_params()
    predvar = res2.mse_resid + (x2 * np.dot(covb, x2.T).T).sum(1)
    predstd = np.sqrt(predvar)

    prstd, iv_l, iv_u = wls_prediction_std(res2)
    np.testing.assert_almost_equal(prstd, predstd, 15)

    #stats.t.isf(0.05/2., 50 - 2)
    q = 2.0106347546964458
    ci_half = q * predstd
    np.testing.assert_allclose(iv_u, res2.fittedvalues + ci_half, rtol=1e-12)
    np.testing.assert_allclose(iv_l, res2.fittedvalues - ci_half, rtol=1e-12)

    prstd, iv_l, iv_u = wls_prediction_std(res2, x2[:3, :])
    np.testing.assert_equal(prstd, prstd[:3])
    np.testing.assert_allclose(iv_u,
                               res2.fittedvalues[:3] + ci_half[:3],
                               rtol=1e-12)
    np.testing.assert_allclose(iv_l,
                               res2.fittedvalues[:3] - ci_half[:3],
                               rtol=1e-12)

    # check WLS
    res3 = WLS(y2, x2, 1. / w).fit()

    #direct calculation
    covb = res3.cov_params()
    predvar = res3.mse_resid * w + (x2 * np.dot(covb, x2.T).T).sum(1)
    predstd = np.sqrt(predvar)

    prstd, iv_l, iv_u = wls_prediction_std(res3)
    np.testing.assert_almost_equal(prstd, predstd, 15)

    #stats.t.isf(0.05/2., 50 - 2)
    q = 2.0106347546964458
    ci_half = q * predstd
    np.testing.assert_allclose(iv_u, res3.fittedvalues + ci_half, rtol=1e-12)
    np.testing.assert_allclose(iv_l, res3.fittedvalues - ci_half, rtol=1e-12)

    # testing shapes of exog
    prstd, iv_l, iv_u = wls_prediction_std(res3, x2[-1:, :], weights=3.)
    np.testing.assert_equal(prstd, prstd[-1])
    prstd, iv_l, iv_u = wls_prediction_std(res3, x2[-1, :], weights=3.)
    np.testing.assert_equal(prstd, prstd[-1])

    prstd, iv_l, iv_u = wls_prediction_std(res3, x2[-2:, :], weights=3.)
    np.testing.assert_equal(prstd, prstd[-2:])

    prstd, iv_l, iv_u = wls_prediction_std(res3, x2[-2:, :], weights=[3, 3])
    np.testing.assert_equal(prstd, prstd[-2:])

    prstd, iv_l, iv_u = wls_prediction_std(res3, x2[:3, :])
    np.testing.assert_equal(prstd, prstd[:3])
    np.testing.assert_allclose(iv_u,
                               res3.fittedvalues[:3] + ci_half[:3],
                               rtol=1e-12)
    np.testing.assert_allclose(iv_l,
                               res3.fittedvalues[:3] - ci_half[:3],
                               rtol=1e-12)

    #use wrong size for exog
    #prstd, iv_l, iv_u = wls_prediction_std(res3, x2[-1,0], weights=3.)
    np.testing.assert_raises(ValueError,
                             wls_prediction_std,
                             res3,
                             x2[-1, 0],
                             weights=3.)

    # check some weight values
    sew1 = wls_prediction_std(res3, x2[-3:, :])[0]**2
    for wv in np.linspace(0.5, 3, 5):

        sew = wls_prediction_std(res3, x2[-3:, :], weights=1. / wv)[0]**2
        np.testing.assert_allclose(sew, sew1 + res3.scale * (wv - 1))
Example #9
0
def test_predict_se():
    # this test doesn't use reference values
    # checks conistency across options, and compares to direct calculation

    # generate dataset
    nsample = 50
    x1 = np.linspace(0, 20, nsample)
    x = np.c_[x1, (x1 - 5)**2, np.ones(nsample)]
    np.random.seed(0)#9876789) #9876543)
    beta = [0.5, -0.01, 5.]
    y_true2 = np.dot(x, beta)
    w = np.ones(nsample)
    w[int(nsample * 6. / 10):] = 3
    sig = 0.5
    y2 = y_true2 + sig * w * np.random.normal(size=nsample)
    x2 = x[:,[0,2]]

    # estimate OLS
    res2 = OLS(y2, x2).fit()

    #direct calculation
    covb = res2.cov_params()
    predvar = res2.mse_resid + (x2 * np.dot(covb, x2.T).T).sum(1)
    predstd = np.sqrt(predvar)

    prstd, iv_l, iv_u = wls_prediction_std(res2)
    np.testing.assert_almost_equal(prstd, predstd, 15)

    #stats.t.isf(0.05/2., 50 - 2)
    q = 2.0106347546964458
    ci_half = q * predstd
    np.testing.assert_allclose(iv_u, res2.fittedvalues + ci_half, rtol=1e-12)
    np.testing.assert_allclose(iv_l, res2.fittedvalues - ci_half, rtol=1e-12)

    prstd, iv_l, iv_u = wls_prediction_std(res2, x2[:3,:])
    np.testing.assert_equal(prstd, prstd[:3])
    np.testing.assert_allclose(iv_u, res2.fittedvalues[:3] + ci_half[:3],
                               rtol=1e-12)
    np.testing.assert_allclose(iv_l, res2.fittedvalues[:3] - ci_half[:3],
                               rtol=1e-12)


    # check WLS
    res3 = WLS(y2, x2, 1. / w).fit()

    #direct calculation
    covb = res3.cov_params()
    predvar = res3.mse_resid * w + (x2 * np.dot(covb, x2.T).T).sum(1)
    predstd = np.sqrt(predvar)

    prstd, iv_l, iv_u = wls_prediction_std(res3)
    np.testing.assert_almost_equal(prstd, predstd, 15)

    #stats.t.isf(0.05/2., 50 - 2)
    q = 2.0106347546964458
    ci_half = q * predstd
    np.testing.assert_allclose(iv_u, res3.fittedvalues + ci_half, rtol=1e-12)
    np.testing.assert_allclose(iv_l, res3.fittedvalues - ci_half, rtol=1e-12)

    # testing shapes of exog
    prstd, iv_l, iv_u = wls_prediction_std(res3, x2[-1:,:], weights=3.)
    np.testing.assert_equal(prstd, prstd[-1])
    prstd, iv_l, iv_u = wls_prediction_std(res3, x2[-1,:], weights=3.)
    np.testing.assert_equal(prstd, prstd[-1])

    prstd, iv_l, iv_u = wls_prediction_std(res3, x2[-2:,:], weights=3.)
    np.testing.assert_equal(prstd, prstd[-2:])

    prstd, iv_l, iv_u = wls_prediction_std(res3, x2[-2:,:], weights=[3, 3])
    np.testing.assert_equal(prstd, prstd[-2:])

    prstd, iv_l, iv_u = wls_prediction_std(res3, x2[:3,:])
    np.testing.assert_equal(prstd, prstd[:3])
    np.testing.assert_allclose(iv_u, res3.fittedvalues[:3] + ci_half[:3],
                               rtol=1e-12)
    np.testing.assert_allclose(iv_l, res3.fittedvalues[:3] - ci_half[:3],
                               rtol=1e-12)


    #use wrong size for exog
    #prstd, iv_l, iv_u = wls_prediction_std(res3, x2[-1,0], weights=3.)
    np.testing.assert_raises(ValueError, wls_prediction_std, res3, x2[-1,0],
                             weights=3.)

    # check some weight values
    sew1 = wls_prediction_std(res3, x2[-3:,:])[0]**2
    for wv in np.linspace(0.5, 3, 5):

        sew = wls_prediction_std(res3, x2[-3:,:], weights=1. / wv)[0]**2
        np.testing.assert_allclose(sew, sew1 + res3.scale * (wv - 1))
Example #10
0
    x = np.ones((nobs,2))
    x[:,1] = np.arange(nobs)/20.
    y = x.sum(1) + 1.01*(1+1.5*(x[:,1]>10))*np.random.rand(nobs)
    print(het_goldfeldquandt(y,x, 1))

    y = x.sum(1) + 1.01*(1+0.5*(x[:,1]>10))*np.random.rand(nobs)
    print(het_goldfeldquandt(y,x, 1))

    y = x.sum(1) + 1.01*(1-0.5*(x[:,1]>10))*np.random.rand(nobs)
    print(het_goldfeldquandt(y,x, 1))

    print(het_breuschpagan(y,x))
    print(het_white(y,x))

    f, fp, fo = het_goldfeldquandt(y,x, 1)
    print(f, fp)
    resgq = het_goldfeldquandt(y,x, 1, retres=True)
    print(resgq)

    #this is just a syntax check:
    print(_neweywestcov(y, x))

    resols1 = OLS(y, x).fit()
    print(_neweywestcov(resols1.resid, x))
    print(resols1.cov_params())
    print(resols1.HC0_se)
    print(resols1.cov_HC0)

    y = x.sum(1) + 10.*(1-0.5*(x[:,1]>10))*np.random.rand(nobs)
    print(HetGoldfeldQuandt().run(y,x, 1, alternative='dec'))
    #transf = TransformRestriction(np.eye(exog.shape[1])[:2], res2.params[:2] / 2)
    transf3 = TransformRestriction([[0, 0, 0, 1, 0],[0, 0, 0, 0, 1]], [0, 1])
    exog3_st = transf3.reduce(exog)
    res3 = OLS(endog, exog3_st).fit()
    # need to correct for constant/offset in the optimization
    res3 = OLS(endog - exog.dot(transf3.constant.squeeze()), exog3_st).fit()
    params = transf3.expand(res3.params).squeeze()
    assert_allclose(params[:-2], res3_ols.params, rtol=1e-13)
    print(res3.params)
    print(params)
    print(res3_ols.params)
    print(res3_ols.bse)
    # the following raises `ValueError: cannot test a constant constraint`
    #tt = res3.t_test(transf3.transf_mat, transf3.constant.squeeze())
    #print tt.sd
    cov_params3 = transf3.transf_mat.dot(res3.cov_params()).dot(transf3.transf_mat.T)
    bse3 = np.sqrt(np.diag(cov_params3))
    print(bse3)

    tp = transform_params_constraint(res2.params, res2.normalized_cov_params,
                                     transf3.R, transf3.q)
    tp = transform_params_constraint(res2.params, res2.cov_params(), transf3.R, transf3.q)

    import statsmodels.api as sm
    rand_data = sm.datasets.randhie.load(as_pandas=False)
    rand_exog = rand_data.exog.view(float).reshape(len(rand_data.exog), -1)
    rand_exog = sm.add_constant(rand_exog, prepend=False)


    # Fit Poisson model:
    poisson_mod0 = sm.Poisson(rand_data.endog, rand_exog)
    #transf = TransformRestriction(np.eye(exog.shape[1])[:2], res2.params[:2] / 2)
    transf3 = TransformRestriction([[0, 0, 0, 1, 0],[0, 0, 0, 0, 1]], [0, 1])
    exog3_st = transf3.reduce(exog)
    res3 = OLS(endog, exog3_st).fit()
    # need to correct for constant/offset in the optimization
    res3 = OLS(endog - exog.dot(transf3.constant.squeeze()), exog3_st).fit()
    params = transf3.expand(res3.params).squeeze()
    assert_allclose(params[:-2], res3_ols.params, rtol=1e-13)
    print(res3.params)
    print(params)
    print(res3_ols.params)
    print(res3_ols.bse)
    # the following raises `ValueError: can't test a constant constraint`
    #tt = res3.t_test(transf3.transf_mat, transf3.constant.squeeze())
    #print tt.sd
    cov_params3 = transf3.transf_mat.dot(res3.cov_params()).dot(transf3.transf_mat.T)
    bse3 = np.sqrt(np.diag(cov_params3))
    print(bse3)

    tp = transform_params_constraint(res2.params, res2.normalized_cov_params,
                                     transf3.R, transf3.q)
    tp = transform_params_constraint(res2.params, res2.cov_params(), transf3.R, transf3.q)


    from statsmodels.discrete.discrete_model import Poisson
    import statsmodels.api as sm
    rand_data = sm.datasets.randhie.load(as_pandas=False)
    rand_exog = rand_data.exog.view(float).reshape(len(rand_data.exog), -1)
    rand_exog = sm.add_constant(rand_exog, prepend=False)

Example #13
0
    def test_combine_subset_regression(self):
        # split sample into two, use first sample as prior for second
        endog = self.endog
        exog = self.exog
        nobs = len(endog)

        n05 = nobs // 2
        np.random.seed(987125)
        # shuffle to get random subsamples
        shuffle_idx = np.random.permutation(np.arange(nobs))
        ys = endog[shuffle_idx]
        xs = exog[shuffle_idx]
        k = 10
        res_ols0 = OLS(ys[:n05], xs[:n05, :k]).fit()
        res_ols1 = OLS(ys[n05:], xs[n05:, :k]).fit()

        w = res_ols1.scale / res_ols0.scale   #1.01
        mod_1 = TheilGLS(ys[n05:], xs[n05:, :k], r_matrix=np.eye(k), q_matrix=res_ols0.params, sigma_prior=w * res_ols0.cov_params())
        res_1p = mod_1.fit(cov_type='data-prior')
        res_1s = mod_1.fit(cov_type='sandwich')
        res_olsf = OLS(ys, xs[:, :k]).fit()

        assert_allclose(res_1p.params, res_olsf.params, rtol=1e-9)
        corr_fact = 0.96156318 # corrct for differences in scale computation
        assert_allclose(res_1p.bse, res_olsf.bse * corr_fact, rtol=1e-3)

        # regression test, does not verify numbers
        # especially why are these smaller than OLS on full sample
        # in larger sample, nobs=600, those were close to full OLS
        bse1 = np.array([
            0.27609914,  0.15808869,  0.39880789,  0.78583194,  0.68619331,
            0.56252314,  0.55757562,  0.68538523,  0.39695081,  0.55988991])
        assert_allclose(res_1s.bse, bse1, rtol=1e-7)
Example #14
0
    x = np.ones((nobs,2))
    x[:,1] = np.arange(nobs)/20.
    y = x.sum(1) + 1.01*(1+1.5*(x[:,1]>10))*np.random.rand(nobs)
    print(het_goldfeldquandt(y,x, 1))

    y = x.sum(1) + 1.01*(1+0.5*(x[:,1]>10))*np.random.rand(nobs)
    print(het_goldfeldquandt(y,x, 1))

    y = x.sum(1) + 1.01*(1-0.5*(x[:,1]>10))*np.random.rand(nobs)
    print(het_goldfeldquandt(y,x, 1))

    print(het_breuschpagan(y,x))
    print(het_white(y,x))

    f, fp, fo = het_goldfeldquandt(y,x, 1)
    print(f, fp)
    resgq = het_goldfeldquandt(y,x, 1, retres=True)
    print(resgq)

    #this is just a syntax check:
    print(_neweywestcov(y, x))

    resols1 = OLS(y, x).fit()
    print(_neweywestcov(resols1.resid, x))
    print(resols1.cov_params())
    print(resols1.HC0_se)
    print(resols1.cov_HC0)

    y = x.sum(1) + 10.*(1-0.5*(x[:,1]>10))*np.random.rand(nobs)
    print(HetGoldfeldQuandt().run(y,x, 1, alternative='dec'))
Example #15
0
    def test_ols_noncentrality(self):
        k = self.k_groups

        res_ols = OLS(self.y, self.ex).fit()
        nobs_t = res_ols.model.nobs

        # constraint
        c_equal = -np.eye(k)[1:]
        c_equal[:, 0] = 1
        v = np.zeros(c_equal.shape[0])

        # noncentrality at estimated parameters
        wt = res_ols.wald_test(c_equal, scalar=True)
        df_num, df_denom = wt.df_num, wt.df_denom

        cov_p = res_ols.cov_params()

        nc_wt = wald_test_noncent_generic(res_ols.params,
                                          c_equal,
                                          v,
                                          cov_p,
                                          diff=None,
                                          joint=True)
        assert_allclose(nc_wt, wt.statistic * wt.df_num, rtol=1e-13)

        nc_wt2 = wald_test_noncent(res_ols.params,
                                   c_equal,
                                   v,
                                   res_ols,
                                   diff=None,
                                   joint=True)
        assert_allclose(nc_wt2, nc_wt, rtol=1e-13)

        es_ols = nc_wt / nobs_t
        es_oneway = smo.effectsize_oneway(res_ols.params,
                                          res_ols.scale,
                                          self.nobs,
                                          use_var="equal")
        assert_allclose(es_ols, es_oneway, rtol=1e-13)

        alpha = 0.05
        pow_ols = smpwr.ftest_power(np.sqrt(es_ols),
                                    df_denom,
                                    df_num,
                                    alpha,
                                    ncc=1)
        pow_oneway = smpwr.ftest_anova_power(np.sqrt(es_oneway),
                                             nobs_t,
                                             alpha,
                                             k_groups=k,
                                             df=None)
        assert_allclose(pow_ols, pow_oneway, rtol=1e-13)

        # noncentrality at other params
        params_alt = res_ols.params * 0.75
        # compute constraint value so we can get noncentrality from wald_test
        v_off = _offset_constraint(c_equal, res_ols.params, params_alt)
        wt_off = res_ols.wald_test((c_equal, v + v_off), scalar=True)
        nc_wt_off = wald_test_noncent_generic(params_alt,
                                              c_equal,
                                              v,
                                              cov_p,
                                              diff=None,
                                              joint=True)
        assert_allclose(nc_wt_off,
                        wt_off.statistic * wt_off.df_num,
                        rtol=1e-13)

        # check vectorized version, joint=False
        nc_wt_vec = wald_test_noncent_generic(params_alt,
                                              c_equal,
                                              v,
                                              cov_p,
                                              diff=None,
                                              joint=False)
        for i in range(c_equal.shape[0]):
            nc_wt_i = wald_test_noncent_generic(
                params_alt,
                c_equal[i:i + 1],  # noqa
                v[i:i + 1],
                cov_p,
                diff=None,  # noqa
                joint=False)
            assert_allclose(nc_wt_vec[i], nc_wt_i, rtol=1e-13)
Example #16
0
def sequential_break_test(endog, exog, nbreaks_null=0, trim=0.15, vcov=None):
    """
    Test that one more break exists in the sample, given that there are at
    least `nbreaks_null` breaks.

    TODO obviously in sequential estimation of breakpoints, this will right now
        recalculate the SSRs for the segments of the model many times. Easy
        optimization is possible
    TODO add support for dates, and return breakdates as well
    TODO add support for p-value calculation (Hansen 1997)
    TODO add support for different trimming values when calculating the null
         model and when estimating the additional break (see Footnote 4,
         Bai and Perron 2003)

    Parameters
    ----------
    endog : array-like
        The endogenous variable.
    exog : array-like
        The exogenous matrix.
    nbreaks_null : integer
        The number of breakpoints in the null hypothesis
    trim : float or int, optional
        If a float, the minimum percentage of observations in each regime,
        if an integer, the minimum number of observations in each regime.
    vcov : callback, optional
        Optionally provide a callback to modify the variance / covariance
        matrix used in calculating the test statistic.
    
    Returns
    -------
    fstat : float
        The test statistic.
    crits : iterable
        The critical values.
    """
    nobs = len(endog)
    if trim < 1:
        trim = int(np.floor(trim * nobs))

    exog = np.asarray(exog)
    # TODO Is there a better way to test for and fix this? (the problem is
    #      that if the exog argument is a list, so that exog is 1dim,
    #      np.concatenate fails to create a matrix, instead just makes a
    #      long vector)
    if exog.ndim == 1:
        exog = exog[:, None]
    
    # TODO add test to make sure trim is consistent with the number of breaks
    #      in both the null and alternative hypotheses
    
    # Estimate the breakpoints under the null
    breakpoints, ssr_null = find_breakpoints(endog, exog, nbreaks_null, trim)

    # Get the indices for the start and end of each segment
    segments = zip((0,) + breakpoints, breakpoints + (nobs,))

    # For each segment (there are nbreaks_null+1), estimate an additional
    # breakpoint
    optimal_segment = None
    new_breakpoints = None
    min_ssr = np.Inf
    for segment in range(nbreaks_null+1):
        start, end = segments[segment]
        # Add one to the end, since breakpoint is actually the last observation
        # in the previous regime
        end += 1
        segment_endog = endog[start:end]
        segment_exog = exog[start:end]

        # TODO this involves re-calculating SSR for lots of segments. Should
        #      use a cache of the upper-triangular set of SSRs from the
        #      find_breakpoints estimation above
        try:
            breakpoint, ssr = find_breakpoints(segment_endog, segment_exog, 1,
                                               trim)

            if ssr < min_ssr:
                min_ssr = ssr
                optimal_segment = segment
                new_breakpoints = breakpoint
        except InvalidRegimeError:
            pass

    # Find the parameters
    start, end = segments[optimal_segment]
    end += 1
    segment_exog, _, _ = build_exog(exog[start:end], new_breakpoints)
    res = OLS(endog[start:end], segment_exog).fit()

    # Calculate the test statistic
    nbreaks = 1
    q = exog.shape[1] # number of parameters subject to break, hard-coded to entire exog for now
    p = 0 # number of parameters not subject to break, hard-coded to zero for now
    R = np.zeros((nbreaks, nbreaks+1))
    R[np.diag_indices(nbreaks)] = [-1]*nbreaks
    R[tuple(np.diag_indices(nbreaks) + np.array([[0]*nbreaks, [1]*nbreaks]))] = [1]*nbreaks
    Rd = R.dot(res.params[:, None])

    V = vcov(res) if vcov else res.cov_params()

    nobs_segment = end - start
    const = (nobs_segment - (nbreaks+1)*q - p) / (nobs_segment*nbreaks*q)
    fstat = const * Rd.T.dot(np.linalg.inv(R.dot(V).dot(R.T))).dot(Rd)

    return fstat