def test_crossed_logit_vb_formula():

    data = gen_crossed_logit_pandas(10, 10, 1, 2)

    fml = "y ~ fe"
    fml_vc = {"a": "0 + C(a)", "b": "0 + C(b)"}
    glmm1 = BinomialBayesMixedGLM.from_formula(fml, fml_vc, data, vcp_p=0.5)
    rslt1 = glmm1.fit_vb()

    glmm2 = BinomialBayesMixedGLM(glmm1.endog,
                                  glmm1.exog,
                                  glmm1.exog_vc,
                                  glmm1.ident,
                                  vcp_p=0.5)
    rslt2 = glmm2.fit_vb()

    assert_allclose(rslt1.params, rslt2.params, atol=1e-4)

    rslt1.summary()
    rslt2.summary()

    for rslt in rslt1, rslt2:
        cp = rslt.cov_params()
        p = len(rslt.params)
        if rslt is rslt1:
            assert_equal(cp.shape, np.r_[p, ])
            assert_equal(cp > 0, True * np.ones(p))
        else:
            assert_equal(cp.shape, np.r_[p, ])
            assert_equal(cp > 0, True * np.ones(p))
def test_crossed_logit_vb_formula():

    data = gen_crossed_logit_pandas(10, 10, 1, 2)

    fml = "y ~ fe"
    fml_vc = {"a": "0 + C(a)", "b": "0 + C(b)"}
    glmm1 = BinomialBayesMixedGLM.from_formula(fml, fml_vc, data, vcp_p=0.5)
    rslt1 = glmm1.fit_vb()

    glmm2 = BinomialBayesMixedGLM(
        glmm1.endog, glmm1.exog, glmm1.exog_vc, glmm1.ident, vcp_p=0.5)
    rslt2 = glmm2.fit_vb()

    assert_allclose(rslt1.params, rslt2.params, atol=1e-4)

    rslt1.summary()
    rslt2.summary()

    for rslt in rslt1, rslt2:
        cp = rslt.cov_params()
        p = len(rslt.params)
        if rslt is rslt1:
            assert_equal(cp.shape, np.r_[p,])
            assert_equal(cp > 0, True*np.ones(p))
        else:
            assert_equal(cp.shape, np.r_[p,])
            assert_equal(cp > 0, True*np.ones(p))
def test_crossed_logit_vb():

    y, exog_fe, exog_vc, ident = gen_crossed_logit(10, 10, 1, 2)

    glmm1 = BinomialBayesMixedGLM(y, exog_fe, exog_vc, ident, vcp_p=0.5,
                                  fe_p=0.5)
    rslt1 = glmm1.fit_map()

    glmm2 = BinomialBayesMixedGLM(y, exog_fe, exog_vc, ident, vcp_p=0.5,
                                  fe_p=0.5)
    rslt2 = glmm2.fit_vb(mean=rslt1.params)

    rslt1.summary()
    rslt2.summary()

    assert_allclose(rslt1.params[0:5], np.r_[
        -5.43073978e-01, -2.46197518e+00, -2.36582801e+00,
        -9.64030461e-03, 2.32701078e-03],
                    rtol=1e-4, atol=1e-4)

    assert_allclose(rslt1.cov_params.flat[0:5], np.r_[
        4.12927123e-02, -2.04448923e-04, 4.64829219e-05,
        1.20377543e-04, -1.45003234e-04],
                    rtol=1e-4, atol=1e-4)

    assert_allclose(rslt2.params[0:5], np.r_[
        -0.70834417, -0.3571011, 0.19126823, -0.36074489, 0.058976],
                    rtol=1e-4, atol=1e-4)

    assert_allclose(rslt2.cov_params[0:5], np.r_[
        0.05212492, 0.04729656, 0.03916944, 0.25921842, 0.25782576],
                    rtol=1e-4, atol=1e-4)
def test_elbo_grad():

    for f in range(2):
        for j in range(2):

            if f == 0:
                if j == 0:
                    y, exog_fe, exog_vc, ident = gen_simple_logit(10, 10, 2)
                else:
                    y, exog_fe, exog_vc, ident = gen_crossed_logit(
                        10, 10, 1, 2)
            elif f == 1:
                if j == 0:
                    y, exog_fe, exog_vc, ident = gen_simple_poisson(
                        10, 10, 0.5)
                else:
                    y, exog_fe, exog_vc, ident = gen_crossed_poisson(
                        10, 10, 1, 0.5)

            exog_vc = sparse.csr_matrix(exog_vc)

            if f == 0:
                glmm1 = BinomialBayesMixedGLM(y, exog_fe, exog_vc, ident,
                                              vcp_p=0.5)
            else:
                glmm1 = PoissonBayesMixedGLM(y, exog_fe, exog_vc, ident,
                                             vcp_p=0.5)

            rslt1 = glmm1.fit_map()

            for k in range(3):

                if k == 0:
                    vb_mean = rslt1.params
                    vb_sd = np.ones_like(vb_mean)
                elif k == 1:
                    vb_mean = np.zeros(len(vb_mean))
                    vb_sd = np.ones_like(vb_mean)
                else:
                    vb_mean = np.random.normal(size=len(vb_mean))
                    vb_sd = np.random.uniform(1, 2, size=len(vb_mean))

                mean_grad, sd_grad = glmm1.vb_elbo_grad(vb_mean, vb_sd)

                def elbo(vec):
                    n = len(vec) // 2
                    return glmm1.vb_elbo(vec[:n], vec[n:])

                x = np.concatenate((vb_mean, vb_sd))
                g1 = approx_fprime(x, elbo, 1e-5)
                n = len(x) // 2

                mean_grad_n = g1[:n]
                sd_grad_n = g1[n:]

                assert_allclose(mean_grad, mean_grad_n, atol=1e-2,
                                rtol=1e-2)
                assert_allclose(sd_grad, sd_grad_n, atol=1e-2,
                                rtol=1e-2)
def test_elbo_grad():

    for f in range(2):
        for j in range(2):

            if f == 0:
                if j == 0:
                    y, exog_fe, exog_vc, ident = gen_simple_logit(10, 10, 2)
                else:
                    y, exog_fe, exog_vc, ident = gen_crossed_logit(
                        10, 10, 1, 2)
            elif f == 1:
                if j == 0:
                    y, exog_fe, exog_vc, ident = gen_simple_poisson(
                        10, 10, 0.5)
                else:
                    y, exog_fe, exog_vc, ident = gen_crossed_poisson(
                        10, 10, 1, 0.5)

            exog_vc = sparse.csr_matrix(exog_vc)

            if f == 0:
                glmm1 = BinomialBayesMixedGLM(y, exog_fe, exog_vc, ident,
                                              vcp_p=0.5)
            else:
                glmm1 = PoissonBayesMixedGLM(y, exog_fe, exog_vc, ident,
                                             vcp_p=0.5)

            rslt1 = glmm1.fit_map()

            for k in range(3):

                if k == 0:
                    vb_mean = rslt1.params
                    vb_sd = np.ones_like(vb_mean)
                elif k == 1:
                    vb_mean = np.zeros(len(vb_mean))
                    vb_sd = np.ones_like(vb_mean)
                else:
                    vb_mean = np.random.normal(size=len(vb_mean))
                    vb_sd = np.random.uniform(1, 2, size=len(vb_mean))

                mean_grad, sd_grad = glmm1.vb_elbo_grad(vb_mean, vb_sd)

                def elbo(vec):
                    n = len(vec) // 2
                    return glmm1.vb_elbo(vec[:n], vec[n:])

                x = np.concatenate((vb_mean, vb_sd))
                g1 = approx_fprime(x, elbo, 1e-5)
                n = len(x) // 2

                mean_grad_n = g1[:n]
                sd_grad_n = g1[n:]

                assert_allclose(mean_grad, mean_grad_n, atol=1e-2,
                                rtol=1e-2)
                assert_allclose(sd_grad, sd_grad_n, atol=1e-2,
                                rtol=1e-2)
Example #6
0
def test_crossed_logit_map():

    y, exog_fe, exog_vc, ident = gen_crossed_logit(10, 10, 1, 2)
    exog_vc = sparse.csr_matrix(exog_vc)

    glmm = BinomialBayesMixedGLM(y, exog_fe, exog_vc, ident, vcp_p=0.5)
    rslt = glmm.fit_map()

    assert_allclose(glmm.logposterior_grad(rslt.params),
                    np.zeros_like(rslt.params),
                    atol=1e-4)
def test_crossed_logit_map():

    y, exog_fe, exog_vc, ident = gen_crossed_logit(10, 10, 1, 2)
    exog_vc = sparse.csr_matrix(exog_vc)

    glmm = BinomialBayesMixedGLM(y, exog_fe, exog_vc, ident,
                                 vcp_p=0.5)
    rslt = glmm.fit_map()

    assert_allclose(glmm.logposterior_grad(rslt.params),
                    np.zeros_like(rslt.params), atol=1e-4)
def test_crossed_logit_vb():

    y, exog_fe, exog_vc, ident = gen_crossed_logit(10, 10, 1, 2)

    glmm1 = BinomialBayesMixedGLM(
        y, exog_fe, exog_vc, ident, vcp_p=0.5, fe_p=0.5)
    rslt1 = glmm1.fit_map()

    glmm2 = BinomialBayesMixedGLM(
        y, exog_fe, exog_vc, ident, vcp_p=0.5, fe_p=0.5)
    rslt2 = glmm2.fit_vb(mean=rslt1.params)

    rslt1.summary()
    rslt2.summary()

    assert_allclose(
        rslt1.params[0:5],
        np.r_[-5.43073978e-01, -2.46197518e+00, -2.36582801e+00,
              -9.64030461e-03, 2.32701078e-03],
        rtol=1e-4,
        atol=1e-4)

    assert_allclose(
        rslt1.cov_params().flat[0:5],
        np.r_[4.12927123e-02, -2.04448923e-04, 4.64829219e-05, 1.20377543e-04,
              -1.45003234e-04],
        rtol=1e-4,
        atol=1e-4)

    assert_allclose(
        rslt2.params[0:5],
        np.r_[-0.70834417, -0.3571011, 0.19126823, -0.36074489, 0.058976],
        rtol=1e-4,
        atol=1e-4)

    assert_allclose(
        rslt2.cov_params()[0:5],
        np.r_[0.05212492, 0.04729656, 0.03916944, 0.25921842, 0.25782576],
        rtol=1e-4,
        atol=1e-4)

    for rslt in rslt1, rslt2:
        cp = rslt.cov_params()
        p = len(rslt.params)
        if rslt is rslt1:
            assert_equal(cp.shape, np.r_[p, p])
            np.linalg.cholesky(cp)
        else:
            assert_equal(cp.shape, np.r_[p,])
            assert_equal(cp > 0, True*np.ones(p))
def test_scale_map():

    y, exog_fe, exog_vc, ident = gen_simple_logit(10, 10, 0)
    exog_fe -= exog_fe.mean(0)
    exog_fe /= exog_fe.std(0)
    exog_vc = sparse.csr_matrix(exog_vc)

    rslts = []
    for scale_fe in False, True:
        glmm = BinomialBayesMixedGLM(
            y, exog_fe, exog_vc, ident, vcp_p=0.5, fe_p=0.5)
        rslt = glmm.fit_map(scale_fe=scale_fe)
        rslts.append(rslt)

    assert_allclose(rslts[0].params, rslts[1].params, rtol=1e-4)
def test_scale_map():

    y, exog_fe, exog_vc, ident = gen_simple_logit(10, 10, 0)
    exog_fe -= exog_fe.mean(0)
    exog_fe /= exog_fe.std(0)
    exog_vc = sparse.csr_matrix(exog_vc)

    rslts = []
    for scale_fe in False, True:
        glmm = BinomialBayesMixedGLM(
            y, exog_fe, exog_vc, ident, vcp_p=0.5, fe_p=0.5)
        rslt = glmm.fit_map(scale_fe=scale_fe)
        rslts.append(rslt)

    assert_allclose(rslts[0].params, rslts[1].params, rtol=1e-4)
def test_doc_examples():

    np.random.seed(8767)
    n = 200
    m = 20
    data = pd.DataFrame({"Year": np.random.uniform(0, 1, n),
                         "Village": np.random.randint(0, m, n)})
    data['year_cen'] = data['Year'] - data.Year.mean()

    # Binomial outcome
    lpr = np.random.normal(size=m)[data.Village]
    lpr += np.random.normal(size=m)[data.Village] * data.year_cen
    y = (np.random.uniform(size=n) < 1 / (1 + np.exp(-lpr)))
    data["y"] = y.astype(np.int)

    # These lines should agree with the example in the class docstring.
    random = {"a": '0 + C(Village)', "b": '0 + C(Village)*year_cen'}
    model = BinomialBayesMixedGLM.from_formula(
                 'y ~ year_cen', random, data)
    result = model.fit_vb()
    _ = result

    # Poisson outcome
    lpr = np.random.normal(size=m)[data.Village]
    lpr += np.random.normal(size=m)[data.Village] * data.year_cen
    data["y"] = np.random.poisson(np.exp(lpr))

    # These lines should agree with the example in the class docstring.
    random = {"a": '0 + C(Village)', "b": '0 + C(Village)*year_cen'}
    model = PoissonBayesMixedGLM.from_formula(
                 'y ~ year_cen', random, data)
    result = model.fit_vb()
    _ = result
def test_crossed_logit_vb_formula():

    data = gen_crossed_logit_pandas(10, 10, 1, 2)

    fml = "y ~ fe"
    fml_vc = {"a": "0 + C(a)", "b": "0 + C(b)"}
    glmm1 = BinomialBayesMixedGLM.from_formula(fml, fml_vc, data, vcp_p=0.5)
    rslt1 = glmm1.fit_vb()

    glmm2 = BinomialBayesMixedGLM(
        glmm1.endog, glmm1.exog, glmm1.exog_vc, glmm1.ident, vcp_p=0.5)
    rslt2 = glmm2.fit_vb()

    assert_allclose(rslt1.params, rslt2.params, atol=1e-4)

    rslt1.summary()
    rslt2.summary()
Example #13
0
def fit_mixed_lm(subjects_data, formula, random_factors_formulas=None):

    model = BinomialBayesMixedGLM.from_formula(formula,
                                               random_factors_formulas,
                                               subjects_data)
    result = model.fit_vb()

    return model, result
def test_crossed_logit_map():

    y, exog_fe, exog_vc, ident = gen_crossed_logit(10, 10, 1, 2)
    exog_vc = sparse.csr_matrix(exog_vc)

    glmm = BinomialBayesMixedGLM(y, exog_fe, exog_vc, ident, vcp_p=0.5)
    rslt = glmm.fit_map()

    assert_allclose(glmm.logposterior_grad(rslt.params),
                    np.zeros_like(rslt.params),
                    atol=1e-4)

    # Check dimensions and PSD status of cov_params
    cp = rslt.cov_params()
    p = len(rslt.params)
    assert_equal(cp.shape, np.r_[p, p])
    np.linalg.cholesky(cp)
    def _train(self, X, y):
        # Initialize the output
        mapping = {}

        # Estimate target type, if necessary
        if self.binomial_target is None:
            if len(y.unique()) <= 2:
                binomial_target = True
            else:
                binomial_target = False
        else:
            binomial_target = self.binomial_target

        # The estimation does not have to converge -> at least converge to the same value.
        np.random.seed(2001)

        for switch in self.ordinal_encoder.category_mapping:
            col = switch.get('col')
            values = switch.get('mapping')
            data = self._rename_and_merge(X, y, col)

            try:
                with warnings.catch_warnings():
                    warnings.filterwarnings("ignore")
                    if binomial_target:
                        # Classification, returns (regularized) log odds per category as stored in vc_mean
                        # Note: md.predict() returns: output = fe_mean + vcp_mean + vc_mean[category]
                        md = bgmm.from_formula('target ~ 1', {'a': '0 + C(feature)'}, data).fit_vb()
                        index_names = [int(float(re.sub(r'C\(feature\)\[(\S+)\]', r'\1', index_name))) for index_name in md.model.vc_names]
                        estimate = pd.Series(md.vc_mean, index=index_names)
                    else:
                        # Regression, returns (regularized) mean deviation of the observation's category from the global mean
                        md = smf.mixedlm('target ~ 1', data, groups=data['feature']).fit()
                        tmp = dict()
                        for key, value in md.random_effects.items():
                            tmp[key] = value[0]
                        estimate = pd.Series(tmp)
            except np.linalg.LinAlgError:
                # Singular matrix -> just return all zeros
                estimate = pd.Series(np.zeros(len(values)), index=values)

            # Ignore unique columns. This helps to prevent overfitting on id-like columns
            if len(X[col].unique()) == len(y):
                estimate[:] = 0

            if self.handle_unknown == 'return_nan':
                estimate.loc[-1] = np.nan
            elif self.handle_unknown == 'value':
                estimate.loc[-1] = 0

            if self.handle_missing == 'return_nan':
                estimate.loc[values.loc[np.nan]] = np.nan
            elif self.handle_missing == 'value':
                estimate.loc[-2] = 0

            mapping[col] = estimate

        return mapping
def test_crossed_logit_map():

    y, exog_fe, exog_vc, ident = gen_crossed_logit(10, 10, 1, 2)
    exog_vc = sparse.csr_matrix(exog_vc)

    glmm = BinomialBayesMixedGLM(y, exog_fe, exog_vc, ident, vcp_p=0.5)
    rslt = glmm.fit_map()

    assert_allclose(
        glmm.logposterior_grad(rslt.params),
        np.zeros_like(rslt.params),
        atol=1e-4)

    # Check dimensions and PSD status of cov_params
    cp = rslt.cov_params()
    p = len(rslt.params)
    assert_equal(cp.shape, np.r_[p, p])
    np.linalg.cholesky(cp)
def test_crossed_logit_vb_formula():

    data = gen_crossed_logit_pandas(10, 10, 1, 2)

    fml = "y ~ fe"
    fml_vc = {"a": "0 + C(a)", "b": "0 + C(b)"}
    glmm1 = BinomialBayesMixedGLM.from_formula(
        fml, fml_vc, data, vcp_p=0.5)
    rslt1 = glmm1.fit_vb()

    glmm2 = BinomialBayesMixedGLM(glmm1.endog, glmm1.exog_fe, glmm1.exog_vc,
                                  glmm1.ident, vcp_p=0.5)
    rslt2 = glmm2.fit_vb()

    assert_allclose(rslt1.params, rslt2.params, atol=1e-4)

    rslt1.summary()
    rslt2.summary()
Example #18
0
def test_simple_logit_map():

    y, exog_fe, exog_vc, ident = gen_simple_logit(10, 10, 2)
    exog_vc = sparse.csr_matrix(exog_vc)

    glmm = BinomialBayesMixedGLM(y, exog_fe, exog_vc, ident, vcp_p=0.5)
    rslt = glmm.fit_map()

    assert_allclose(glmm.logposterior_grad(rslt.params),
                    np.zeros_like(rslt.params),
                    atol=1e-3)

    # Test the predict method
    for linear in False, True:
        for exog in None, exog_fe:
            pr1 = rslt.predict(linear=linear, exog=exog)
            pr2 = glmm.predict(rslt.params, linear=linear, exog=exog)
            assert_allclose(pr1, pr2)
            if not linear:
                assert_equal(pr1.min() >= 0, True)
                assert_equal(pr1.max() <= 1, True)
def test_crossed_logit_vb():

    y, exog_fe, exog_vc, ident = gen_crossed_logit(10, 10, 1, 2)

    glmm1 = BinomialBayesMixedGLM(y, exog_fe, exog_vc, ident, vcp_p=0.5,
                                  fe_p=0.5)
    rslt1 = glmm1.fit_map()

    glmm2 = BinomialBayesMixedGLM(y, exog_fe, exog_vc, ident, vcp_p=0.5,
                                  fe_p=0.5)
    rslt2 = glmm2.fit_vb(mean=rslt1.params)

    rslt1.summary()
    rslt2.summary()

    assert_allclose(rslt1.params[0:5], np.r_[
        -5.43073978e-01, -2.46197518e+00, -2.36582801e+00,
        -9.64030461e-03, 2.32701078e-03],
                    rtol=1e-4, atol=1e-4)

    assert_allclose(rslt1.cov_params.flat[0:5], np.r_[
        4.12927123e-02, -2.04448923e-04, 4.64829219e-05,
        1.20377543e-04, -1.45003234e-04],
                    rtol=1e-4, atol=1e-4)

    assert_allclose(rslt2.params[0:5], np.r_[
        -0.70834417, -0.3571011, 0.19126823, -0.36074489, 0.058976],
                    rtol=1e-4, atol=1e-4)

    assert_allclose(rslt2.cov_params[0:5], np.r_[
        0.05212492, 0.04729656, 0.03916944, 0.25921842, 0.25782576],
                    rtol=1e-4, atol=1e-4)
Example #20
0
def test_simple_logit_vb():

    y, exog_fe, exog_vc, ident = gen_simple_logit(10, 10, 0)
    exog_vc = sparse.csr_matrix(exog_vc)

    glmm1 = BinomialBayesMixedGLM(y,
                                  exog_fe,
                                  exog_vc,
                                  ident,
                                  vcp_p=0.5,
                                  fe_p=0.5)
    rslt1 = glmm1.fit_map()

    glmm2 = BinomialBayesMixedGLM(y,
                                  exog_fe,
                                  exog_vc,
                                  ident,
                                  vcp_p=0.5,
                                  fe_p=0.5)
    rslt2 = glmm2.fit_vb(rslt1.params)

    rslt1.summary()
    rslt2.summary()

    assert_allclose(rslt1.params[0:5],
                    np.r_[0.75330405, -0.71643228, -2.49091288, -0.00959806,
                          0.00450254],
                    rtol=1e-4,
                    atol=1e-4)

    assert_allclose(rslt2.params[0:5],
                    np.r_[0.79338836, -0.7599833, -0.64149356, -0.24772884,
                          0.10775366],
                    rtol=1e-4,
                    atol=1e-4)
def test_simple_logit_map():

    y, exog_fe, exog_vc, ident = gen_simple_logit(10, 10, 2)
    exog_vc = sparse.csr_matrix(exog_vc)

    glmm = BinomialBayesMixedGLM(y, exog_fe, exog_vc, ident, vcp_p=0.5)
    rslt = glmm.fit_map()

    assert_allclose(
        glmm.logposterior_grad(rslt.params),
        np.zeros_like(rslt.params),
        atol=1e-3)

    # Test the predict method
    for linear in False, True:
        for exog in None, exog_fe:
            pr1 = rslt.predict(linear=linear, exog=exog)
            pr2 = glmm.predict(rslt.params, linear=linear, exog=exog)
            assert_allclose(pr1, pr2)
            if not linear:
                assert_equal(pr1.min() >= 0, True)
                assert_equal(pr1.max() <= 1, True)
def test_simple_logit_vb():

    y, exog_fe, exog_vc, ident = gen_simple_logit(10, 10, 0)
    exog_vc = sparse.csr_matrix(exog_vc)

    glmm1 = BinomialBayesMixedGLM(y, exog_fe, exog_vc, ident, vcp_p=0.5,
                                  fe_p=0.5)
    rslt1 = glmm1.fit_map()

    glmm2 = BinomialBayesMixedGLM(y, exog_fe, exog_vc, ident, vcp_p=0.5,
                                  fe_p=0.5)
    rslt2 = glmm2.fit_vb(rslt1.params)

    rslt1.summary()
    rslt2.summary()

    assert_allclose(rslt1.params[0:5], np.r_[
        0.75330405, -0.71643228, -2.49091288, -0.00959806,  0.00450254],
                    rtol=1e-4, atol=1e-4)

    assert_allclose(rslt2.params[0:5], np.r_[
        0.79338836, -0.7599833, -0.64149356, -0.24772884,  0.10775366],
                    rtol=1e-4, atol=1e-4)
def test_simple_logit_vb():

    y, exog_fe, exog_vc, ident = gen_simple_logit(10, 10, 0)
    exog_vc = sparse.csr_matrix(exog_vc)

    glmm1 = BinomialBayesMixedGLM(
        y, exog_fe, exog_vc, ident, vcp_p=0.5, fe_p=0.5)
    rslt1 = glmm1.fit_map()

    glmm2 = BinomialBayesMixedGLM(
        y, exog_fe, exog_vc, ident, vcp_p=0.5, fe_p=0.5)
    rslt2 = glmm2.fit_vb(rslt1.params)

    rslt1.summary()
    rslt2.summary()

    assert_allclose(
        rslt1.params[0:5],
        np.r_[0.75330405, -0.71643228, -2.49091288, -0.00959806, 0.00450254],
        rtol=1e-4,
        atol=1e-4)

    assert_allclose(
        rslt2.params[0:5],
        np.r_[0.79338836, -0.7599833, -0.64149356, -0.24772884, 0.10775366],
        rtol=1e-4,
        atol=1e-4)

    for rslt in rslt1, rslt2:
        cp = rslt.cov_params()
        p = len(rslt.params)
        if rslt is rslt1:
            assert_equal(cp.shape, np.r_[p, p])
            np.linalg.cholesky(cp)
        else:
            assert_equal(cp.shape, np.r_[p,])
            assert_equal(cp > 0, True*np.ones(p))
def test_crossed_logit_vb():

    y, exog_fe, exog_vc, ident = gen_crossed_logit(10, 10, 1, 2)

    glmm1 = BinomialBayesMixedGLM(y,
                                  exog_fe,
                                  exog_vc,
                                  ident,
                                  vcp_p=0.5,
                                  fe_p=0.5)
    rslt1 = glmm1.fit_map()

    glmm2 = BinomialBayesMixedGLM(y,
                                  exog_fe,
                                  exog_vc,
                                  ident,
                                  vcp_p=0.5,
                                  fe_p=0.5)
    rslt2 = glmm2.fit_vb(mean=rslt1.params)

    rslt1.summary()
    rslt2.summary()

    assert_allclose(rslt1.params[0:5],
                    np.r_[-5.43073978e-01, -2.46197518e+00, -2.36582801e+00,
                          -9.64030461e-03, 2.32701078e-03],
                    rtol=1e-4,
                    atol=1e-4)

    assert_allclose(rslt1.cov_params().flat[0:5],
                    np.r_[4.12927123e-02, -2.04448923e-04, 4.64829219e-05,
                          1.20377543e-04, -1.45003234e-04],
                    rtol=1e-4,
                    atol=1e-4)

    assert_allclose(rslt2.params[0:5],
                    np.r_[-0.70834417, -0.3571011, 0.19126823, -0.36074489,
                          0.058976],
                    rtol=1e-4,
                    atol=1e-4)

    assert_allclose(rslt2.cov_params()[0:5],
                    np.r_[0.05212492, 0.04729656, 0.03916944, 0.25921842,
                          0.25782576],
                    rtol=1e-4,
                    atol=1e-4)

    for rslt in rslt1, rslt2:
        cp = rslt.cov_params()
        p = len(rslt.params)
        if rslt is rslt1:
            assert_equal(cp.shape, np.r_[p, p])
            np.linalg.cholesky(cp)
        else:
            assert_equal(cp.shape, np.r_[p, ])
            assert_equal(cp > 0, True * np.ones(p))
def test_logit_map_crossed_formula():

    data = gen_crossed_logit_pandas(10, 10, 1, 0.5)

    fml = "y ~ fe"
    fml_vc = {"a": "0 + C(a)", "b": "0 + C(b)"}
    glmm = BinomialBayesMixedGLM.from_formula(
        fml, fml_vc, data, vcp_p=0.5)
    rslt = glmm.fit_map()

    assert_allclose(glmm.logposterior_grad(rslt.params),
                    np.zeros_like(rslt.params), atol=1e-4)
    rslt.summary()

    r = rslt.random_effects("a")
    assert_allclose(r.iloc[0, :].values,
                    np.r_[-0.02004904, 0.094014], atol=1e-4)
def test_logit_map_crossed_formula():

    data = gen_crossed_logit_pandas(10, 10, 1, 0.5)

    fml = "y ~ fe"
    fml_vc = {"a": "0 + C(a)", "b": "0 + C(b)"}
    glmm = BinomialBayesMixedGLM.from_formula(
        fml, fml_vc, data, vcp_p=0.5)
    rslt = glmm.fit_map()

    assert_allclose(glmm.logposterior_grad(rslt.params),
                    np.zeros_like(rslt.params), atol=1e-4)
    rslt.summary()

    r = rslt.random_effects("a")
    assert_allclose(r.iloc[0, :].values,
                    np.r_[-0.02004904, 0.094014], atol=1e-4)
def test_simple_logit_vb():

    y, exog_fe, exog_vc, ident = gen_simple_logit(10, 10, 0)
    exog_vc = sparse.csr_matrix(exog_vc)

    glmm1 = BinomialBayesMixedGLM(y,
                                  exog_fe,
                                  exog_vc,
                                  ident,
                                  vcp_p=0.5,
                                  fe_p=0.5)
    rslt1 = glmm1.fit_map()

    glmm2 = BinomialBayesMixedGLM(y,
                                  exog_fe,
                                  exog_vc,
                                  ident,
                                  vcp_p=0.5,
                                  fe_p=0.5)
    rslt2 = glmm2.fit_vb(rslt1.params)

    rslt1.summary()
    rslt2.summary()

    assert_allclose(rslt1.params[0:5],
                    np.r_[0.75330405, -0.71643228, -2.49091288, -0.00959806,
                          0.00450254],
                    rtol=1e-4,
                    atol=1e-4)

    assert_allclose(rslt2.params[0:5],
                    np.r_[0.79338836, -0.7599833, -0.64149356, -0.24772884,
                          0.10775366],
                    rtol=1e-4,
                    atol=1e-4)

    for rslt in rslt1, rslt2:
        cp = rslt.cov_params()
        p = len(rslt.params)
        if rslt is rslt1:
            assert_equal(cp.shape, np.r_[p, p])
            np.linalg.cholesky(cp)
        else:
            assert_equal(cp.shape, np.r_[p, ])
            assert_equal(cp > 0, True * np.ones(p))
def test_logit_map_crossed_formula():

    data = gen_crossed_logit_pandas(10, 10, 1, 0.5)

    fml = "y ~ fe"
    fml_vc = {"a": "0 + C(a)", "b": "0 + C(b)"}
    glmm = BinomialBayesMixedGLM.from_formula(fml, fml_vc, data, vcp_p=0.5)
    rslt = glmm.fit_map()

    assert_allclose(glmm.logposterior_grad(rslt.params),
                    np.zeros_like(rslt.params),
                    atol=1e-4)
    rslt.summary()

    r = rslt.random_effects("a")
    assert_allclose(r.iloc[0, :].values,
                    np.r_[-0.02004904, 0.094014],
                    atol=1e-4)

    # Check dimensions and PSD status of cov_params
    cm = rslt.cov_params()
    p = rslt.params.shape[0]
    assert_equal(list(cm.shape), [p, p])
    np.linalg.cholesky(cm)
def test_logit_map_crossed_formula():

    data = gen_crossed_logit_pandas(10, 10, 1, 0.5)

    fml = "y ~ fe"
    fml_vc = {"a": "0 + C(a)", "b": "0 + C(b)"}
    glmm = BinomialBayesMixedGLM.from_formula(fml, fml_vc, data, vcp_p=0.5)
    rslt = glmm.fit_map()

    assert_allclose(
        glmm.logposterior_grad(rslt.params),
        np.zeros_like(rslt.params),
        atol=1e-4)
    rslt.summary()

    r = rslt.random_effects("a")
    assert_allclose(
        r.iloc[0, :].values, np.r_[-0.02004904, 0.094014], atol=1e-4)

    # Check dimensions and PSD status of cov_params
    cm = rslt.cov_params()
    p = rslt.params.shape[0]
    assert_equal(list(cm.shape), [p, p])
    np.linalg.cholesky(cm)
Example #30
0
    df = get_data(group)
    for outcome in "bucketacc", "bucketcomp":

        fml = get_formula(adj_time=adj_time)
        fmx = outcome + " ~ " + fml

        yl = {
            "bucketacc": "target accuracy",
            "bucketcomp": "competitor accuracy"
        }[outcome]

        vcx = get_vcf(vcs, adj_time)

        fmx = outcome + " ~ " + fml
        model = BinomialBayesMixedGLM.from_formula(
            fmx, vcx, df, vcp_p=3, fe_p=3)

        fid = open(group + ".pkl", "rb")
        pars = pickle.load(fid)
        fid.close()
        if adj_time:
            tm = pars["tm_adj"]
            ts = pars["ts_adj"]
        else:
            tm = pars["tm"]
            ts = pars["ts"]

        if use_vb:
            params = pd.read_csv(
                "%s_params_%d_%s_%s_vb.csv" % (group, vcs, outcome, adjs))
        else:
select_cols = [
    "play_id", "game_id", "touchdown", "yards_gained", "turnover", "posteam",
    "defteam", "yardline_100", "half_seconds_remaining", "play_type",
    "shotgun", "no_huddle", "qb_dropback", "pass_length", "pass_location",
    "run_location", "run_gap", "field_goal_result", "opp_fg_prob",
    "opp_td_prob", "fumble_forced", "fumble_not_forced", "fumble_lost",
    "penalty"
]

nfl_rush_2019 = nfl_2019[nfl_2019["play_type"] == "rush"]

# first, fit rush outcome models

# 1 - touchdown
rush_penalty_mod = BinomialBayesMixedGLM.from_formula(
    'penalty ~ shotgun + no_huddle + qb_dropback + run_location + run_gap',
    ['0 + rusher_id', '0 + def_id'],
    data=nfl_rush_2019)

rush_penalty_result = rush_penalty_mod.fit_vb

# 2 - rushing yards
rush_yard_mod = PoissonBayesMixedGLM.from_formula(
    'yards_gained ~ shotgun + no_huddle + qb_dropback + run_location + run_gap',
    ['0 + rusher_id', '0 + def_id'],
    data=nfl_rush_2019)

rush_yard_result = rush_yard_mod.fit_vb()

# 3 - rushing turnovers (fumbles)
rush_turnover_mod = BinomialBayesMixedGLM.from_formula(
    'turnover ~ shotgun + no_huddle + qb_dropback + run_location + run_gap',
Example #32
0
            vcn = ["Sample", "Exon", "Gene", "Person"]
            fml = "Imprinted ~ KidRank + C(Lib) + Boy"

            vc_fml = {"Sample": "0 + C(Sample)", "Exon": "0 + C(Exon)",
                      "Gene": "0 + C(Gene)", "Person": "0 + C(Person)"}

            if kc == 3:
                fml += " + Pat"
                fml = fml.replace("Pat", "Pat01")
                fml = fml.replace("C(Lib)", "C(Lib)*Pat01")
                fml = fml.replace("KidRank", "KidRank*Pat01")

            dy = dx.drop("PlacentaWeight", axis=1)

            if kc != 3:
                model = BinomialBayesMixedGLM.from_formula(fml, vc_fml, dy, vcp_p=3, fe_p=3)
            else:
                ident = []
                exog_vc = []

                for g in dy.Gene.unique():
                    ident.append(genecode[g])
                    exog_vc.append((dy.Gene == g).astype(np.int))

                for e in dy.Exon.unique():
                    ident.append(exoncode[e])
                    exog_vc.append((dy.Exon == e).astype(np.int))

                for p in dy.Person.unique():
                    ident.append(4)
                    exog_vc.append((dy.Person == p).astype(np.int))
Example #33
0
def glmm_model(data, features, y, random_effects):
    model = BinomialBayesMixedGLM.from_formula(f'{y} ~ {features}',
                                               random_effects, data)
    result = model.fit_vb()
    return result
Example #34
0
tmp['诊断严重程度'] = tmp['诊断严重程度'].replace({'轻微': 0, '严重': 1})
tmp['治疗'] = tmp['治疗'].replace({'标准': 0, '新药': 1})
tmp = tmp.reset_index()
del tmp['index']
tmp = tmp.rename(columns={
    '周数': 'zhous',
    '值': 'zhi',
    '组': 'zu',
    '诊断严重程度': 'severity',
    '治疗': 'drug'
})
tmp.to_csv(r"D:/书籍资料整理/属性数据分析/抑郁症治疗_展开.csv")
random = {"a": '0 + C(zu)'}

model = BinomialBayesMixedGLM.from_formula(
    'zhi ~ severity + drug + zhous+drug:zhous', random, tmp)
result = model.fit_vb()
#给出的结果大致上与书中的结果差不多,估计差异在
#书中给出的结果为使用高斯-埃尔米特求积
#而statsmodels使用的是贝叶斯方法.
#结果给出的是方差可能要开根号才能求标准差
#另外
print(result.summary())

data = pd.read_csv(r"D:/书籍资料整理/属性数据分析/老鼠.csv")
random = {"a": '0 + C(簇)'}

model = BinomialBayesMixedGLM.from_formula('死亡 ~ C(组) ', random, data)
result = model.fit_vb()
result.summary()
Example #35
0
import numpy as np
from statsmodels.genmod.bayes_mixed_glm import (BinomialBayesMixedGLM,
                                                PoissonBayesMixedGLM)
import pandas as pd
from scipy import sparse
from numpy.testing import assert_allclose, assert_equal
from scipy.optimize import approx_fprime

np.random.seed(8767)
n = 200
m = 20
data = pd.DataFrame({
    "Year": np.random.uniform(0, 1, n),
    "Village": np.random.randint(0, m, n)
})
data['year_cen'] = data['Year'] - data.Year.mean()

# Binomial outcome
lpr = np.random.normal(size=m)[data.Village]
lpr += np.random.normal(size=m)[data.Village] * data.year_cen
y = (np.random.uniform(size=n) < 1 / (1 + np.exp(-lpr)))
data["y"] = y.astype(int)

# These lines should agree with the example in the class docstring.
random = {"a": '0 + C(Village)'}

print(data)
model = BinomialBayesMixedGLM.from_formula('y ~ year_cen', random, data)
result = model.fit_vb()