def test_glmpoisson_screening():

    y, x, idx_nonzero_true, beta = _get_poisson_data()
    nobs = len(y)

    xnames_true = ['var%4d' % ii for ii in idx_nonzero_true]
    xnames_true[0] = 'const'
    parameters = pd.DataFrame(beta[idx_nonzero_true],
                              index=xnames_true,
                              columns=['true'])

    xframe_true = pd.DataFrame(x[:, idx_nonzero_true], columns=xnames_true)
    res_oracle = GLMPenalized(y, xframe_true, family=family.Poisson()).fit()
    parameters['oracle'] = res_oracle.params

    mod_initial = GLMPenalized(y, np.ones(nobs), family=family.Poisson())

    screener = VariableScreening(mod_initial)
    exog_candidates = x[:, 1:]
    res_screen = screener.screen_exog(exog_candidates, maxiter=10)

    assert_equal(np.sort(res_screen.idx_nonzero), idx_nonzero_true)

    xnames = ['var%4d' % ii for ii in res_screen.idx_nonzero]
    xnames[0] = 'const'

    # smoke test
    res_screen.results_final.summary(xname=xnames)
    res_screen.results_pen.summary()
    assert_equal(res_screen.results_final.mle_retvals['converged'], True)

    ps = pd.Series(res_screen.results_final.params, index=xnames, name='final')
    parameters = parameters.join(ps, how='outer')

    assert_allclose(parameters['oracle'], parameters['final'], atol=5e-6)
Exemple #2
0
    def _initialize(cls):
        y, x = cls.y, cls.x

        modp = GLM(y, x, family=family.Poisson())
        cls.res2 = modp.fit()

        mod = GLMPenalized(y, x, family=family.Poisson(), penal=cls.penalty)
        mod.pen_weight = 0
        cls.res1 = mod.fit(method='bfgs', maxiter=100, disp=0)

        cls.atol = 5e-6
Exemple #3
0
    def _initialize(cls):
        y, x = cls.y, cls.x
        modp = GLM(y, x[:, :cls.k_nonzero], family=family.Poisson())
        cls.res2 = modp.fit()

        mod = GLMPenalized(y, x, family=family.Poisson(), penal=cls.penalty)
        mod.pen_weight *= 1.5  # same as discrete Poisson
        mod.penal.tau = 0.05
        cls.res1 = mod.fit(method='bfgs', maxiter=100)

        cls.exog_index = slice(None, cls.k_nonzero, None)

        cls.atol = 5e-3
    def setup_class(cls):

        sp = np.array([40491.3940640059, 232455.530262537])
        # s_scale is same as before
        cls.s_scale = s_scale = np.array([2.443955e-06, 0.007945455])

        cls.exog = patsy.dmatrix('fuel + drive', data=df_autos)

        x_spline = df_autos[['weight', 'hp']].values
        bs = BSplines(x_spline, df=[12, 10], degree=[3, 3],
                      variable_names=['weight', 'hp'],
                      constraints='center',
                      include_intercept=True)

        alpha0 = 1 / s_scale * sp / 2
        gam_bs = GLMGam.from_formula('city_mpg ~ fuel + drive', df_autos,
                                     smoother=bs, family=family.Poisson(),
                                     alpha=alpha0)

        cls.res1a = gam_bs.fit(use_t=False)

        cls.res1b = gam_bs.fit(method='newton', use_t=True)
        cls.res1 = cls.res1a._results
        cls.res2 = results_mpg_bs_poisson.mpg_bs_poisson

        cls.rtol_fitted = 1e-8
        cls.covp_corrfact = 1  # not needed
    def setup_class(cls):
        super(TestGAMPoisson, cls).setup_class()  #initialize DGP

        cls.family = family.Poisson()
        cls.rvs = stats.poisson.rvs

        cls.init()
Exemple #6
0
    def __init__(self):
        super(self.__class__, self).__init__()  #initialize DGP

        self.family = family.Poisson()
        self.rvs = stats.poisson.rvs

        self.init()
Exemple #7
0
    def _initialize(cls):
        y, x = cls.y, cls.x
        cov_type = 'HC0'
        modp = PoissonPenalized(y, x, penal=cls.penalty)
        modp.pen_weight *= 1.5  # same as discrete Poisson 1.5
        modp.penal.tau = 0.05
        cls.res2 = modp.fit(cov_type=cov_type, method='bfgs', maxiter=100,
                            disp=0)

        mod = GLMPenalized(y, x, family=family.Poisson(), penal=cls.penalty)
        mod.pen_weight *= 1.5  # same as discrete Poisson 1.5
        mod.penal.tau = 0.05
        cls.res1 = mod.fit(cov_type=cov_type, method='bfgs', maxiter=100,
                           disp=0)

        cls.exog_index = slice(None, None, None)

        cls.atol = 1e-4
    def __init__(self,
                 family_name='normal',
                 link_name='identity',
                 fam_params=None):
        """Constructor."""

        # Store link
        self.link_name = link_name
        if self.link_name.lower() == 'logit':
            self.link = L.logit
        elif self.link_name.lower() == 'log':
            self.link = L.log
        elif self.link_name.lower() == 'identity':
            self.link = L.identity
        elif self.link_name.lower() == 'sqrt':
            self.link = L.sqrt
        elif self.link_name.lower() == 'probit':
            self.link = L.probit
        family_kwargs = {}
        if self.link_name:
            family_kwargs['link'] = self.link
        # Store family
        self.family_name = family_name
        if self.family_name.lower() == 'normal':
            self.family = F.Gaussian(**family_kwargs)

            def rand(x):
                return np.random.normal(x, fam_params)
        elif self.family_name.lower() == 'binomial':
            self.family = F.Binomial(**family_kwargs)

            def rand(x):
                return np.random.binomial(1, x)
        elif self.family_name.lower() == 'poisson':
            self.family = F.Poisson(**family_kwargs)

            def rand(x):
                return np.random.poisson(x)

        self.rand = rand
        self.in_columns = None
        self.out_columns = None
    plt.title('gam.AdditiveModel')

if example == 2:
    print("binomial")
    f = family.Binomial()
    b = np.asarray([scipy.stats.bernoulli.rvs(p) for p in f.link.inverse(y)])
    b.shape = y.shape
    m = GAM(b, d, family=f)
    toc = time.time()
    m.fit(b)
    tic = time.time()
    print(tic - toc)

if example == 3:
    print("Poisson")
    f = family.Poisson()
    y = y / y.max() * 3
    yp = f.link.inverse(y)
    p = np.asarray([scipy.stats.poisson.rvs(p) for p in f.link.inverse(y)],
                   float)
    p.shape = y.shape
    m = GAM(p, d, family=f)
    toc = time.time()
    m.fit(p)
    tic = time.time()
    print(tic - toc)

plt.figure()
plt.plot(x1, standardize(m.smoothers[0](x1)), 'r')
plt.plot(x1, standardize(f1(x1)), linewidth=2)
plt.figure()