Example #1
0
 def __init__(self,
              endog,
              exog,
              smoothers=None,
              family=families.Gaussian()):
     #self.family = family
     #TODO: inconsistent super __init__
     AdditiveModel.__init__(self, exog, smoothers=smoothers, family=family)
     GLM.__init__(self, endog, exog, family=family)
     assert self.family is family  #make sure we got the right family
Example #2
0
    def __init__(self):
        # Test Precisions
        self.decimal_aic_R = -10  # Big difference vs R.
        self.decimal_resids = DECIMAL_3

        from results.results_glm import InvGaussLog
        res2 = InvGaussLog()
        self.res1 = GLM(res2.endog, res2.exog,
            family=sm.families.InverseGaussian(link=\
            sm.families.links.log)).fit()
        self.res2 = res2
Example #3
0
    def __init__(self):
        # Test Precision
        self.decimal_aic_R = DECIMAL_0
        self.decimal_aic_Stata = DECIMAL_2
        self.decimal_loglike = DECIMAL_0
        self.decimal_null_deviance = DECIMAL_1

        nobs = 100
        x = np.arange(nobs)
        np.random.seed(54321)
        #        y = 1.0 - .02*x - .001*x**2 + 0.001 * np.random.randn(nobs)
        self.X = np.c_[np.ones((nobs, 1)), x, x**2]
        self.lny = np.exp(-(-1.0 + 0.02*x + 0.0001*x**2)) +\
                        0.001 * np.random.randn(nobs)

        GaussLog_Model = GLM(self.lny, self.X, \
                family=sm.families.Gaussian(sm.families.links.log))
        self.res1 = GaussLog_Model.fit()
        from results.results_glm import GaussianLog
        self.res2 = GaussianLog()
Example #4
0
    def __init__(self):
        # Test Precision
        self.decimal_aic_R = DECIMAL_0
        self.decimal_aic_Stata = DECIMAL_2
        self.decimal_loglike = DECIMAL_0
        self.decimal_null_deviance = DECIMAL_1

        nobs = 100
        x = np.arange(nobs)
        np.random.seed(54321)
#        y = 1.0 - .02*x - .001*x**2 + 0.001 * np.random.randn(nobs)
        self.X = np.c_[np.ones((nobs,1)),x,x**2]
        self.lny = np.exp(-(-1.0 + 0.02*x + 0.0001*x**2)) +\
                        0.001 * np.random.randn(nobs)

        GaussLog_Model = GLM(self.lny, self.X, \
                family=sm.families.Gaussian(sm.families.links.log))
        self.res1 = GaussLog_Model.fit()
        from results.results_glm import GaussianLog
        self.res2 = GaussianLog()
Example #5
0
    def __init__(self):
        # Test Precisions
        self.decimal_bic = DECIMAL_1
        self.decimal_aic_R = DECIMAL_1
        self.decimal_aic_Stata = DECIMAL_3
        self.decimal_loglike = DECIMAL_1
        self.decimal_resids = DECIMAL_3

        nobs = 100
        x = np.arange(nobs)
        np.random.seed(54321)
        y = 1.0 + 2.0 * x + x**2 + 0.1 * np.random.randn(nobs)
        self.X = np.c_[np.ones((nobs,1)),x,x**2]
        self.y_inv = (1. + .02*x + .001*x**2)**-1 + .001 * np.random.randn(nobs)
        InverseLink_Model = GLM(self.y_inv, self.X,
                family=sm.families.Gaussian(sm.families.links.inverse_power))
        InverseLink_Res = InverseLink_Model.fit()
        self.res1 = InverseLink_Res
        from results.results_glm import GaussianInverse
        self.res2 = GaussianInverse()
Example #6
0
def test_prefect_pred():
    cur_dir = os.path.dirname(os.path.abspath(__file__))
    iris = np.genfromtxt(os.path.join(cur_dir, 'results', 'iris.csv'),
                         delimiter=",",
                         skip_header=1)
    y = iris[:, -1]
    X = iris[:, :-1]
    X = X[y != 2]
    y = y[y != 2]
    X = add_constant(X, prepend=True)
    glm = GLM(y, X, family=sm.families.Binomial())
    assert_raises(PerfectSeparationError, glm.fit)
Example #7
0
    def __init__(self):
        # Test Precisions
        self.decimal_aic_R = -10  #TODO: Big difference vs R
        self.decimal_fittedvalues = DECIMAL_3
        self.decimal_params = DECIMAL_3

        from results.results_glm import Medpar1
        data = Medpar1()
        self.res1 = GLM(data.endog, data.exog,
            family=sm.families.InverseGaussian(link=\
            sm.families.links.identity)).fit()
        from results.results_glm import InvGaussIdentity
        self.res2 = InvGaussIdentity()
Example #8
0
    def __init__(self):
        # Test Precisions
        self.decimal_resids = DECIMAL_3
        self.decimal_aic_R = DECIMAL_0
        self.decimal_fittedvalues = DECIMAL_3

        from results.results_glm import CancerLog
        res2 = CancerLog()
        self.res1 = GLM(
            res2.endog,
            res2.exog,
            family=sm.families.Gamma(link=sm.families.links.log)).fit()
        self.res2 = res2
Example #9
0
    def __init__(self):
        # Test Precisions
        self.decimal_bic = DECIMAL_1
        self.decimal_aic_R = DECIMAL_1
        self.decimal_aic_Stata = DECIMAL_3
        self.decimal_loglike = DECIMAL_1
        self.decimal_resids = DECIMAL_3

        nobs = 100
        x = np.arange(nobs)
        np.random.seed(54321)
        y = 1.0 + 2.0 * x + x**2 + 0.1 * np.random.randn(nobs)
        self.X = np.c_[np.ones((nobs, 1)), x, x**2]
        self.y_inv = (1. + .02 * x +
                      .001 * x**2)**-1 + .001 * np.random.randn(nobs)
        InverseLink_Model = GLM(self.y_inv,
                                self.X,
                                family=sm.families.Gaussian(
                                    sm.families.links.inverse_power))
        InverseLink_Res = InverseLink_Model.fit()
        self.res1 = InverseLink_Res
        from results.results_glm import GaussianInverse
        self.res2 = GaussianInverse()
Example #10
0
    def __init__(self):
        # Test Precisions
        self.decimal_resids = -100  #TODO Very off from Stata?
        self.decimal_params = DECIMAL_2
        self.decimal_aic_R = DECIMAL_0
        self.decimal_loglike = DECIMAL_1

        from results.results_glm import CancerIdentity
        res2 = CancerIdentity()
        self.res1 = GLM(
            res2.endog,
            res2.exog,
            family=sm.families.Gamma(link=sm.families.links.identity)).fit()
        self.res2 = res2
Example #11
0
 def setupClass(cls):
     from results.results_glm import Cpunish
     from gwstatsmodels.datasets.cpunish import load
     data = load()
     data.exog[:, 3] = np.log(data.exog[:, 3])
     data.exog = add_constant(data.exog)
     exposure = [100] * len(data.endog)
     cls.res1 = GLM(data.endog,
                    data.exog,
                    family=sm.families.Poisson(),
                    exposure=exposure).fit()
     cls.res1.params[-1] += np.log(100)  # add exposure back in to param
     # to make the results the same
     cls.res2 = Cpunish()
Example #12
0
    def __init__(self):
        '''
        Tests Poisson family with canonical log link.

        Test results were obtained by R.
        '''
        from results.results_glm import Cpunish
        from gwstatsmodels.datasets.cpunish import load
        self.data = load()
        self.data.exog[:, 3] = np.log(self.data.exog[:, 3])
        self.data.exog = add_constant(self.data.exog)
        self.res1 = GLM(self.data.endog,
                        self.data.exog,
                        family=sm.families.Poisson()).fit()
        self.res2 = Cpunish()
Example #13
0
    def __init__(self):
        '''
        Test Gaussian family with canonical identity link
        '''
        # Test Precisions
        self.decimal_resids = DECIMAL_3
        self.decimal_params = DECIMAL_2
        self.decimal_bic = DECIMAL_0
        self.decimal_bse = DECIMAL_3

        from gwstatsmodels.datasets.longley import load
        self.data = load()
        self.data.exog = add_constant(self.data.exog)
        self.res1 = GLM(self.data.endog,
                        self.data.exog,
                        family=sm.families.Gaussian()).fit()
        from results.results_glm import Longley
        self.res2 = Longley()
Example #14
0
    def __init__(self):
        '''
        Test Binomial family with canonical logit link using star98 dataset.
        '''
        self.decimal_resids = DECIMAL_1
        self.decimal_bic = DECIMAL_2

        from gwstatsmodels.datasets.star98 import load
        from results.results_glm import Star98
        data = load()
        data.exog = add_constant(data.exog)
        self.res1 = GLM(data.endog, data.exog, \
        family=sm.families.Binomial()).fit()
        #NOTE: if you want to replicate with RModel
        #res2 = RModel(data.endog[:,0]/trials, data.exog, r.glm,
        #        family=r.binomial, weights=trials)

        self.res2 = Star98()
Example #15
0
    def init(self):
        nobs = self.nobs
        y_true, x, exog = self.y_true, self.x, self.exog
        if not hasattr(self, 'scale'):
            scale = 1
        else:
            scale = self.scale

        f = self.family

        self.mu_true = mu_true = f.link.inverse(y_true)

        np.random.seed(8765993)
        #y_obs = np.asarray([stats.poisson.rvs(p) for p in mu], float)
        y_obs = self.rvs(mu_true, scale=scale, size=nobs)  #this should work
        m = GAM(y_obs, x, family=f)  #TODO: y_obs is twice __init__ and fit
        m.fit(y_obs, maxiter=100)
        res_gam = m.results
        self.res_gam = res_gam  #attached for debugging
        self.mod_gam = m  #attached for debugging

        res_glm = GLM(y_obs, exog, family=f).fit()

        #Note: there still are some naming inconsistencies
        self.res1 = res1 = Dummy()  #for gam model
        #res2 = Dummy() #for benchmark
        self.res2 = res2 = res_glm  #reuse existing glm results, will add additional

        #eta in GLM terminology
        res2.y_pred = res_glm.model.predict(res_glm.params, exog, linear=True)
        res1.y_pred = res_gam.predict(x)
        res1.y_predshort = res_gam.predict(x[:10])  #, linear=True)

        #mu
        res2.mu_pred = res_glm.model.predict(res_glm.params,
                                             exog,
                                             linear=False)
        res1.mu_pred = res_gam.mu

        #parameters
        slopes = [i for ss in m.smoothers for i in ss.params[1:]]
        const = res_gam.alpha + sum([ss.params[1] for ss in m.smoothers])
        res1.params = np.array([const] + slopes)
Example #16
0
    def __init__(self):
        '''
        Tests Gamma family with canonical inverse link (power -1)
        '''
        # Test Precisions
        self.decimal_aic_R = -1  #TODO: off by about 1, we are right with Stata
        self.decimal_resids = DECIMAL_2

        from gwstatsmodels.datasets.scotland import load
        from results.results_glm import Scotvote
        data = load()
        data.exog = add_constant(data.exog)
        res1 = GLM(data.endog, data.exog, \
                    family=sm.families.Gamma()).fit()
        self.res1 = res1
        #        res2 = RModel(data.endog, data.exog, r.glm, family=r.Gamma)
        res2 = Scotvote()
        res2.aic_R += 2  # R doesn't count degree of freedom for scale with gamma
        self.res2 = res2
Example #17
0
    def __init__(self):
        '''
        Tests the Inverse Gaussian family in GLM.

        Notes
        -----
        Used the rndivgx.ado file provided by Hardin and Hilbe to
        generate the data.  Results are read from model_results, which
        were obtained by running R_ig.s
        '''
        # Test Precisions
        self.decimal_aic_R = DECIMAL_0
        self.decimal_loglike = DECIMAL_0

        from results.results_glm import InvGauss
        res2 = InvGauss()
        res1 = GLM(res2.endog, res2.exog, \
                family=sm.families.InverseGaussian()).fit()
        self.res1 = res1
        self.res2 = res2
Example #18
0
    def __init__(self):
        '''
        Test Negative Binomial family with canonical log link
        '''
        # Test Precision
        self.decimal_resid = DECIMAL_1
        self.decimal_params = DECIMAL_3
        self.decimal_resids = -1  # 1 % mismatch at 0
        self.decimal_fittedvalues = DECIMAL_1

        from gwstatsmodels.datasets.committee import load
        self.data = load()
        self.data.exog[:, 2] = np.log(self.data.exog[:, 2])
        interaction = self.data.exog[:, 2] * self.data.exog[:, 1]
        self.data.exog = np.column_stack((self.data.exog, interaction))
        self.data.exog = add_constant(self.data.exog)
        self.res1 = GLM(self.data.endog,
                        self.data.exog,
                        family=sm.families.NegativeBinomial()).fit()
        from results.results_glm import Committee
        res2 = Committee()
        res2.aic_R += 2  # They don't count a degree of freedom for the scale
        self.res2 = res2
Example #19
0
    plt.legend(loc='upper left')
    plt.title('gam.GAM Poisson')

    counter = 2
    for ii, xx in zip(['z', 'x1', 'x2'], [z, x[:, 0], x[:, 1]]):
        sortidx = np.argsort(xx)
        #plt.figure()
        plt.subplot(2, 2, counter)
        plt.plot(xx[sortidx], p[sortidx], 'k.', alpha=0.5)
        plt.plot(xx[sortidx], yp[sortidx], 'b.', label='true')
        plt.plot(xx[sortidx], y_pred[sortidx], 'r.', label='GAM')
        plt.legend(loc='upper left')
        plt.title('gam.GAM Poisson ' + ii)
        counter += 1

    res = GLM(p, exog_reduced, family=f).fit()

    #plot component, compared to true component
    x1 = x[:, 0]
    x2 = x[:, 1]
    f1 = exog[:, :order + 1].sum(1) - 1  #take out constant
    f2 = exog[:, order + 1:].sum(1) - 1
    plt.figure()
    #Note: need to correct for constant which is indeterminatedly distributed
    #plt.plot(x1, m.smoothers[0](x1)-m.smoothers[0].params[0]+1, 'r')
    #better would be subtract f(0) m.smoothers[0](np.array([0]))
    plt.plot(x1, f1, linewidth=2)
    plt.plot(x1, m.smoothers[0](x1) - m.smoothers[0].params[0], 'r')

    plt.figure()
    plt.plot(x2, f2, linewidth=2)
Example #20
0
 def __init__(self):
     from results.results_glm import Lbw
     self.res2 = Lbw()
     self.res1 = GLM(self.res2.endog,
                     self.res2.exog,
                     family=sm.families.Binomial()).fit()
Example #21
0
 def __init__(self, endog, exog, smoothers=None, family=families.Gaussian()):
     #self.family = family
     #TODO: inconsistent super __init__
     AdditiveModel.__init__(self, exog, smoothers=smoothers, family=family)
     GLM.__init__(self, endog, exog, family=family)
     assert self.family is family  #make sure we got the right family