Python GLM Examples, scikits.statsmodels.api.GLM Python Examples

Example #1

0

Show file

File: test_glm.py Project: zed/statsmodels

def test_attribute_writable_resettable():
    """
    Regression test for mutables and class constructors.
    """
    data = sm.datasets.longley.load()
    endog, exog = data.endog, data.exog
    glm_model = sm.GLM(endog, exog)
    assert_equal(glm_model.family.link.power, 1.0)
    glm_model.family.link.power = 2.
    assert_equal(glm_model.family.link.power, 2.0)
    glm_model2 = sm.GLM(endog, exog)
    assert_equal(glm_model2.family.link.power, 1.0)

Example #2

0

Show file

File: test_poisson.py Project: zed/statsmodels

    def __init__(self):

        # generate artificial data
        np.random.seed(98765678)
        nobs = 200
        rvs = np.random.randn(nobs, 6)
        data_exog = rvs
        data_exog = sm.add_constant(data_exog)
        xbeta = 1 + 0.1 * rvs.sum(1)
        data_endog = np.random.poisson(np.exp(xbeta))

        #estimate discretemod.Poisson as benchmark
        self.res_discrete = Poisson(data_endog, data_exog).fit(disp=0)

        mod_glm = sm.GLM(data_endog, data_exog, family=sm.families.Poisson())
        self.res_glm = mod_glm.fit()

        #estimate generic MLE
        #self.mod = PoissonGMLE(data_endog, data_exog)
        #res = self.mod.fit()
        offset = self.res_discrete.params[0] * data_exog[:, 0]  #1d ???
        #self.res = PoissonOffsetGMLE(data_endog, data_exog[:,1:], offset=offset).fit(start_params = np.ones(6)/2., method='nm')
        modo = PoissonOffsetGMLE(data_endog, data_exog[:, 1:], offset=offset)
        self.res = modo.fit(start_params=0.9 * self.res_discrete.params[1:],
                            method='nm',
                            disp=0)

Example #3

0

Show file

File: test_poisson.py Project: zed/statsmodels

    def __init__(self):

        # generate artificial data
        np.random.seed(98765678)
        nobs = 200
        rvs = np.random.randn(nobs, 6)
        data_exog = rvs
        data_exog = sm.add_constant(data_exog)
        xbeta = 0.1 + 0.1 * rvs.sum(1)
        data_endog = np.random.poisson(np.exp(xbeta))

        #estimate discretemod.Poisson as benchmark
        self.res_discrete = Poisson(data_endog, data_exog).fit(disp=0)

        mod_glm = sm.GLM(data_endog, data_exog, family=sm.families.Poisson())
        self.res_glm = mod_glm.fit()

        #estimate generic MLE
        self.mod = PoissonGMLE(data_endog, data_exog)
        self.res = self.mod.fit(start_params=0.9 * self.res_discrete.params,
                                method='nm',
                                disp=0)

Example #4

0

Show file

from scipy import stats

data = numpy.loadtxt("../doc/gelman/arm2/police/frisk_with_noise.dat",  skiprows=7)

X = numpy.zeros((3,len(data[:,0])))
print X.shape

arrests = data[:,2]
arrests[arrests == 0] = 1
arrests = numpy.log(arrests)

stops = data[:,0]
stops[stops==0.0] = .0001

X[0,:] = arrests # arrests
X[1,:] = data[:,4] # eth
X[2,:] = numpy.ones(len(data[:,0])) # eth

glm = sm.GLM(stops, X.T, family=sm.families.Poisson())
res = glm.fit()

print "res.deviance=" + str(res.deviance)
print "res.scale=" + str(res.scale)
print "res.params=" + str(res.params)
print "res.pearson_chi2=" + str(res.pearson_chi2)
print "res.df_model=" + str(res.df_model)
print "res.null_deviance=" + str(res.null_deviance)
print "res.t()=" + str(res.t())

Example #5

0

Show file

    #print data_endog

    modp = MyPoisson(data_endog, data_exog)
    resp = modp.fit()
    print resp.params
    print resp.bse

    from scikits.statsmodels.discretemod import Poisson
    resdp = Poisson(data_endog, data_exog).fit()
    print '\ncompare with discretemod'
    print 'compare params'
    print resdp.params - resp.params
    print 'compare bse'
    print resdp.bse - resp.bse

    gmlp = sm.GLM(data_endog, data_exog, family=sm.families.Poisson())
    resgp = gmlp.fit()
    ''' this creates a warning, bug bse is double defined ???
    c:\josef\eclipsegworkspace\statsmodels-josef-experimental-gsoc\scikits\statsmodels\decorators.py:105: CacheWriteWarning: The attribute 'bse' cannot be overwritten
      warnings.warn(errmsg, CacheWriteWarning)
    '''
    print '\ncompare with GLM'
    print 'compare params'
    print resgp.params - resp.params
    print 'compare bse'
    print resgp.bse - resp.bse

    lam = np.exp(np.dot(data_exog, resp.params))
    '''mean of Poisson distribution'''
    predmean = stats.poisson.stats(lam, moments='m')
    print np.max(np.abs(predmean - lam))

Example #6

0

Show file

File: wrapper.py Project: zed/statsmodels

    from pandas import DataFrame
    data = sm.datasets.longley.load()
    df = DataFrame(data.exog, columns=data.exog_name)
    y = data.endog
    # data.exog = sm.add_constant(data.exog)
    df['intercept'] = 1.
    olsresult = sm.OLS(y, df).fit()
    rlmresult = sm.RLM(y, df).fit()

    # olswrap = RegressionResultsWrapper(olsresult)
    # rlmwrap = RLMResultsWrapper(rlmresult)

    data = sm.datasets.wfs.load()
    # get offset
    offset = np.log(data.exog[:, -1])
    exog = data.exog[:, :-1]

    # convert dur to dummy
    exog = sm.tools.categorical(exog, col=0, drop=True)
    # drop reference category
    # convert res to dummy
    exog = sm.tools.categorical(exog, col=0, drop=True)
    # convert edu to dummy
    exog = sm.tools.categorical(exog, col=0, drop=True)
    # drop reference categories and add intercept
    exog = sm.add_constant(exog[:, [1, 2, 3, 4, 5, 7, 8, 10, 11, 12]])

    endog = np.round(data.endog)
    mod = sm.GLM(endog, exog, family=sm.families.Poisson()).fit()
    # glmwrap = GLMResultsWrapper(mod)

Example #7

0

Show file

# The percent of students taking college credit courses "PCTAF",
# The percentage of charter schools in the districut "PCTCHRT"
# The percent of schools in the district operating year round "PCTYRRND"
# The following are interaction terms "PERMINTE_AVYRSEXP","PERMINTE_AVSAL",
# "AVYRSEXP_AVSAL","PERSPEN_PTRATIO","PERSPEN_PCTAF","PTRATIO_PCTAF",
# "PERMINTE_AVYRSEXP_AVSAL","PERSPEN_PTRATIO_PCTAF"

data = sm.datasets.star98.load()
data.exog = sm.add_constant(data.exog)

print """The response variable is (success, failure).  Eg., the first
observation is """, data.endog[0]
print"""Giving a total number of trials for this observation of
""", data.endog[0].sum()

glm_binom = sm.GLM(data.endog, data.exog, family=sm.families.Binomial())

binom_results = glm_binom.fit()
print """The fitted values are
""", binom_results.params
print """The corresponding t-values are
""", binom_results.tvalues

# It is common in GLMs with interactions to compare first differences.
# We are interested in the difference of the impact of the explanatory variable
# on the response variable.  This example uses interquartile differences for
# the percentage of low income households while holding the other values
# constant at their mean.


means = data.exog.mean(axis=0)