def test_attribute_writable_resettable(): """ Regression test for mutables and class constructors. """ data = sm.datasets.longley.load() endog, exog = data.endog, data.exog glm_model = sm.GLM(endog, exog) assert_equal(glm_model.family.link.power, 1.0) glm_model.family.link.power = 2. assert_equal(glm_model.family.link.power, 2.0) glm_model2 = sm.GLM(endog, exog) assert_equal(glm_model2.family.link.power, 1.0)
def __init__(self): # generate artificial data np.random.seed(98765678) nobs = 200 rvs = np.random.randn(nobs, 6) data_exog = rvs data_exog = sm.add_constant(data_exog) xbeta = 1 + 0.1 * rvs.sum(1) data_endog = np.random.poisson(np.exp(xbeta)) #estimate discretemod.Poisson as benchmark self.res_discrete = Poisson(data_endog, data_exog).fit(disp=0) mod_glm = sm.GLM(data_endog, data_exog, family=sm.families.Poisson()) self.res_glm = mod_glm.fit() #estimate generic MLE #self.mod = PoissonGMLE(data_endog, data_exog) #res = self.mod.fit() offset = self.res_discrete.params[0] * data_exog[:, 0] #1d ??? #self.res = PoissonOffsetGMLE(data_endog, data_exog[:,1:], offset=offset).fit(start_params = np.ones(6)/2., method='nm') modo = PoissonOffsetGMLE(data_endog, data_exog[:, 1:], offset=offset) self.res = modo.fit(start_params=0.9 * self.res_discrete.params[1:], method='nm', disp=0)
def __init__(self): # generate artificial data np.random.seed(98765678) nobs = 200 rvs = np.random.randn(nobs, 6) data_exog = rvs data_exog = sm.add_constant(data_exog) xbeta = 0.1 + 0.1 * rvs.sum(1) data_endog = np.random.poisson(np.exp(xbeta)) #estimate discretemod.Poisson as benchmark self.res_discrete = Poisson(data_endog, data_exog).fit(disp=0) mod_glm = sm.GLM(data_endog, data_exog, family=sm.families.Poisson()) self.res_glm = mod_glm.fit() #estimate generic MLE self.mod = PoissonGMLE(data_endog, data_exog) self.res = self.mod.fit(start_params=0.9 * self.res_discrete.params, method='nm', disp=0)
from scipy import stats data = numpy.loadtxt("../doc/gelman/arm2/police/frisk_with_noise.dat", skiprows=7) X = numpy.zeros((3,len(data[:,0]))) print X.shape arrests = data[:,2] arrests[arrests == 0] = 1 arrests = numpy.log(arrests) stops = data[:,0] stops[stops==0.0] = .0001 X[0,:] = arrests # arrests X[1,:] = data[:,4] # eth X[2,:] = numpy.ones(len(data[:,0])) # eth glm = sm.GLM(stops, X.T, family=sm.families.Poisson()) res = glm.fit() print "res.deviance=" + str(res.deviance) print "res.scale=" + str(res.scale) print "res.params=" + str(res.params) print "res.pearson_chi2=" + str(res.pearson_chi2) print "res.df_model=" + str(res.df_model) print "res.null_deviance=" + str(res.null_deviance) print "res.t()=" + str(res.t())
#print data_endog modp = MyPoisson(data_endog, data_exog) resp = modp.fit() print resp.params print resp.bse from scikits.statsmodels.discretemod import Poisson resdp = Poisson(data_endog, data_exog).fit() print '\ncompare with discretemod' print 'compare params' print resdp.params - resp.params print 'compare bse' print resdp.bse - resp.bse gmlp = sm.GLM(data_endog, data_exog, family=sm.families.Poisson()) resgp = gmlp.fit() ''' this creates a warning, bug bse is double defined ??? c:\josef\eclipsegworkspace\statsmodels-josef-experimental-gsoc\scikits\statsmodels\decorators.py:105: CacheWriteWarning: The attribute 'bse' cannot be overwritten warnings.warn(errmsg, CacheWriteWarning) ''' print '\ncompare with GLM' print 'compare params' print resgp.params - resp.params print 'compare bse' print resgp.bse - resp.bse lam = np.exp(np.dot(data_exog, resp.params)) '''mean of Poisson distribution''' predmean = stats.poisson.stats(lam, moments='m') print np.max(np.abs(predmean - lam))
from pandas import DataFrame data = sm.datasets.longley.load() df = DataFrame(data.exog, columns=data.exog_name) y = data.endog # data.exog = sm.add_constant(data.exog) df['intercept'] = 1. olsresult = sm.OLS(y, df).fit() rlmresult = sm.RLM(y, df).fit() # olswrap = RegressionResultsWrapper(olsresult) # rlmwrap = RLMResultsWrapper(rlmresult) data = sm.datasets.wfs.load() # get offset offset = np.log(data.exog[:, -1]) exog = data.exog[:, :-1] # convert dur to dummy exog = sm.tools.categorical(exog, col=0, drop=True) # drop reference category # convert res to dummy exog = sm.tools.categorical(exog, col=0, drop=True) # convert edu to dummy exog = sm.tools.categorical(exog, col=0, drop=True) # drop reference categories and add intercept exog = sm.add_constant(exog[:, [1, 2, 3, 4, 5, 7, 8, 10, 11, 12]]) endog = np.round(data.endog) mod = sm.GLM(endog, exog, family=sm.families.Poisson()).fit() # glmwrap = GLMResultsWrapper(mod)
# The percent of students taking college credit courses "PCTAF", # The percentage of charter schools in the districut "PCTCHRT" # The percent of schools in the district operating year round "PCTYRRND" # The following are interaction terms "PERMINTE_AVYRSEXP","PERMINTE_AVSAL", # "AVYRSEXP_AVSAL","PERSPEN_PTRATIO","PERSPEN_PCTAF","PTRATIO_PCTAF", # "PERMINTE_AVYRSEXP_AVSAL","PERSPEN_PTRATIO_PCTAF" data = sm.datasets.star98.load() data.exog = sm.add_constant(data.exog) print """The response variable is (success, failure). Eg., the first observation is """, data.endog[0] print"""Giving a total number of trials for this observation of """, data.endog[0].sum() glm_binom = sm.GLM(data.endog, data.exog, family=sm.families.Binomial()) binom_results = glm_binom.fit() print """The fitted values are """, binom_results.params print """The corresponding t-values are """, binom_results.tvalues # It is common in GLMs with interactions to compare first differences. # We are interested in the difference of the impact of the explanatory variable # on the response variable. This example uses interquartile differences for # the percentage of low income households while holding the other values # constant at their mean. means = data.exog.mean(axis=0)