def null(self): _endog = self._endog wls = WLS(_endog, np.ones((len(_endog), 1)), weights=self._data_weights) return wls.fit().fittedvalues
def fit(self, maxiter=100, method="IRLS", tol=1e-8, data_weights=1.0, scale=None): """ Fits a generalized linear model for a given family. parameters ---------- data_weights : array-like or scalar, only used with Binomial Number of trials for each observation. Used for only for binomial data when `endog` is specified as a 2d array of (successes, failures). Note that this argument will be dropped in the future. maxiter : int, optional Default is 100. method : string Default is 'IRLS' for iteratively reweighted least squares. This is currently the only method available for GLM fit. scale : string or float, optional `scale` can be 'X2', 'dev', or a float The default value is None, which uses `X2` for Gamma, Gaussian, and Inverse Gaussian. `X2` is Pearson's chi-square divided by `df_resid`. The default is 1 for the Binomial and Poisson families. `dev` is the deviance divided by df_resid tol : float Convergence tolerance. Default is 1e-8. """ if np.shape(data_weights) != () and not isinstance(self.family, families.Binomial): raise ValueError, "Data weights are only to be supplied for\ the Binomial family" self.data_weights = data_weights if np.shape(self.data_weights) == () and self.data_weights > 1: self.data_weights = self.data_weights * np.ones((self.exog.shape[0])) self.scaletype = scale if isinstance(self.family, families.Binomial): # thisc checks what kind of data is given for Binomial. family will need a reference to # endog if this is to be removed from the preprocessing self.endog = self.family.initialize(self.endog) mu = self.family.starting_mu(self.endog) wlsexog = self.exog eta = self.family.predict(mu) self.iteration += 1 dev = self.family.deviance(self.endog, mu) if np.isnan(dev): raise ValueError, "The first guess on the deviance function \ returned a nan. This could be a boundary problem and should be reported." else: self.history["deviance"].append(dev) # first guess on the deviance is assumed to be scaled by 1. while ( np.fabs(self.history["deviance"][self.iteration] - self.history["deviance"][self.iteration - 1]) ) > tol and self.iteration < maxiter: self.weights = data_weights * self.family.weights(mu) wlsendog = eta + self.family.link.deriv(mu) * (self.endog - mu) # - offset wls_results = WLS(wlsendog, wlsexog, self.weights).fit() eta = np.dot(self.exog, wls_results.params) # + offset mu = self.family.fitted(eta) self._update_history(wls_results, mu) self.scale = self.estimate_scale(mu) self.iteration += 1 self.mu = mu glm_results = GLMResults(self, wls_results.params, wls_results.normalized_cov_params, self.scale) glm_results.bse = np.sqrt(np.diag(wls_results.cov_params(scale=self.scale))) return glm_results