def null(self): endog = self._endog model = self.model exog = np.ones((len(endog), 1)) if hasattr(model, 'offset'): return GLM(endog, exog, offset=model.offset, family=self.family).fit().mu elif hasattr(model, 'exposure'): return GLM(endog, exog, exposure=model.exposure, family=self.family).fit().mu else: wls_model = lm.WLS(endog, exog, weights=self._data_weights) return wls_model.fit().fittedvalues
def fit(self, maxiter=100, method='IRLS', tol=1e-8, scale=None): ''' Fits a generalized linear model for a given family. parameters ---------- maxiter : int, optional Default is 100. method : string Default is 'IRLS' for iteratively reweighted least squares. This is currently the only method available for GLM fit. scale : string or float, optional `scale` can be 'X2', 'dev', or a float The default value is None, which uses `X2` for Gamma, Gaussian, and Inverse Gaussian. `X2` is Pearson's chi-square divided by `df_resid`. The default is 1 for the Binomial and Poisson families. `dev` is the deviance divided by df_resid tol : float Convergence tolerance. Default is 1e-8. ''' endog = self.endog if endog.ndim > 1 and endog.shape[1] == 2: data_weights = endog.sum(1) # weights are total trials else: data_weights = np.ones((endog.shape[0])) self.data_weights = data_weights if np.shape(self.data_weights) == () and self.data_weights > 1: self.data_weights = self.data_weights *\ np.ones((endog.shape[0])) self.scaletype = scale if isinstance(self.family, families.Binomial): # this checks what kind of data is given for Binomial. # family will need a reference to endog if this is to be removed from the # preprocessing self.endog = self.family.initialize(self.endog) if hasattr(self, 'offset'): offset = self.offset elif hasattr(self, 'exposure'): offset = self.exposure else: offset = 0 #TODO: would there ever be both and exposure and an offset? mu = self.family.starting_mu(self.endog) wlsexog = self.exog eta = self.family.predict(mu) self.iteration += 1 dev = self.family.deviance(self.endog, mu) if np.isnan(dev): raise ValueError("The first guess on the deviance function \ returned a nan. This could be a boundary problem and should be reported.") else: self.history['deviance'].append(dev) # first guess on the deviance is assumed to be scaled by 1. while((np.fabs(self.history['deviance'][self.iteration]-\ self.history['deviance'][self.iteration-1])) > tol and \ self.iteration < maxiter): self.weights = data_weights * self.family.weights(mu) wlsendog = eta + self.family.link.deriv(mu) * (self.endog-mu) \ - offset wls_results = lm.WLS(wlsendog, wlsexog, self.weights).fit() eta = np.dot(self.exog, wls_results.params) + offset mu = self.family.fitted(eta) self._update_history(wls_results, mu) self.scale = self.estimate_scale(mu) self.iteration += 1 if endog.squeeze().ndim == 1 and np.allclose(mu - endog, 0): msg = "Perfect separation detected, results not available" raise PerfectSeparationError(msg) self.mu = mu glm_results = GLMResults(self, wls_results.params, wls_results.normalized_cov_params, self.scale) return GLMResultsWrapper(glm_results)
def fit(self, maxiter=50, tol=1e-8, scale_est='mad', init=None, cov='H1', update_scale=True, conv='dev'): """ Fits the model using iteratively reweighted least squares. The IRLS routine runs until the specified objective converges to `tol` or `maxiter` has been reached. Parameters ---------- conv : string Indicates the convergence criteria. Available options are "coefs" (the coefficients), "weights" (the weights in the iteration), "resids" (the standardized residuals), and "dev" (the un-normalized log-likelihood for the M estimator). The default is "dev". cov : string, optional 'H1', 'H2', or 'H3' Indicates how the covariance matrix is estimated. Default is 'H1'. See rlm.RLMResults for more information. init : string Specifies method for the initial estimates of the parameters. Default is None, which means that the least squares estimate is used. Currently it is the only available choice. maxiter : int The maximum number of iterations to try. Default is 50. scale_est : string or HuberScale() 'mad', 'stand_mad', or HuberScale() Indicates the estimate to use for scaling the weights in the IRLS. The default is 'mad' (median absolute deviation. Other options are use 'stand_mad' for the median absolute deviation standardized around the median and 'HuberScale' for Huber's proposal 2. Huber's proposal 2 has optional keyword arguments d, tol, and maxiter for specifying the tuning constant, the convergence tolerance, and the maximum number of iterations. See models.robust.scale for more information. tol : float The convergence tolerance of the estimate. Default is 1e-8. update_scale : Bool If `update_scale` is False then the scale estimate for the weights is held constant over the iteration. Otherwise, it is updated for each fit in the iteration. Default is True. Returns ------- results : object scikits.statsmodels.rlm.RLMresults """ if not cov.upper() in ["H1", "H2", "H3"]: raise ValueError("Covariance matrix %s not understood" % cov) else: self.cov = cov.upper() conv = conv.lower() if not conv in ["weights", "coefs", "dev", "resid"]: raise ValueError("Convergence argument %s not understood" \ % conv) self.scale_est = scale_est wls_results = lm.WLS(self.endog, self.exog).fit() if not init: self.scale = self._estimate_scale(wls_results.resid) self._update_history(wls_results) self.iteration = 1 if conv == 'coefs': criterion = self.history['params'] elif conv == 'dev': criterion = self.history['deviance'] elif conv == 'resid': criterion = self.history['sresid'] elif conv == 'weights': criterion = self.history['weights'] while (np.all(np.fabs(criterion[self.iteration]-\ criterion[self.iteration-1]) > tol) and \ self.iteration < maxiter): # self.weights = self.M.weights((self.endog - \ # wls_results.fittedvalues)/self.scale) self.weights = self.M.weights(wls_results.resid / self.scale) wls_results = lm.WLS(self.endog, self.exog, weights=self.weights).fit() if update_scale is True: self.scale = self._estimate_scale(wls_results.resid) self._update_history(wls_results) self.iteration += 1 results = RLMResults(self, wls_results.params, self.normalized_cov_params, self.scale) results.fit_options = dict(cov=cov.upper(), scale_est=scale_est, norm=self.M.__class__.__name__, conv=conv) #norm is not changed in fit, no old state #doing the next causes exception #self.cov = self.scale_est = None #reset for additional fits #iteration and history could contain wrong state with repeated fit return RLMResultsWrapper(results)