コード例 #1
0
 def null(self):
     endog = self._endog
     model = self.model
     exog = np.ones((len(endog), 1))
     if hasattr(model, 'offset'):
         return GLM(endog, exog, offset=model.offset,
                    family=self.family).fit().mu
     elif hasattr(model, 'exposure'):
         return GLM(endog,
                    exog,
                    exposure=model.exposure,
                    family=self.family).fit().mu
     else:
         wls_model = lm.WLS(endog, exog, weights=self._data_weights)
         return wls_model.fit().fittedvalues
コード例 #2
0
    def fit(self, maxiter=100, method='IRLS', tol=1e-8, scale=None):
        '''
        Fits a generalized linear model for a given family.

        parameters
        ----------
        maxiter : int, optional
            Default is 100.
        method : string
            Default is 'IRLS' for iteratively reweighted least squares.  This
            is currently the only method available for GLM fit.
        scale : string or float, optional
            `scale` can be 'X2', 'dev', or a float
            The default value is None, which uses `X2` for Gamma, Gaussian,
            and Inverse Gaussian.
            `X2` is Pearson's chi-square divided by `df_resid`.
            The default is 1 for the Binomial and Poisson families.
            `dev` is the deviance divided by df_resid
        tol : float
            Convergence tolerance.  Default is 1e-8.
        '''
        endog = self.endog
        if endog.ndim > 1 and endog.shape[1] == 2:
            data_weights = endog.sum(1)  # weights are total trials
        else:
            data_weights = np.ones((endog.shape[0]))
        self.data_weights = data_weights
        if np.shape(self.data_weights) == () and self.data_weights > 1:
            self.data_weights = self.data_weights *\
                    np.ones((endog.shape[0]))
        self.scaletype = scale
        if isinstance(self.family, families.Binomial):
            # this checks what kind of data is given for Binomial.
            # family will need a reference to endog if this is to be removed from the
            # preprocessing
            self.endog = self.family.initialize(self.endog)

        if hasattr(self, 'offset'):
            offset = self.offset
        elif hasattr(self, 'exposure'):
            offset = self.exposure
        else:
            offset = 0
        #TODO: would there ever be both and exposure and an offset?

        mu = self.family.starting_mu(self.endog)
        wlsexog = self.exog
        eta = self.family.predict(mu)
        self.iteration += 1
        dev = self.family.deviance(self.endog, mu)
        if np.isnan(dev):
            raise ValueError("The first guess on the deviance function \
returned a nan.  This could be a boundary problem and should be reported.")
        else:
            self.history['deviance'].append(dev)
            # first guess on the deviance is assumed to be scaled by 1.
        while((np.fabs(self.history['deviance'][self.iteration]-\
                    self.history['deviance'][self.iteration-1])) > tol and \
                    self.iteration < maxiter):
            self.weights = data_weights * self.family.weights(mu)
            wlsendog = eta + self.family.link.deriv(mu) * (self.endog-mu) \
                - offset
            wls_results = lm.WLS(wlsendog, wlsexog, self.weights).fit()
            eta = np.dot(self.exog, wls_results.params) + offset
            mu = self.family.fitted(eta)
            self._update_history(wls_results, mu)
            self.scale = self.estimate_scale(mu)
            self.iteration += 1
            if endog.squeeze().ndim == 1 and np.allclose(mu - endog, 0):
                msg = "Perfect separation detected, results not available"
                raise PerfectSeparationError(msg)
        self.mu = mu
        glm_results = GLMResults(self, wls_results.params,
                                 wls_results.normalized_cov_params, self.scale)
        return GLMResultsWrapper(glm_results)
コード例 #3
0
    def fit(self,
            maxiter=50,
            tol=1e-8,
            scale_est='mad',
            init=None,
            cov='H1',
            update_scale=True,
            conv='dev'):
        """
        Fits the model using iteratively reweighted least squares.

        The IRLS routine runs until the specified objective converges to `tol`
        or `maxiter` has been reached.

        Parameters
        ----------
        conv : string
            Indicates the convergence criteria.
            Available options are "coefs" (the coefficients), "weights" (the
            weights in the iteration), "resids" (the standardized residuals),
            and "dev" (the un-normalized log-likelihood for the M
            estimator).  The default is "dev".
        cov : string, optional
            'H1', 'H2', or 'H3'
            Indicates how the covariance matrix is estimated.  Default is 'H1'.
            See rlm.RLMResults for more information.
        init : string
            Specifies method for the initial estimates of the parameters.
            Default is None, which means that the least squares estimate
            is used.  Currently it is the only available choice.
        maxiter : int
            The maximum number of iterations to try. Default is 50.
        scale_est : string or HuberScale()
            'mad', 'stand_mad', or HuberScale()
            Indicates the estimate to use for scaling the weights in the IRLS.
            The default is 'mad' (median absolute deviation.  Other options are
            use 'stand_mad' for the median absolute deviation standardized
            around the median and 'HuberScale' for Huber's proposal 2.
            Huber's proposal 2 has optional keyword arguments d, tol, and
            maxiter for specifying the tuning constant, the convergence
            tolerance, and the maximum number of iterations.
            See models.robust.scale for more information.
        tol : float
            The convergence tolerance of the estimate.  Default is 1e-8.
        update_scale : Bool
            If `update_scale` is False then the scale estimate for the
            weights is held constant over the iteration.  Otherwise, it
            is updated for each fit in the iteration.  Default is True.

        Returns
        -------
        results : object
            scikits.statsmodels.rlm.RLMresults
        """
        if not cov.upper() in ["H1", "H2", "H3"]:
            raise ValueError("Covariance matrix %s not understood" % cov)
        else:
            self.cov = cov.upper()
        conv = conv.lower()
        if not conv in ["weights", "coefs", "dev", "resid"]:
            raise ValueError("Convergence argument %s not understood" \
                % conv)
        self.scale_est = scale_est
        wls_results = lm.WLS(self.endog, self.exog).fit()
        if not init:
            self.scale = self._estimate_scale(wls_results.resid)
        self._update_history(wls_results)
        self.iteration = 1
        if conv == 'coefs':
            criterion = self.history['params']
        elif conv == 'dev':
            criterion = self.history['deviance']
        elif conv == 'resid':
            criterion = self.history['sresid']
        elif conv == 'weights':
            criterion = self.history['weights']
        while (np.all(np.fabs(criterion[self.iteration]-\
                criterion[self.iteration-1]) > tol) and \
                self.iteration < maxiter):
            #            self.weights = self.M.weights((self.endog - \
            #                    wls_results.fittedvalues)/self.scale)
            self.weights = self.M.weights(wls_results.resid / self.scale)
            wls_results = lm.WLS(self.endog, self.exog,
                                 weights=self.weights).fit()
            if update_scale is True:
                self.scale = self._estimate_scale(wls_results.resid)
            self._update_history(wls_results)
            self.iteration += 1
        results = RLMResults(self, wls_results.params,
                             self.normalized_cov_params, self.scale)

        results.fit_options = dict(cov=cov.upper(),
                                   scale_est=scale_est,
                                   norm=self.M.__class__.__name__,
                                   conv=conv)
        #norm is not changed in fit, no old state

        #doing the next causes exception
        #self.cov = self.scale_est = None #reset for additional fits
        #iteration and history could contain wrong state with repeated fit
        return RLMResultsWrapper(results)