Ejemplo n.º 1
0
 def null(self):
     _endog = self._endog
     wls = WLS(_endog, np.ones((len(_endog), 1)), weights=self._data_weights)
     return wls.fit().fittedvalues
Ejemplo n.º 2
0
    def fit(self, maxiter=100, method="IRLS", tol=1e-8, data_weights=1.0, scale=None):
        """
        Fits a generalized linear model for a given family.

        parameters
        ----------
         data_weights : array-like or scalar, only used with Binomial
            Number of trials for each observation. Used for only for
            binomial data when `endog` is specified as a 2d array of
            (successes, failures). Note that this argument will be
            dropped in the future.
        maxiter : int, optional
            Default is 100.
        method : string
            Default is 'IRLS' for iteratively reweighted least squares.  This
            is currently the only method available for GLM fit.
        scale : string or float, optional
            `scale` can be 'X2', 'dev', or a float
            The default value is None, which uses `X2` for Gamma, Gaussian,
            and Inverse Gaussian.
            `X2` is Pearson's chi-square divided by `df_resid`.
            The default is 1 for the Binomial and Poisson families.
            `dev` is the deviance divided by df_resid
        tol : float
            Convergence tolerance.  Default is 1e-8.
        """
        if np.shape(data_weights) != () and not isinstance(self.family, families.Binomial):
            raise ValueError, "Data weights are only to be supplied for\
the Binomial family"
        self.data_weights = data_weights
        if np.shape(self.data_weights) == () and self.data_weights > 1:
            self.data_weights = self.data_weights * np.ones((self.exog.shape[0]))
        self.scaletype = scale
        if isinstance(self.family, families.Binomial):
            # thisc checks what kind of data is given for Binomial.  family will need a reference to
            # endog if this is to be removed from the preprocessing
            self.endog = self.family.initialize(self.endog)
        mu = self.family.starting_mu(self.endog)
        wlsexog = self.exog
        eta = self.family.predict(mu)
        self.iteration += 1
        dev = self.family.deviance(self.endog, mu)
        if np.isnan(dev):
            raise ValueError, "The first guess on the deviance function \
returned a nan.  This could be a boundary problem and should be reported."
        else:
            self.history["deviance"].append(dev)
            # first guess on the deviance is assumed to be scaled by 1.
        while (
            np.fabs(self.history["deviance"][self.iteration] - self.history["deviance"][self.iteration - 1])
        ) > tol and self.iteration < maxiter:
            self.weights = data_weights * self.family.weights(mu)
            wlsendog = eta + self.family.link.deriv(mu) * (self.endog - mu)
            # - offset
            wls_results = WLS(wlsendog, wlsexog, self.weights).fit()
            eta = np.dot(self.exog, wls_results.params)  # + offset
            mu = self.family.fitted(eta)
            self._update_history(wls_results, mu)
            self.scale = self.estimate_scale(mu)
            self.iteration += 1
        self.mu = mu
        glm_results = GLMResults(self, wls_results.params, wls_results.normalized_cov_params, self.scale)
        glm_results.bse = np.sqrt(np.diag(wls_results.cov_params(scale=self.scale)))
        return glm_results