예제 #1
0
    def test_influence(self):
        # currently only smoke test
        res1 = self.res1
        from statsmodels.stats.outliers_influence import MLEInfluence

        influ = MLEInfluence(res1)
        attrs = [
            'cooks_distance', 'd_fittedvalues', 'd_fittedvalues_scaled',
            'd_params', 'dfbetas', 'hat_matrix_diag', 'resid_studentized'
        ]
        for attr in attrs:
            getattr(influ, attr)

        influ.summary_frame()
예제 #2
0
    def test_influence(self):
        # currently only smoke test
        res1 = self.res1
        from statsmodels.stats.outliers_influence import MLEInfluence

        influ0 = MLEInfluence(res1)
        influ = res1.get_influence()
        attrs = ['cooks_distance', 'd_fittedvalues', 'd_fittedvalues_scaled',
                 'd_params', 'dfbetas', 'hat_matrix_diag', 'resid_studentized'
                 ]
        for attr in attrs:
            getattr(influ, attr)

        frame = influ.summary_frame()
        frame0 = influ0.summary_frame()
        assert_allclose(frame, frame0, rtol=1e-13, atol=1e-13)
예제 #3
0
    def setup_class(cls):
        df = data_bin
        res = GLM(df['constrict'], df[['const', 'log_rate', 'log_volumne']],
              family=families.Binomial()).fit(attach_wls=True, atol=1e-10)

        cls.infl1 = res.get_influence()
        cls.infl0 = MLEInfluence(res)
예제 #4
0
    def get_influence(self):
        """
        Influence and outlier measures

        See notes section for influence measures that do not apply for
        zero inflated models.

        Returns
        -------
        MLEInfluence
            The instance has methods to calculate the main influence and
            outlier measures as attributes.

        See Also
        --------
        statsmodels.stats.outliers_influence.MLEInfluence

        Notes
        -----
        ZeroInflated models have functions that are not differentiable
        with respect to sample endog if endog=0. This means that generalized
        leverage cannot be computed in the usual definition.

        Currently, both the generalized leverage, in `hat_matrix_diag`
        attribute and studetized residuals are not available. In the influence
        plot generalized leverage is replaced by a hat matrix diagonal that
        only takes combined exog into account, computed in the same way as
        for OLS. This is a measure for exog outliers but does not take
        specific features of the model into account.
        """
        # same as sumper in DiscreteResults, only added for docstring
        from statsmodels.stats.outliers_influence import MLEInfluence
        return MLEInfluence(self)
예제 #5
0
    def get_influence(self):
        """
        Get an instance of MLEInfluence with influence and outlier measures

        Returns
        -------
        infl : MLEInfluence instance
            The instance has methods to calculate the main influence and
            outlier measures as attributes.

        See Also
        --------
        statsmodels.stats.outliers_influence.MLEInfluence

        Notes
        -----
        Support for mutli-link and multi-exog models is still experimental
        in MLEInfluence. Interface and some definitions might still change.

        Note: Difference to R betareg: Betareg has the same general leverage
        as this model. However, they use a linear approximation hat matrix
        to scale and studentize influence and residual statistics.
        MLEInfluence uses the generalized leverage as hat_matrix_diag.
        Additionally, MLEInfluence uses pearson residuals for residual
        analusis.

        References
        ----------
        todo

        """
        from statsmodels.stats.outliers_influence import MLEInfluence
        return MLEInfluence(self)
예제 #6
0
    def setup_class(cls):
        df = data_bin
        mod = GLM(df['constrict'], df[['const', 'log_rate', 'log_volumne']],
                  family=families.Binomial(link=families.links.probit()))
        res = mod.fit(method="newton", tol=1e-10)
        from statsmodels.discrete.discrete_model import Probit
        mod2 = Probit(df['constrict'], df[['const', 'log_rate', 'log_volumne']])
        res2 = mod2.fit(method="newton", tol=1e-10)

        cls.infl1 = MLEInfluence(res)  # res.get_influence()
        cls.infl0 = res2.get_influence()
예제 #7
0
    def setup_class(cls):
        from .test_diagnostic import get_duncan_data
        endog, exog, labels = get_duncan_data()
        data = pd.DataFrame(np.column_stack((endog, exog)),
                            columns='y const var1 var2'.split(),
                            index=labels)

        res = GLM.from_formula('y ~ const + var1 + var2 - 1', data).fit()
        #res = GLM(endog, exog).fit()

        cls.infl1 = res.get_influence()
        cls.infl0 = MLEInfluence(res)
예제 #8
0
    def setup_class(cls):
        yi = np.array([0, 2, 14, 19, 30])
        ni = 40 * np.ones(len(yi))
        xi = np.arange(1, len(yi) + 1)
        exog = np.column_stack((np.ones(len(yi)), xi))
        endog = np.column_stack((yi, ni - yi))

        res = GLM(endog, exog, family=families.Binomial()).fit()

        cls.infl1 = res.get_influence()
        cls.infl0 = MLEInfluence(res)
        cls.cd_rtol = 5e-5