Python GLM.get_influence 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: statsmodels.genmod.generalized_linear_model

클래스/타입: GLM

메소드/함수: get_influence

hotexamples.com에서의 예제들: 8

Python GLM.get_influence - 8개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 statsmodels.genmod.generalized_linear_model.GLM.get_influence에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

GLM(30)

fit(30)

from_formula(15)

fit_constrained(8)

predict(5)

summary(5)

get_influence(4)

aicc(2)

bic(2)

cov_params(2)

aic(2)

params(2)

pvalues(2)

bse(1)

__init__(1)

llf(1)

llnull(1)

lrstat(1)

lrt_pval(1)

예제 #1

파일 보기

    def setup_class(cls):
        df = data_bin
        res = GLM(df['constrict'], df[['const', 'log_rate', 'log_volumne']],
              family=families.Binomial()).fit(attach_wls=True, atol=1e-10)

        cls.infl1 = res.get_influence()
        cls.infl0 = MLEInfluence(res)

예제 #2

파일 보기

파일: test_influence.py 프로젝트: haribharadwaj/statsmodels

    def setup_class(cls):
        df = data_bin
        res = GLM(df['constrict'], df[['const', 'log_rate', 'log_volumne']],
              family=families.Binomial()).fit(attach_wls=True, atol=1e-10)

        cls.infl1 = res.get_influence()
        cls.infl0 = MLEInfluence(res)

예제 #3

파일 보기

    def setup_class(cls):
        yi = np.array([0, 2, 14, 19, 30])
        ni = 40 * np.ones(len(yi))
        xi = np.arange(1, len(yi) + 1)
        exog = np.column_stack((np.ones(len(yi)), xi))
        endog = np.column_stack((yi, ni - yi))

        res = GLM(endog, exog, family=families.Binomial()).fit()

        cls.infl1 = res.get_influence()
        cls.infl0 = MLEInfluence(res)
        cls.cd_rtol = 5e-5

예제 #4

파일 보기

파일: test_influence.py 프로젝트: haribharadwaj/statsmodels

    def setup_class(cls):
        yi = np.array([0, 2, 14, 19, 30])
        ni = 40 * np.ones(len(yi))
        xi = np.arange(1, len(yi) + 1)
        exog = np.column_stack((np.ones(len(yi)), xi))
        endog = np.column_stack((yi, ni - yi))

        res = GLM(endog, exog, family=families.Binomial()).fit()

        cls.infl1 = res.get_influence()
        cls.infl0 = MLEInfluence(res)
        cls.cd_rtol = 5e-5

예제 #5

파일 보기

def test_influence_glm_bernoulli():
    # example uses Finney's data and is used in Pregibon 1981

    df = data_bin
    results_sas = np.asarray(results_sas_df)

    res = GLM(df['constrict'], df[['const', 'log_rate', 'log_volumne']],
              family=families.Binomial()).fit(attach_wls=True, atol=1e-10)

    infl = res.get_influence(observed=False)

    k_vars = 3
    assert_allclose(infl.dfbetas, results_sas[:, 5:8], atol=1e-4)
    assert_allclose(infl.d_params, results_sas[:, 5:8] * res.bse.values, atol=1e-4)
    assert_allclose(infl.cooks_distance[0] * k_vars, results_sas[:, 8], atol=6e-5)
    assert_allclose(infl.hat_matrix_diag, results_sas[:, 4], atol=6e-5)

    c_bar = infl.cooks_distance[0] * 3 * (1 - infl.hat_matrix_diag)
    assert_allclose(c_bar, results_sas[:, 9], atol=6e-5)

예제 #6

파일 보기

파일: test_influence.py 프로젝트: haribharadwaj/statsmodels

def test_influence_glm_bernoulli():
    # example uses Finney's data and is used in Pregibon 1981

    df = data_bin
    results_sas = np.asarray(results_sas_df)

    res = GLM(df['constrict'], df[['const', 'log_rate', 'log_volumne']],
              family=families.Binomial()).fit(attach_wls=True, atol=1e-10)

    infl = res.get_influence(observed=False)

    k_vars = 3
    assert_allclose(infl.dfbetas, results_sas[:, 5:8], atol=1e-4)
    assert_allclose(infl.d_params, results_sas[:, 5:8] * res.bse.values, atol=1e-4)
    assert_allclose(infl.cooks_distance[0] * k_vars, results_sas[:, 8], atol=6e-5)
    assert_allclose(infl.hat_matrix_diag, results_sas[:, 4], atol=6e-5)

    c_bar = infl.cooks_distance[0] * 3 * (1 - infl.hat_matrix_diag)
    assert_allclose(c_bar, results_sas[:, 9], atol=6e-5)

예제 #7

파일 보기

#
# This measures are based on a one-step approximation to the the results
# for deleting one observation. One-step approximations are usually accurate
# for small changes but underestimate the magnitude of large changes. Event
# though large changes are underestimated, they still show clearly the
# effect of influential observations
#
# In this example observation 4 and 18 have a large standardized residual
# and large Cook's distance, but not a large leverage. Observation 13 has
# the largest leverage but only small Cook's distance and not a large
# studentized residual.
#
# Only the two observations 4 and 18 have a large impact on the parameter
# estimates.

infl = res.get_influence(observed=False)

summ_df = infl.summary_frame()
summ_df.sort_values("cooks_d", ascending=False)[:10]

fig = infl.plot_influence()
fig.tight_layout(pad=1.0)

fig = infl.plot_index(y_var="cooks",
                      threshold=2 * infl.cooks_distance[0].mean())
fig.tight_layout(pad=1.0)

fig = infl.plot_index(y_var="resid", threshold=1)
fig.tight_layout(pad=1.0)

fig = infl.plot_index(y_var="dfbeta", idx=1, threshold=0.5)

예제 #8

파일 보기

파일: influence_glm_logit.py 프로젝트: bashtage/statsmodels

#
# This measures are based on a one-step approximation to the the results
# for deleting one observation. One-step approximations are usually accurate
# for small changes but underestimate the magnitude of large changes. Event
# though large changes are underestimated, they still show clearly the
# effect of influential observations
#
# In this example observation 4 and 18 have a large standardized residual
# and large Cook's distance, but not a large leverage. Observation 13 has
# the largest leverage but only small Cook's distance and not a large
# studentized residual.
#
# Only the two observations 4 and 18 have a large impact on the parameter
# estimates.

infl = res.get_influence(observed=False)

summ_df = infl.summary_frame()
summ_df.sort_values('cooks_d', ascending=False)[:10]

infl.plot_influence()

infl.plot_index(y_var='cooks', threshold=2 * infl.cooks_distance[0].mean())

infl.plot_index(y_var='resid', threshold=1)

infl.plot_index(y_var='dfbeta', idx=1, threshold=0.5)

infl.plot_index(y_var='dfbeta', idx=2, threshold=0.5)

infl.plot_index(y_var='dfbeta', idx=0, threshold=0.5)