Esempio n. 1
0
def test_warnings_raised():
    if sys.version_info < (3, 4):
        raise SkipTest
    weights = [1, 1, 1, 2, 2, 2, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3]
    # faking aweights by using normalized freq_weights
    weights = np.array(weights)

    gid = np.arange(1, 17 + 1) // 2

    cov_kwds = {'groups': gid, 'use_correction': False}
    with warnings.catch_warnings(record=True) as w:
        res1 = GLM(cpunish_data.endog,
                   cpunish_data.exog,
                   family=sm.families.Poisson(),
                   freq_weights=weights).fit(cov_type='cluster',
                                             cov_kwds=cov_kwds)
        res1.summary()
        assert len(w) >= 1

    with warnings.catch_warnings(record=True) as w:
        res1 = GLM(cpunish_data.endog,
                   cpunish_data.exog,
                   family=sm.families.Poisson(),
                   var_weights=weights).fit(cov_type='cluster',
                                            cov_kwds=cov_kwds)
        res1.summary()
        assert len(w) >= 1
def test_warnings_raised():
    weights = [1, 1, 1, 2, 2, 2, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3]
    # faking aweights by using normalized freq_weights
    weights = np.array(weights)

    gid = np.arange(1, 17 + 1) // 2

    cov_kwds = {'groups': gid, 'use_correction': False}

    # Work around for buggy pytest repeated warning capture on Python 2.7
    warning_type = SpecificationWarning if PY3 else None
    with pytest.warns(warning_type):
        res1 = GLM(cpunish_data.endog,
                   cpunish_data.exog,
                   family=sm.families.Poisson(),
                   freq_weights=weights).fit(cov_type='cluster',
                                             cov_kwds=cov_kwds)
        res1.summary()

    with pytest.warns(warning_type):
        res1 = GLM(cpunish_data.endog,
                   cpunish_data.exog,
                   family=sm.families.Poisson(),
                   var_weights=weights).fit(cov_type='cluster',
                                            cov_kwds=cov_kwds)
        res1.summary()
Esempio n. 3
0
def test_warnings_raised():
    weights = [1, 1, 1, 2, 2, 2, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3]
    # faking aweights by using normalized freq_weights
    weights = np.array(weights)

    gid = np.arange(1, 17 + 1) // 2

    cov_kwds = {'groups': gid, 'use_correction': False}

    with pytest.warns(SpecificationWarning):
        res1 = GLM(cpunish_data.endog, cpunish_data.exog,
                   family=sm.families.Poisson(), freq_weights=weights
                   ).fit(cov_type='cluster', cov_kwds=cov_kwds)
        res1.summary()

    with pytest.warns(SpecificationWarning):
        res1 = GLM(cpunish_data.endog, cpunish_data.exog,
                   family=sm.families.Poisson(), var_weights=weights
                   ).fit(cov_type='cluster', cov_kwds=cov_kwds)
        res1.summary()
Esempio n. 4
0
def test_warnings_raised():
    weights = [1, 1, 1, 2, 2, 2, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3]
    # faking aweights by using normalized freq_weights
    weights = np.array(weights)

    gid = np.arange(1, 17 + 1) // 2

    cov_kwds = {'groups': gid, 'use_correction': False}

    # Work around for buggy pytest repeated warning capture on Python 2.7
    warning_type = SpecificationWarning if PY3 else None
    with pytest.warns(warning_type):
        res1 = GLM(cpunish_data.endog, cpunish_data.exog,
                   family=sm.families.Poisson(), freq_weights=weights
                   ).fit(cov_type='cluster', cov_kwds=cov_kwds)
        res1.summary()

    with pytest.warns(warning_type):
        res1 = GLM(cpunish_data.endog, cpunish_data.exog,
                   family=sm.families.Poisson(), var_weights=weights
                   ).fit(cov_type='cluster', cov_kwds=cov_kwds)
        res1.summary()
Esempio n. 5
0
def test_warnings_raised():
    if sys.version_info < (3, 4):
        raise SkipTest
    weights = [1, 1, 1, 2, 2, 2, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3]
    # faking aweights by using normalized freq_weights
    weights = np.array(weights)

    gid = np.arange(1, 17 + 1) // 2

    cov_kwds = {'groups': gid, 'use_correction': False}
    with warnings.catch_warnings(record=True) as w:
        res1 = GLM(cpunish_data.endog, cpunish_data.exog,
                   family=sm.families.Poisson(), freq_weights=weights
                   ).fit(cov_type='cluster', cov_kwds=cov_kwds)
        res1.summary()
        assert len(w) >= 1

    with warnings.catch_warnings(record=True) as w:
        res1 = GLM(cpunish_data.endog, cpunish_data.exog,
                   family=sm.families.Poisson(), var_weights=weights
                   ).fit(cov_type='cluster', cov_kwds=cov_kwds)
        res1.summary()
        assert len(w) >= 1
Esempio n. 6
0
def reply_analysis_report(data_input_path, data_output_path):
    reply_analysis = prepare_data_reply_analysis(data_input_path,
                                                 data_output_path)

    score = reply_analysis.assign(
        tweet_negative_score=lambda df: df.apply(
            lambda x: x["tweet_score"]
            if x["tweet_label"] == "NEGATIVE" else 1 - x["tweet_score"],
            axis=1),
        trump_negative_score=lambda df: df.apply(
            lambda x: x["trump_score"]
            if x["trump_label"] == "NEGATIVE" else 1 - x["trump_score"],
            axis=1))
    logits = scipy.special.logit(
        score[["negative_score_retweet", "negative_score_trump"]])
    logits.plot(kind="scatter",
                x="negative_score_trump",
                y="negative_score_retweet",
                alpha=0.1)
    logits.save("plots/logit_sentiment_score.png")
    print(
        "Naive Sentiment Score Calculation",
        scipy.stats.pearsonr(logits["negative_score_trump"],
                             logits["negative_score_retweet"]))

    tmp_data = reply_analysis[~reply_analysis["trump_label"].isnull()]
    x = sm.add_constant(tmp_data[[
        # "created_at_trump_day", "created_at_trump_month", "created_at_trump_year",
        "followers_count_norm",
        "friends_count_norm",
        "listed_count_norm",
        "statuses_count_norm"
    ]])
    res = GLM(tmp_data['trump_label'].astype("category").cat.codes,
              x,
              family=families.Binomial()).fit(attach_wls=True, atol=1e-10)
    print(res.summary())

    print(
        "ATE",
        backdoor_binary_respose_ate(reply_analysis, "trump_label", "NEGATIVE",
                                    "tweet_label", "NEGATIVE",
                                    "trump_created_at", "tweet_created_at",
                                    datetime.timedelta(minutes=0), "1D"))
Esempio n. 7
0
import statsmodels.stats.tests.test_influence

test_module = statsmodels.stats.tests.test_influence.__file__
cur_dir = cur_dir = os.path.abspath(os.path.dirname(test_module))

file_name = "binary_constrict.csv"
file_path = os.path.join(cur_dir, "results", file_name)
df = pd.read_csv(file_path, index_col=0)

res = GLM(
    df["constrict"],
    df[["const", "log_rate", "log_volumne"]],
    family=families.Binomial(),
).fit(attach_wls=True, atol=1e-10)
print(res.summary())

# ## get the influence measures
#
# GLMResults has a `get_influence` method similar to OLSResults, that
# returns and instance of the GLMInfluence class. This class has methods and
# (cached) attributes to inspect influence and outlier measures.
#
# This measures are based on a one-step approximation to the the results
# for deleting one observation. One-step approximations are usually accurate
# for small changes but underestimate the magnitude of large changes. Event
# though large changes are underestimated, they still show clearly the
# effect of influential observations
#
# In this example observation 4 and 18 have a large standardized residual
# and large Cook's distance, but not a large leverage. Observation 13 has
from statsmodels.genmod import families

import statsmodels.stats.tests.test_influence
test_module = statsmodels.stats.tests.test_influence.__file__
cur_dir = cur_dir = os.path.abspath(os.path.dirname(test_module))

file_name = 'binary_constrict.csv'
file_path = os.path.join(cur_dir, 'results', file_name)
df = pd.read_csv(file_path, index_col=0)

res = GLM(
    df['constrict'],
    df[['const', 'log_rate', 'log_volumne']],
    family=families.Binomial()).fit(
        attach_wls=True, atol=1e-10)
print(res.summary())

# ## get the influence measures
#
# GLMResults has a `get_influence` method similar to OLSResults, that
# returns and instance of the GLMInfluence class. This class has methods and
# (cached) attributes to inspect influence and outlier measures.
#
# This measures are based on a one-step approximation to the the results
# for deleting one observation. One-step approximations are usually accurate
# for small changes but underestimate the magnitude of large changes. Event
# though large changes are underestimated, they still show clearly the
# effect of influential observations
#
# In this example observation 4 and 18 have a large standardized residual
# and large Cook's distance, but not a large leverage. Observation 13 has