def test_warnings_raised(): if sys.version_info < (3, 4): raise SkipTest weights = [1, 1, 1, 2, 2, 2, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3] # faking aweights by using normalized freq_weights weights = np.array(weights) gid = np.arange(1, 17 + 1) // 2 cov_kwds = {'groups': gid, 'use_correction': False} with warnings.catch_warnings(record=True) as w: res1 = GLM(cpunish_data.endog, cpunish_data.exog, family=sm.families.Poisson(), freq_weights=weights).fit(cov_type='cluster', cov_kwds=cov_kwds) res1.summary() assert len(w) >= 1 with warnings.catch_warnings(record=True) as w: res1 = GLM(cpunish_data.endog, cpunish_data.exog, family=sm.families.Poisson(), var_weights=weights).fit(cov_type='cluster', cov_kwds=cov_kwds) res1.summary() assert len(w) >= 1
def test_warnings_raised(): weights = [1, 1, 1, 2, 2, 2, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3] # faking aweights by using normalized freq_weights weights = np.array(weights) gid = np.arange(1, 17 + 1) // 2 cov_kwds = {'groups': gid, 'use_correction': False} # Work around for buggy pytest repeated warning capture on Python 2.7 warning_type = SpecificationWarning if PY3 else None with pytest.warns(warning_type): res1 = GLM(cpunish_data.endog, cpunish_data.exog, family=sm.families.Poisson(), freq_weights=weights).fit(cov_type='cluster', cov_kwds=cov_kwds) res1.summary() with pytest.warns(warning_type): res1 = GLM(cpunish_data.endog, cpunish_data.exog, family=sm.families.Poisson(), var_weights=weights).fit(cov_type='cluster', cov_kwds=cov_kwds) res1.summary()
def test_warnings_raised(): weights = [1, 1, 1, 2, 2, 2, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3] # faking aweights by using normalized freq_weights weights = np.array(weights) gid = np.arange(1, 17 + 1) // 2 cov_kwds = {'groups': gid, 'use_correction': False} with pytest.warns(SpecificationWarning): res1 = GLM(cpunish_data.endog, cpunish_data.exog, family=sm.families.Poisson(), freq_weights=weights ).fit(cov_type='cluster', cov_kwds=cov_kwds) res1.summary() with pytest.warns(SpecificationWarning): res1 = GLM(cpunish_data.endog, cpunish_data.exog, family=sm.families.Poisson(), var_weights=weights ).fit(cov_type='cluster', cov_kwds=cov_kwds) res1.summary()
def test_warnings_raised(): weights = [1, 1, 1, 2, 2, 2, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3] # faking aweights by using normalized freq_weights weights = np.array(weights) gid = np.arange(1, 17 + 1) // 2 cov_kwds = {'groups': gid, 'use_correction': False} # Work around for buggy pytest repeated warning capture on Python 2.7 warning_type = SpecificationWarning if PY3 else None with pytest.warns(warning_type): res1 = GLM(cpunish_data.endog, cpunish_data.exog, family=sm.families.Poisson(), freq_weights=weights ).fit(cov_type='cluster', cov_kwds=cov_kwds) res1.summary() with pytest.warns(warning_type): res1 = GLM(cpunish_data.endog, cpunish_data.exog, family=sm.families.Poisson(), var_weights=weights ).fit(cov_type='cluster', cov_kwds=cov_kwds) res1.summary()
def test_warnings_raised(): if sys.version_info < (3, 4): raise SkipTest weights = [1, 1, 1, 2, 2, 2, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3] # faking aweights by using normalized freq_weights weights = np.array(weights) gid = np.arange(1, 17 + 1) // 2 cov_kwds = {'groups': gid, 'use_correction': False} with warnings.catch_warnings(record=True) as w: res1 = GLM(cpunish_data.endog, cpunish_data.exog, family=sm.families.Poisson(), freq_weights=weights ).fit(cov_type='cluster', cov_kwds=cov_kwds) res1.summary() assert len(w) >= 1 with warnings.catch_warnings(record=True) as w: res1 = GLM(cpunish_data.endog, cpunish_data.exog, family=sm.families.Poisson(), var_weights=weights ).fit(cov_type='cluster', cov_kwds=cov_kwds) res1.summary() assert len(w) >= 1
def reply_analysis_report(data_input_path, data_output_path): reply_analysis = prepare_data_reply_analysis(data_input_path, data_output_path) score = reply_analysis.assign( tweet_negative_score=lambda df: df.apply( lambda x: x["tweet_score"] if x["tweet_label"] == "NEGATIVE" else 1 - x["tweet_score"], axis=1), trump_negative_score=lambda df: df.apply( lambda x: x["trump_score"] if x["trump_label"] == "NEGATIVE" else 1 - x["trump_score"], axis=1)) logits = scipy.special.logit( score[["negative_score_retweet", "negative_score_trump"]]) logits.plot(kind="scatter", x="negative_score_trump", y="negative_score_retweet", alpha=0.1) logits.save("plots/logit_sentiment_score.png") print( "Naive Sentiment Score Calculation", scipy.stats.pearsonr(logits["negative_score_trump"], logits["negative_score_retweet"])) tmp_data = reply_analysis[~reply_analysis["trump_label"].isnull()] x = sm.add_constant(tmp_data[[ # "created_at_trump_day", "created_at_trump_month", "created_at_trump_year", "followers_count_norm", "friends_count_norm", "listed_count_norm", "statuses_count_norm" ]]) res = GLM(tmp_data['trump_label'].astype("category").cat.codes, x, family=families.Binomial()).fit(attach_wls=True, atol=1e-10) print(res.summary()) print( "ATE", backdoor_binary_respose_ate(reply_analysis, "trump_label", "NEGATIVE", "tweet_label", "NEGATIVE", "trump_created_at", "tweet_created_at", datetime.timedelta(minutes=0), "1D"))
import statsmodels.stats.tests.test_influence test_module = statsmodels.stats.tests.test_influence.__file__ cur_dir = cur_dir = os.path.abspath(os.path.dirname(test_module)) file_name = "binary_constrict.csv" file_path = os.path.join(cur_dir, "results", file_name) df = pd.read_csv(file_path, index_col=0) res = GLM( df["constrict"], df[["const", "log_rate", "log_volumne"]], family=families.Binomial(), ).fit(attach_wls=True, atol=1e-10) print(res.summary()) # ## get the influence measures # # GLMResults has a `get_influence` method similar to OLSResults, that # returns and instance of the GLMInfluence class. This class has methods and # (cached) attributes to inspect influence and outlier measures. # # This measures are based on a one-step approximation to the the results # for deleting one observation. One-step approximations are usually accurate # for small changes but underestimate the magnitude of large changes. Event # though large changes are underestimated, they still show clearly the # effect of influential observations # # In this example observation 4 and 18 have a large standardized residual # and large Cook's distance, but not a large leverage. Observation 13 has
from statsmodels.genmod import families import statsmodels.stats.tests.test_influence test_module = statsmodels.stats.tests.test_influence.__file__ cur_dir = cur_dir = os.path.abspath(os.path.dirname(test_module)) file_name = 'binary_constrict.csv' file_path = os.path.join(cur_dir, 'results', file_name) df = pd.read_csv(file_path, index_col=0) res = GLM( df['constrict'], df[['const', 'log_rate', 'log_volumne']], family=families.Binomial()).fit( attach_wls=True, atol=1e-10) print(res.summary()) # ## get the influence measures # # GLMResults has a `get_influence` method similar to OLSResults, that # returns and instance of the GLMInfluence class. This class has methods and # (cached) attributes to inspect influence and outlier measures. # # This measures are based on a one-step approximation to the the results # for deleting one observation. One-step approximations are usually accurate # for small changes but underestimate the magnitude of large changes. Event # though large changes are underestimated, they still show clearly the # effect of influential observations # # In this example observation 4 and 18 have a large standardized residual # and large Cook's distance, but not a large leverage. Observation 13 has