def test__turn_counts_into_matrix(self):
		cat_counts, not_cat_counts = self._get_counts()
		X = LogOddsUninformativePriorScore._turn_counts_into_matrix(cat_counts, not_cat_counts)
		np.testing.assert_almost_equal(X, np.array([[1, 100],
		                                            [5, 510],
		                                            [1, 100],
		                                            [9, 199]]))
	def test_get_score(self):
		cat_counts, not_cat_counts = self._get_counts()
		scores = LogOddsUninformativePriorScore.get_score(cat_counts, not_cat_counts)
		np.testing.assert_almost_equal(
			scores,
			#np.array([ 0.0590679,  0.1006782,  0.0590679, -0.1475645])
			np.array([ 0.4447054,  0.9433088,  0.4447054, -0.9971462])
		)
 def test_get_score(self):
     cat_counts, not_cat_counts = self._get_counts()
     scores = LogOddsUninformativePriorScore.get_score(
         cat_counts, not_cat_counts)
     np.testing.assert_almost_equal(
         scores,
         #np.array([ 0.0590679,  0.1006782,  0.0590679, -0.1475645])
         np.array([0.4447054, 0.9433088, 0.4447054, -0.9971462]))
Esempio n. 4
0
import spacy

from scattertext import SampleCorpora, produce_scattertext_explorer
from scattertext.CorpusFromPandas import CorpusFromPandas
from scattertext.termscoring.LogOddsUniformativePriorScore import LogOddsUninformativePriorScore

nlp = spacy.load('en_core_web_sm')
convention_df = SampleCorpora.ConventionData2012.get_data()
corpus = CorpusFromPandas(convention_df,
                          category_col='party',
                          text_col='text',
                          nlp=nlp).build()
term_freq_df = corpus.get_term_freq_df()
scores = -(LogOddsUninformativePriorScore.get_thresholded_score(
    term_freq_df['democrat freq'],
    term_freq_df['republican freq'],
    alpha_w=2.,
    threshold=0.1))
html = produce_scattertext_explorer(corpus,
                                    category='democrat',
                                    category_name='Democratic',
                                    not_category_name='Republican',
                                    scores=scores,
                                    sort_by_dist=False,
                                    gray_zero_scores=True,
                                    minimum_term_frequency=5,
                                    width_in_pixels=1000,
                                    metadata=convention_df['speaker'])
open('./demo_insignificant_greyed_out.html', 'wb').write(html.encode('utf-8'))
print('Open ./demo_insignificant_greyed_out.html in Chrome or Firefox.')
	def test__turn_pvals_into_scores(self):
		p_vals = np.array([0.01, 0.99, 0.5, 0.1, 0.9])
		scores = LogOddsUninformativePriorScore._turn_pvals_into_scores(p_vals)
		np.testing.assert_almost_equal(scores, [0.98, -0.98, -0., 0.8, -0.8])
 def test__turn_counts_into_matrix(self):
     cat_counts, not_cat_counts = self._get_counts()
     X = LogOddsUninformativePriorScore._turn_counts_into_matrix(
         cat_counts, not_cat_counts)
     np.testing.assert_almost_equal(
         X, np.array([[1, 100], [5, 510], [1, 100], [9, 199]]))
 def test__turn_pvals_into_scores(self):
     p_vals = np.array([0.01, 0.99, 0.5, 0.1, 0.9])
     scores = LogOddsUninformativePriorScore._turn_pvals_into_scores(p_vals)
     np.testing.assert_almost_equal(scores, [0.98, -0.98, -0., 0.8, -0.8])
 def test_get_score_threshold(self):
     cat_counts, not_cat_counts = self._get_counts()
     scores = LogOddsUninformativePriorScore.get_thresholded_score(
         cat_counts, not_cat_counts)
     np.testing.assert_almost_equal(
         scores, np.array([0., 0.9433088, 0., -0.9971462]))
 def test_get_delta_hats(self):
     cat_counts, not_cat_counts = self._get_counts()
     scores = LogOddsUninformativePriorScore.get_delta_hats(
         cat_counts, not_cat_counts)
     np.testing.assert_almost_equal(
         scores, np.array([-0.6095321, -1.0345766, -0.6095321, 1.5201005]))