예제 #1
0
	def test_get_p_vals(self):
		tdm = build_hamlet_jz_term_doc_mat()
		df = tdm.get_term_freq_df()
		X = df[['hamlet freq', 'jay-z/r. kelly freq']].values
		pvals = LogOddsRatioUninformativeDirichletPrior().get_p_vals(X)
		self.assertGreaterEqual(min(pvals), 0)
		self.assertLessEqual(min(pvals), 1)
 def get_thresholded_score(cat_word_counts,
                           not_cat_word_counts,
                           alpha_w=0.01,
                           threshold=0.1):
     scores = (LogOddsRatioUninformativeDirichletPrior(
         alpha_w).get_p_values_from_counts(cat_word_counts,
                                           not_cat_word_counts)) * 2 - 1
     # scores = (np.min(np.array([1 - scores, scores]), axis=0) <= threshold) * scores
     return scores * ((scores < -(1. - (threshold * 2)))
                      | (scores > (1. - (threshold * 2))))
	def get_score(cat_word_counts, not_cat_word_counts, alpha_w=0.01):
		X = LogOddsUninformativePriorScore. \
			_turn_counts_into_matrix(cat_word_counts, not_cat_word_counts)
		p_vals = LogOddsRatioUninformativeDirichletPrior(alpha_w).get_p_vals(X)
		scores = LogOddsUninformativePriorScore._turn_pvals_into_scores(p_vals)
		return scores
	def get_delta_hats(cat_word_counts, not_cat_word_counts, alpha_w=0.01):
		return (LogOddsRatioUninformativeDirichletPrior(alpha_w)
		        .get_log_odds_with_prior(LogOddsUninformativePriorScore
		                                 ._turn_counts_into_matrix(cat_word_counts,
		                                                           not_cat_word_counts)))