'Reject, Negative' ], False).get_unigram_corpus().compact( st.ClassPercentageCompactor(term_count=5))) print('finding priors', time.time() - t0, 's') priors = (st.PriorFactory( full_corpus, starting_count=0.01).use_all_categories().get_priors()) print('building four square', time.time() - t0, 's') four_square = st.FourSquare(corpus, category_a_list=['Accept, Positive'], not_category_a_list=['Reject, Negative'], category_b_list=['Accept, Negative'], not_category_b_list=['Reject, Positive'], term_ranker=term_ranker, scorer=st.LogOddsRatioInformativeDirichletPrior( priors, 500, 'word'), labels={ 'a': 'Positive Reviews of Accepted Papers', 'b': 'Negative Reviews of Accepted Papers', 'not_a_and_not_b': 'Rejections', 'a_and_b': 'Acceptances', 'a_and_not_b': 'Positive Reviews', 'b_and_not_a': 'Negative Reviews', 'not_a': 'Negative Reviews of Rejected Papers', 'not_b': 'Positive Reviews of Rejected Papers', }) print('making html', time.time() - t0, 's') html = st.produce_four_square_explorer( four_square=four_square, x_label='Pos-Neg', y_label='Accept-Reject',
print('compacting', time.time() - t0, 's') term_ranker = st.OncePerDocFrequencyRanker corpus = st.CompactTerms(corpus, minimum_term_count=2, term_ranker=term_ranker).compact() print('finding priors', time.time() - t0, 's') priors = (st.PriorFactory( full_corpus, starting_count=0.01).use_all_categories().get_priors()) print('building four square', time.time() - t0, 's') four_square = st.FourSquare(corpus, category_a_list=['Accept, Positive'], not_category_a_list=['Reject, Negative'], category_b_list=['Accept, Negative'], not_category_b_list=['Reject, Positive'], term_ranker=term_ranker, scorer=st.LogOddsRatioInformativeDirichletPrior( priors, 10), labels={ 'a': 'Positive Reviews of Accepted Papers', 'b': 'Negative Reviews of Accepted Papers', 'not_a_and_not_b': 'Rejections', 'a_and_b': 'Acceptances', 'a_and_not_b': 'Positive Reviews', 'b_and_not_a': 'Negative Reviews', 'not_a': 'Negative Reviews of Rejected Papers', 'not_b': 'Positive Reviews of Rejected Papers', }) print('making html', time.time() - t0, 's') html = st.produce_four_square_explorer( four_square=four_square, x_label='Pos-Neg', y_label='Accept-Reject',