コード例 #1
0
    'Reject, Negative'
], False).get_unigram_corpus().compact(
    st.ClassPercentageCompactor(term_count=5)))

print('finding priors', time.time() - t0, 's')
priors = (st.PriorFactory(
    full_corpus, starting_count=0.01).use_all_categories().get_priors())
print('building four square', time.time() - t0, 's')

four_square = st.FourSquare(corpus,
                            category_a_list=['Accept, Positive'],
                            not_category_a_list=['Reject, Negative'],
                            category_b_list=['Accept, Negative'],
                            not_category_b_list=['Reject, Positive'],
                            term_ranker=term_ranker,
                            scorer=st.LogOddsRatioInformativeDirichletPrior(
                                priors, 500, 'word'),
                            labels={
                                'a': 'Positive Reviews of Accepted Papers',
                                'b': 'Negative Reviews of Accepted Papers',
                                'not_a_and_not_b': 'Rejections',
                                'a_and_b': 'Acceptances',
                                'a_and_not_b': 'Positive Reviews',
                                'b_and_not_a': 'Negative Reviews',
                                'not_a': 'Negative Reviews of Rejected Papers',
                                'not_b': 'Positive Reviews of Rejected Papers',
                            })
print('making html', time.time() - t0, 's')
html = st.produce_four_square_explorer(
    four_square=four_square,
    x_label='Pos-Neg',
    y_label='Accept-Reject',
コード例 #2
0
print('compacting', time.time() - t0, 's')
term_ranker = st.OncePerDocFrequencyRanker
corpus = st.CompactTerms(corpus, minimum_term_count=2,
                         term_ranker=term_ranker).compact()
print('finding priors', time.time() - t0, 's')
priors = (st.PriorFactory(
    full_corpus, starting_count=0.01).use_all_categories().get_priors())
print('building four square', time.time() - t0, 's')

four_square = st.FourSquare(corpus,
                            category_a_list=['Accept, Positive'],
                            not_category_a_list=['Reject, Negative'],
                            category_b_list=['Accept, Negative'],
                            not_category_b_list=['Reject, Positive'],
                            term_ranker=term_ranker,
                            scorer=st.LogOddsRatioInformativeDirichletPrior(
                                priors, 10),
                            labels={
                                'a': 'Positive Reviews of Accepted Papers',
                                'b': 'Negative Reviews of Accepted Papers',
                                'not_a_and_not_b': 'Rejections',
                                'a_and_b': 'Acceptances',
                                'a_and_not_b': 'Positive Reviews',
                                'b_and_not_a': 'Negative Reviews',
                                'not_a': 'Negative Reviews of Rejected Papers',
                                'not_b': 'Positive Reviews of Rejected Papers',
                            })
print('making html', time.time() - t0, 's')
html = st.produce_four_square_explorer(
    four_square=four_square,
    x_label='Pos-Neg',
    y_label='Accept-Reject',