Python RankDifference Exemples, scattertext.RankDifference Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : demo_word_list_topic_model.py Projet : rutum/scattertext

import scattertext.interface.ProduceScattertextExplorer
from scattertext import RankDifference

convention_df = st.SampleCorpora.ConventionData2012.get_data()
convention_df['parse'] = convention_df['text'].apply(st.whitespace_nlp_with_sentences)

unigram_corpus = (st.CorpusFromParsedDocuments(convention_df,
                                               category_col='party',
                                               parsed_col='parse')
                  .build().get_stoplisted_unigram_corpus())

topic_model = (st.SentencesForTopicModeling(unigram_corpus)
               .get_topics_from_terms(['obama', 'romney', 'democrats', 'republicans',
                                       'health', 'military', 'taxes', 'education',
                                       'olympics', 'auto', 'iraq', 'iran', 'israel'],
                                      scorer=RankDifference(), num_terms_per_topic=20))

topic_feature_builder = st.FeatsFromTopicModel(topic_model)

topic_corpus = st.CorpusFromParsedDocuments(
	convention_df,
	category_col='party',
	parsed_col='parse',
	feats_from_spacy_doc=topic_feature_builder
).build()

html = scattertext.interface.ProduceScattertextExplorer.produce_scattertext_explorer(
	topic_corpus,
	category='democrat',
	category_name='Democratic',
	not_category_name='Republican',

Exemple #2

0

Afficher le fichier

Fichier : demo_word_list_topic_model.py Projet : Nimesh-Patel/Python_TextAnal_Visualization

convention_df = st.SampleCorpora.ConventionData2012.get_data()
convention_df['parse'] = convention_df['text'].apply(
    st.whitespace_nlp_with_sentences)

unigram_corpus = (st.CorpusFromParsedDocuments(
    convention_df, category_col='party',
    parsed_col='parse').build().get_stoplisted_unigram_corpus())

topic_model = (
    st.SentencesForTopicModeling(unigram_corpus).get_topics_from_terms(
        [
            'obama', 'romney', 'democrats', 'republicans', 'health',
            'military', 'taxes', 'education', 'olympics', 'auto', 'iraq',
            'iran', 'israel'
        ],
        scorer=RankDifference(),
        num_terms_per_topic=20))

topic_feature_builder = st.FeatsFromTopicModel(topic_model)

topic_corpus = st.CorpusFromParsedDocuments(
    convention_df,
    category_col='party',
    parsed_col='parse',
    feats_from_spacy_doc=topic_feature_builder).build()

html = st.produce_scattertext_explorer(
    topic_corpus,
    category='democrat',
    category_name='Democratic',
    not_category_name='Republican',

Exemple #3

0

Afficher le fichier

Fichier : demo_deltajsd.py Projet : weizhao-BME/scattertext

from scattertext import SampleCorpora, whitespace_nlp_with_sentences, produce_frequency_explorer, RankDifference
from scattertext.CorpusFromPandas import CorpusFromPandas

convention_df = SampleCorpora.ConventionData2012.get_data()
corpus = CorpusFromPandas(
    convention_df,
    category_col='party',
    text_col='text',
    nlp=whitespace_nlp_with_sentences).build().get_unigram_corpus().compact(
        JSDCompactor(1000))

term_etc_df = corpus.get_term_freq_df('').assign(
    DemocraticRank=lambda df: dense_rank(df['democrat']),
    RepublicanRank=lambda df: dense_rank(df['republican']),
    RankDiff=lambda df: RankDifference().get_scores(df['democrat'], df[
        'republican']),
)

get_custom_term_html = '(function(x) {return "Term: " + x.term + "<span class=topic_preview>"' + ' '.join(
    f''' + "<br>{name}: " + x.etc.{key}.toFixed(5)'''
    for name, key in [('Democratic Rank', 'DemocraticRank'),
                      ('Republican Rank',
                       'RepublicanRank'), ('Rank Difference Score',
                                           'RankDiff')]) + '+ "</span>" ;})'

html = produce_frequency_explorer(
    corpus,
    category='democrat',
    category_name='Democratic',
    not_category_name='Republican',
    minimum_term_frequency=0,