예제 #1
0
def create_scatter_text(writers, names, messages, nonames=False):
    my_df = pd.DataFrame({"author": names, "message": messages})
    nlp = st.tweet_tokenizier_factory(nltk.tokenize.TweetTokenizer())
    my_df['parse'] = my_df['message'].apply(nlp)

    corpus = st.CorpusFromParsedDocuments(
        my_df, category_col='author',
        parsed_col='parse').build().get_unigram_corpus().compact(
            st.AssociationCompactor(2000))

    if nonames:
        html = st.produce_scattertext_explorer(corpus,
                                               category=writers[0],
                                               category_name="Author_0",
                                               not_category_name="Author_1",
                                               minimum_term_frequency=0,
                                               pmi_threshold_coefficient=0,
                                               width_in_pixels=1000,
                                               transform=st.Scalers.dense_rank)
    else:
        html = st.produce_scattertext_explorer(corpus,
                                               category=writers[0],
                                               category_name=writers[0],
                                               not_category_name=writers[1],
                                               minimum_term_frequency=0,
                                               pmi_threshold_coefficient=0,
                                               width_in_pixels=1000,
                                               transform=st.Scalers.dense_rank)

    with open('./demo_compact.html', 'w') as f:
        f.write(html)
    f.close()
import scattertext as st

df = st.SampleCorpora.ConventionData2012.get_data().assign(
    parse=lambda df: df.text.apply(st.whitespace_nlp_with_sentences))

corpus = st.CorpusFromParsedDocuments(
    df, category_col='party',
    parsed_col='parse').build().get_unigram_corpus().compact(
        st.AssociationCompactor(2000))

html = st.produce_scattertext_explorer(corpus,
                                       category='democrat',
                                       category_name='Democratic',
                                       not_category_name='Republican',
                                       minimum_term_frequency=0,
                                       pmi_threshold_coefficient=0,
                                       width_in_pixels=1000,
                                       metadata=corpus.get_df()['speaker'],
                                       transform=st.Scalers.dense_rank,
                                       show_diagonal=False,
                                       max_overlapping=3,
                                       vertical_lines=0.5)
open('./demo_vertical_lines.html', 'w').write(html)
print('open ./demo_vertical_lines.html in Chrome')
예제 #3
0
import scattertext as st
import pandas as pd

df = st.SampleCorpora.RottenTomatoes.get_data()
df['parse'] = df['text'].apply(st.whitespace_nlp_with_sentences)
corpus = (st.CorpusFromParsedDocuments(df, category_col='category', parsed_col='parse')
          .build()
          .get_unigram_corpus()
          .compact(st.AssociationCompactor(1000)))

corpus, axes = st.EmbeddingsResolver(corpus).set_embeddings_model().project_embeddings()
term_colors = st.CategoryColorAssigner(corpus).get_term_colors()
html = st.produce_pca_explorer(corpus,
                               category='fresh',
                               not_categories=['rotten'],
                               neutral_categories=['plot'],
                               metadata=df['movie_name'],
                               width_in_pixels=1000,
                               show_axes=False,
                               use_full_doc=True,
                               projection=axes,
                               term_colors=term_colors,
                               show_characteristic=False,
                               show_top_terms=False,
                               unified_context=True,
                               show_category_headings=True,
                               show_cross_axes=False,
                               include_term_category_counts=True,
                               color_func="(function(d) {return modelInfo.term_colors[d.term]})",
                               )
file_name = 'demo_unified_context.html'
예제 #4
0
    convention_df = st.SampleCorpora.ConventionData2012.get_data(
    ).assign(
    parse=lambda df: df.text.apply(nlp),
    party=lambda df: df.party.apply(
        {#'democrat': #'Democratic', 
         #'republican': '#Republican'}.get
    )
)
corpus = st.CorpusFromParsedDocuments(
    convention_df,
    category_col='party',
    parsed_col='parse',
    feats_from_spacy_doc=st.PyTextRankPhrases()
).build(
).compact(
    st.AssociationCompactor(2000, use_non_text_features=True)

class Stats_Graph_Manager:
    def __init__(self):
        pass
 
        
        




class graph:
    def __init__(self,gdict=None):
        if gdict is None:
            gdict = []
예제 #5
0
import scattertext as st

df = st.SampleCorpora.ConventionData2012.get_data().assign(
    parse=lambda df: df.text.apply(st.whitespace_nlp_with_sentences)
)

corpus = st.CorpusFromParsedDocuments(
    df, category_col='party', parsed_col='parse'
).build().get_unigram_corpus().compact(st.AssociationCompactor(2000))

html = st.produce_scattertext_explorer(
    corpus,
    category='democrat',
    category_name='Democratic',
    not_category_name='Republican',
    minimum_term_frequency=0, pmi_threshold_coefficient=0,
    width_in_pixels=1000, metadata=corpus.get_df()['speaker'],
    transform=st.Scalers.dense_rank,
    show_diagonal=True,
    max_overlapping=3
)
open('./demo_compact.html', 'w').write(html)
print('open ./demo_compact.html in Chrome')