def interactive(): """Page to allow nlp analysis from user input""" input_text = st.text_area("Enter text", "Type here") token_cb = st.checkbox("Show tokens") ner_cb = st.checkbox("Show named entities") sentiment_cb = st.checkbox("Show sentiment") summary_cb = st.checkbox("Show Summary") # st.success("Running Analysis") # if st.button("Analysis"): if token_cb: tokens = az.tokenize(input_text) st.write(tokens) if ner_cb: doc = az.get_nlp(input_text) named_entities = az.named_entity_recognization(input_text) if len(named_entities) > 0: html = spacy.displacy.render(doc, style="ent") # Newlines seem to mess with the rendering html = html.replace("\n", " ") HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid \ #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem">\ {}</div>""" st.write(HTML_WRAPPER.format(html), unsafe_allow_html=True) else: st.info("No named entity recognized") if sentiment_cb: sentiments = TextBlob(az.lemmatized_text(input_text)) st.write(sentiments.sentiment) if summary_cb: summaries = sz.summarize_text(input_text) st.write(summaries)
def test_tokenize(): """Test tokenize break down str into list of str correctly with the porter method from nltk package""" input_text = "Test tokenize break down str into list of str correctly" output = az.tokenize(input_text) expected = ["test", "tokenize", "break", "str", "list", "str", "correctly"] assert output == expected
def df_preprocess(df): """build and preprocess (combine, normalize, tokenize) text""" # filter out first two columns -- non-report content cols = df.columns[2:] # combining text into combined column df["combined"] = df[cols].apply( lambda row: "\n".join(row.values.astype(str)), axis=1) # normalize df[cts.NORMAL] = df["combined"].apply(lambda row: az.normalize(row)) # tokenize df[cts.TOKEN] = df[cts.NORMAL].apply(lambda row: az.tokenize(row)) return df
def df_preprocess(df): """Build and preprocess (combine, normalize, tokenize) text.""" # filter out first two columns -- non-report content # (student and assignment name) cols = df.columns[2:] # combining text into combined column df[cts.COMBINED] = df[cols].apply( lambda row: "\n".join(row.values.astype(str)), axis=1) # normalize df[cts.NORMAL] = df[cts.COMBINED].apply(lambda row: az.normalize(row)) # tokenize df[cts.TOKEN] = df[cts.NORMAL].apply(lambda row: az.tokenize(row))
def interactive(): """Page to allow nlp analysis from user input.""" input_text = st.text_area("Enter text", "Type here") token_cb = st.checkbox("Show tokens") ner_cb = st.checkbox("Show named entities") sentiment_cb = st.checkbox("Show sentiment") summary_cb = st.checkbox("Show Summary") # st.success("Running Analysis") # if st.button("Analysis"): if token_cb: tokens = az.tokenize(input_text) st.write(tokens) if ner_cb: displacy_renderer(az.get_nlp(input_text)) if sentiment_cb: sentiments = TextBlob(az.lemmatized_text(input_text)) st.write(sentiments.sentiment) if summary_cb: summaries = sz.summarize_text(input_text) st.write(summaries)
def test_tokenize_parametrize(input_text, expected): """parametrize test tokenize""" output = az.tokenize(input_text) assert output == expected
def test_tokenize(): """Test tokenize break down str into list of str correctly.""" input_text = "Test tokenize break down str into list of str correctly" output = az.tokenize(input_text) expected = ["test", "tokenize", "break", "str", "list", "str", "correctly"] assert output == expected