コード例 #1
0
def interactive():
    """Page to allow nlp analysis from user input"""
    input_text = st.text_area("Enter text", "Type here")
    token_cb = st.checkbox("Show tokens")
    ner_cb = st.checkbox("Show named entities")
    sentiment_cb = st.checkbox("Show sentiment")
    summary_cb = st.checkbox("Show Summary")

    # st.success("Running Analysis")
    # if st.button("Analysis"):
    if token_cb:
        tokens = az.tokenize(input_text)
        st.write(tokens)
    if ner_cb:
        doc = az.get_nlp(input_text)
        named_entities = az.named_entity_recognization(input_text)
        if len(named_entities) > 0:
            html = spacy.displacy.render(doc, style="ent")
            # Newlines seem to mess with the rendering
            html = html.replace("\n", " ")
            HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid \
        #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem">\
        {}</div>"""
            st.write(HTML_WRAPPER.format(html), unsafe_allow_html=True)
        else:
            st.info("No named entity recognized")
    if sentiment_cb:
        sentiments = TextBlob(az.lemmatized_text(input_text))
        st.write(sentiments.sentiment)
    if summary_cb:
        summaries = sz.summarize_text(input_text)
        st.write(summaries)
コード例 #2
0
def test_tokenize():
    """Test tokenize break down str into list of str correctly with the porter
    method from nltk package"""
    input_text = "Test tokenize break down str into list of str correctly"
    output = az.tokenize(input_text)
    expected = ["test", "tokenize", "break", "str", "list", "str", "correctly"]
    assert output == expected
コード例 #3
0
def df_preprocess(df):
    """build and preprocess (combine, normalize, tokenize) text"""
    # filter out first two columns -- non-report content
    cols = df.columns[2:]
    # combining text into combined column
    df["combined"] = df[cols].apply(
        lambda row: "\n".join(row.values.astype(str)), axis=1)
    # normalize
    df[cts.NORMAL] = df["combined"].apply(lambda row: az.normalize(row))
    # tokenize
    df[cts.TOKEN] = df[cts.NORMAL].apply(lambda row: az.tokenize(row))
    return df
コード例 #4
0
def df_preprocess(df):
    """Build and preprocess (combine, normalize, tokenize) text."""
    # filter out first two columns -- non-report content
    # (student and assignment name)
    cols = df.columns[2:]
    # combining text into combined column
    df[cts.COMBINED] = df[cols].apply(
        lambda row: "\n".join(row.values.astype(str)), axis=1)
    # normalize
    df[cts.NORMAL] = df[cts.COMBINED].apply(lambda row: az.normalize(row))
    # tokenize
    df[cts.TOKEN] = df[cts.NORMAL].apply(lambda row: az.tokenize(row))
コード例 #5
0
def interactive():
    """Page to allow nlp analysis from user input."""
    input_text = st.text_area("Enter text", "Type here")
    token_cb = st.checkbox("Show tokens")
    ner_cb = st.checkbox("Show named entities")
    sentiment_cb = st.checkbox("Show sentiment")
    summary_cb = st.checkbox("Show Summary")

    # st.success("Running Analysis")
    # if st.button("Analysis"):
    if token_cb:
        tokens = az.tokenize(input_text)
        st.write(tokens)
    if ner_cb:
        displacy_renderer(az.get_nlp(input_text))
    if sentiment_cb:
        sentiments = TextBlob(az.lemmatized_text(input_text))
        st.write(sentiments.sentiment)
    if summary_cb:
        summaries = sz.summarize_text(input_text)
        st.write(summaries)
コード例 #6
0
def test_tokenize_parametrize(input_text, expected):
    """parametrize test tokenize"""
    output = az.tokenize(input_text)
    assert output == expected
コード例 #7
0
def test_tokenize():
    """Test tokenize break down str into list of str correctly."""
    input_text = "Test tokenize break down str into list of str correctly"
    output = az.tokenize(input_text)
    expected = ["test", "tokenize", "break", "str", "list", "str", "correctly"]
    assert output == expected