def main():
    """A Simple NLP app with Spacy-Streamlit"""
    st.title("Wikipedia Citation Needed Predictor")
    our_image = Image.open(os.path.join('wiki_logo.png'))
    st.image(our_image)
    menu = ["Home", "NER", "Classification", "Explain Prediction"]
    choice = st.sidebar.selectbox("Menu", menu)
    if choice == "Home":
        st.subheader("Tokenization")
        raw_text = st.text_area("Your Text", "Enter Text Here")
        docx = nlp(raw_text)
        if st.button("Tokenize"):
            spacy_streamlit.visualize_tokens(
                docx, attrs=['text', 'pos_', 'dep_', 'ent_type_'])
    elif choice == "NER":
        st.subheader("Named Entity Recognition")
        raw_text = st.text_area("Your Text", "Enter Text Here")
        docx = nlp(raw_text)
        spacy_streamlit.visualize_ner(docx, labels=nlp.get_pipe('ner').labels)
    elif choice == "Classification":
        st.subheader("Citation Needed ")
        raw_text = st.text_area("Your Text", "Enter Text Here")
        docx = nlp(raw_text)
        spacy_streamlit.visualize_textcat(docx, title="Sentence Need Citation")
    elif choice == "Explain Prediction":
        st.subheader("Why this predicted")
        raw_text = st.text_area("Your Text", "Enter Text Here")
        # get number of features input
        num_features_input = st.number_input(
            label="Num of features to visualize",
            min_value=1,
            max_value=7,
            step=1)
Beispiel #2
0
def single(selected_workshop, df, ner_labels):
    st.write("Selected workshop: ", selected_workshop)
    selected_description = df[df.title == selected_workshop].body.to_list()[0]

    doc = spacy_streamlit.process_text("en_core_web_md", selected_description)
    spacy_streamlit.visualize_tokens(doc, title="Token attributes")

    spacy_streamlit.visualize_ner(doc, labels=ner_labels, title="Named entities")
Beispiel #3
0
def main():
    st.title('Simple NLP app for tokenizing and Named Entity Recognition')
    menu = ['Home', 'NER']
    choice = st.sidebar.selectbox('Menu', menu)
    if choice == 'Home':
        st.subheader('Tokentization')
        raw_text = st.text_area("Your text", 'Enter your text here')
        docs = nlp(raw_text)
        if st.button('Tokenize'):
            spacy_streamlit.visualize_tokens(
                docs, attrs=['text', 'pos_', 'tag_', 'ent_type_'])

    elif choice == 'NER':
        st.subheader('Named Entity Recognition')
        raw_text = st.text_area("Your text", 'Enter your text here')
        docs = nlp(raw_text)
        spacy_streamlit.visualize_ner(docs, labels=nlp.get_pipe('ner').labels)
Beispiel #4
0
def main():
	"""A Simple NLP app with Spacy-Streamlit"""
	st.title("Spacy-Streamlit NLP App")
        our_image = Image.open(os.path.join('SpaCy_logo.svg.png'))
	st.image(our_image)
	menu = ["Home","NER"]
	choice = st.sidebar.selectbox("Menu",menu)
	if choice == "Home":
		st.subheader("Tokenization")
		raw_text = st.text_area("Your Text","Enter Text Here")
		docx = nlp(raw_text)
		if st.button("Tokenize"):
			spacy_streamlit.visualize_tokens(docx,attrs=['text','pos_','dep_','ent_type_'])
	elif choice == "NER":
		st.subheader("Named Entity Recognition")
		raw_text = st.text_area("Your Text","Enter Text Here")
		docx = nlp(raw_text)
		spacy_streamlit.visualize_ner(docx,labels=nlp.get_pipe('ner').labels)
Beispiel #5
0
def main():
    activites = ["Summary", "Named Entity Recognition", "Search", "Keywords"]
    choice = st.sidebar.selectbox("Select Activity", activites)
    if choice == "Summary":
        html_temp = """
	<div style="background-color:#16A085;"><p style="color:white;font-size:60px;">Text Summarizer</p></div>
	"""
        components.html(html_temp)
        text = st.text_area("Input Text For Summary", height=300)
        if st.button("summarize"):
            st.success(summary(text))
        text_range = st.sidebar.slider("Summarize words Range", 25, 500)

    # Named Entity Recognition
    elif choice == "Named Entity Recognition":
        html_temp1 = """
	<div style="background-color:#16A085;"><p style="color:white;font-size:60px;">Text Tokenizer</p></div>
	"""
        components.html(html_temp1)
        row_data = st.text_area("write Text For Tokenizer")
        docx = nlp(row_data)
        if st.button("Tokenizer"):
            spacy_streamlit.visualize_tokens(
                docx, attrs=['text', 'pos_', 'dep_', 'ent_type_'])
        if st.button("NER"):
            spacy_streamlit.visualize_ner(docx,
                                          labels=nlp.get_pipe('ner').labels)
        if st.button("Text Relationship"):
            spacy_streamlit.visualize_parser(docx)

    #Search Bar
    elif choice == "Search":
        html_temp4 = """
	<div style="background-color:#16A085;"><p style="color:white;font-size:60px;,text-align:center;">Search Bar</p></div>
	"""
        components.html(html_temp4)
        row_text = st.text_input("Search Anything")
        google = Google(license=None)
        if st.button("search"):
            for search_result in google.search(row_text):
                st.write(search_result.text)
                st.warning(search_result.url)
Beispiel #6
0
def main():
    st.title('Spacy-Streamlit NLP App')

    menu = ['Home', 'NER']
    choice = st.sidebar.selectbox("Menu", menu)

    if choice == 'Home':
        st.subheader("Tokenization")
        raw_text = st.text_area("Your Text", 'Enter Text Here')
        docx = nlp(raw_text)
        if st.button('Tokenize'):
            spacy_streamlit.visualize_tokens(
                docx, attrs=['text', 'pos_', 'dep_', 'ent_type_'])

    elif choice == 'NER':
        st.subheader('Named Enitity Recognition')
        raw_text = st.text_area("Your Text", 'Enter Text Here')
        docx = nlp(raw_text)
        if st.button('Tokenize'):
            spacy_streamlit.visualize_ner(docx,
                                          labels=nlp.get_pipe('ner'.labels))
Beispiel #7
0
def single():
    df = load_data("all-workshops-2021-02-04.csv")
    titles = df["title"].to_list()

    st.sidebar.subheader("Single workshop options")
    selected_workshop = st.sidebar.selectbox("Select workshop", titles)

    st.subheader(
        "Select a workshop in the sidebar to analyze token properties and named entities with spaCy."
    )

    st.write("Selected workshop: ", selected_workshop)
    selected_description = df[df.title == selected_workshop].body.to_list()[0]

    nlp = spacy.load("en_core_web_md")
    ner_labels = nlp.get_pipe("ner").labels

    doc = spacy_streamlit.process_text("en_core_web_md", selected_description)
    spacy_streamlit.visualize_tokens(doc, title="Token attributes")

    spacy_streamlit.visualize_ner(doc,
                                  labels=ner_labels,
                                  title="Named entities")
Beispiel #8
0
def main():
    st.title("SpaCy Streamlit  APP")
    # st.markdown(title_temp,unsafe_allow_html=True)
    our_image = Image.open(os.path.join('SpaCy_logo.svg.png'))
    st.image(our_image)

    menu = ['Home', 'NER']
    choice = st.sidebar.selectbox('Menu', menu)

    if choice == 'Home':
        st.subheader('Home')
        raw_docx = st.text_area('Your Docs', 'Enter Text')
        docx = nlp(raw_docx)
        if st.button("Tokenize"):
            spacy_streamlit.visualize_tokens(
                docx, attrs=["text", "pos_", "dep_", "ent_type_"])

    elif choice == 'NER':
        st.subheader('Named Entity Recognizer')
        raw_docx = st.text_area('Your Text', 'Enter Text')
        docx = nlp(raw_docx)
        # if st.button('Analyze'):
        spacy_streamlit.visualize_ner(docx, labels=nlp.get_pipe("ner").labels)
Beispiel #9
0
def main():

	st.title('Tokenization and Named Entity Recognition APP')

	menu=['Home','NER']
	choice= st.sidebar.selectbox('Selection Menu',menu)



	if choice =='Home':
		st.subheader('Tokenization')
		raw_text=st.text_area('Please enter your text below')
		docx=nlp(raw_text)
		if st.button('Tokenize'):
			spacy_streamlit.visualize_tokens(docx)



	elif choice=='NER':
		st.subheader('Named Entity Recognition')
		raw_text=st.text_area('Please enter your text here')
		docx=nlp(raw_text)
		if st.button('Show NER'):
			spacy_streamlit.visualize_ner(docx,labels=nlp.get_pipe('ner').labels)
Beispiel #10
0
def calc_main():
    st.title("Nimbus Words")
    st.sidebar.header("Input Options")
    expander_bar = st.beta_expander("How To Use This App")
    expander_bar.markdown("""

    **Use the Dropdown Box located within the sidebar on the left to choose 1 of the 6 AI text editing features offered by Nimbus Words.** 

    1) **Summarizer:** Paste in text that will be summarized by our AI model. The first text box will do an automated summary of our program's recommended word count, and the second box beneath that will provide a summary of the exact word count you choose using the slider located within the sidebar.  

    2) **Tokenizer:** Paste in text that will be analyzed by our AI model. The **Tokenizer** button will provide a breakdown on each word within the phrase, for example 'Google' is an organization, or 'Jeff Bezos' is a proper noun. The **NER** button will display all named entities, for example 'Steve Jobs' is a person. The **Text Relationship** button will display a visual graph of the dependency each word has within a sentence or phrase. 

    3) **Synonyms:** Paste in text that will be analyzed by our AI model. The **Synonyms** button will provide you with synonyms to the inputted attribute. The **Definition** checkbox will provide definitions for the attribute. The **Example** checkbox will provide examples of the given attribute in a sentence.

    4) **Translator:** Paste in text that will be translated by our AI model. The **Translate** button will translate the inputted text into one of the many languages that we have provided, and we will automatically detect which language the inputted text is written in.

    5) **Search:** Paste in text that will be preprcoessed by our AI model. The **Search** button will do a filtered search for your input.

    6) **Spell Correction:** Paste in text that will be spell-checked by our AI model. The **Correct** button will offer a correct spelling for any grammatical error that are detected. The **Pluralize**, **Singularize**, **Comparative** and **Superlative** checkboxes do exactly as they say, and ouput those options for the input you provided. 

    """)

    activites = [
        "Summary", "Tokenizer", "Synonyms", "Translator", "Search",
        "Spell Correction"
    ]
    choice = st.sidebar.selectbox("Select Activity", activites)
    if choice == "Summary":
        st.title('AI Text Summarizer')
        text = st.text_area("Input Text For Summary", height=300)
        if st.button("Summarize"):
            st.success(summary(text))
        text_range = st.sidebar.slider("Summarize words Range", 25, 500)
        text = st.text_area("Input Text For Summary", height=250)
        if st.button("Summarize with Custom Word Count"):
            st.warning(summarize(text, word_count=text_range))
    # Tokenizer
    elif choice == "Tokenizer":
        st.title('Text Tokenizer')
        row_data = st.text_area("write Text For Tokenizer")
        docx = nlp(row_data)
        if st.button("Tokenizer"):
            spacy_streamlit.visualize_tokens(
                docx, attrs=['text', 'pos_', 'dep_', 'ent_type_'])
        if st.button("NER"):
            spacy_streamlit.visualize_ner(docx,
                                          labels=nlp.get_pipe('ner').labels)
        if st.button("Text Relationship"):
            spacy_streamlit.visualize_parser(docx)
    # synonyms
    elif choice == "Synonyms":
        st.title('Synonym Generator')
        text = st.text_area("Enter Text")
        if st.button("Synonyms"):
            for syn in wordnet.synsets(text):
                for i in syn.lemmas():
                    st.success(i.name())
        if st.checkbox("Definition"):
            for syn in wordnet.synsets(text):
                st.warning(syn.definition())
        if st.checkbox("Example"):
            for syn in wordnet.synsets(text):
                st.success(syn.examples())
    # Translator
    elif choice == "Translator":
        st.title('Speech Tranlation')
        row_text = st.text_area("Enter Your Text For Translation", height=300)
        translation_text = TextBlob(row_text)
        list1 = ["en", "ta", "pa", "gu", "hi", "ur", "kn", "bn", "te"]
        a = st.selectbox("select", list1)
        if st.button("search"):
            #input1 = TextBlob("Simple is better than complex")
            st.success(translation_text.translate(to=a))
    #Search Bar
    elif choice == "Search":
        st.title('Web Search')
        row_text = st.text_input("Search Anything")
        google = Google(license=None)
        if st.button("search"):
            for search_result in google.search(row_text):
                st.write(search_result.text)
                st.warning(search_result.url)
    elif choice == "Spell Correction":
        st.title('AI Spell Correction')
        text_data = st.text_area("Enter Text Here")
        a = TextBlob(text_data)
        if st.button("Correct"):
            st.success(a.correct())
        st.title('Pluralize & Singularize')
        text_data1 = st.text_input("Enter a word For pluralize / singularize")
        if st.checkbox("Pluralize"):
            st.warning(pluralize(text_data1))
        if st.checkbox("Singularize"):
            st.warning(singularize(text_data1))

        st.title('Compartitive & Superlative')
        text2 = st.text_input("Enter Text For comparative & superlative")
        if st.checkbox("Comparative"):
            st.success(comparative(text2))
        if st.checkbox("Superlative"):
            st.success(superlative(text2))
Beispiel #11
0
def main():

    st.title("NATURAL LANGUAGE PROCESSING WEB APPLICATION")
    st.subheader("Choose the type of NLP service you like to use:")


# Tokenization

# Named Entity

if choice == "Home":
    st.subheader("Tokenization")
    raw_text = st.text_area("your text", "Enter text here")
    docx = nlp(raw_text)
if st.button("Tokenize"):
    spacy_streamlit.visualize_tokens(docx, attrs=['text', 'lemma_', 'pos_'])

elif choice == "NER":
    st.subheader("Named Entity Recognition")
    raw_text = st.text_area("your text", "Enter text here")
    docx = nlp(raw_text)
    spacy_streamlit.visualize_ner(docx, labels=nlp.get_pipe('ner').labels)

# Sentiment Analysis
if st.checkbox("Show Sentiment Analysis"):
    st.subheader("Sentiment of Your Text")
    message = st.text_area("Enter Your Text", "Type Here")
if st.button("analyse"):
    blob = TextBlob(message)
    result_sentiment = blob.sentiment
    st.success(result_sentiment)
Beispiel #12
0
    choice = st.radio("Go to", nav_list)
    st.header("About App")
    st.info(
        """This App uses State of the Art Spacy Library along with Python.It uses Streamlit
           for implemention of beatiful and easy web app.
        """)

st.markdown(""" ## Natural Language Processing using **SPACY** """)
raw_text = st.text_area("Text here")
doc = nlp(raw_text)

if raw_text is not None:

    if choice == "Tokenization":
        if st.button("Tokenize"):
            spacy_streamlit.visualize_tokens(
                doc=doc, attrs=["text", "pos_", "dep_", "lemma_", "shape_"])

    if choice == "Name Entity Recognition":
        if st.button("Analyze"):
            spacy_streamlit.visualize_ner(doc=doc,
                                          labels=nlp.get_pipe("ner").labels)

    if choice == "Sentence Segmentation":
        l = []
        if st.button("Segmentize"):
            st.write(
                f""" There are **{len(list(doc.sents))} Sentences** in this text dataset."""
            )
            for sent in doc.sents:
                l.append(sent)
            d = {"Sentences": l}
Beispiel #13
0
def calc_main():
    st.write("Nimbus Words")   
    st.sidebar.header("Input Options") 

    activites = ["Summary", "Tokenizer","Synonyms","Translator","Search","Spell Correction"]
    choice = st.sidebar.selectbox("Select Activity",activites)
    if choice == "Summary":
        st.title('AI Text Summarizer')
        text = st.text_area("Input Text For Summary",height=300)
        if st.button("summarize"):
            st.success(summary(text))
        text_range= st.sidebar.slider("Summarize words Range",25,500)
        text = st.text_area("Input Text For Summary",height=250)
        if st.button("custom summarization"):
           st.warning(summarize(text,word_count=text_range))
    # Tokenizer
    elif choice == "Tokenizer":
        st.title('Text Tokenizer')
        row_data = st.text_area("write Text For Tokenizer")
        docx= nlp(row_data)
        if st.button("Tokenizer"):
            spacy_streamlit.visualize_tokens(docx,attrs=['text','pos_','dep_','ent_type_'])
        if st.button("NER"):
            spacy_streamlit.visualize_ner(docx,labels=nlp.get_pipe('ner').labels)
        if st.button("Text Relationship"):
            spacy_streamlit.visualize_parser(docx)
       # synonyms      
    elif choice == "Synonyms":
        st.title('Synonym Generator')
        text = st.text_area("Enter Text")
        if st.button("Find"):
            for syn in wordnet.synsets(text):
                for i in syn.lemmas():
                    st.success(i.name())
        if st.checkbox("Defination"):
            for syn in wordnet.synsets(text):
                st.warning(syn.definition()) 
        if st.checkbox("Example"):
            for syn in wordnet.synsets(text):
                st.success(syn.examples())
      # Translator          
    elif choice == "Translator":
        st.title('Speech Tranlation')
        row_text = st.text_area("Enter Your Text For Translation",height=300)
        translation_text = TextBlob(row_text)
        list1 = ["en","ta","pa","gu","hi","ur","kn","bn","te"]
        a= st.selectbox("select",list1)
        if st.button("search"):
            #input1 = TextBlob("Simple is better than complex")
            st.success(translation_text.translate(to=a))
    #Search Bar
    elif choice == "Search":
        st.title('Web Search')
        row_text= st.text_input("Search Anything")
        google = Google(license=None)
        if st.button("search"):
            for search_result in google.search(row_text):
                st.write(search_result.text)
                st.warning(search_result.url)
    elif choice == "Spell Correction":
        st.title('AI Spell Correction')
        text_data = st.text_area("Enter Text Here")
        a = TextBlob(text_data)
        if st.button("Correct"):
            st.success(a.correct())
        st.title('Pluralize & Singularize')
        text_data1 = st.text_input("Enter a word For pluralize / singularize")
        if st.checkbox("pluralize"):
            st.warning(pluralize(text_data1))
        if st.checkbox("singularize"):
            st.warning(singularize(text_data1))
        
        st.title('Compartitive & Superlative')
        text2 = st.text_input("Enter Text For comparative & superlative")
        if st.checkbox("comparative"):
            st.success(comparative(text2))
        if st.checkbox("superlative"):
            st.success(superlative(text2))
Beispiel #14
0
def main():
    """spaCy NLP Word Frequency App"""

    st.title("Natural Language Processing with spaCy")

    img = Image.open("letters.jpg")
    st.sidebar.image(img,
                     width=300,
                     caption='Image credit: towardsdatascience.com')
    menu = [
        "Tokenization & Word Count", "Word Similarity Check",
        "Extractive Text Summarization"
    ]
    choice = st.sidebar.selectbox("Select from below", menu)

    # -------------------------------------- Tokenization & Word Count ---------------------------------------

    if choice == "Tokenization & Word Count":
        st.subheader("Tokenization & Word Count")
        menu = ["Tokenization", "Word Count", "Entity Recognition"]
        choice = st.selectbox("Select", menu)

        # ---------------------------------------------- Tokenization --------------------------------------------

        if choice == "Tokenization":
            st.subheader("Tokenization")
            raw_text = st.text_area("Your Text", "Enter Text Here")
            nlp = spacy.load('en_core_web_sm')
            docx = nlp(raw_text)
            if st.button("Tokenize"):
                att = ['text', 'lemma_', 'pos_', 'ent_type_']
                spacy_streamlit.visualize_tokens(docx, attrs=att)

    # ----------------------------------------------- Word Count ---------------------------------------------

        elif choice == "Word Count":
            raw_text = st.text_area("Your Text", "Enter Text Here")

            # Text Preprocessing

            raw_text = remove_punctuation(raw_text)
            raw_text = remove_accented_chars(raw_text)

            for i in range(0, len(raw_text)):
                raw_text = re.sub('[^a-zA-Z0-9]', ' ', raw_text)
                raw_text = ' '.join(raw_text.split())

            raw_text = raw_text.lower()

            # Create a frequency table of words
            nlp = spacy.load('en_core_web_sm')

            # Build an NLP Object
            nlp = spacy.load('en_core_web_sm')
            docx = nlp(raw_text)

            # Build Word Frequency
            # word.text is tokenization in spacy

            word_frequencies = {}
            for word in docx:
                if word.text not in stopwords:
                    if word.text not in word_frequencies.keys():
                        word_frequencies[word.text] = 1
                    else:
                        word_frequencies[word.text] += 1

                    a = word_frequencies.keys()
                    b = word_frequencies.values()

            word_count = pd.DataFrame(list(zip(a, b)),
                                      columns=['Words', 'Frequency'])
            word_count = word_count.sort_values('Frequency', ascending=False)

            word_count = word_count.set_index('Words')
            word_count['Frequency_%'] = word_count['Frequency'] / word_count[
                'Frequency'].max()

            if st.button("Word Count"):
                st.write(word_count)

        # ------------------------------------- Named Entity Recognition -------------------------------------

        elif choice == "Entity Recognition":
            st.subheader("Entity Recognition")
            raw_text = st.text_area("Your Text", "Enter Text Here")
            nlp = spacy.load('en_core_web_sm')
            docx = nlp(raw_text)
            if st.button("Entity Type"):
                spacy_streamlit.visualize_ner(
                    docx, labels=nlp.get_pipe('ner').labels)

        # ----------------------------------------------------------------------------------------------------
        # ------------------------------------- End of Word Count --------------------------------------------
        # ----------------------------------------------------------------------------------------------------

    if choice == "Word Similarity Check":
        st.subheader("Word Similarity Check")
        raw_text1 = st.text_area("First Text", "Paste/write your text here..")
        raw_text2 = st.text_area("Second Text", "Another chunk of text here..")

        def text_prep(raw_text):
            raw_text = remove_punctuation(raw_text)
            raw_text = remove_accented_chars(raw_text)

            for i in range(0, len(raw_text)):
                raw_text = re.sub('[^a-zA-Z0-9]', ' ', raw_text)
                raw_text = ' '.join(raw_text.split())

            raw_text = raw_text.lower()

            return raw_text

        raw_text1 = text_prep(raw_text1)
        raw_text2 = text_prep(raw_text2)

        # Create a frequency table of words
        nlp = spacy.load('en_core_web_sm', disable=['parser', 'ner'])

        # Build an NLP Object
        nlp = spacy.load('en_core_web_sm')
        docx1 = nlp(raw_text1)
        docx2 = nlp(raw_text2)

        docx1_lem = [
            token.lemma_ if token.lemma_ != '-PRON-' else token.text
            for token in docx1
        ]
        docx2_lem = [
            token.lemma_ if token.lemma_ != '-PRON-' else token.text
            for token in docx2
        ]

        #word.lemma_ if word.lemma_ != '-PRON-' else word.text for word in text

        # Build Word Frequency
        ## Approximate Match

        def word_freq_(docx):
            word_frequencies = {}
            for word in docx:
                if word.text not in stopwords:
                    if word.text not in word_frequencies.keys():
                        word_frequencies[word.text] = 1
                    else:
                        word_frequencies[word.text] += 1

            return word_frequencies

        word1 = word_freq_(docx1).keys()
        frequency1 = word_freq_(docx1).values()

        word2 = word_freq_(docx2).keys()
        frequency2 = word_freq_(docx2).values()

        matched_words = [x for x in word1 if x in word2]
        df = pd.DataFrame()
        df['Matched_Words'] = matched_words

        word_count1 = pd.DataFrame(list(zip(word1, frequency1)),
                                   columns=['Matched_Words', 'Frequency_doc1'])
        word_count2 = pd.DataFrame(list(zip(word2, frequency2)),
                                   columns=['Matched_Words', 'Frequency_doc2'])

        merged_df = pd.merge(df, word_count1, on="Matched_Words")
        df = pd.merge(merged_df, word_count2, on="Matched_Words")

        ## Exact Match
        # word.text is tokenization in spacy

        def word_freq(docx):
            word_frequencies = {}
            for word in docx:
                if word not in stopwords:
                    if word not in word_frequencies.keys():
                        word_frequencies[word] = 1
                    else:
                        word_frequencies[word] += 1

            return word_frequencies

        word1 = word_freq(docx1_lem).keys()
        frequency1 = word_freq(docx1_lem).values()

        word2 = word_freq(docx2_lem).keys()
        frequency2 = word_freq(docx2_lem).values()

        matched_words = [x for x in word1 if x in word2]
        df_lem = pd.DataFrame()
        df_lem['Matched_Words'] = matched_words

        word_count1 = pd.DataFrame(list(zip(word1, frequency1)),
                                   columns=['Matched_Words', 'Frequency_doc1'])
        word_count2 = pd.DataFrame(list(zip(word2, frequency2)),
                                   columns=['Matched_Words', 'Frequency_doc2'])

        merged_df_lem = pd.merge(df_lem, word_count1, on="Matched_Words")
        df_lem = pd.merge(merged_df_lem, word_count2, on="Matched_Words")

        ## What percentage matches in both docs?
        text = [raw_text1, raw_text2]

        from sklearn.feature_extraction.text import CountVectorizer
        cv = CountVectorizer()
        count_matrix = cv.fit_transform(text)

        from sklearn.metrics.pairwise import cosine_similarity
        matchpct = round(cosine_similarity(count_matrix)[0][1] * 100, 2)

        if st.button('Matched Words'):
            st.write('The two documents have a', matchpct,
                     '% match. (cosine similaity)')
            st.subheader("Preprocessed Text")
            st.write(df)
            st.subheader("Preprocessed and Lemmatized Text")
            st.write(df_lem)

    # ------------------------------------------------- Summarization ----------------------------------------------------

    elif choice == "Extractive Text Summarization":
        st.subheader("Summarize Document")
        raw_raw_text = st.text_area("Your Text", "Enter Text Here")
        raw_text = raw_raw_text
        summarizer_type = st.selectbox("Select a Summarizer", [
            "Gensim", "Sumy Lex Rank", "Sumy Luhn",
            "Sumy Latent Semantic Analysis", "Sumy Text Rank"
        ])
        if st.button('Summarize'):
            if summarizer_type == "Gensim":
                summary_result = summarize(raw_text)
                st.subheader("Keywords")
                keyword = st.number_input(
                    "Enter the number of keywords and hit the 'Summarize' button."
                )
                kw = keywords(raw_text, words=keyword).split('\n')
                st.write(kw)
            elif summarizer_type == "Sumy Lex Rank":
                summary_result = sumy_lex_rank_summarizer(raw_text)
            elif summarizer_type == "Sumy Luhn":
                summary_result = sumy_luhn_summarizer(raw_text)
            elif summarizer_type == "Sumy Latent Semantic Analysis":
                summary_result = sumy_lsa_summarizer(raw_text)
            elif summarizer_type == "Sumy Text Rank":
                summary_result = sumy_tr_summarizer(raw_text)

            #------------------------------------- Length and Reading Time -------------------------------------------

            # Length of Original Text
            len_raw = len(raw_raw_text)

            # Length of Summary
            len_sum = len(summary_result)

            # Reading Time
            def readingtime(docs):
                nlp = spacy.load('en_core_web_sm')
                total_words_tokens = [token.text for token in nlp(docs)]
                estimatedtime = len(total_words_tokens) / 200
                return '{} mins'.format(round(estimatedtime))

            # Reading time of the orginal document
            rt_raw = readingtime(raw_raw_text)

            # Reading time of the summary
            rt_sum = readingtime(summary_result)

            st.subheader("Summary")
            st.write(summary_result)

            st.subheader("Some little details.")
            st.write("The length of original document:", len_raw, "characters")
            st.write("The length of the summary:", len_sum, "characters")

            st.write("Approximate required time to read original document:",
                     rt_raw)
            st.write("Approximate required time to read the summary:", rt_sum)

            # WordCloud Generation
            wc = WordCloud(background_color="white",
                           colormap="Dark2",
                           max_font_size=150,
                           random_state=42)
            wc.generate(summary_result)
            plt.imshow(wc, interpolation='bilinear')
            plt.axis("off")
            st.pyplot()