def main():
    """A Simple NLP app with Spacy-Streamlit"""
    st.title("Wikipedia Citation Needed Predictor")
    our_image = Image.open(os.path.join('wiki_logo.png'))
    st.image(our_image)
    menu = ["Home", "NER", "Classification", "Explain Prediction"]
    choice = st.sidebar.selectbox("Menu", menu)
    if choice == "Home":
        st.subheader("Tokenization")
        raw_text = st.text_area("Your Text", "Enter Text Here")
        docx = nlp(raw_text)
        if st.button("Tokenize"):
            spacy_streamlit.visualize_tokens(
                docx, attrs=['text', 'pos_', 'dep_', 'ent_type_'])
    elif choice == "NER":
        st.subheader("Named Entity Recognition")
        raw_text = st.text_area("Your Text", "Enter Text Here")
        docx = nlp(raw_text)
        spacy_streamlit.visualize_ner(docx, labels=nlp.get_pipe('ner').labels)
    elif choice == "Classification":
        st.subheader("Citation Needed ")
        raw_text = st.text_area("Your Text", "Enter Text Here")
        docx = nlp(raw_text)
        spacy_streamlit.visualize_textcat(docx, title="Sentence Need Citation")
    elif choice == "Explain Prediction":
        st.subheader("Why this predicted")
        raw_text = st.text_area("Your Text", "Enter Text Here")
        # get number of features input
        num_features_input = st.number_input(
            label="Num of features to visualize",
            min_value=1,
            max_value=7,
            step=1)
Exemple #2
0
def home():
    #set title for stramlit application
    st.title("Named Entity Recognition")
    
    #This is the default text which will be searched in wikipedia by default and the result of NER will be displayed.
    DEFAULT_TEXT = """ Narendra Modi """

    #space has different nlp models we are using en_core_web_sm for Named-entity recognition.
    spacy_model = "en_core_web_sm"

    #input query is given to wikipedia and the result of that query given to the vaiable text.
    text = st.text_input("Wikipedia search", DEFAULT_TEXT)
    
    #exception handling in case of failure in the searched query.
    try:
        text = wikipedia.summary(text)
    except:
        pass

    #pocessing of the text and display using visualize_ner.    
    doc = spacy_streamlit.process_text(spacy_model, text)

    spacy_streamlit.visualize_ner(
        doc,
        labels=["PERSON", "DATE", "GPE", "ORG", "NORP"],
        show_table=False,
        title="Persons, dates and locations",
    )
    st.text(f"Analyzed using spaCy model {spacy_model}")
Exemple #3
0
def single(selected_workshop, df, ner_labels):
    st.write("Selected workshop: ", selected_workshop)
    selected_description = df[df.title == selected_workshop].body.to_list()[0]

    doc = spacy_streamlit.process_text("en_core_web_md", selected_description)
    spacy_streamlit.visualize_tokens(doc, title="Token attributes")

    spacy_streamlit.visualize_ner(doc, labels=ner_labels, title="Named entities")
Exemple #4
0
def main():
    menu = ["WORD_CLOUD", "TOKENIZACAO", "RESUMO"]
    choice = st.sidebar.selectbox("Menu", menu)
    if choice == "WORD_CLOUD":
        try:
            st.title("Análise de texto - Arquivos PDF")
            uploaded_file = st.file_uploader("", type="pdf")

            if uploaded_file is not None:
                # print(uploaded_file)
                df = extract_data(uploaded_file)
                #            docx = nlp(str(df))
                #            spacy_streamlit.visualize_ner(docx, labels=nlp.get_pipe('ner').labels)

                # Create and generate a word cloud image:
                wordcloud = WordCloud().generate(str(df))

                # Display the generated image:
                plt.imshow(wordcloud, interpolation='bilinear')
                plt.axis("off")
                plt.show()
                st.pyplot()
        except:
            pass

    elif choice == "TOKENIZACAO":
        try:
            st.title("Análise de texto - Arquivos PDF")
            uploaded_file = st.file_uploader("", type="pdf")

            if uploaded_file is not None:
                # print(uploaded_file)
                df = extract_data(uploaded_file)
                docx = nlp(str(df))
                spacy_streamlit.visualize_ner(
                    docx, labels=nlp.get_pipe('ner').labels)

        except:
            pass

    elif choice == "PDF":
        st.title("Análise de texto - Arquivos PDF")
        uploaded_file = st.file_uploader("", type="pdf")

        if uploaded_file is not None:
            # print(uploaded_file)
            df = extract_data(uploaded_file)
            docx = nlp(str(df))
            spacy_streamlit.visualize_ner(docx,
                                          labels=nlp.get_pipe('ner').labels)
Exemple #5
0
def main():
    st.title('Simple NLP app for tokenizing and Named Entity Recognition')
    menu = ['Home', 'NER']
    choice = st.sidebar.selectbox('Menu', menu)
    if choice == 'Home':
        st.subheader('Tokentization')
        raw_text = st.text_area("Your text", 'Enter your text here')
        docs = nlp(raw_text)
        if st.button('Tokenize'):
            spacy_streamlit.visualize_tokens(
                docs, attrs=['text', 'pos_', 'tag_', 'ent_type_'])

    elif choice == 'NER':
        st.subheader('Named Entity Recognition')
        raw_text = st.text_area("Your text", 'Enter your text here')
        docs = nlp(raw_text)
        spacy_streamlit.visualize_ner(docs, labels=nlp.get_pipe('ner').labels)
Exemple #6
0
def main():
	"""A Simple NLP app with Spacy-Streamlit"""
	st.title("Spacy-Streamlit NLP App")
        our_image = Image.open(os.path.join('SpaCy_logo.svg.png'))
	st.image(our_image)
	menu = ["Home","NER"]
	choice = st.sidebar.selectbox("Menu",menu)
	if choice == "Home":
		st.subheader("Tokenization")
		raw_text = st.text_area("Your Text","Enter Text Here")
		docx = nlp(raw_text)
		if st.button("Tokenize"):
			spacy_streamlit.visualize_tokens(docx,attrs=['text','pos_','dep_','ent_type_'])
	elif choice == "NER":
		st.subheader("Named Entity Recognition")
		raw_text = st.text_area("Your Text","Enter Text Here")
		docx = nlp(raw_text)
		spacy_streamlit.visualize_ner(docx,labels=nlp.get_pipe('ner').labels)
def main():
    activites = ["Summary", "Named Entity Recognition", "Search", "Keywords"]
    choice = st.sidebar.selectbox("Select Activity", activites)
    if choice == "Summary":
        html_temp = """
	<div style="background-color:#16A085;"><p style="color:white;font-size:60px;">Text Summarizer</p></div>
	"""
        components.html(html_temp)
        text = st.text_area("Input Text For Summary", height=300)
        if st.button("summarize"):
            st.success(summary(text))
        text_range = st.sidebar.slider("Summarize words Range", 25, 500)

    # Named Entity Recognition
    elif choice == "Named Entity Recognition":
        html_temp1 = """
	<div style="background-color:#16A085;"><p style="color:white;font-size:60px;">Text Tokenizer</p></div>
	"""
        components.html(html_temp1)
        row_data = st.text_area("write Text For Tokenizer")
        docx = nlp(row_data)
        if st.button("Tokenizer"):
            spacy_streamlit.visualize_tokens(
                docx, attrs=['text', 'pos_', 'dep_', 'ent_type_'])
        if st.button("NER"):
            spacy_streamlit.visualize_ner(docx,
                                          labels=nlp.get_pipe('ner').labels)
        if st.button("Text Relationship"):
            spacy_streamlit.visualize_parser(docx)

    #Search Bar
    elif choice == "Search":
        html_temp4 = """
	<div style="background-color:#16A085;"><p style="color:white;font-size:60px;,text-align:center;">Search Bar</p></div>
	"""
        components.html(html_temp4)
        row_text = st.text_input("Search Anything")
        google = Google(license=None)
        if st.button("search"):
            for search_result in google.search(row_text):
                st.write(search_result.text)
                st.warning(search_result.url)
Exemple #8
0
def main():
    st.title('Spacy-Streamlit NLP App')

    menu = ['Home', 'NER']
    choice = st.sidebar.selectbox("Menu", menu)

    if choice == 'Home':
        st.subheader("Tokenization")
        raw_text = st.text_area("Your Text", 'Enter Text Here')
        docx = nlp(raw_text)
        if st.button('Tokenize'):
            spacy_streamlit.visualize_tokens(
                docx, attrs=['text', 'pos_', 'dep_', 'ent_type_'])

    elif choice == 'NER':
        st.subheader('Named Enitity Recognition')
        raw_text = st.text_area("Your Text", 'Enter Text Here')
        docx = nlp(raw_text)
        if st.button('Tokenize'):
            spacy_streamlit.visualize_ner(docx,
                                          labels=nlp.get_pipe('ner'.labels))
def main():
    #Title of Application
    streamlit.title('NER on data from Wikipedia or URL in Streamlit App')

    #!python -m spacy download en_core_web_sm
    nlp = spacy.load("en_core_web_sm")
    menu = ['Wikipedia', 'URL']
    choice = streamlit.sidebar.selectbox('Menu', menu)
    streamlit.sidebar.write("Made by: Avdhoot Patil")
    if choice == 'Wikipedia':
        streamlit.subheader('Wikipedia')
        raw_docx = streamlit.text_input(
            'Enter the Topic',
            'wikipedia')  # User can enter topic of own choice
        try:
            page = wikipedia.page(raw_docx)  # Extract data from Wikipedia API
            article = nlp(
                page.summary)  # NER operation on summary of given topic
            spacy_streamlit.visualize_ner(article,
                                          labels=nlp.get_pipe("ner").labels)
        except Exception as e:
            streamlit.write(
                "Page id does not match any pages. Try another id!")
            streamlit.write("Please refer below suggestions")
            streamlit.write(SystemExit(e))

    else:
        streamlit.subheader('URL')
        raw_docx = streamlit.text_input(
            'Enter the URL',
            'https://www.nytimes.com/2018/08/13/us/politics/peter-strzok-fired-fbi.html?hp&action=click&pgtype=Homepage&clickSource=story-heading&module=first-column-region&region=top-news&WT.nav=top-news'
        )  # User can choose diffrent url for NER
        try:
            ny_bb = url_to_string(
                raw_docx)  # Calling function to extract data from url
            article = nlp(ny_bb)
            spacy_streamlit.visualize_ner(article,
                                          labels=nlp.get_pipe("ner").labels)
        except:
            streamlit.write("Enter correct url")
def main():
    st.title('Text Summarization and Named Entity Recognition')
    choices = ['Summarization', 'NER', 'Summarize and NER on urls']
    menu = st.sidebar.selectbox('Action Center', choices)

    if menu == 'Summarization':
        st.subheader('Summarzing blocks')
        raw_text = st.text_area('Enter text here', 'Type here')
        if st.button('Summarize'):
            summ = summarize(raw_text)
            st.write(summ)

    if menu == 'NER':
        st.subheader('Named Entity Recognition on Textual context')
        raw_text = st.text_area('Enter text here', 'Type here')
        if st.button('Find Entity'):
            result = nam_ent(raw_text)
            result = spacy_streamlit.visualize_ner(
                result, labels=nlp.get_pipe("ner").labels, show_table=False)
            st.write(result)

    if menu == 'Summarize and NER on urls':
        st.subheader('NLP based summarization in specified URL')
        raw_text = st.text_area('Enter path', 'Type here')
        text_length = st.slider('Length of textual-Information', 50, 100)
        if st.button('Extract information'):
            if raw_text != 'Type here':
                result = url_analyses(raw_text)
                len_total = len(result)
                collective_len = round(len_total / text_length)
                st.write(result[:collective_len])
                summary = summarize(result)
                summary_result = nam_ent(summary)
                summary_result = spacy_streamlit.visualize_ner(
                    summary_result,
                    labels=nlp.get_pipe("ner").labels,
                    show_table=True)
                st.write(summary_result)
Exemple #11
0
def single():
    df = load_data("all-workshops-2021-02-04.csv")
    titles = df["title"].to_list()

    st.sidebar.subheader("Single workshop options")
    selected_workshop = st.sidebar.selectbox("Select workshop", titles)

    st.subheader(
        "Select a workshop in the sidebar to analyze token properties and named entities with spaCy."
    )

    st.write("Selected workshop: ", selected_workshop)
    selected_description = df[df.title == selected_workshop].body.to_list()[0]

    nlp = spacy.load("en_core_web_md")
    ner_labels = nlp.get_pipe("ner").labels

    doc = spacy_streamlit.process_text("en_core_web_md", selected_description)
    spacy_streamlit.visualize_tokens(doc, title="Token attributes")

    spacy_streamlit.visualize_ner(doc,
                                  labels=ner_labels,
                                  title="Named entities")
Exemple #12
0
def main():
    st.title("SpaCy Streamlit  APP")
    # st.markdown(title_temp,unsafe_allow_html=True)
    our_image = Image.open(os.path.join('SpaCy_logo.svg.png'))
    st.image(our_image)

    menu = ['Home', 'NER']
    choice = st.sidebar.selectbox('Menu', menu)

    if choice == 'Home':
        st.subheader('Home')
        raw_docx = st.text_area('Your Docs', 'Enter Text')
        docx = nlp(raw_docx)
        if st.button("Tokenize"):
            spacy_streamlit.visualize_tokens(
                docx, attrs=["text", "pos_", "dep_", "ent_type_"])

    elif choice == 'NER':
        st.subheader('Named Entity Recognizer')
        raw_docx = st.text_area('Your Text', 'Enter Text')
        docx = nlp(raw_docx)
        # if st.button('Analyze'):
        spacy_streamlit.visualize_ner(docx, labels=nlp.get_pipe("ner").labels)
Exemple #13
0
def structure_anslysis(text, display=None):
    """
    visualizes POS tag counts, Entity counts & word-entity highlighted text
    
    :param text: The text to perform analysis on
    :param display: whether to diplay highlighted text (default None)
    :param entities: options for diplay (default None)
    
    :returns pos_freq (dictionary), ent_freq(dictionary)
    """
    st.write('Text Size Exceeded! Truncating...')
    doc = nlp(text[:100000])
    pos_freq = pos_tag_counts(doc)
    ent_freq = entity_counts(doc)

    fig, axs = plt.subplots(1, 2, figsize=(15, 6))

    sns.barplot(list(pos_freq.keys()),
                list(pos_freq.values()),
                color='#e84118',
                ax=axs[0])
    axs[0].set_title('POS COUNTS')
    axs[0].set_xticklabels(labels=list(pos_freq.keys()), rotation=90)

    sns.barplot(list(ent_freq.keys()),
                list(ent_freq.values()),
                color='#273c75',
                ax=axs[1])
    axs[1].set_title('ENTITY COUNTS')
    axs[1].set_xticklabels(labels=list(ent_freq.keys()), rotation=90)

    plt.show()

    if display:
        spacy_streamlit.visualize_ner(doc, labels=nlp.get_pipe('ner').labels)

    return pos_freq, ent_freq
Exemple #14
0
def main():

	st.title('Tokenization and Named Entity Recognition APP')

	menu=['Home','NER']
	choice= st.sidebar.selectbox('Selection Menu',menu)



	if choice =='Home':
		st.subheader('Tokenization')
		raw_text=st.text_area('Please enter your text below')
		docx=nlp(raw_text)
		if st.button('Tokenize'):
			spacy_streamlit.visualize_tokens(docx)



	elif choice=='NER':
		st.subheader('Named Entity Recognition')
		raw_text=st.text_area('Please enter your text here')
		docx=nlp(raw_text)
		if st.button('Show NER'):
			spacy_streamlit.visualize_ner(docx,labels=nlp.get_pipe('ner').labels)
Exemple #15
0
def main():
    st.set_page_config(page_title='BITask-AmanKumar',
                       page_icon=None,
                       layout='centered',
                       initial_sidebar_state='auto')
    st.title("Named Entity Recognition on scrapped data form wikipedia")
    st.subheader(
        "Enter the keyword you want to search on wikipedia and perform NER")

    # getting the keyword from user
    input = st.text_area("Enter the keyword", " ")
    st.text("Wait.. Ner is getting performed(2-15 seconds)")
    st.text(
        "If you are still seeing the wikipedia information below then one these may be the possible reasons-\n1- Servers times out\n2.No Wikipedia matched with keyword\n3.A page title unexpectedly resolves to a redirect\n "
    )

    # calling the utility function to fetch the data
    raw = wikiExtract(input)

    # performing the ner on the text and stroing it in docx
    docx = nlp(raw)

    # visulaizing the text with NER
    spacy_streamlit.visualize_ner(docx, labels=nlp.get_pipe('ner').labels)
Exemple #16
0
def run_the_nlp(state):
    st.title('spaCy cleaning')
    context = st.text_area('Paste some text to test:')
    allowed_postag = st.sidebar.multiselect(
        label='Pos_tag',
        options=[
            'PROPN', 'NOUN', 'ADJ', 'VERB', 'ADV', 'AUX', 'ADP', 'SYM', 'NUM'
        ],
        default=[
            'PROPN', 'NOUN', 'ADJ', 'VERB', 'ADV', 'AUX', 'ADP', 'SYM', 'NUM'
        ],
    )
    nlp = en_core_web_sm.load()
    if context != '':
        doc = nlp(context)
        spacy_streamlit.visualize_ner(doc,
                                      labels=nlp.get_pipe("ner").labels,
                                      title="spaCy NER",
                                      sidebar_title=None,
                                      show_table=False)

        clean_func = lambda x: clean_text_pipe(
            x, nlp, allowed_postags=allowed_postag)
        st.write(clean_func(context))
Exemple #17
0
def plot_ner(text,table=False,tit=''):
    doc=nlp(text)
    visualize_ner(doc, labels=nlp.get_pipe("ner").labels,show_table=table,title=tit)
    dict=[(X.text, X.label_) for X in doc.ents]
    return dict
Exemple #18
0
# Tokenization

# Named Entity

if choice == "Home":
    st.subheader("Tokenization")
    raw_text = st.text_area("your text", "Enter text here")
    docx = nlp(raw_text)
if st.button("Tokenize"):
    spacy_streamlit.visualize_tokens(docx, attrs=['text', 'lemma_', 'pos_'])

elif choice == "NER":
    st.subheader("Named Entity Recognition")
    raw_text = st.text_area("your text", "Enter text here")
    docx = nlp(raw_text)
    spacy_streamlit.visualize_ner(docx, labels=nlp.get_pipe('ner').labels)

# Sentiment Analysis
if st.checkbox("Show Sentiment Analysis"):
    st.subheader("Sentiment of Your Text")
    message = st.text_area("Enter Your Text", "Type Here")
if st.button("analyse"):
    blob = TextBlob(message)
    result_sentiment = blob.sentiment
    st.success(result_sentiment)
    st.area_chart(result_sentiment)

# Text Summarization
if st.checkbox("Show Text Summarization"):
    st.subheader("Summarize Your Text")
    message = st.text_area("Enter Your Text", "Give your paragraph")
Exemple #19
0
def main():
	#summary entity checker




	st.title('Analyze and Summarize your Text')
	image= Image.open("img2.jpg")

	st.image(image, use_column_width=True)

	#activities = ['Summarize', 'NER checker', 'NER for URL']
	# activities = ['Summarize', 'NER checker', 'Sentiment Analyzer', 'Word Cloud']
	# choice= st.sidebar.selectbox('Select Activity', activities)

	# raw_text= st.text_area('Enter text here', 'Type Here')
	# st.checkbox('Summarize')
	# st.checkbox('NER checker')
	# st.checkbox('Sentiment Analyzer')
	# st.checkbox('Word Cloud')


	if st.checkbox('Summarize'):
		st.subheader('Summarize your text using Natural Language Processing')
		raw_text= st.text_area('Enter text to summarize:', 'Paste Here')
		summary_choice = st.selectbox('Summary Choice', ['Summary model (Gensim)', 'Summary model (Sumy)'])
		if st.button('Summarize'):
			if summary_choice == 'Summary model (Gensim)':
				summary_result= summarize(raw_text)
			elif summary_choice == 'Summary model (Sumy)':
				summary_result= sumy_summarizer(raw_text)
			st.write(summary_result)

	if st.checkbox('Named Entity Recognition'):
		st.subheader('Entity Recognition')
		raw_text= st.text_area('Enter text to identify labels: ', 'Paste Here')
		if st.button('Scan'):
			docx= nlp(raw_text)
			spacy_streamlit.visualize_ner(docx, labels=nlp.get_pipe('ner').labels)

			# docx= nlp(raw_text)
			# html= displacy.render(docx, style= 'ent')
			# html= html.replace('\n\n', '\n')
			# st.write(html, unsafe_allow_html=True)
			# st.markdown(html, unsafe_allow_html=True)



		# st.subheader('Entity Recognition with Spacy')
		# raw_text= st.text_area('Enter text here', 'Type Here')
		# docx= nlp(raw_text)
		# spacy_streamlit.visualize_ner(docx, labels=nlp.get_pipe('ner').labels)

	# if choice == 'NER for URL':
	# 	st.subheader('Analyze text from URL')
	# 	raw_url= st.text_input('Enter URL', 'Type here')
	# 	text_length= st.slider('Length to preview', 50,100)
	# 	if st.button('Extract'):
	# 		if raw_url != 'Type here':
	# 			result = get_text(raw_url)
	# 			len_of_full_text= len(result)
	# 			len_of_short_text= round(len(result)/text_length)
	# 			st.info('Length:: Full Text::{}'.format(len_of_full_text))
	# 			st.info('Length:: Short Text::{}'.format(len_of_short_text))
	# 			st.write(result[:len_of_short_text])
	# 			summary_docx= sumy_summarizer(result)
	# 			docx= nlp(summary_docx)
	# 			html= displacy.render(docx, style= 'ent')
	# 			html= html.replace('\n\n', '\n')
	# 			st.write(html, unsafe_allow_html=True)
	# 			# st.markdown(html, unsafe_allow_html=True)

	if st.checkbox('Sentiment Analyzer'):
		st.subheader('Sentiment analysis')
		message= st.text_area('Enter text for sentiment analysis: ', 'Paste Here')
		if st.button('Analyze'):
			blob = TextBlob(message)
			result_sentiment= blob.sentiment
			st.success(result_sentiment)
			# st.bar_chart(result_sentiment)
			
	if st.checkbox('Word Cloud'):
		st.subheader('Word Cloud of your text')
		message= st.text_area('Enter text to create word cloud: ', 'Paste here')
		if st.button('Create Cloud'):
			wordcloud= WordCloud().generate(message)
			plt.imshow(wordcloud, interpolation='bilinear')
			plt.xticks([])
			plt.yticks([])
			st.pyplot()
Exemple #20
0
        """)

st.markdown(""" ## Natural Language Processing using **SPACY** """)
raw_text = st.text_area("Text here")
doc = nlp(raw_text)

if raw_text is not None:

    if choice == "Tokenization":
        if st.button("Tokenize"):
            spacy_streamlit.visualize_tokens(
                doc=doc, attrs=["text", "pos_", "dep_", "lemma_", "shape_"])

    if choice == "Name Entity Recognition":
        if st.button("Analyze"):
            spacy_streamlit.visualize_ner(doc=doc,
                                          labels=nlp.get_pipe("ner").labels)

    if choice == "Sentence Segmentation":
        l = []
        if st.button("Segmentize"):
            st.write(
                f""" There are **{len(list(doc.sents))} Sentences** in this text dataset."""
            )
            for sent in doc.sents:
                l.append(sent)
            d = {"Sentences": l}
            df = pd.DataFrame(data=d)
            st.write(df)

    if choice == "Sentiment Analysis":
        sid = SentimentIntensityAnalyzer()
import spacy

import spacy_streamlit

nlp = spacy.blank("en")
text = "But Google is starting from behind."
doc = nlp.make_doc(text)
ent = doc.char_span(4, 10, label="ORG", kb_id="Q95")
doc.ents = [ent]

spacy_streamlit.visualize_ner(doc,
                              labels=["ORG"],
                              show_table=False,
                              title="Custom Colors NER Visualization",
                              displacy_options={
                                  "colors": {
                                      "ORG": "#EEE"
                                  },
                                  "kb_url_template":
                                  "https://www.wikidata.org/wiki/{}"
                              },
                              key="Custom Colors")

spacy_streamlit.visualize_ner(
    doc,
    labels=["ORG"],
    show_table=False,
    title="Default Colors NER Visualization",
    displacy_options={"kb_url_template": "https://www.wikidata.org/wiki/{}"},
    key="Default Colors")
            #concordance
            #if st.button('concordance'):
            #tokens = nltk.word_tokenize(join_sent)
            #text = nltk.Text(tokens)
            #conc = text.concordance('village')
            #st.write(conc)

            st.subheader('Name Entity Recognition')
            with st.beta_expander('Learn more'):
                st.markdown("""
					**NER** find entities, which can be people, companies, or locations and exist within text data. 
					""")
            nlp = spacy.load('en_core_web_sm')
            doc = nlp(join_sent)
            if st.button('NER'):
                spat.visualize_ner(doc, labels=nlp.get_pipe('ner').labels)

#Geospatial analysis
st.sidebar.header('Geospatial analysis')
if st.sidebar.checkbox('Geospatial analysis'):
    st.subheader('Geospatial analysis')
    data = clean_prepare()

    year = st.sidebar.selectbox('Select year conflict:',
                                list(sorted(data.year.unique())))
    year_data = data[data.year == year]

    st.map(year_data[['latitude', 'longitude']], zoom=6)

    if st.sidebar.checkbox('Admin barh'):
Exemple #23
0
def calc_main():
    st.title("Nimbus Words")
    st.sidebar.header("Input Options")
    expander_bar = st.beta_expander("How To Use This App")
    expander_bar.markdown("""

    **Use the Dropdown Box located within the sidebar on the left to choose 1 of the 6 AI text editing features offered by Nimbus Words.** 

    1) **Summarizer:** Paste in text that will be summarized by our AI model. The first text box will do an automated summary of our program's recommended word count, and the second box beneath that will provide a summary of the exact word count you choose using the slider located within the sidebar.  

    2) **Tokenizer:** Paste in text that will be analyzed by our AI model. The **Tokenizer** button will provide a breakdown on each word within the phrase, for example 'Google' is an organization, or 'Jeff Bezos' is a proper noun. The **NER** button will display all named entities, for example 'Steve Jobs' is a person. The **Text Relationship** button will display a visual graph of the dependency each word has within a sentence or phrase. 

    3) **Synonyms:** Paste in text that will be analyzed by our AI model. The **Synonyms** button will provide you with synonyms to the inputted attribute. The **Definition** checkbox will provide definitions for the attribute. The **Example** checkbox will provide examples of the given attribute in a sentence.

    4) **Translator:** Paste in text that will be translated by our AI model. The **Translate** button will translate the inputted text into one of the many languages that we have provided, and we will automatically detect which language the inputted text is written in.

    5) **Search:** Paste in text that will be preprcoessed by our AI model. The **Search** button will do a filtered search for your input.

    6) **Spell Correction:** Paste in text that will be spell-checked by our AI model. The **Correct** button will offer a correct spelling for any grammatical error that are detected. The **Pluralize**, **Singularize**, **Comparative** and **Superlative** checkboxes do exactly as they say, and ouput those options for the input you provided. 

    """)

    activites = [
        "Summary", "Tokenizer", "Synonyms", "Translator", "Search",
        "Spell Correction"
    ]
    choice = st.sidebar.selectbox("Select Activity", activites)
    if choice == "Summary":
        st.title('AI Text Summarizer')
        text = st.text_area("Input Text For Summary", height=300)
        if st.button("Summarize"):
            st.success(summary(text))
        text_range = st.sidebar.slider("Summarize words Range", 25, 500)
        text = st.text_area("Input Text For Summary", height=250)
        if st.button("Summarize with Custom Word Count"):
            st.warning(summarize(text, word_count=text_range))
    # Tokenizer
    elif choice == "Tokenizer":
        st.title('Text Tokenizer')
        row_data = st.text_area("write Text For Tokenizer")
        docx = nlp(row_data)
        if st.button("Tokenizer"):
            spacy_streamlit.visualize_tokens(
                docx, attrs=['text', 'pos_', 'dep_', 'ent_type_'])
        if st.button("NER"):
            spacy_streamlit.visualize_ner(docx,
                                          labels=nlp.get_pipe('ner').labels)
        if st.button("Text Relationship"):
            spacy_streamlit.visualize_parser(docx)
    # synonyms
    elif choice == "Synonyms":
        st.title('Synonym Generator')
        text = st.text_area("Enter Text")
        if st.button("Synonyms"):
            for syn in wordnet.synsets(text):
                for i in syn.lemmas():
                    st.success(i.name())
        if st.checkbox("Definition"):
            for syn in wordnet.synsets(text):
                st.warning(syn.definition())
        if st.checkbox("Example"):
            for syn in wordnet.synsets(text):
                st.success(syn.examples())
    # Translator
    elif choice == "Translator":
        st.title('Speech Tranlation')
        row_text = st.text_area("Enter Your Text For Translation", height=300)
        translation_text = TextBlob(row_text)
        list1 = ["en", "ta", "pa", "gu", "hi", "ur", "kn", "bn", "te"]
        a = st.selectbox("select", list1)
        if st.button("search"):
            #input1 = TextBlob("Simple is better than complex")
            st.success(translation_text.translate(to=a))
    #Search Bar
    elif choice == "Search":
        st.title('Web Search')
        row_text = st.text_input("Search Anything")
        google = Google(license=None)
        if st.button("search"):
            for search_result in google.search(row_text):
                st.write(search_result.text)
                st.warning(search_result.url)
    elif choice == "Spell Correction":
        st.title('AI Spell Correction')
        text_data = st.text_area("Enter Text Here")
        a = TextBlob(text_data)
        if st.button("Correct"):
            st.success(a.correct())
        st.title('Pluralize & Singularize')
        text_data1 = st.text_input("Enter a word For pluralize / singularize")
        if st.checkbox("Pluralize"):
            st.warning(pluralize(text_data1))
        if st.checkbox("Singularize"):
            st.warning(singularize(text_data1))

        st.title('Compartitive & Superlative')
        text2 = st.text_input("Enter Text For comparative & superlative")
        if st.checkbox("Comparative"):
            st.success(comparative(text2))
        if st.checkbox("Superlative"):
            st.success(superlative(text2))
def NER(model, raw_text):
    doc = model(raw_text)
    with st.beta_expander("Show/Hide Result", expanded=True):
        spacy_streamlit.visualize_ner(doc, labels=model.get_pipe('ner').labels)
    return doc
Exemple #25
0
nlp.add_pipe("parser", source=source_nlp, before='ner')
legalSentencizer.add_to_pipe(nlp)
visualizer = ['ner']
#text = "Ausscheiden aus Gesellschaft - Vorsteuerbelastete Abwicklungskosten (\u00a7 15 Abs. 1 UStG) Nach Ansicht des Autors stellt die zit. Entscheidung des BFH vom 20. 7. 1988 die konsequente Fortf\u00fchrung der verfehlten Rechtsprechung zum Vorsteuerabzug von Gesellschaftern dar. Er bem\u00e4ngelt auch an diesem Urteil, da\u00df allein auf den Wortlaut des Gesetzes abgestellt und nicht gefragt werde, ob das gefundene Ergebnis auch dem Gesetzesziel entspricht. Das Gesetzesziel verlange den Vorsteuerabzug bei allen Aufwendungen, die mit der unternehmerischen Bet\u00e4tigung zusammenh\u00e4ngen. Bei wem der Vorsteuerabzug in Betracht kommt, h\u00e4nge davon ab, wer die Aufwendungen getragen hat. \u00a7 15 UStG k\u00f6nne in diesem Sinne ausgelegt werden, indem dem Gesellschafter f\u00fcr den Vorsteuerabzug insoweit die Unternehmereigenschaft der Gesellschaft zugerechnet werde. 0093121 Anmerkung Umsatzsteuer Stadie Prof. Dr. Holger 08.09.1989 19890908 UR-1989-0278 UR-1989-0279 UStG:15/1 1989 BFH Urteil X R 46/81 v. 20. 7. 1988 BFH/NV 1989 326"
#doc = nlp(text)
texts = ['Schon aus diesem Grund könne das Urteil des Bundesgerichtshofs niemals Rechts- und Billigkeitsgrundlage für den am 30. Dezember 1994 abgeschlossenen Vergleich sein.',
         'Der räumliche Zusammenhang ist unschädlich, wenn die andere Wohnung unentgeltlich einem Angehörigen überlassen wird (BFH vom 28.6.2002-BStBl 2003 II S. 119) ; wird die zweite Wohnung jedoch von einem minderjährigen Kind der Familie bewohnt, ist die Förderung für diese Wohnung ausgeschlossen, weil der elterliche Haushalt die Wohnung des minderjährigen Kindes umfasst.',
         'Sie beantragten die Erhöhung des Verlustes gemäß § 17 EStG aufgrund von Bürgschaftsinanspruchnahmen.'
         'In diesem Fall sind für die dann erforderliche Ertragsprognose die Verhältnisse ab dem Zeitpunkt der Teilübertragung maßgeblich (vgl BFH v 17.03.2010, BStBl II 2011, 622) .',
         '115 Die aufgezeigte Verzinsung kann nicht durch freiwillige Auflösung im Laufe des Wj verhindert werden (BFH v 26.10.89, IV R 83/88, BStBl II 1990, 290) .',
         'Bei gewichtigen Fällen empfiehlt sich die Einholung einer verbindlichen Auskunft nach § 89 AO.'
         'Die Möglichkeit, dass das Gebäude für Zwecke der eigenen Vermögensverwaltung hergestellt wird, scheidet dann aus (vgl. BFH-Urteil vom 12. Juli 2007 X R 4/04, BStBl II 2007, 885) .'
         'Es werden bei der Einräumung des Optionsrechts durch Abschluss des Stillhaltervertrags und bei der Übertragung des Wirtschaftsguts durch Abschluss eines Veräußerungsvertrags identische Leistungen erbracht (Philipowski, Vereinnahmte Stillhalterprämien; Gezahlter Barausgleich nicht abziehbar?',
         '6b Abs 2a S 4-6 EStG nF: § 6b Abs 2a EStG enthält ein Wahlrecht, dass die auf den nach § 6b EStG begünstigten Veräußerungsgewinn entfallende festgesetzte Steuer zinslos in fünf gleichen Jahresraten gezahlt werden kann, wenn der StPfl eine Reinvestition in BV im EU- bzw EWR-Ausland plant.',
         'Weder das vom Kläger in Bezug genommene Urteil des BFH vom 27.5.2003 (VI R 33/01) als solches noch die darin zum Ausdruck gebrachte Auffassung, Aufwendungen für die erstmalige Berufsausbildung seien unter bestimmten Voraussetzungen als-vorweggenommene-Werbungskosten zu berücksichtigen, rechtfertigen eine Änderung der bestandskräftigen Einkommensteuerbescheide für die Streitjahre nach § 173 Abs. 1 Nr. 2 oder § 175 Abs. 1 Nr. 2 AO.']

data_50 = select_data(50)
docs = list(nlp.pipe(data_50))
#spacy_streamlit.visualize(MODEL_PATH, text, visualizer)
color_mapping = {}
entities_name = ['GS', 'VS', 'RS', 'UN', 'PER', 'LIT', 'VT', 'GRT', 'INN', 'EUN',
                 'LDS', 'ORG', 'LD', 'ST', 'STR', 'VO', 'AN', 'RR', 'MRK']
hex_values = ["#e6194B", "#fabed4", "#ffd8b1", "#f58231", "#800000", "#9A6324", "#808000", "#ffe119", "#fffac8",
              "#bfef45", "#3cb44b", "#aaffc3", "#42d4f4", "#469990", "#4363d8", "#911eb4", "#dcbeff", "#f032e6"]

for entity, hex_value in zip(entities_name, hex_values):
        color_mapping[entity] = hex_value

for i in range(len(docs)):
    visualize_ner(docs[i], labels=nlp.get_pipe("ner").labels, key=i, colors=color_mapping)
Exemple #26
0
def main():
    st.set_page_config(page_title="20 in 1 NLP tasks",layout='wide')
    #st.title('NER recognition app')
    options=['Home','Analysis','Custom text cleaning','Question and answering','Text summarization','Email extractor','Spelling correction',
             'Text generation','About']
    choice=st.sidebar.selectbox('Chose accordingly',options)


    if choice=='Home':
        image=Image.open('1_a3xerDP7jqQglKxfIxfxVw.jpeg')
        st.image(image)
        st.header('Multi **NLP** tasks in a single window')
        st.write("""
        # This web App contains different text analysis with visual representation and advance tasks like QnA and Text Generation
        """)




    elif choice=='Analysis':
        st.subheader('Upload document')

        doc_file = st.file_uploader('', type=['csv', 'pdf', 'text', 'docx'])

        if doc_file is not None:
            file_details = doc_file.type
            if file_details == 'text/plain':
                raw_text = str(doc_file.read(), 'utf-8')
            elif file_details == 'application/pdf':
                raw_text = read_pdf(doc_file)

            else:
                raw_text = docx2txt.process(doc_file)

        elif doc_file is None:
            st.subheader('Or enter your input')
            raw_text = st.text_area(' ')

        
        if st.sidebar.checkbox('Analyze'):
            num_of_most_common=st.sidebar.number_input('Most common tokens',5,15)
            with st.beta_expander('Original text'):
                st.write(raw_text)

            with st.beta_expander('Basic Text Analysis'):
                data=text_analyzer(raw_text)
                st.dataframe(data)


            col1,col2=st.beta_columns(2)

            with col1:
                with st.beta_expander('Word Stats'):
                    st.info('Words statistics')
                    doc=nt.TextFrame(raw_text)
                    st.write(doc.word_stats())
                with st.beta_expander("Top Keywords"):
                    st.info("Top Keywords/Tokens")
                    processed_text = nfx.remove_stopwords(raw_text)
                    keywords = get_most_common_tokens(
                        processed_text, num_of_most_common
                    )
                    st.write(keywords)

                with st.beta_expander("Sentiment"):
                    sent_result = sentiment(raw_text)
                    st.write(sent_result)

            with col2:
                with st.beta_expander("Plot Word Freq"):
                    fig = plt.figure()
                    top_keywords = get_most_common_tokens(
                        processed_text, num_of_most_common
                    )
                    plt.bar(keywords.keys(), top_keywords.values())
                    plt.xticks(rotation=45)
                    st.pyplot(fig)

                with st.beta_expander('Plot of part of speech'):
                    fig=plt.figure()
                    sns.countplot(data['PoS'])
                    plt.xticks(rotation=45)
                    st.pyplot(fig)
                with st.beta_expander('Word Cloud Visualization'):
                    plot_wordcloud(raw_text)

        if st.sidebar.checkbox('Name Entity Recognition'):
            doc = nlp(raw_text)
            spacy_streamlit.visualize_ner(doc, labels=nlp.get_pipe('ner').labels,
                                          attrs=['text', 'label_', 'start', 'end'])




    elif choice=='Custom text cleaning':
        st.subheader('Custom text cleaning')
        doc_file = st.file_uploader('', type=['csv', 'pdf', 'text', 'docx'])

        if doc_file is not None:
            file_details = doc_file.type
            if file_details == 'text/plain':
                raw_text = str(doc_file.read(), 'utf-8')
            elif file_details == 'application/pdf':
                raw_text = read_pdf(doc_file)

            else:
                raw_text = docx2txt.process(doc_file)

        elif doc_file is None:
            st.subheader('Or enter your input')
            raw_text = st.text_area(' ')

        normalization = st.sidebar.checkbox('Text normalization')
        clean_stopwards = st.sidebar.checkbox('Remove stopwords')
        clean_punctuation = st.sidebar.checkbox('Remove punctuation')
        clean_numreric = st.sidebar.checkbox('Remove numbers')
        clean_special = st.sidebar.checkbox('Remove special characters')
        clean_url = st.sidebar.checkbox('Clean URLs')

        if st.button('Start process'):



            col1,col2=st.beta_columns(2)
            with col1:
                with st.beta_expander('Original text'):
                    st.write('The length is :',len(raw_text))
                    st.write(raw_text)

            with col2:
                with st.beta_expander('Processed text'):
                    if normalization:
                        raw_text=raw_text.lower()
                    if clean_stopwards:
                        raw_text=nfx.remove_stopwords(raw_text)
                    if clean_url:
                        raw_text=nfx.remove_urls(raw_text)
                    if clean_special:
                        raw_text=nfx.remove_special_characters(raw_text)
                    if clean_punctuation:
                        raw_text=nfx.remove_punctuations(raw_text)
                    if clean_numreric:
                        raw_text=nfx.remove_numbers(raw_text)
                    st.write('The length is :',len(raw_text))
                    st.write(raw_text)




    elif choice=='Text summarization':
        st.subheader('Extractive text summarization')
        doc_file = st.file_uploader('Upload', type=['csv', 'pdf', 'text', 'docx'])
        #

        if doc_file is not None:
            file_details = doc_file.type
            if file_details == 'text/plain':
                raw_text = str(doc_file.read(), 'utf-8')
            elif file_details == 'application/pdf':
                raw_text = read_pdf(doc_file)
            else:
                raw_text = docx2txt.process(doc_file)
        elif doc_file is None:
            raw_text = st.text_area('Or enter your input manually')

        if st.button("Summarize"):
            with st.beta_expander("Original Text"):
                st.write(raw_text)
            c1, c2 = st.beta_columns(2)

            with c1:
                with st.beta_expander("LexRank Summary"):
                    my_summary = sumy_summarizer(raw_text)
                    document_len = {"Original": len(raw_text),
                                    "Summary": len(my_summary)}
                    st.write(document_len)
                    st.write(my_summary)

                    st.info("Rouge Score")
                    eval_df = evaluate_summary(my_summary, raw_text)
                    #st.dataframe(eval_df.T)
                    eval_df['metrics'] = eval_df.index
                    c = alt.Chart(eval_df).mark_bar().encode(
                        x='metrics', y='rouge-1')
                    st.altair_chart(c)

            with c2:
                with st.beta_expander("Frequency based summary"):
                    summary=freq_summarization(raw_text)
                    document_len = {"Original": len(raw_text),
                                    "Summary": len(summary)}
                    st.write(document_len)
                    st.write(summary)
                    st.info("Rouge Score")
                    eval_df = evaluate_summary(summary, raw_text)
                    #st.dataframe(eval_df.T)
                    eval_df['metrics'] = eval_df.index
                    c = alt.Chart(eval_df).mark_bar().encode(
                        x='metrics', y='rouge-1')
                    st.altair_chart(c)




    # elif choice=='Document similarity':
    #     st.subheader('Document similarity check')

    #     doc_file_1 = st.file_uploader('Upload first document', type=['csv', 'pdf', 'text', 'docx'])
    #     if doc_file_1 is not None:
    #         file_details = doc_file_1.type
    #         if file_details == 'text/plain':
    #             raw_text_1 = str(doc_file_1.read(), 'utf-8')
    #         elif file_details == 'application/pdf':
    #             raw_text_1 = read_pdf(doc_file_1)
    #         else:
    #             raw_text_1 = docx2txt.process(doc_file_1)
    #     elif doc_file_1 is None:
    #         raw_text_1 = st.text_area('Upload first document manually')

    #     doc_file_2 = st.file_uploader('Upload second document', type=['csv', 'pdf', 'text', 'docx'])
    #     if doc_file_1 is not None:
    #         file_details = doc_file_2.type
    #         if file_details == 'text/plain':
    #             raw_text_2 = str(doc_file_2.read(), 'utf-8')
    #         elif file_details == 'application/pdf':
    #             raw_text_2 = read_pdf(doc_file_2)
    #         else:
    #             raw_text_2 = docx2txt.process(doc_file_2)
    #     elif doc_file_2 is None:
    #         raw_text_2 = st.text_area('Upload second document manually')

    #     a=embed_fn([raw_text_1])
    #     b=embed_fn([raw_text_2])
    #     cosine=cosine_similarity(a,b)[0][0]*100
    #     if st.button('Calculate similarity'):
    #         st.write(f'The similarity is {round(cosine,2)} %')




    elif choice=='Email extractor':
        st.subheader('Email extractor')
        doc_file = st.file_uploader('Upload', type=['csv', 'pdf', 'text', 'docx'])
        if doc_file is not None:
            file_details = doc_file.type
            if file_details == 'text/plain':
                raw_text = str(doc_file.read(), 'utf-8')
                if st.checkbox('Display original text'):
                    st.write(raw_text)
            elif file_details == 'application/pdf':
                raw_text = read_pdf(doc_file)
                if st.checkbox('Display original text'):
                    st.write(raw_text)
            else:
                raw_text = docx2txt.process(doc_file)
                if st.checkbox('Display original text'):
                    st.write(raw_text)
        elif doc_file is None:
            raw_text = st.text_area('Enter your input')


        tasks_list = ["Emails"]
        task_option = st.sidebar.multiselect("Task", tasks_list, default="Emails")
        task_mapper = {"Emails": nfx.extract_emails(raw_text)}

        all_results = []
        for task in task_option:
            result = task_mapper[task]
            # st.write(result)
            all_results.append(result)
        st.write(all_results)

        with st.beta_expander("Results As DataFrame"):
            result_df = pd.DataFrame(all_results).T
            result_df.columns = task_option
            st.dataframe(result_df)
            #make_downloadable_df(result_df)

    elif choice=='Spelling correction':
        st.subheader('Spell checker and corrector')
        doc_file = st.file_uploader('Upload', type=['csv', 'pdf', 'text', 'docx'])
        if doc_file is not None:
            file_details = doc_file.type
            if file_details == 'text/plain':
                raw_text = str(doc_file.read(), 'utf-8')
                if st.checkbox('Display original text'):
                    st.write(raw_text)
            elif file_details == 'application/pdf':
                raw_text = read_pdf(doc_file)
                if st.checkbox('Display original text'):
                    st.write(raw_text)
            else:
                raw_text = docx2txt.process(doc_file)
                if st.checkbox('Display original text'):
                    st.write(raw_text)
        elif doc_file is None:
            raw_text = st.text_area('Enter your input')

        spell = SpellChecker()
        misspelled_word_list = raw_text.split()
        misspelled_word = spell.unknown(misspelled_word_list)
        b = spell.correction(raw_text)
        if st.button('Get corrected output'):
            st.write(b)
        if st.button('Analyze'):
            for word in misspelled_word:
                if word != spell.correction(word):
                    st.write('Original word:', word)
                    st.write('correct word:', spell.correction(word))
                    st.write('Suggested words:', spell.candidates(word))
                    #st.write('\n')





    elif choice=='Question and answering':
        st.subheader('Question and Answering system')

        doc_file=st.file_uploader('Upload',type=['csv','pdf','text','docx'])
        #


        if doc_file is not None:
            file_details=doc_file.type
            if file_details=='text/plain':
                raw_text=str(doc_file.read(),'utf-8')
                if st.checkbox('Display original text'):
                    st.write(raw_text)
            elif file_details=='application/pdf':
                raw_text=read_pdf(doc_file)
                if st.checkbox('Display original text'):
                    st.write(raw_text)
            else:
                raw_text=docx2txt.process(doc_file)
                if st.checkbox('Display original text'):
                    st.write(raw_text)
        elif doc_file is None:
            raw_text = st.text_area('Enter your input')

        st.subheader('Enter your question')
        question=st.text_area('What"s in your mind?')


        # if st.button('Generate answer'):
        #
        #     qna=QnA(question,raw_text)
        #     st.write(qna)

    elif choice=='Text generationText generation':
        pass

    else:
        st.header('About')
        st.write('''
        # This web application is built by *Arindam Mondal* , a student of Masters in Data Analytics.''')
def geo():
	"""
	Geospatial analysis
	"""

	data = load_data()
	cols = attribute_name()
	need_cols = ["ACTOR1", "ACTOR2", "REGION", "COUNTRY", "ADMIN1", "ADMIN2",
	"LOCATION", "EVENT_TYPE", "SUB_EVENT_TYPE", "FATALITIES", "YEAR", "LONGITUDE","LATITUDE"]

	nlp = spacy.load('en_core_web_sm')

	ext_data = data[need_cols]

	def cloud_plot(df):
		fig, ax = plt.subplots(figsize=(5,5))
		cloud = WordCloud().generate(df)
		ax.imshow(cloud, interpolation="bilinear")
		ax.axis('off')
		st.pyplot(fig)

	st.sidebar.title('Geospatial')
	page = st.sidebar.selectbox('select page:', ('Global views', 'Region views'))
	st.write('## Geospatial analysis.')

	if page == 'Global views':

		with  st.beta_container():
			st.write('### Conflict by year.')

			year = st.selectbox('Select year:', range(1997, 2021))
			year_data = ext_data[ext_data['YEAR'] == year]


			st.write(f"#### Conflict in Africa for year: {year}.")
			viewport = pdk.ViewState(latitude=year_data.LONGITUDE.mean(), longitude=year_data.LATITUDE.mean(), zoom=2, bearing=0, pitch=0)
			# Define a layer to display on a map
			layer = pdk.Layer(
		    "ScatterplotLayer",
		    year_data,
		    pickable=True,
		    opacity=0.8,
		    stroked=True,
		    filled=True,
		    radius_scale=5,
		    radius_min_pixels=1,
		    radius_max_pixels=1000,
		    line_width_min_pixels=1,
		    get_position=["LONGITUDE", 'LATITUDE'],
		    get_radius="FATALITIES",
		    get_fill_color=[255, 140, 0],
		    get_line_color=[0, 0, 0])

			r = pdk.Deck(layers=[layer], initial_view_state=viewport, map_style="road")
			st.pydeck_chart(r)


		if st.checkbox('Information'):
			st.write('### Conflict notes.')

			ctry = st.sidebar.selectbox('Select country:', list(data['COUNTRY'].unique()), key=0)
			year = st.selectbox('Select year:', range(1997, 2021), key=1)

			year_country = data[(data['YEAR'] == year) & (data['COUNTRY'] == ctry)]
			source = ' '.join(year_country['SOURCE'])
			event = ' '.join(year_country['EVENT_TYPE'])
			actors = ' '.join(year_country['ACTOR1'] + ', ' + year_country['ACTOR2'])
			fatalities = year_country['FATALITIES'].sum()
			fatalities = f"{str(fatalities)}"

			text1 = ' '.join(year_country['NOTES'])
			text = word_tokenizer(text1)
			doc = nlp(text1)

			if st.button('Read'):
				spat.visualize_ner(doc, labels=nlp.get_pipe('ner').labels)

			
			with  st.beta_container():
				if st.checkbox('Source'):
					st.write("#### SOURCE")
					cloud_plot(source)

				if st.checkbox('Event'):
					st.write("#### EVENT_TYPE")
					cloud_plot(event)

				if st.checkbox("Actors"):
					st.write("#### ACTORS")
					cloud_plot(actors)

				if st.checkbox("Fatalities"):
					st.write('#### Yearly fatalities.')
					fig, ax = plt.subplots()
					ax.text(0.5, 0.5, fatalities, size=50, ha="center", va='center', bbox=dict(boxstyle="round", 
						ec= (1, 0.8, 0.5),
						fc = (0.3, 0.2, 0.5)))
					ax.axis("off")
					st.pyplot(fig)

			st.write('### Conflict notes mining.')
			if st.checkbox('Text statistics'):
				fdist = nltk.FreqDist(text)
				popular_word = list(set(fdist.keys()) - set(fdist.hapaxes()))
				pop_word = pd.Series({w:fdist[w] for w in sorted(popular_word)})

				with  st.beta_container():
					st.write(f'Lenght of the text: {len(text)} words.')
					st.write(f'Lexical richness of the text: {len(text)/len(set(np.unique(text)))}.')
					with  st.beta_expander('Learn more.'):
						nb = """
						**Lexical richness** gives us the number of times on average each word in the text is used.

						**Frequency distribution** records the number of times each outcomes of an 
						experiment has occured. It helps also to identify the words of a text that are most
						informative about topic and genre of the text. 
						"""
						st.markdown(nb, unsafe_allow_html=False)
					fig, ax = plt.subplots(figsize=(10,15), dpi=100)
					mosaic(pop_word.sort_values()[:30], ax=ax, horizontal=False,
					 title='Frequency distribution: 30 most commons words in the note.', axes_label=False)
					st.pyplot(fig)


			if st.checkbox('Collocation'):
				bigrams = nltk.bigrams(text)
				cfd_b = nltk.FreqDist(bigrams)
				bfd = {x[0][0]+' '+x[0][1]: x[1] for x in cfd_b.most_common(n=30)}

				trigrams = nltk.trigrams(text)
				cfd_t = nltk.FreqDist(trigrams)
				cdic={x[0][0] + ' ' + x[0][1]+' '+x[0][2]: x[1] for x in cfd_t.most_common(n=30)}

				with st.beta_expander('Learn more'):
					nb = """
						Collocations is a sequence of words that occurs together unusually often.
					"""
					st.markdown(nb)

				with st.beta_container():
					fig1, ax1 = plt.subplots(figsize=(5, 8))
					mosaic(bfd, ax=ax1,  axes_label=False,
					       horizontal=False, title='30 most commons bigrams.')
					st.pyplot(fig1)

					fig2, ax2 = plt.subplots(figsize=(5, 8))
					mosaic(cdic, ax=ax2,  axes_label=False,
					       horizontal=False, title='30 most commons trigrams.')
					st.pyplot(fig2)


			if st.checkbox('Concordance'):
				with st.beta_expander('Learn more'):
					nb = """
					**Concordance**: shows occurence of a given word in the some context. 
					"""
					st.markdown(nb)

				word = st.text_input("Give words")
				matcher = PhraseMatcher(nlp.vocab)
				terms = [str(word)
				]
				patterns = [nlp.make_doc(t) for t in terms]
				matcher.add("TerminologyList", None, *patterns)
				matches = matcher(doc)
				for match_id, start, end in matches:
				    span = doc[start-20:end+15]
				    st.write(span.text)


	if page == 'Region views':

		st.write('### Conflict by region')

		region = st.sidebar.selectbox('Select region:', list(data['REGION'].unique()))
		year = st.selectbox('Select year:', range(1997, 2021))

		sub_region = data[(data['YEAR'] == year) & (data['REGION'] == region)]
		fatalities_per_country = sub_region.groupby('COUNTRY')["FATALITIES"].agg('sum')
		cal_fatalities = sub_region.groupby('EVENT_DATE')['FATALITIES'].agg('sum')
		weekly_fatalities = cal_fatalities.resample('W').sum()
		monthly_fatalities = cal_fatalities.resample('M', convention='end').sum()
		Q_fatalities = cal_fatalities.resample('Q', convention='start').sum()
		sub_region['MONTH'] = sub_region.EVENT_DATE.dt.month
		sub_region['WEEKDAY'] = sub_region.EVENT_DATE.dt.weekday

		ctry_month = pd.pivot_table(sub_region, aggfunc=np.sum, columns='COUNTRY', index='MONTH', values='FATALITIES')
		ctry_weeks = pd.pivot_table(sub_region, aggfunc=np.sum, columns='COUNTRY', index='WEEKDAY', values='FATALITIES')

		
		if st.checkbox('Major viz'):
			with st.beta_container():
				st.write('#### Majors Actors')
				fig, ax = plt.subplots()
				sns.countplot(x='INTER1', data=sub_region)
				st.pyplot(fig)

				fig1, ax1 = plt.subplots(figsize=(10,5))
				sub_region.ACTOR1.value_counts()[:10].plot(kind='bar', ax=ax1, title=f'10 forces most present in the {region} region.')
				st.pyplot(fig1)

				st.write('#### Event type and countries.')
				fig2, ax2 = plt.subplots(figsize=(10,5))
				sub_region.COUNTRY.value_counts().plot(kind='bar', ax=ax2, title=f'Countries of the {region} region.')
				st.pyplot(fig2)

				fig3, ax3 = plt.subplots(figsize=(10,5))
				sub_region.EVENT_TYPE.value_counts().plot(kind='bar', ax=ax3, title='Event type.')
				st.pyplot(fig3)

				st.write('#### Fatalities by country.')
				fig4, ax4 = plt.subplots(figsize=(10,5))
				fatalities_per_country.plot(kind='bar', ax=ax4, title='Total fatalities by country ')
				ax4.set_ylabel('FATALITIES')
				st.pyplot(fig4)

		if st.checkbox('Time series'):
			if st.checkbox('Calendar'):
				st.write('#### Fatalities calendar for each country.')
				with st.beta_container():
					fig, ax = plt.subplots(figsize=(15,8))
					sns.heatmap(ctry_month, center=0, annot=True, ax=ax, fmt='.2f')
					st.pyplot(fig)

					fig1, ax1 = plt.subplots(figsize=(15,8))
					sns.heatmap(ctry_weeks, center=0, annot=True, ax=ax1, fmt='.3f')
					st.pyplot(fig1)

			if st.checkbox('Days, Weeks.'):
				with st.beta_container():
					fig1, ax1 = plt.subplots(figsize=(15,5))
					cal_fatalities.plot(ax=ax1)
					ax1.set_title('Daily fatalities.')
					st.pyplot(fig1)

					fig2, ax2 = plt.subplots(figsize=(15,5))
					weekly_fatalities.plot(ax=ax2)
					ax2.set_title('Weekly fatalities.')
					st.pyplot(fig2)

			if st.checkbox( 'Months, Quarter'):
				with st.beta_container():
					fig3, ax3 = plt.subplots(figsize=(15,5))
					monthly_fatalities.plot(ax=ax3)
					ax3.set_title('Monthly fatalities.')
					st.pyplot(fig3)

					fig4, ax4 = plt.subplots(figsize=(15,5))
					Q_fatalities.plot(kind='bar', ax=ax4)
					ax4.set_title('Quarterly fatalities.')
					st.pyplot(fig4)
Exemple #28
0
st.markdown('# Hotel Review Analysis')

tab = st.sidebar.selectbox(
    'Select Task',
    ('Review Analysis', 'Aggregated Stats', 'Aspect Base Sentiment Analysis'))

if tab == 'Review Analysis':
    st.markdown('## Review analysis')
    review = st.text_input('Please type your review')

    if review:
        # st.markdown('## Name Entity Recognition (NER)')
        docx = nlp(review)
        spacy_streamlit.visualize_ner(docx,
                                      show_table=False,
                                      labels=nlp.get_pipe('ner').labels)

        col1, _, col2, _, col3 = st.beta_columns([8, 1, 6, 1, 6])
        col1.markdown('### Processes text')
        review_p = process_review(review)
        col1.markdown(review_p)

        ## Emotional Sentiment
        col2.markdown('### Emotions')
        col2.table(pd.Series(NRCLex(review).raw_emotion_scores))

        col3.markdown('### Sentiment')
        blob = TextBlob(review)
        col3.markdown('**Note:** Positive = 1 & Negative = 0')
        col3.markdown(f'Polarity : {blob.sentiment.polarity:0.2f}')
Exemple #29
0
"""
Example using the components provided by spacy-streamlit in an existing app.

Prerequisites:
python -m spacy download en_core_web_sm
"""
import spacy_streamlit
import streamlit as st

DEFAULT_TEXT = """Google was founded in September 1998 by Larry Page and Sergey Brin while they were Ph.D. students at Stanford University in California. Together they own about 14 percent of its shares and control 56 percent of the stockholder voting power through supervoting stock. They incorporated Google as a California privately held company on September 4, 1998, in California. Google was then reincorporated in Delaware on October 22, 2002."""

spacy_model = "en_core_web_sm"

st.title("My cool app")
text = st.text_area("Text to analyze", DEFAULT_TEXT, height=200)
doc = spacy_streamlit.process_text(spacy_model, text)

spacy_streamlit.visualize_ner(
    doc,
    labels=["PERSON", "DATE", "GPE"],
    show_table=False,
    title="Persons, dates and locations",
)
st.text(f"Analyzed using spaCy model {spacy_model}")
Exemple #30
0
if 'genes' in visualizers:
    st.header("Genes")
    kind = ["GENE_OR_GENE_PRODUCT"]
    u1 = st.checkbox("Filter to unique tokens?", value=False, key=1)
    df, attrs = get_special_entities(doc)
    df = pd.DataFrame(df, columns=attrs)
    df = df[df['label_'].isin(kind)]
    df = df.drop('label_', axis=1)
    if u1:
        df = df.drop_duplicates('text')
    st.write(df)

if 'special' in visualizers:
    st.header("Special Entities")
    st.markdown("See **ner** for nice tagging.")
    entity_kinds = st.multiselect("Select entity kinds",
                                  nlp.get_pipe('ner').labels)

    u2 = st.checkbox("Filter to unique tokens?", value=False, key=2)
    df, attrs = get_special_entities(doc)
    df = pd.DataFrame(df, columns=attrs)
    df = df[df['label_'].isin(entity_kinds)]
    if u2:
        df = df.drop_duplicates('text')
    st.write(df)

if 'ner' in visualizers:
    st.header("Named Entity Recognition")
    visualize_ner(doc, labels=nlp.get_pipe("ner").labels, title=None)