Example #1
0
def predict_note_authentication(data2):
    """Using Nltk , Spacy ,Streamlit to Perform 
    Named Entity Recognition on scrapped data 
    and extract entities like city, person, 
    organisation, Date, Geographical Entity, Product etc.  
    
    data2= page name of wikipedia to extract data
    
    """

    nltk.download('punkt')
    nltk.download('averaged_perceptron_tagger')
    data = (wikipedia.summary(data2))

    #Then we apply word tokenization and part-of-speech tagging to the sentence.

    def preprocess(sentence):
        sentence = nltk.word_tokenize(sentence)
        sentence = nltk.pos_tag(sentence)
        return sentence

    sentence = preprocess(data)

    #Creating Chunk rule for text phrasing
    #Our chunk pattern consists of one rule,
    #that a noun phrase, NP, should be formed whenever the chunker
    #finds an optional determiner, DT, followed by any
    #number of adjectives, JJ, and then a noun, NN.

    pattern = 'NP: {<DT>?<JJ>*<NN>}'

    #create a chunk parser and test it on our sentence
    cp = nltk.RegexpParser(pattern)
    cs = cp.parse(sentence)

    #IOB tags  to represent chunk structures in files, and we will also be using this format.

    iob_tagged = tree2conlltags(cs)

    #Use of Spacy

    nlp = en_core_web_sm.load()
    doc = nlp(data)
    #pprint([(X.text, X.label_) for X in doc.ents])
    ny_bb = data
    article = nlp(ny_bb)

    labels = [x.label_ for x in article.ents]
    #Counter(labels)
    items = [x.text for x in article.ents]
    #Counter(items).most_common(9)
    sentences = [x for x in article.sents]
    #print(sentence)

    #Use of spacy_streamlit
    #The package includes building blocks that call
    #into Streamlit and set up all the required elements for you.
    default_text = data
    models = ["en_core_web_sm"]
    spacy_streamlit.visualize(models, default_text, sentence)
Example #2
0
 def show(self, text, meta=False):
   lit.visualize(
     models=self.models,
     default_text=text,
     default_model=self.models[0],
     visualizers=self.views,
     similarity_texts=('datamaskin', 'laptop'),
     show_json_doc=meta,
     show_meta=meta,
     show_config=meta,
     show_visualizer_select=True,
     sidebar_title='NLP spaCy intro',
     show_logo=False,
     # color='#EEF5DB',
   )
Example #3
0
import spacy_streamlit

models = ["en_core_web_sm", "en_core_web_md"]
default_text = "Sundar Pichai is the CEO of Google."
spacy_streamlit.visualize(models, default_text)
Example #4
0
if choice == 'Sentiment Analysis':
    st.subheader("Sentiment of your text")
    message = st.text_area("Enter your text", "Type Here")

    message = pd.Series(message)
    df_counts_pos = tc.transform(message)
    df_clean_pos = clean_text.transform(message)
    df_model_pos = df_counts_pos
    df_model_pos['clean_text'] = df_clean_pos

    if st.button("Predict"):
        prediction = model.predict(df_model_pos)
        st.write(f'sentiment prediction is {prediction[0]}')

if choice == 'NER Checker':
    st.subheader("Entity recognition of your text")
    message = st.text_area("Enter your text", "Type Here")
    message = pd.Series(message)
    message = clean_text.fit_transform(message)

    if st.button("Analyze"):
        spacy_streamlit.visualize(models, message)

if choice == 'word cloud':
    st.subheader("Word cloud display")

    message = st.text_area("Enter your text", "Type Here")
    message = pd.Series(message)
    make_wordcloud(' '.join(message))
Example #5
0
import urllib.parse

from dotenv import load_dotenv
import requests
import spacy_streamlit
import streamlit as st

load_dotenv()

token_headers = {"Authorization": f"Bearer {os.environ['ACCESS_TOKEN']}"}
api_url = urllib.parse.urljoin(os.environ["DBAPI_URL"],
                               os.environ["DBAPI_STAGE"])

models = ["en_core_web_sm", "en_core_web_md"]

search_query = st.text_input("Input your search query here:", "query")
resp = requests.get(
    f"{api_url}/documents/search_summary?query={search_query}",
    headers=token_headers,
).json()
id_to_text = {}
for d in resp:
    id_to_text[d["id"]] = (d["title"].strip() + "\n" +
                           d["parsed_text"]).strip()

id_ = st.selectbox("Select document", sorted(id_to_text.keys()))

default_text = id_to_text[id_]
st.text(default_text)
spacy_streamlit.visualize(models, default_text, visualizers=["ner"])
Example #6
0
import os
import pkg_resources
import imp

import spacy_streamlit

models = ["ja_core_news_lg", "ja_core_news_md", "ja_core_news_sm"]
# models = ["ja_core_news_sm"]

# 未ダウンロードのモデルファイルがある場合はダウンロード
for model in models:
    try:
        imp.find_module(model)
    except ImportError:
        os.system("python -m spacy download {}".format(model))
        imp.reload(pkg_resources)

spacy_streamlit.visualize(models, "")
def main(models: str, default_text: str):
    models = [name.strip() for name in models.split(",")]
    spacy_streamlit.visualize(models, default_text, visualizers=["textcat"])
Example #8
0
import spacy_streamlit
from pathlib import Path
import srsly
import importlib

MODELS = srsly.read_json(Path(__file__).parent / "models.json")
DEFAULT_MODEL = "en_core_web_sm"
DEFAULT_TEXT = "David Bowie moved to the US in 1974, initially staying in New York City before settling in Los Angeles."
DESCRIPTION = """**Explore trained [spaCy v3.0](https://nightly.spacy.io) pipelines**"""


def get_default_text(nlp):
    # Check if spaCy has built-in example texts for the language
    try:
        examples = importlib.import_module(f".lang.{nlp.lang}.examples",
                                           "spacy")
        return examples.sentences[0]
    except (ModuleNotFoundError, ImportError):
        return ""


spacy_streamlit.visualize(
    MODELS,
    default_model=DEFAULT_MODEL,
    visualizers=["parser", "ner", "similarity", "tokens"],
    show_visualizer_select=True,
    sidebar_description=DESCRIPTION,
    get_default_text=get_default_text)
Example #9
0
import spacy_streamlit
from pathlib import Path
import srsly

MODELS = srsly.read_json(Path(__file__).parent / "models.json")
DEFAULT_MODEL = "en_core_web_sm"
DEFAULT_TEXT = "Sundar Pichai is the CEO of Google."
DESCRIPTION = """**Explore trained [spaCy v3.0](https://nightly.spacy.io) pipelines**"""

spacy_streamlit.visualize(
    MODELS,
    DEFAULT_TEXT,
    default_model=DEFAULT_MODEL,
    visualizers=["parser", "ner", "similarity", "tokens"],
    show_visualizer_select=True,
    # TODO: enable when morph issue is fixed in next nightly
    # show_json_doc=False,
    sidebar_description=DESCRIPTION)
Example #10
0
# pip install spacy-streamlit
# python -m spacy download en_core_web_sm

import spacy_streamlit

models = ['en_core_web_sm']
text = "Today is a beautiful day"
spacy_streamlit.visualize(models, text)
"""On your terminal, type:
streamlit run streamlit_app.py 
"""
st.title("Fake News Detector")
image = Image.open('Photos/fofweb.jpg')
st.image(image)
st.markdown(desc)
st.subheader("Enter the URL/text of a news article written in English")
select_input = st.radio("Select Input:", ["URL", "Text"])

if select_input == "URL":
    url = st.text_input("URL")
    if st.button("Run"):
        text = get_page_text(url)
        generate_output(text)
        cloud(text)
        spacy_streamlit.visualize(models,
                                  text,
                                  visualizers=visualizers,
                                  show_visualizer_select=True)

else:
    text = st.text_area("Text", height=300)
    if st.button("Run") and len(text) > 100:
        generate_output(text)
        cloud(text)
        spacy_streamlit.visualize(models,
                                  text,
                                  visualizers=visualizers,
                                  show_visualizer_select=True)

    else:
        st.markdown('Please enter greater than 100 characters')