def predict_note_authentication(data2): """Using Nltk , Spacy ,Streamlit to Perform Named Entity Recognition on scrapped data and extract entities like city, person, organisation, Date, Geographical Entity, Product etc. data2= page name of wikipedia to extract data """ nltk.download('punkt') nltk.download('averaged_perceptron_tagger') data = (wikipedia.summary(data2)) #Then we apply word tokenization and part-of-speech tagging to the sentence. def preprocess(sentence): sentence = nltk.word_tokenize(sentence) sentence = nltk.pos_tag(sentence) return sentence sentence = preprocess(data) #Creating Chunk rule for text phrasing #Our chunk pattern consists of one rule, #that a noun phrase, NP, should be formed whenever the chunker #finds an optional determiner, DT, followed by any #number of adjectives, JJ, and then a noun, NN. pattern = 'NP: {<DT>?<JJ>*<NN>}' #create a chunk parser and test it on our sentence cp = nltk.RegexpParser(pattern) cs = cp.parse(sentence) #IOB tags to represent chunk structures in files, and we will also be using this format. iob_tagged = tree2conlltags(cs) #Use of Spacy nlp = en_core_web_sm.load() doc = nlp(data) #pprint([(X.text, X.label_) for X in doc.ents]) ny_bb = data article = nlp(ny_bb) labels = [x.label_ for x in article.ents] #Counter(labels) items = [x.text for x in article.ents] #Counter(items).most_common(9) sentences = [x for x in article.sents] #print(sentence) #Use of spacy_streamlit #The package includes building blocks that call #into Streamlit and set up all the required elements for you. default_text = data models = ["en_core_web_sm"] spacy_streamlit.visualize(models, default_text, sentence)
def show(self, text, meta=False): lit.visualize( models=self.models, default_text=text, default_model=self.models[0], visualizers=self.views, similarity_texts=('datamaskin', 'laptop'), show_json_doc=meta, show_meta=meta, show_config=meta, show_visualizer_select=True, sidebar_title='NLP spaCy intro', show_logo=False, # color='#EEF5DB', )
import spacy_streamlit models = ["en_core_web_sm", "en_core_web_md"] default_text = "Sundar Pichai is the CEO of Google." spacy_streamlit.visualize(models, default_text)
if choice == 'Sentiment Analysis': st.subheader("Sentiment of your text") message = st.text_area("Enter your text", "Type Here") message = pd.Series(message) df_counts_pos = tc.transform(message) df_clean_pos = clean_text.transform(message) df_model_pos = df_counts_pos df_model_pos['clean_text'] = df_clean_pos if st.button("Predict"): prediction = model.predict(df_model_pos) st.write(f'sentiment prediction is {prediction[0]}') if choice == 'NER Checker': st.subheader("Entity recognition of your text") message = st.text_area("Enter your text", "Type Here") message = pd.Series(message) message = clean_text.fit_transform(message) if st.button("Analyze"): spacy_streamlit.visualize(models, message) if choice == 'word cloud': st.subheader("Word cloud display") message = st.text_area("Enter your text", "Type Here") message = pd.Series(message) make_wordcloud(' '.join(message))
import urllib.parse from dotenv import load_dotenv import requests import spacy_streamlit import streamlit as st load_dotenv() token_headers = {"Authorization": f"Bearer {os.environ['ACCESS_TOKEN']}"} api_url = urllib.parse.urljoin(os.environ["DBAPI_URL"], os.environ["DBAPI_STAGE"]) models = ["en_core_web_sm", "en_core_web_md"] search_query = st.text_input("Input your search query here:", "query") resp = requests.get( f"{api_url}/documents/search_summary?query={search_query}", headers=token_headers, ).json() id_to_text = {} for d in resp: id_to_text[d["id"]] = (d["title"].strip() + "\n" + d["parsed_text"]).strip() id_ = st.selectbox("Select document", sorted(id_to_text.keys())) default_text = id_to_text[id_] st.text(default_text) spacy_streamlit.visualize(models, default_text, visualizers=["ner"])
import os import pkg_resources import imp import spacy_streamlit models = ["ja_core_news_lg", "ja_core_news_md", "ja_core_news_sm"] # models = ["ja_core_news_sm"] # 未ダウンロードのモデルファイルがある場合はダウンロード for model in models: try: imp.find_module(model) except ImportError: os.system("python -m spacy download {}".format(model)) imp.reload(pkg_resources) spacy_streamlit.visualize(models, "")
def main(models: str, default_text: str): models = [name.strip() for name in models.split(",")] spacy_streamlit.visualize(models, default_text, visualizers=["textcat"])
import spacy_streamlit from pathlib import Path import srsly import importlib MODELS = srsly.read_json(Path(__file__).parent / "models.json") DEFAULT_MODEL = "en_core_web_sm" DEFAULT_TEXT = "David Bowie moved to the US in 1974, initially staying in New York City before settling in Los Angeles." DESCRIPTION = """**Explore trained [spaCy v3.0](https://nightly.spacy.io) pipelines**""" def get_default_text(nlp): # Check if spaCy has built-in example texts for the language try: examples = importlib.import_module(f".lang.{nlp.lang}.examples", "spacy") return examples.sentences[0] except (ModuleNotFoundError, ImportError): return "" spacy_streamlit.visualize( MODELS, default_model=DEFAULT_MODEL, visualizers=["parser", "ner", "similarity", "tokens"], show_visualizer_select=True, sidebar_description=DESCRIPTION, get_default_text=get_default_text)
import spacy_streamlit from pathlib import Path import srsly MODELS = srsly.read_json(Path(__file__).parent / "models.json") DEFAULT_MODEL = "en_core_web_sm" DEFAULT_TEXT = "Sundar Pichai is the CEO of Google." DESCRIPTION = """**Explore trained [spaCy v3.0](https://nightly.spacy.io) pipelines**""" spacy_streamlit.visualize( MODELS, DEFAULT_TEXT, default_model=DEFAULT_MODEL, visualizers=["parser", "ner", "similarity", "tokens"], show_visualizer_select=True, # TODO: enable when morph issue is fixed in next nightly # show_json_doc=False, sidebar_description=DESCRIPTION)
# pip install spacy-streamlit # python -m spacy download en_core_web_sm import spacy_streamlit models = ['en_core_web_sm'] text = "Today is a beautiful day" spacy_streamlit.visualize(models, text) """On your terminal, type: streamlit run streamlit_app.py """
st.title("Fake News Detector") image = Image.open('Photos/fofweb.jpg') st.image(image) st.markdown(desc) st.subheader("Enter the URL/text of a news article written in English") select_input = st.radio("Select Input:", ["URL", "Text"]) if select_input == "URL": url = st.text_input("URL") if st.button("Run"): text = get_page_text(url) generate_output(text) cloud(text) spacy_streamlit.visualize(models, text, visualizers=visualizers, show_visualizer_select=True) else: text = st.text_area("Text", height=300) if st.button("Run") and len(text) > 100: generate_output(text) cloud(text) spacy_streamlit.visualize(models, text, visualizers=visualizers, show_visualizer_select=True) else: st.markdown('Please enter greater than 100 characters')