import torch import spacy import neuralcoref from pytorch_transformers import * from gensim.parsing.porter import PorterStemmer import question_generation.datasets as data nlp = spacy.load("en_core_web_lg") USE_COREF = False if (USE_COREF): neuralcoref.add_to_pipe(nlp) class nlp_engine: def __init__(self, ): # self.tokenizer = BertTokenizer.from_pretrained("bert-base-uncased") # self.model = BertModel.from_pretrained("bert-base-uncased") self.use_coref = False self.vectorizer = data.vectorize('fast') self.stemmer = PorterStemmer() def make_multiple_choice(self, word, sentence, ai=False): if (len(word.split(' ')) == 1): if (word in sentence.split(' ')): most = self.vectorizer.most_similar(word.lower(), topn=20) choices = [x[0].lower() for x in most] tmp = list() stems = list() tmp.append(word.lower())
def __init__(self): self.nlp = spacy.load('en') neuralcoref.add_to_pipe(self.nlp)
def __init__(self, model_name): self.nlp = spacy.load(model_name) neuralcoref.add_to_pipe(self.nlp, greedyness=0.45) self.textclean = TextClean()
def get_doc_example(text): nlp = spacy.load("en_core_web_sm") neuralcoref.add_to_pipe(nlp) doc = nlp(text) return doc
'Dudley': ['man', 'boy'], 'Ron': ['man', 'boy'], 'Hagrid': ['man'], 'Albus Dumbledore': ['Albus', 'headmaster', 'him', 'he'], 'Draco': ['man', 'boy', 'Malfoy'], 'McGongall': ['woman', 'McGongall', 'professor'], ' Snape': ['man', 'Snape', 'him', 'he', 'professor', 'Severus'], 'Professor Quirrell': ['man', 'Quirrell'], 'Harry': ['man', 'boy', 'Harry Potter', 'Potter', 'his'], 'Hermione': ['she', 'woman', 'girl', 'Hermione granger', 'her'], 'Ginny': ['she', 'her', 'woman', 'girl'], "Argus Flich": ['he', 'man'], 'Voldemort': ['man', 'the Dark Lord', 'his', 'Lord Voldemort'] } neuralcoref.add_to_pipe(nlp, max_dist_match=1000, greedyness=.40) hp_num = input("Enter harry potter book #: ") nlp.get_pipe('neuralcoref').set_conv_dict(rare_words) source_file = f"harrypotter{hp_num}.txt" temp_file = f"harrypotter{hp_num}_clean.txt" f = open(source_file) f1 = open(temp_file, 'w') """replace punctuation with special char inside quotes so conversations are not treated as individual sentences""" def removePunctFromQuotes(text): returnText = ""
import spacy import neuralcoref nlp = spacy.load('en') neuralcoref.add_to_pipe(nlp, greedyness=0.55) #0.55 is best value I've seen str1 = u"Norway has a lot of electric cars, so many that it can make anyone driving a new vehicle with an internal combustion engine look like a Luddite. Mercedes-Benz brought us there to experience the EQC, and possibly to normalize it in a sea of EVs that makes California look like a land of late adopters. Outside Oslo, where cars were larger and more upscale than in other parts of Europe, and Tesla vehicles (S and X) are a more common sight than around Los Angeles or the Bay Area, the EQC fit right in. DON'T MISS: Mercedes-Benz EQC Edition 1886 electric SUV kicks off a new era \ After a couple of rain-soaked days driving the EQC there last week, we can say that it will be a great addition in the U.S. when it arrives sometime in 2020. At about 187 inches long, the EQC400 4Matic crossover splices into the American mid-sizers." doc = nlp(str1) # doc = nlp(u'John went to Norway and it was beautiful.') print(doc._.coref_clusters) print(doc._.coref_clusters[0].mentions) print(str1) print(doc._.coref_resolved)
def get_coref_parser(): nlp = spacy.load('en_core_web_sm') neuralcoref.add_to_pipe(nlp) nlp.tokenizer = NoTokenizer(nlp.vocab) return nlp
def __init__(self): self.nlp = spacy.load('en') neuralcoref.add_to_pipe(self.nlp) print("Server loaded") self.response = None
def __init__(self, spacy_model: str = 'zh_core_web_lg', greedyness: float = 0.45): self.nlp = spacy.load(spacy_model) neuralcoref.add_to_pipe(self.nlp, greedyness=greedyness)
def __init__(self, language = "en_core_web_sm", greedyness: float = 0.65): super().__init__() import spacy self.nlp = spacy.load(language) neuralcoref.add_to_pipe(self.nlp)
def replace_pronouns(orig_text, name, pronoun_replacement, nlp, correcting_they_pronouns=False): #for singular they transformation #text, indexes to revert. #cache? if (correcting_they_pronouns): print('here') nlp.remove_pipe( "neuralcoref" ) # This remove the current neuralcoref instance from SpaCy pipe neuralcoref.add_to_pipe(nlp, greedyness=0.6) doc = nlp(orig_text) text = [] buffer_start = 0 name_cluster = find_cluster(name, doc) if name_cluster == None: nlp.remove_pipe( "neuralcoref" ) # This remove the current neuralcoref instance from SpaCy pipe neuralcoref.add_to_pipe(nlp, greedyness=0.6) doc = nlp(orig_text) name_cluster = find_cluster(name, doc) if (name_cluster == None): return [{'text': 'No Match', 'is_pronoun': False}] #need to check if mention is pronoun print(name_cluster.mentions) pronouns = list_pronouns(name_cluster.mentions, pronoun_replacement) present_tense_heads = list_present_tense_heads(pronouns, pronoun_replacement) print(present_tense_heads) altered_tokens = pronouns + present_tense_heads altered_tokens.sort(key=lambda altered_token: altered_token['token'].i) print('altered_tokens', altered_tokens) for altered_token in altered_tokens: if altered_token[ 'token'].i > buffer_start: # If we've skxipped over some tokens, let's add those in (with trailing whitespace if available) text += [{ 'text': doc[buffer_start:altered_token['token'].i].text + doc[altered_token['token'].i - 1].whitespace_, 'is_pronoun': False, 'index': altered_token['token'].i, 'orig_text': altered_token['token'].text }] text += [{ 'text': altered_token['replacement_text'], 'is_pronoun': True, 'index': altered_token['token'].i, 'orig_text': altered_token['token'].text }] text += [{ 'text': doc[altered_token['token'].i].whitespace_, 'is_pronoun': False, 'index': altered_token['token'].i, 'orig_text': altered_token['token'].text }] # Replace token, with trailing whitespace if available buffer_start = altered_token['token'].i + 1 text += [{'text': doc[buffer_start:].text, 'is_pronoun': False}] text = [ altered_token for altered_token in text if altered_token["text"] != '' ] return text
# -*- coding: utf-8 -*- """ Created on Thu Jun 27 19:55:37 2019 @author: sudhe """ import spacy import neuralcoref import numpy as np import networkx as nx from sklearn.metrics.pairwise import cosine_similarity coref_nlp = spacy.load('en') neuralcoref.add_to_pipe(coref_nlp) nlp = spacy.load('en_core_web_sm') # Extract word vectors def My_Page_Rank(sentences): sentence_vectors = [] lexArticle = dict() for sentence in sentences: if len(sentence) != 0: v = sum([ word_embeddings.get(w, np.zeros((300, ))) for w in sentence.split() ]) / (len(sentence.split()) + 0.001) else: v = np.zeros((300, )) sentence_vectors.append(v)