def test_issue3882(en_vocab): """Test that displaCy doesn't serialize the doc.user_data when making a copy of the Doc. """ doc = Doc(en_vocab, words=["Hello", "world"], deps=["dep", "dep"]) doc.user_data["test"] = set() displacy.parse_deps(doc)
def visualize_parser(docs: List[spacy.tokens.Doc], *, title: Optional[str] = None, key: Optional[str] = None) -> None: st.header(title) cols = st.beta_columns(2) num_parses = cols[1].select_slider('Number of Sentences to Visualise:', options=[0, 1, 2, 3, 4], value=1) vismode = cols[0].radio('Which Dependencies to Show', ('All', 'Collocation Candidates')) if num_parses >= 1: for num, sent in enumerate(docs): if num < num_parses: allparsed = displacy.parse_deps(sent) colparsed = my_parser(sent, num) html = displacy.render( (allparsed if vismode == 'All' else colparsed), style="dep", manual=True) # Double newlines seem to mess with the rendering html = html.replace("\n\n", "\n") if len(docs) > 1: st.markdown(f"> {sent.text}") st.write(get_svg(html), unsafe_allow_html=True)
def recognize_named_tag(event, context): request_body = event['body'] text = json.loads(request_body)['text'] print('received test from http post: ', text) if text is not None: doc = MODEL(text) parse = displacy.parse_deps(doc) setting = {} setting['lang'] = 'en' setting['direction'] = 'ltr' parse['setting'] = setting print('parse after create: ', parse) body = parse response = { "statusCode": 200, "body": json.dumps(body), "headers": { 'Content-Type': 'application/json', 'Access-Control-Allow-Origin': '*' } } return response
def test_displacy_parse_deps(en_vocab): """Test that deps and tags on a Doc are converted into displaCy's format.""" words = ["This", "is", "a", "sentence"] heads = [1, 1, 3, 1] pos = ["DET", "VERB", "DET", "NOUN"] tags = ["DT", "VBZ", "DT", "NN"] deps = ["nsubj", "ROOT", "det", "attr"] doc = Doc(en_vocab, words=words, heads=heads, pos=pos, tags=tags, deps=deps) deps = displacy.parse_deps(doc) assert isinstance(deps, dict) assert deps["words"] == [ { "lemma": None, "text": words[0], "tag": pos[0] }, { "lemma": None, "text": words[1], "tag": pos[1] }, { "lemma": None, "text": words[2], "tag": pos[2] }, { "lemma": None, "text": words[3], "tag": pos[3] }, ] assert deps["arcs"] == [ { "start": 0, "end": 1, "label": "nsubj", "dir": "left" }, { "start": 2, "end": 3, "label": "det", "dir": "left" }, { "start": 1, "end": 3, "label": "attr", "dir": "right" }, ]
def test_displacy_parse_deps(en_vocab): """Test that deps and tags on a Doc are converted into displaCy's format.""" words = ["This", "is", "a", "sentence"] heads = [1, 0, 1, -2] pos = ["DET", "VERB", "DET", "NOUN"] tags = ["DT", "VBZ", "DT", "NN"] deps = ["nsubj", "ROOT", "det", "attr"] doc = get_doc(en_vocab, words=words, heads=heads, pos=pos, tags=tags, deps=deps) deps = displacy.parse_deps(doc) assert isinstance(deps, dict) assert deps["words"] == [ {"text": "This", "tag": "DET"}, {"text": "is", "tag": "VERB"}, {"text": "a", "tag": "DET"}, {"text": "sentence", "tag": "NOUN"}, ] assert deps["arcs"] == [ {"start": 0, "end": 1, "label": "nsubj", "dir": "left"}, {"start": 2, "end": 3, "label": "det", "dir": "left"}, {"start": 1, "end": 3, "label": "attr", "dir": "right"}, ]
def get_spacy_parse(contexts): all_parses = [] for context in contexts: tokens = context["tokens"] for index in range(len(tokens)): tokens[index]["extract"] = context["extract_mask"][index] tokens[index]["weight"] = context["weight_mask"][index] sentences = get_sentence_tokens_from_token_list(tokens) for sentence_tokens in sentences: sentence_text = get_sentence_text_from_tokens(sentence_tokens) parse = en_nlp(sentence_text) for index, token in enumerate(parse): # assuming same token index, as we'd already parsed it with the same spacy model before if token.text.strip().lower( ) != sentence_tokens[index]["text"].strip().lower(): print(token.text.strip().lower(), "!=", sentence_tokens[index]["text"].strip().lower()) print(parse[max(0, index - 7):min(len(parse) - 1, index + 7)]) print(" ".join([ x["text"] for x in sentence_tokens[max(0, index - 7):min(len(parse) - 1, index + 7)] ])) break token._.extract = sentence_tokens[index]["extract"] token._.weight = sentence_tokens[index]["weight"] token._.dist_cit = sentence_tokens[index].get("dist_cit", 0) # token._.dist_cit_norm = sentence_tokens[index][0].get("dist_cit_norm", 0) display_dict = displacy.parse_deps(parse) # for index, word in enumerate(display_dict["words"]): # if word["text"] != parse[index].text: # print("ARGh") all_parses.append(parse) return all_parses
def create_dep(text): doc = MODEL(text) return displacy.parse_deps(doc)
import spacy import os from spacy import displacy import json nlp = spacy.load("en_core_web_sm") cwd = os.getcwd() path = cwd + "/py/uploads/display-upload.txt" #print(path) with open(path, 'r') as f: text = f.read() doc = nlp(text) sentence_spans = list(doc.sents) parse_dict = displacy.parse_deps(doc) parsed_json = json.dumps(parse_dict) print(parsed_json)
def visualize_text(text): language = settings.LANG_ID.classify(text)[0] lang = settings.LANGUAGE_MODELS[language] doc = lang(text) return displacy.parse_deps(doc)
def tag_and_parse(text): doc = MODEL(text) return displacy.parse_deps(doc)
def displacy_service(text): doc = nlp(text) return displacy.parse_deps(doc)
def displacy_service(text): """Deploys a displaCy server in localhost""" doc = nlp(text) return displacy.parse_deps(doc)
def displacyService(self, text): 'Returns a graph object' doc = self.nlp(text) return displacy.parse_deps(doc)