def test_displacy_parse_ents(en_vocab): """Test that named entities on a Doc are converted into displaCy's format.""" doc = Doc(en_vocab, words=["But", "Google", "is", "starting", "from", "behind"]) doc.ents = [Span(doc, 1, 2, label=doc.vocab.strings["ORG"])] ents = displacy.parse_ents(doc) assert isinstance(ents, dict) assert ents["text"] == "But Google is starting from behind " assert ents["ents"] == [{ "start": 4, "end": 10, "label": "ORG", "kb_id": "", "kb_url": "#" }] doc.ents = [Span(doc, 1, 2, label=doc.vocab.strings["ORG"], kb_id="Q95")] ents = displacy.parse_ents(doc) assert isinstance(ents, dict) assert ents["text"] == "But Google is starting from behind " assert ents["ents"] == [{ "start": 4, "end": 10, "label": "ORG", "kb_id": "Q95", "kb_url": "#" }]
def test_displacy_parse_ents(en_vocab): """Test that named entities on a Doc are converted into displaCy's format.""" doc = get_doc(en_vocab, words=["But", "Google", "is", "starting", "from", "behind"]) doc.ents = [Span(doc, 1, 2, label=doc.vocab.strings["ORG"])] ents = displacy.parse_ents(doc) assert isinstance(ents, dict) assert ents["text"] == "But Google is starting from behind " assert ents["ents"] == [{"start": 4, "end": 10, "label": "ORG"}]
def test_displacy_parse_ents_with_kb_id_options(en_vocab): """Test that named entities with kb_id on a Doc are converted into displaCy's format.""" doc = Doc(en_vocab, words=["But", "Google", "is", "starting", "from", "behind"]) doc.ents = [Span(doc, 1, 2, label=doc.vocab.strings["ORG"], kb_id="Q95")] ents = displacy.parse_ents( doc, {"kb_url_template": "https://www.wikidata.org/wiki/{}"}) assert isinstance(ents, dict) assert ents["text"] == "But Google is starting from behind " assert ents["ents"] == [{ "start": 4, "end": 10, "label": "ORG", "kb_id": "Q95", "kb_url": "https://www.wikidata.org/wiki/Q95", }]
def render_ner(text): tagged_text = NLP(text) return displacy.parse_ents(tagged_text)
#input text text = """Fycompa 4 mg film-coated tablets crocine 12 mg film-coated tablets Quadrameterer 1.3 GBq/mL solution for injection Topotecan123 Actavis 1mg powder for concentrate for solution 4mg –packs of 7, 28, 84 and 98 Topotecan2344 Actavis 2mg powder for concentrate for solution Topotecan45 Actavis 1mg powder for concentrate for solution """ #loading the trained model nlp = spacy.load("cogna") doc = nlp(text) options = {"ents": ["PACKAGE_ITEM_QTY","Tablet","injection"], "colors": {"PACKAGE_ITEM_QTY":"Red","Tablet":"Yellow"}} #print(displacy.render(doc, style="dep", page=False, minify=False, jupyter=None, options=options, manual=False)) results=displacy.parse_ents(doc, options=options) import json print(json.dumps(results)) #NER output data as dataframe(tabular format) param = [[ent.text,ent.label_] for ent in doc.ents] df=pd.DataFrame(param) headers = ['Entity','Category'] df.columns = headers df_table= pd.read_table(StringIO(str(df)), sep="\s+", header=0) print(tabulate(df_table, headers='keys', tablefmt='psql')) #output data visulalization in spacy
def apply_spacy_model(source_generator,spacy_nlp_model): modifier_function = lambda text_string: displacy.parse_ents(spacy_nlp_model(str(text_string))) return Generators.generator_modifier(source_generator,modifier_function)
with open(path, 'r') as f: text = f.read() doc = nlp(text) matches = matcher(doc) for match_id, start, end in matches: string_id = nlp.vocab.strings[match_id] # Get string representation span = doc[start:end] # The matched span #print("Match: ", match_id, string_id, start, end, span.text) #sentence_spans = list(doc.sents) parse_dict = displacy.parse_ents(doc) #parse_dict = displacy.render(doc, style="ent") ents_arr = parse_dict['ents'] spans = [] for ent in ents_arr: ent['type'] = ent['label'] del ent['label'] spans.append(ent) # reformat into the displacy-ent.js format (now deprecated?) ent_dict = {} #ent_dict['text'] = '' #ent_dict['spans'] = [ { 'end': 20, 'start': 5, 'type': "PERSON" }, { 'end': 67, 'start': 61, 'type': "ORG" }, { 'end': 75, 'start': 71, 'type': "DATE" } ] #ent_dict['ents'] = [''] ent_dict['spans'] = spans