def test_name(self): doc = ner.name( """GitHub launched April 10, 2008, a subsidiary of Microsoft, is an American web-based hosting service for version control using Git. It is mostly used for computer code. It offers all of the distributed version control and source code management (SCM) functionality of Git as well as adding its own features.""", language='en_core_web_sm') text_label = [(X.text, X.label_) for X in doc] print(text_label) self.assertEqual(text_label, [('April 10, 2008', 'DATE'), ('Microsoft', 'ORG'), ('American', 'NORP'), ('Git', 'PERSON'), ('Git', 'GPE')]) doc = ner.name( """Michael Jeffrey Jordan born February 17, 1963 in Brooklyn, New York, United States of America. Known by his initials, MJ,[5] is an American former professional basketball player who is the principal owner and chairman of the Charlotte Hornets of the National Basketball Association """, language='en_core_web_sm') text_label = [(X.text, X.label_) for X in doc] print(text_label) self.assertEqual( text_label, [('Michael Jeffrey Jordan', 'PERSON'), ('February 17, 1963', 'DATE'), ('Brooklyn', 'GPE'), ('New York', 'GPE'), ('United States of America', 'GPE'), ('American', 'NORP'), ('the Charlotte Hornets of the National Basketball Association', 'ORG')])
def test_name(self): doc = ner.name( """GitHub launched April 10, 2008, a subsidiary of Microsoft, is an American web-based hosting service for version control using Git. It is mostly used for computer code. It offers all of the distributed version control and source code management (SCM) functionality of Git as well as adding its own features.""", language="en_core_web_sm", ) text_label = [(X.text, X.label_) for X in doc] self.assertEqual( text_label, [ ("GitHub", "ORG"), ("April 10, 2008", "DATE"), ("Microsoft", "ORG"), ("American", "NORP"), ("Git", "PERSON"), ("SCM", "ORG"), ("Git", "PERSON"), ], ) doc = ner.name( """Michael Jeffrey Jordan born February 17, 1963 in Brooklyn, New York, United States of America. Known by his initials, MJ,[5] is an American former professional basketball player who is the principal owner and chairman of the Charlotte Hornets of the National Basketball Association """, language="en_core_web_sm", ) text_label = [(X.text, X.label_) for X in doc] self.assertEqual( text_label, [ ("Michael Jeffrey Jordan", "PERSON"), ("February 17, 1963", "DATE"), ("Brooklyn", "GPE"), ("New York", "GPE"), ("United States of America", "GPE"), ("American", "NORP"), ("the Charlotte Hornets of the National Basketball Association", "ORG"), ], )
def get_name(document): """This function takes block of text as input and returns person's name.""" name_str = "" document = document.replace("\n", " ") sent = ner.name(document, language='en_core_web_sm') for word in sent: if word.label_ == 'PERSON': name_str = f'{name_str} {word}' break return name_str
from nerd import ner doc = ner.name( ''' Hugging Face Inc. is a company based in New York City. Its headquarters are in DUMBO, therefore very close to the Manhattan Bridge which is visible from the window. ''', language='en_core_web_sm') text_label = [(X.text, X.label_) for X in doc] print(text_label)
def nerd_ner(document): doc = ner.name(document, language='en_core_web_lg') results = [(ent.text, ent.label_) for ent in doc] return results
help='Language model name to download.') parser.add_argument('-l', '--load', type=str, help='Load downloaded language model.') parser.add_argument('-n', '--name', type=str, help='Find entities from given text.') args = parser.parse_args() if len(sys.argv) < 2: print('Specify a key to use') sys.exit(1) try: import argcomplete argcomplete.autocomplete(parser) except ImportError: pass args = parser.parse_args() if args.download != None: ner.download_model(args.download) elif args.load != None: ner.load_model(args.load) else: doc = ner.name(args.name) text_label = [(X.text, X.label_) for X in doc] print(text_label)
def get_doc(texts, language): doc = ner.name(texts, language=language) text_label = [(X.text, X.label_) for X in doc] return text_label