Esempio n. 1
0
 def test_name(self):
     doc = ner.name(
         """GitHub launched April 10, 2008, a subsidiary of Microsoft, is an American web-based hosting service for version control using Git.
                    It is mostly used for computer code. It offers all of the distributed version control and source code management (SCM) functionality
                    of Git as well as adding its own features.""",
         language='en_core_web_sm')
     text_label = [(X.text, X.label_) for X in doc]
     print(text_label)
     self.assertEqual(text_label, [('April 10, 2008', 'DATE'),
                                   ('Microsoft', 'ORG'),
                                   ('American', 'NORP'), ('Git', 'PERSON'),
                                   ('Git', 'GPE')])
     doc = ner.name(
         """Michael Jeffrey Jordan born February 17, 1963 in Brooklyn, New York, United States of America. Known by his initials, MJ,[5] is an American former professional
                    basketball player who is the principal owner and chairman of the Charlotte Hornets of the National Basketball Association
                    """,
         language='en_core_web_sm')
     text_label = [(X.text, X.label_) for X in doc]
     print(text_label)
     self.assertEqual(
         text_label,
         [('Michael Jeffrey Jordan', 'PERSON'),
          ('February 17, 1963', 'DATE'), ('Brooklyn', 'GPE'),
          ('New York', 'GPE'), ('United States of America', 'GPE'),
          ('American', 'NORP'),
          ('the Charlotte Hornets of the National Basketball Association',
           'ORG')])
Esempio n. 2
0
 def test_name(self):
     doc = ner.name(
         """GitHub launched April 10, 2008, a subsidiary of Microsoft, is an American web-based hosting service for version control using Git.
                    It is mostly used for computer code. It offers all of the distributed version control and source code management (SCM) functionality
                    of Git as well as adding its own features.""",
         language="en_core_web_sm",
     )
     text_label = [(X.text, X.label_) for X in doc]
     self.assertEqual(
         text_label,
         [
             ("GitHub", "ORG"),
             ("April 10, 2008", "DATE"),
             ("Microsoft", "ORG"),
             ("American", "NORP"),
             ("Git", "PERSON"),
             ("SCM", "ORG"),
             ("Git", "PERSON"),
         ],
     )
     doc = ner.name(
         """Michael Jeffrey Jordan born February 17, 1963 in Brooklyn, New York, United States of America. Known by his initials, MJ,[5] is an American former professional
                    basketball player who is the principal owner and chairman of the Charlotte Hornets of the National Basketball Association
                    """,
         language="en_core_web_sm",
     )
     text_label = [(X.text, X.label_) for X in doc]
     self.assertEqual(
         text_label,
         [
             ("Michael Jeffrey Jordan", "PERSON"),
             ("February 17, 1963", "DATE"),
             ("Brooklyn", "GPE"),
             ("New York", "GPE"),
             ("United States of America", "GPE"),
             ("American", "NORP"),
             ("the Charlotte Hornets of the National Basketball Association",
              "ORG"),
         ],
     )
def get_name(document):
    """This function takes block of text as input and returns person's name."""

    name_str = ""

    document = document.replace("\n", " ")

    sent = ner.name(document, language='en_core_web_sm')

    for word in sent:
        if word.label_ == 'PERSON':
            name_str = f'{name_str} {word}'
            break

    return name_str
from nerd import ner

doc = ner.name( '''
        Hugging Face Inc. is a company based in New York City. Its headquarters are in DUMBO, therefore very
        close to the Manhattan Bridge which is visible from the window.
        ''', language='en_core_web_sm')
text_label = [(X.text, X.label_) for X in doc]
print(text_label)
Esempio n. 5
0
def nerd_ner(document):
    doc = ner.name(document, language='en_core_web_lg')
    results = [(ent.text, ent.label_) for ent in doc]
    return results
Esempio n. 6
0
                        help='Language model name to download.')
    parser.add_argument('-l',
                        '--load',
                        type=str,
                        help='Load downloaded language model.')
    parser.add_argument('-n',
                        '--name',
                        type=str,
                        help='Find entities from given text.')
    args = parser.parse_args()

    if len(sys.argv) < 2:
        print('Specify a key to use')
        sys.exit(1)

    try:
        import argcomplete
        argcomplete.autocomplete(parser)
    except ImportError:
        pass

    args = parser.parse_args()
    if args.download != None:
        ner.download_model(args.download)
    elif args.load != None:
        ner.load_model(args.load)
    else:
        doc = ner.name(args.name)
        text_label = [(X.text, X.label_) for X in doc]
        print(text_label)
Esempio n. 7
0
def get_doc(texts, language):
    doc = ner.name(texts, language=language)
    text_label = [(X.text, X.label_) for X in doc]
    return text_label