def test_stanford_method(): """ get_tokens() should throw an exception if Stanford is disabled. :return: """ was_enabled = is_stanford_enabled() try: disable_stanford() from lexnlp.nlp.en.stanford import get_tokens_list with pytest.raises(RuntimeError): _ = get_tokens_list("This should throw an exception.") finally: if was_enabled: enable_stanford()
def get_locations(text, strict=False, return_source=False, window=2) -> Generator: """ Get locations from text using Stanford libraries. :param window: :param return_source: :param strict: :param text: :return: """ # Iterate through sentences for sentence in get_sentence_list(text): # Tag sentence sentence_pos = STANFORD_NER_TAGGER.tag(get_tokens_list(text)) # Iterate through chunks locations = [] last_loc_pos = None for i, token in enumerate(sentence_pos): # Check label if token[1] == 'LOCATION': if not strict and last_loc_pos is not None and ( i - last_loc_pos) < window: locations[-1] += (" " if not token[0].startswith("'") else "") + token[0] else: locations.append(token[0]) last_loc_pos = i else: if token[0] in [".", ","]: if not strict and last_loc_pos is not None and ( i - last_loc_pos) < window: locations[-1] += ( " " if token[0] not in string.punctuation and not token[0].startswith("'") else "") + token[0] last_loc_pos = i # Cleanup and yield for location in locations: location = strip_unicode_punctuation(location).strip( string.punctuation).strip(string.whitespace) if return_source: yield location, sentence else: yield location
def get_persons(text, strict=False, return_source=False, window=2) -> Generator: """ Get persons from text using Stanford libraries. :param window: :param return_source: :param strict: :param text: :return: """ # Iterate through sentences for sentence in get_sentence_list(text): # Tag sentence sentence_pos = STANFORD_NER_TAGGER.tag(get_tokens_list(text)) # Iterate through chunks names = [] last_person_pos = None for i, token in enumerate(sentence_pos): # Check label if token[1] == 'PERSON': if not strict and last_person_pos is not None and ( i - last_person_pos) < window: names[-1] += " " + token[0] else: names.append(token[0]) last_person_pos = i else: if token[0] in [".", ","]: if not strict and last_person_pos is not None and ( i - last_person_pos) < window: names[-1] += (" " if token[0] not in string.punctuation else "") + token[0] last_person_pos = i # Cleanup and yield for name in names: name = strip_unicode_punctuation(name).strip( string.punctuation).strip(string.whitespace) if return_source: yield name, sentence else: yield name