def test_parse_trivial(self):
     model = LayeredDefinitionDetector()
     model.load_compressed(TRAINED_MODEL_PATH)
     text = """
             The Trustee shall establish, 
             maintain and hold in trust a separate fund designated as the "Redemption Fund", shall establish and 
             maintain within the Redemption Fund a separate Optional Redemption Account and a separate Special 
             Redemption Account and shall accept moneys deposited for redemption and shall deposit such moneys 
             into said Accounts, as applicable.
             """
     ants = model.get_annotations(text)
     self.assertGreater(len(ants), 0)
     ant_def = text[ants[0].coords[0]:ants[0].coords[1]]
     self.assertGreater(len(ant_def), 0)
 def non_test_train(self):
     # indended to be run by user
     model = LayeredDefinitionDetector()
     train_file = os.path.join(f'{lexnlp_test_path}/lexnlp/ml/en',
                               'layered_definitions_train_data.jsonl')
     model.train_on_doccano_jsonl(TRAINED_MODEL_PATH, train_file)
예제 #3
0

def get_definition_objects_list(text, decode_unicode=True) -> List[DefinitionCaught]:
    """
    :param text: text to search for definitions
    :param decode_unicode:
    :return: a list of found definitions - objects of class DefinitionCaught
    """
    definitions = []
    for sentence in get_sentence_span(text):  # type: Tuple[int, int, str]
        definitions += get_definition_list_in_sentence(sentence, decode_unicode)
    definitions = filter_definitions_for_self_repeating(definitions)
    return definitions


parser_ml_classifier = LayeredDefinitionDetector()


def get_definition_annotations(text: str,
                               decode_unicode=True,
                               locator_type: AnnotationLocatorType = AnnotationLocatorType.RegexpBased) \
        -> Generator[DefinitionAnnotation, None, None]:

    if locator_type == AnnotationLocatorType.MlWordVectorBased:
        if not parser_ml_classifier.initialized:
            raise Exception('"parser_ml_classifier" object should be initialized (call load_compressed method)')
        ants = parser_ml_classifier.get_annotations(text)
        for ant in ants:
            yield ant
        return