def test_parse_trivial(self): model = LayeredDefinitionDetector() model.load_compressed(TRAINED_MODEL_PATH) text = """ The Trustee shall establish, maintain and hold in trust a separate fund designated as the "Redemption Fund", shall establish and maintain within the Redemption Fund a separate Optional Redemption Account and a separate Special Redemption Account and shall accept moneys deposited for redemption and shall deposit such moneys into said Accounts, as applicable. """ ants = model.get_annotations(text) self.assertGreater(len(ants), 0) ant_def = text[ants[0].coords[0]:ants[0].coords[1]] self.assertGreater(len(ant_def), 0)
def non_test_train(self): # indended to be run by user model = LayeredDefinitionDetector() train_file = os.path.join(f'{lexnlp_test_path}/lexnlp/ml/en', 'layered_definitions_train_data.jsonl') model.train_on_doccano_jsonl(TRAINED_MODEL_PATH, train_file)
def get_definition_objects_list(text, decode_unicode=True) -> List[DefinitionCaught]: """ :param text: text to search for definitions :param decode_unicode: :return: a list of found definitions - objects of class DefinitionCaught """ definitions = [] for sentence in get_sentence_span(text): # type: Tuple[int, int, str] definitions += get_definition_list_in_sentence(sentence, decode_unicode) definitions = filter_definitions_for_self_repeating(definitions) return definitions parser_ml_classifier = LayeredDefinitionDetector() def get_definition_annotations(text: str, decode_unicode=True, locator_type: AnnotationLocatorType = AnnotationLocatorType.RegexpBased) \ -> Generator[DefinitionAnnotation, None, None]: if locator_type == AnnotationLocatorType.MlWordVectorBased: if not parser_ml_classifier.initialized: raise Exception('"parser_ml_classifier" object should be initialized (call load_compressed method)') ants = parser_ml_classifier.get_annotations(text) for ant in ants: yield ant return