def test_parse_large_text(self): text = load_resource_document( 'lexnlp/extract/es/sample_es_regulations.txt', 'utf-8') ret = parser.parse(text) self.assertGreater(len(ret), 100) html = annotate_text(text, ret) save_test_document('sample_es_regulations.html', html)
def annotate_document(self, text: str, definitions: List[dict], output_path: str) -> None: annotations = [] index = 0 for df in definitions: index += 1 ant = TextAnnotation( (df["attrs"]["start"], df["attrs"]["end"]), df["tags"]["Extracted Entity Definition Name"], index) annotations.append(ant) html = annotate_text(text, annotations) save_test_document(output_path, html)
def annotate_document(self, text: str, definitions: List[dict], output_path: str) -> None: annotations = [] index = 0 for df in definitions: index += 1 ant_text = df["tags"]["Extracted Entity Text"] ant = DefinitionAnnotation( name=df["tags"]["Extracted Entity Definition Name"], coords=(df["attrs"]["start"], df["attrs"]["end"]), text=ant_text, locale="en") annotations.append(ant) html = annotate_text(text, annotations) save_test_document(output_path, html)
def annotate_definitions_text(text: str, definitions: List[dict], save_path: str) -> None: ants = [] for df in definitions: ref_text = df["tags"]["Extracted Entity Text"] name = df["tags"]["Extracted Entity Definition Name"] start = df["attrs"]["start"] end = df["attrs"]["end"] w_start = text.find(name, start, end + 1) if w_start >= 0: start = w_start end = start + len(name) ant = TextAnnotation((start, end), ref_text) ants.append(ant) markup = annotate_text(text, ants) save_test_document(save_path, markup)
def annotate_definitions_text(text: str, definitions: List[DefinitionAnnotation], save_path: str) -> None: markup = annotate_text(text, definitions) save_test_document(save_path, markup)