def test_catastrophic_repetative_text(self): text = 'X' * 500 start = datetime.datetime.now() defs = get_definition_list_in_sentence((0, len(text), text), False) elapsed = (datetime.datetime.now() - start).total_seconds() self.assertLess(elapsed, 2) self.assertEqual(0, len(defs))
def get_definitions_in_sentence(sentence: str, return_sources=False, decode_unicode=True) -> Generator: definitions = get_definition_list_in_sentence((0, len(sentence), sentence), decode_unicode) for df in definitions: if return_sources: yield df.name, df.text else: yield df.name
def get_definition_objects_list(text, decode_unicode=True) -> List[DefinitionCaught]: """ :param text: text to search for definitions :param decode_unicode: :return: a list of found definitions - objects of class DefinitionCaught """ definitions = [] for sentence in get_sentence_span(text): # type: Tuple[int, int, str] definitions += get_definition_list_in_sentence(sentence, decode_unicode) definitions = filter_definitions_for_self_repeating(definitions) return definitions