def parse(self, log: ProcessLogger, text, text_unit_id, _text_unit_lang, **kwargs) -> ParseResults: # TODO: what's the logic behind [:200] ... < 100 ? found = list(copyright.get_copyright_annotations(text, return_sources=True)) if found: unique = set(found) return ParseResults({CopyrightUsage: [CopyrightUsage(text_unit_id=text_unit_id, year=item.date, name=item.name[:200], copyright_str=item.text[:200], count=found.count(item) ) for item in unique if len(item.name) < 100]})
def parse(self, text, text_unit_id, _text_unit_lang, **kwargs) -> ParseResults: found = list(copyright.get_copyright(text, return_sources=True)) if found: unique = set(found) return ParseResults({ CopyrightUsage: [ CopyrightUsage(text_unit_id=text_unit_id, year=item[1], name=item[2][:200], copyright_str=item[3][:200], count=found.count(item)) for item in unique if len(item[2]) < 100 ] })