def derive_company_name(cls, ant: CopyrightAnnotation, phrase: str) -> None: if ant.company: ant.company = ant.company.strip(' ,;-(:') if cls.reg_valid_company_name.search(ant.company): return ant.company = '' possible_names = [n.group(0) for n in cls.reg_company_name.finditer(ant.name)] if not possible_names: possible_names = [n.group(0) for n in cls.reg_company_name.finditer(phrase)] if possible_names: ant.company = cls.take_best_company_name(possible_names) ant.company = ant.company.strip(' ,;-(:')
def make_annotation_from_pattrn(self, locale: str, ptrn: PatternFound, phrase: LineOrPhrase) -> TextAnnotation: ant = CopyrightAnnotation(name=ptrn.name, coords=(ptrn.start, ptrn.end), text=phrase.text[ptrn.start:ptrn.end], locale=locale) ant.company = ptrn.company # pattern in in fact CopyrightPatternFound ant.year_start = ptrn.start_year ant.year_end = ptrn.end_year return ant
def test_format_copyright_annotation(self): cp = CopyrightAnnotation(name='Siemens', coords=(0, 100), text='text text', locale='locale') cp.company = 'Siemens' cp.year_start = 1996 s = cp.get_cite() # '/copyright/Siemens/1996' self.assertGreater(s.find('copyright'), -1) self.assertGreater(s.find('Siemens'), -1) self.assertGreater(s.find('1996'), -1) cp.year_end = 2019 cp.locale = 'en' s = cp.get_cite() # '/en/copyright/Siemens/1996/2019' self.assertGreater(s.find('copyright'), -1) self.assertGreater(s.find('Siemens'), -1) self.assertGreater(s.find('1996'), -1) self.assertGreater(s.find('2019'), -1)