def get_citation_annotations(cls, text: str) -> \ Generator[CitationAnnotation, None, None]: """ Get citations containing "BGBl" :param text: str :return: yields dict """ for ptn in [ cls.CITATION_PTN_RE, cls.SECOND_CITATION_PTN_RE, cls.CITATION_RANGE_PTN_RE ]: for match in ptn.finditer(text): capture = match.capturesdict() date = ''.join(capture.get('date', '')) if date: try: date = str(list(get_dates(date, 'de'))[0]['value']) except: pass ant = CitationAnnotation( coords=match.span(), text=capture['text'][0], paragraph=''.join(capture.get('paragraph', '')), subparagraph=''.join(capture.get('subparagraph', '')), letter=''.join(capture.get('letter', '')), date=date, part=capture['part'][0], locale='de') ant.article = TextAnnotation.get_int_value(''.join( capture.get('article', ''))) ant.number = TextAnnotation.get_int_value(''.join( capture.get('number', ''))) ant.sentence = TextAnnotation.get_int_value(''.join( capture.get('sentence', ''))) page_range = ', '.join(capture['page']) page = TextAnnotation.get_int_value(page_range) if page: ant.page = page else: ant.page_range = page_range volume_str = ''.join(capture.get('number', '')) volume = TextAnnotation.get_int_value(volume_str) if volume: ant.volume = volume else: ant.volume_str = volume_str year_str = ', '.join(capture.get('year', '')) year = TextAnnotation.get_int_value(year_str) if year: ant.year = year else: ant.year_str = year_str yield ant
def test_citation_annotation(self): ant = CitationAnnotation(coords=(2, 12), volume=1, year=1998, reporter='A. Husseini', reporter_full_name='Amin al-Husseini', page=14, page_range='14-15', source='Quran', court='sharia', locale='pg') self.assertEqual('pg', ant.locale) s = ant.__repr__() self.assertGreater(len(s), 0) cite = ant.get_cite() self.assertEqual('/pg/citation/Quran/1/1998/14-15/sharia/A. Husseini', cite)
def get_citation_annotations(text: str) -> \ Generator[CitationAnnotation, None, None]: """ Get citations. :param text: :param return_source: :param as_dict: :return: tuple or dict (volume, reporter, reporter_full_name, page, page2, court, year[, source text]) """ for match in CITATION_PTN_RE.finditer(text): source_text, volume, reporter, \ page, page2, court, year = match.groups() try: reporter_data = REPORTERS[EDITIONS[reporter]] reporter_full_name = '' if len(reporter_data) == 1: reporter_full_name = reporter_data[0]['name'] elif year: for period_data in reporter_data: if reporter in period_data['editions']: start = period_data['editions'][reporter]['start'].year end = period_data['editions'][reporter]['end'] if (end and start <= int(year) <= end.year ) or start <= int(year): reporter_full_name = period_data['name'] ant = CitationAnnotation( coords=match.span(), volume=int(volume) if volume else None, year=int(year) if year and year.isdigit() else None, reporter=reporter, reporter_full_name=reporter_full_name, page=int(page) if page else None, page_range=page2, source=source_text.strip(), court=court.strip(', ') if court else None, locale='en') yield ant except KeyError: pass