Beispiel #1
0
 def __init__(self,
              coords: Tuple[int, int],
              locale: str = 'en',
              name: str = '',
              sign: str = '',
              company: str = '',
              text: str = '',
              date: str = '',
              year_start: Union[int, str] = '',
              year_end: Union[str, int] = ''):
     super().__init__(name=name, coords=coords, locale=locale)
     self.sign = sign
     self.company = company
     self.text = text
     self.date = date
     self.year_start = TextAnnotation.get_int_value(year_start)
     self.year_end = TextAnnotation.get_int_value(year_end)
    def get_citation_annotations(cls, text: str) -> \
            Generator[CitationAnnotation, None, None]:
        """
        Get citations containing "BGBl"
        :param text: str
        :return: yields dict
        """

        for ptn in [
                cls.CITATION_PTN_RE, cls.SECOND_CITATION_PTN_RE,
                cls.CITATION_RANGE_PTN_RE
        ]:
            for match in ptn.finditer(text):
                capture = match.capturesdict()
                date = ''.join(capture.get('date', ''))
                if date:
                    try:
                        date = str(list(get_dates(date, 'de'))[0]['value'])
                    except:
                        pass

                ant = CitationAnnotation(
                    coords=match.span(),
                    text=capture['text'][0],
                    paragraph=''.join(capture.get('paragraph', '')),
                    subparagraph=''.join(capture.get('subparagraph', '')),
                    letter=''.join(capture.get('letter', '')),
                    date=date,
                    part=capture['part'][0],
                    locale='de')
                ant.article = TextAnnotation.get_int_value(''.join(
                    capture.get('article', '')))
                ant.number = TextAnnotation.get_int_value(''.join(
                    capture.get('number', '')))
                ant.sentence = TextAnnotation.get_int_value(''.join(
                    capture.get('sentence', '')))

                page_range = ', '.join(capture['page'])
                page = TextAnnotation.get_int_value(page_range)
                if page:
                    ant.page = page
                else:
                    ant.page_range = page_range

                volume_str = ''.join(capture.get('number', ''))
                volume = TextAnnotation.get_int_value(volume_str)
                if volume:
                    ant.volume = volume
                else:
                    ant.volume_str = volume_str

                year_str = ', '.join(capture.get('year', ''))
                year = TextAnnotation.get_int_value(year_str)
                if year:
                    ant.year = year
                else:
                    ant.year_str = year_str

                yield ant
Beispiel #3
0
def get_acts_annotations(text: str) -> Generator[ActAnnotation, None, None]:
    for match in ACT_PARTS_RE.finditer(text):
        captures = match.capturesdict()
        act_name = ''.join(captures.get('act_name') or [])
        year_str = ''.join(captures.get('year') or [])
        year = TextAnnotation.safe_cast(year_str, int)
        act = ActAnnotation(act_name=act_name,
                            coords=match.span(),
                            section=''.join(captures.get('section') or []),
                            year=year,
                            ambiguous=act_name == 'Act',
                            text=''.join(captures.get('text') or []),
                            locale='en')
        yield act
def get_geoentity_annotations(
    text: str,
    geo_config_list: List[DictionaryEntry],
    priority: bool = False,
    priority_by_id: bool = False,
    text_languages: List[str] = None,
    min_alias_len: int = geoentities_config.MIN_ALIAS_LEN,
    prepared_alias_ban_list: Union[None, Dict[str, Tuple[
        List[str], List[str]]]] = _ALIAS_BLACK_LIST_PREPARED,
    simplified_normalization: bool = False
) -> Generator[GeoAnnotation, None, None]:
    "See get_geoentities"

    conflict_resolving_func = None

    if priority_by_id:
        conflict_resolving_func = conflicts_take_first_by_id

    if priority:
        conflict_resolving_func = conflicts_top_by_priority

    dic_entries = find_dict_entities(
        text,
        geo_config_list,
        conflict_resolving_func=conflict_resolving_func,
        text_languages=text_languages,
        min_alias_len=min_alias_len,
        prepared_alias_ban_list=prepared_alias_ban_list,
        simplified_normalization=simplified_normalization)

    for ent in dic_entries:
        ant = GeoAnnotation(coords=ent.coords)
        if ent.entity[0]:
            toponim = ent.entity[0]  # type: DictionaryEntry
            year = TextAnnotation.get_int_value(toponim.id)
            if year:
                ant.year = year
            ant.name = toponim.name
        yield ant