コード例 #1
0
ファイル: eu_regulations.py プロジェクト: Veroh72/dnb-nlp
def get_regulation_annotations(text: str) -> \
        Generator[RegulationAnnotation, None, None]:
    """
    Get regulations.
    :param text:
    :param return_source:
    :param as_dict:
    :return: tuple or dict
    (volume, reporter, reporter_full_name, page, page2, court, year[, source text])
    """

    for match in ARTICLE_PTN_RE.finditer(text):

        source_text = match.groups()

        item = source_text

        ant = RegulationAnnotation(coords=match.span(),
                                   source="Legislation (European Union)",
                                   name="".join(source_text[0]),
                                   text=source_text[0].strip())
        yield ant

    for match in DIRECTIVE_PTN_RE.finditer(text):

        source_text = match.groups()

        item = source_text

        ant = RegulationAnnotation(coords=match.span(),
                                   source="Legislation (European Union)",
                                   name="".join(source_text[0]),
                                   text=source_text[0].strip())
        yield ant

    for match in REGULATION_PTN_RE.finditer(text):

        source_text = match.groups()

        item = source_text

        ant = RegulationAnnotation(coords=match.span(),
                                   source="Legislation (European Union)",
                                   name="".join(source_text[0]),
                                   text=source_text[0].strip())
        yield ant
コード例 #2
0
def get_regulation_annotations(text: str) -> \
        Generator[RegulationAnnotation, None, None]:
    """
    Get regulations.
    :param text:
    :param return_source:
    :param as_dict:
    :return: tuple or dict
    (volume, reporter, reporter_full_name, page, page2, court, year[, source text])
    """

    for match in REGULATION_PTN_RE.finditer(text):
        source_text, num1, regulation_type, sec, num2 = match.groups()
        fixed_regulation_type = regulation_type.replace('.', '')
        if sec and 'sec' in sec.lower():
            sec = 'Section'
        regulation_parts = [num1, fixed_regulation_type, sec, num2
                            ] if sec else [num1, fixed_regulation_type, num2]
        item = (REGULATION_CODES_MAP.get(fixed_regulation_type,
                                         regulation_type),
                ' '.join(regulation_parts))

        ant = RegulationAnnotation(coords=match.span(),
                                   source=item[0],
                                   name=item[1],
                                   text=source_text.strip())
        yield ant

    for match in PUBLIC_LAW_PTN_RE.finditer(text):
        source_text = match.groups(0)
        if isinstance(source_text, tuple):
            source_text = source_text[0]
        if source_text and 'pub' in source_text.lower():
            fixed_code = PUBLIC_LAW_SUB_RE.sub(r'Public Law No. \1',
                                               source_text)
        else:
            fixed_code = ' '.join(source_text.lower().title().split())

        ant = RegulationAnnotation(coords=match.span(),
                                   source='Public Law',
                                   name=fixed_code,
                                   text=source_text.strip())
        yield ant
コード例 #3
0
 def match_start_trigger(self, phrase: str) -> None:
     """
     :param phrase: mediante la emisión de instrumentos inscritos en el Registro Nacional de Valores, colocados
     :return: {name: 'Registro Nacional de Valores', probability: 100, ...}
     """
     for reg in self.reg_start_triggers:
         for match in reg.finditer(phrase):
             text = match.group()
             coords = (match.start(), match.end())
             ant = RegulationAnnotation(name=text,
                                        coords=coords,
                                        text=text,
                                        locale=self.locale)
             self.annotations.append(ant)