Ejemplo n.º 1
0
 def convert(
     cls, tree_element: model.ArticleTitleAmendment
 ) -> Iterable[OutgoingReference]:
     assert tree_element.article is not None
     assert tree_element.act_reference is not None
     act_id = ActReferenceConversionHelper.get_act_id_from_parse_result(
         tree_element.act_reference)
     article_id = "".join(tree_element.article.id)
     act_start_pos, act_end_pos = ActReferenceConversionHelper.get_act_id_pos_from_parse_result(
         tree_element.act_reference)
     yield OutgoingReference(act_start_pos, act_end_pos,
                             Reference(act=act_id))
     ref_start_pos, ref_end_pos = get_subtree_start_and_end_pos(
         tree_element.article)
     yield OutgoingReference(ref_start_pos, ref_end_pos,
                             Reference(act=act_id, article=article_id))
Ejemplo n.º 2
0
    def convert_parsed_references(
        cls,
        parsed_references: Iterable[OutgoingReference],
        prefixlen: int,
        textlen: int,
        abbreviations_map: Mapping[str, str],
    ) -> Tuple[OutgoingReference, ...]:
        # TODO: pylint is right here, should refactor.
        # pylint: disable=too-many-arguments
        result = []
        for in_text_reference in parsed_references:
            # The end of the parsed reference is inside the target string
            # Checking for the end and not the beginning is important, because
            # we also want partial references to work here.
            if in_text_reference.end_pos <= prefixlen or in_text_reference.end_pos > textlen:
                continue

            result.append(
                OutgoingReference(
                    start_pos=max(in_text_reference.start_pos - prefixlen, 0),
                    end_pos=(in_text_reference.end_pos - prefixlen),
                    reference=in_text_reference.reference.
                    resolve_abbreviations(abbreviations_map)))
        result.sort()
        return tuple(result)
Ejemplo n.º 3
0
    def convert(
            cls,
            tree_element: model.BlockAmendment) -> Iterable[OutgoingReference]:
        assert isinstance(tree_element.act_reference, model.ActReference)
        act_id = ActReferenceConversionHelper.get_act_id_from_parse_result(
            tree_element.act_reference)
        act_start_pos, act_end_pos = ActReferenceConversionHelper.get_act_id_pos_from_parse_result(
            tree_element.act_reference)
        yield OutgoingReference(act_start_pos, act_end_pos,
                                Reference(act=act_id))

        amended_reference = ReferenceConversionHelper.convert_potential_in_text_reference(
            act_id, tree_element.amended_reference)
        inserted_reference = ReferenceConversionHelper.convert_potential_in_text_reference(
            act_id, tree_element.inserted_reference)

        if amended_reference is not None and inserted_reference is not None:
            # Act has to be cut off first, because otherwise relative_to does not do anything.
            fixed_ref = attr.evolve(inserted_reference.reference,
                                    act=None).relative_to(
                                        amended_reference.reference)
            inserted_reference = attr.evolve(inserted_reference,
                                             reference=fixed_ref)

        if amended_reference:
            yield amended_reference
        if inserted_reference:
            yield inserted_reference
Ejemplo n.º 4
0
 def convert(cls,
             tree_element: model.Repeal) -> Iterable[OutgoingReference]:
     assert tree_element.references is not None
     act_id = ActReferenceConversionHelper.get_act_id_from_parse_result(
         tree_element.act_reference)
     act_start_pos, act_end_pos = ActReferenceConversionHelper.get_act_id_pos_from_parse_result(
         tree_element.act_reference)
     yield OutgoingReference(act_start_pos, act_end_pos,
                             Reference(act=act_id))
     yield from ReferenceConversionHelper.convert_multiple_in_text_references(
         act_id, tree_element.references)
Ejemplo n.º 5
0
 def iter(self, start_override: Optional[int],
          end_override: int) -> Iterable[OutgoingReference]:
     self.commit_deferred_item()
     ref_args = [self.act, None, None, None, None]
     levels = ("articles", "paragraphs", "points", "subpoints")
     for arg_pos, level in enumerate(levels, start=1):
         level_vals = getattr(self, level)
         if len(level_vals) == 1:
             ref_args[arg_pos] = level_vals[0][0]
         else:
             level_vals.sort(key=lambda x: x[1])
             for level_val, start, end in level_vals[:-1]:
                 if start_override is not None:
                     start = start_override
                     start_override = None
                 ref_args[arg_pos] = level_val
                 yield OutgoingReference(start, end, Reference(*ref_args))
             ref_args[arg_pos] = level_vals[-1][0]
         if start_override is None:
             start_override = level_vals[-1][1]
     assert start_override is not None
     yield OutgoingReference(start_override, end_override,
                             Reference(*ref_args))
Ejemplo n.º 6
0
    def convert(
            cls, tree_element: model.CompoundReference
    ) -> Iterable[OutgoingReference]:
        act_id = None
        if tree_element.act_reference is not None:
            act_id = ActReferenceConversionHelper.get_act_id_from_parse_result(
                tree_element.act_reference)
            start_pos, end_pos = ActReferenceConversionHelper.get_act_id_pos_from_parse_result(
                tree_element.act_reference)
            yield OutgoingReference(start_pos, end_pos, Reference(act=act_id))

        if tree_element.references:
            for reference in tree_element.references:
                assert isinstance(reference, model.Reference)
                yield from ReferenceConversionHelper.convert_single_in_text_reference(
                    act_id, reference)
Ejemplo n.º 7
0
def test_semantic_reparse_abbrevs() -> None:
    TEST_ACT = Act(
        identifier="2050. évi XD. törvény",
        publication_date=Date(2050, 3, 4),
        subject="A nyelvtani tesztelésről",
        preamble='',
        children=(
            Article(
                identifier="1",
                children=(Paragraph(
                    text=
                    "Fontos lesz később a tesztelés X tulajdonságiról szóló 2040. évi DX. törvény (a továbbiakban Xtv.) rövidítésének feloldása.",
                ), )),
            Article(
                identifier="2",
                children=(
                    Paragraph(
                        identifier="1",
                        text=
                        "Bekeverünk a tesztelés Y tulajdonságiról szóló 2041. évi X. törvény (a továbbiakban Ytv.) dolgaival",
                    ),
                    Paragraph(
                        identifier="2",
                        text=
                        "Itt megemlítendő az Ytv. 10. §-a és a tesztelés Z tulajdonságiról szóló 2041. évi XXX. törvény (a továbbiakban Ztv.) 1. §-a közötti különbség",
                    ),
                    Paragraph(
                        identifier="3",
                        intro="Mert később használatban van",
                        children=(
                            AlphabeticPoint(identifier="a",
                                            text="az Xtv. 1. § c) pontja, és"),
                            AlphabeticPoint(identifier="b",
                                            text="az Ytv. 2. §-a."),
                        ),
                    ),
                ),
            ),
            Article(
                identifier="3",
                children=(Paragraph(
                    text=
                    "Mégegyszer megemlítendő, hogy fontos az Xtv. 1. §-a, Ytv. 1. §-a, és Ztv. 1337. §-a.",
                ), )),
            Article(
                identifier="4",
                children=(Paragraph(
                    intro=
                    "Az Ytv. 12. § (8) bekezdése helyébe a következő rendelkezés lép:",
                    children=(BlockAmendmentContainer(children=(Paragraph(
                        identifier='8',
                        text="Beillesztett referencia: 12. §, vajon lesz baj?"
                    ), ), ), ),
                ), ),
            ),
        ),
    )

    with_semantics_1 = ActSemanticsParser.add_semantics_to_act(TEST_ACT)
    assert with_semantics_1.is_semantic_parsed

    assert with_semantics_1.article('1').paragraph().act_id_abbreviations == (
        ActIdAbbreviation('Xtv.', '2040. évi DX. törvény'), )
    assert with_semantics_1.article('2').paragraph(
        '1').act_id_abbreviations == (ActIdAbbreviation(
            'Ytv.', '2041. évi X. törvény'), )
    assert with_semantics_1.article('2').paragraph(
        '2').act_id_abbreviations == (ActIdAbbreviation(
            'Ztv.', '2041. évi XXX. törvény'), )
    assert with_semantics_1.article('3').paragraph().outgoing_references == (
        OutgoingReference(start_pos=40,
                          end_pos=44,
                          reference=Reference(act='2040. évi DX. törvény')),
        OutgoingReference(start_pos=45,
                          end_pos=51,
                          reference=Reference(act='2040. évi DX. törvény',
                                              article='1')),
        OutgoingReference(start_pos=53,
                          end_pos=57,
                          reference=Reference(act='2041. évi X. törvény')),
        OutgoingReference(start_pos=58,
                          end_pos=64,
                          reference=Reference(act='2041. évi X. törvény',
                                              article='1')),
        OutgoingReference(start_pos=69,
                          end_pos=73,
                          reference=Reference(act='2041. évi XXX. törvény')),
        OutgoingReference(start_pos=74,
                          end_pos=83,
                          reference=Reference(act='2041. évi XXX. törvény',
                                              article='1337')),
    )

    with_semantics_2 = ActSemanticsParser.add_semantics_to_act(
        with_semantics_1)
    # TODO: with_semantics_2 is with_semantics_1
    assert with_semantics_2 == with_semantics_1

    modified_paragraph = attr.evolve(
        with_semantics_1.article("2").paragraph("1"),
        text=
        "Bekeverünk a tesztelés Y új tulajdonságiról szóló 2057. évi X. törvény (a továbbiakban Ytv.) dolgaival",
        semantic_data=None,
        outgoing_references=None,
        act_id_abbreviations=None,
    )
    modified_article = attr.evolve(
        with_semantics_1.article("2"),
        children=(modified_paragraph, ) +
        with_semantics_1.article("2").children[1:],
    )
    modified_act = attr.evolve(
        with_semantics_1,
        children=(with_semantics_1.children[0], modified_article,
                  with_semantics_1.children[2], with_semantics_1.children[3]),
    )

    assert not modified_act.is_semantic_parsed

    modified_with_semantics = ActSemanticsParser.add_semantics_to_act(
        modified_act)
    assert modified_with_semantics.article('2').paragraph(
        '1').act_id_abbreviations == (ActIdAbbreviation(
            'Ytv.', '2057. évi X. törvény'), )
    assert modified_with_semantics.article('3').paragraph(
    ).outgoing_references == (
        OutgoingReference(start_pos=40,
                          end_pos=44,
                          reference=Reference(act='2040. évi DX. törvény')),
        OutgoingReference(start_pos=45,
                          end_pos=51,
                          reference=Reference(act='2040. évi DX. törvény',
                                              article='1')),
        OutgoingReference(start_pos=53,
                          end_pos=57,
                          reference=Reference(act='2057. évi X. törvény')),
        OutgoingReference(start_pos=58,
                          end_pos=64,
                          reference=Reference(act='2057. évi X. törvény',
                                              article='1')),
        OutgoingReference(start_pos=69,
                          end_pos=73,
                          reference=Reference(act='2041. évi XXX. törvény')),
        OutgoingReference(start_pos=74,
                          end_pos=83,
                          reference=Reference(act='2041. évi XXX. törvény',
                                              article='1337')),
    )
    assert modified_with_semantics.article('4').paragraph().semantic_data == (
        BlockAmendment(position=Reference(act='2057. évi X. törvény',
                                          article='12',
                                          paragraph='8'), ), )

    assert with_semantics_1.article('1') is modified_with_semantics.article(
        '1')
    # Note that because of the abbreviation change, everything else may be reparsed,
    # so no asserts for e.g. article('3')

    # No need to reparse BlockAmendments though
    a4_children = with_semantics_1.article('4').paragraph().children
    modified_a4_children = modified_with_semantics.article(
        '4').paragraph().children
    assert a4_children is not None
    assert modified_a4_children is not None
    assert a4_children[0] is modified_a4_children[0]
Ejemplo n.º 8
0
def test_semantic_reparse_simple() -> None:
    TEST_ACT = Act(
        identifier="2050. évi XD. törvény",
        publication_date=Date(2050, 3, 4),
        subject="A nyelvtani tesztelésről",
        preamble='',
        children=(
            Article(
                identifier="1",
                children=(Paragraph(
                    text=
                    "Fontos lesz később a tesztelés X tulajdonságiról szóló 2040. évi DX. törvény (a továbbiakban Xtv.) rövidítésének feloldása.",
                ), )),
            Article(
                identifier="2",
                children=(
                    Paragraph(
                        identifier="1",
                        text=
                        "Bekeverünk a tesztelés Y tulajdonságiról szóló 2041. évi X. törvény dolgaival.",
                    ),
                    Paragraph(
                        identifier="2",
                        text=
                        "Itt megemlítendő a 1. § és a tesztelés Z tulajdonságiról szóló 2041. évi XXX. törvény (a továbbiakban Ztv.)  1. §-a közötti különbség",
                    ),
                    Paragraph(
                        identifier="3",
                        intro="Az Xtv.",
                        wrap_up="szöveg lép.",
                        children=(
                            AlphabeticPoint(
                                identifier="a",
                                text=
                                "12. § (7) bekezdésében a „fontatlan” szövegrész helyébe a „nem fontos”,",
                            ),
                            AlphabeticPoint(
                                identifier="b",
                                text=
                                "11. §-ben a „nemigazán” szövegrész helyébe a „nem”",
                            ),
                        ),
                    ),
                ),
            ),
            Article(
                identifier="3",
                children=(Paragraph(
                    text=
                    "Ez a törvény a kihirdetését követő napon lép hatályba."),
                          )),
            Article(
                identifier="4",
                children=(Paragraph(
                    intro=
                    "A Ztv. 12. § (8) bekezdése helyébe a következő rendelkezés lép:",
                    children=(BlockAmendmentContainer(children=(Paragraph(
                        identifier='8',
                        text="Beillesztett referencia: 11. §, vajon lesz baj?"
                    ), ), ), ),
                ), ),
            ),
        ),
    )

    with_semantics_1 = ActSemanticsParser.add_semantics_to_act(TEST_ACT)
    assert with_semantics_1.is_semantic_parsed

    assert with_semantics_1.article('1').paragraph().act_id_abbreviations == (
        ActIdAbbreviation('Xtv.', '2040. évi DX. törvény'), )
    assert with_semantics_1.article('2').paragraph(
        '2').act_id_abbreviations == (ActIdAbbreviation(
            'Ztv.', '2041. évi XXX. törvény'), )
    assert with_semantics_1.article('1').paragraph().outgoing_references == (
        OutgoingReference(start_pos=55,
                          end_pos=76,
                          reference=Reference(act='2040. évi DX. törvény')), )
    assert with_semantics_1.article('2').paragraph(
        '1').outgoing_references == (OutgoingReference(
            start_pos=47,
            end_pos=67,
            reference=Reference(act='2041. évi X. törvény')), )
    assert with_semantics_1.article('2').paragraph('3').point(
        'a').semantic_data == (TextAmendment(position=Reference(
            act='2040. évi DX. törvény', article='12', paragraph='7'),
                                             original_text='fontatlan',
                                             replacement_text='nem fontos'), )
    assert with_semantics_1.article('3').paragraph().semantic_data == (
        EnforcementDate(position=None, date=DaysAfterPublication(days=1)), )
    assert with_semantics_1.article('4').paragraph().semantic_data == (
        BlockAmendment(position=Reference(act='2041. évi XXX. törvény',
                                          article='12',
                                          paragraph='8'), ), )

    a4_children = with_semantics_1.article('4').paragraph().children
    assert a4_children is not None
    the_block_amendment = a4_children[0]
    assert isinstance(the_block_amendment, BlockAmendmentContainer)

    assert the_block_amendment.semantic_data is None
    assert the_block_amendment.outgoing_references is None
    assert the_block_amendment.act_id_abbreviations is None

    with_semantics_2 = ActSemanticsParser.add_semantics_to_act(
        with_semantics_1)

    assert with_semantics_2 is with_semantics_1

    modified_paragraph = attr.evolve(
        with_semantics_1.article("2").paragraph("1"),
        text="Az 1. § és 3. § egészen fontos.",
        semantic_data=None,
        outgoing_references=None,
        act_id_abbreviations=None,
    )
    modified_article = attr.evolve(
        with_semantics_1.article("2"),
        children=(modified_paragraph, ) +
        with_semantics_1.article("2").children[1:],
    )
    modified_act = attr.evolve(
        with_semantics_1,
        children=(with_semantics_1.children[0], modified_article,
                  with_semantics_1.children[2], with_semantics_1.children[3]),
    )

    assert modified_act.article('1').is_semantic_parsed
    assert not modified_act.article('2').is_semantic_parsed
    assert modified_act.article('3').is_semantic_parsed
    assert modified_act.article('4').is_semantic_parsed
    assert not modified_act.is_semantic_parsed

    modified_with_semantics = ActSemanticsParser.add_semantics_to_act(
        modified_act)
    assert modified_with_semantics.is_semantic_parsed
    assert modified_with_semantics.article('2').paragraph(
        '1').outgoing_references == (
            OutgoingReference(start_pos=3,
                              end_pos=7,
                              reference=Reference(act=None, article='1')),
            OutgoingReference(start_pos=11,
                              end_pos=15,
                              reference=Reference(act=None, article='3')),
        )

    # Check if nothing else was touched but the modified part.
    assert with_semantics_1.article('1') is modified_with_semantics.article(
        '1')
    assert with_semantics_1.article('2').paragraph(
        '2') is modified_with_semantics.article('2').paragraph('2')
    assert with_semantics_1.article('2').paragraph(
        '3') is modified_with_semantics.article('2').paragraph('3')
    assert with_semantics_1.article('3') is modified_with_semantics.article(
        '3')
    assert with_semantics_1.article('4') is modified_with_semantics.article(
        '4')
Ejemplo n.º 9
0
    ]

CASES_WITH_POSITIONS: List[Tuple[str, Tuple[Tuple[
    Reference, OutgoingReference], ...]]] = [
        ("""
            1. §    A tesztelésről szóló 2345. évi I. törvény
                    a) 8. §
                        aa) (5) bekezdése,
                        ab) (6) bekezdés a) pontja,
                    b) 10. §-a, és
                    c) egy totál másik dolog 1. § c) pontja, még utána ezzel szöveggel
                    jól van feldolgozva.
            """, (
            (
                ref(None, "1"),
                OutgoingReference(21, 41, ref("2345. évi I. törvény")),
            ),
            (
                ref(None, "1", None, "a"),
                OutgoingReference(0, 4, ref("2345. évi I. törvény", "8")),
            ),
            (
                ref(None, "1", None, "a", "aa"),
                OutgoingReference(0, 13, ref("2345. évi I. törvény", "8",
                                             "5")),
            ),
            (
                ref(None, "1", None, "a", "ab"),
                OutgoingReference(0, 22,
                                  ref("2345. évi I. törvény", "8", "6", "a")),
            ),