def convert_amendment_and_insertion( cls, amended_reference: Reference, inserted_reference: Reference) -> BlockAmendment: # Act has to be cut off first, because otherwise relative_to does not do anything. inserted_reference = attr.evolve( inserted_reference, act=None).relative_to(amended_reference) if amended_reference.contains(inserted_reference.first_in_range()): # The weird case which amends 1-2, and inserts 1a in between union_reference = amended_reference else: union_reference = Reference.make_range( amended_reference.first_in_range(), inserted_reference.last_in_range()) return BlockAmendment(position=union_reference, )
def generate_html_nodes_for_block_amendment( act: Act, e: BlockAmendmentContainer) -> Iterable[ET.Element]: # Quick hack to signify that IDs are not needed further on current_ref = Reference("EXTERNAL") if e.intro: intro_element = ET.Element('div', {'class': 'blockamendment_text'}) intro_element.text = "(" + e.intro + ")" yield intro_element begin_quote = ET.Element('div', {'class': 'blockamendment_quote'}) begin_quote.text = '„' yield begin_quote container_element = ET.Element('div', {'class': 'blockamendment_container'}) for element in generate_html_nodes_for_children(act, e, current_ref): container_element.append(element) yield container_element end_quote = ET.Element('div', {'class': 'blockamendment_quote'}) end_quote.text = '”' yield end_quote if e.wrap_up: wrap_up_element = ET.Element('div', {'class': 'blockamendment_text'}) wrap_up_element.text = "(" + e.wrap_up + ")" yield wrap_up_element
def convert( cls, tree_element: model.ArticleTitleAmendment ) -> Iterable[OutgoingReference]: assert tree_element.article is not None assert tree_element.act_reference is not None act_id = ActReferenceConversionHelper.get_act_id_from_parse_result( tree_element.act_reference) article_id = "".join(tree_element.article.id) act_start_pos, act_end_pos = ActReferenceConversionHelper.get_act_id_pos_from_parse_result( tree_element.act_reference) yield OutgoingReference(act_start_pos, act_end_pos, Reference(act=act_id)) ref_start_pos, ref_end_pos = get_subtree_start_and_end_pos( tree_element.article) yield OutgoingReference(ref_start_pos, ref_end_pos, Reference(act=act_id, article=article_id))
def convert( cls, tree_element: model.BlockAmendment) -> Iterable[OutgoingReference]: assert isinstance(tree_element.act_reference, model.ActReference) act_id = ActReferenceConversionHelper.get_act_id_from_parse_result( tree_element.act_reference) act_start_pos, act_end_pos = ActReferenceConversionHelper.get_act_id_pos_from_parse_result( tree_element.act_reference) yield OutgoingReference(act_start_pos, act_end_pos, Reference(act=act_id)) amended_reference = ReferenceConversionHelper.convert_potential_in_text_reference( act_id, tree_element.amended_reference) inserted_reference = ReferenceConversionHelper.convert_potential_in_text_reference( act_id, tree_element.inserted_reference) if amended_reference is not None and inserted_reference is not None: # Act has to be cut off first, because otherwise relative_to does not do anything. fixed_ref = attr.evolve(inserted_reference.reference, act=None).relative_to( amended_reference.reference) inserted_reference = attr.evolve(inserted_reference, reference=fixed_ref) if amended_reference: yield amended_reference if inserted_reference: yield inserted_reference
def convert(cls, tree_element: model.Repeal) -> Iterable[OutgoingReference]: assert tree_element.references is not None act_id = ActReferenceConversionHelper.get_act_id_from_parse_result( tree_element.act_reference) act_start_pos, act_end_pos = ActReferenceConversionHelper.get_act_id_pos_from_parse_result( tree_element.act_reference) yield OutgoingReference(act_start_pos, act_end_pos, Reference(act=act_id)) yield from ReferenceConversionHelper.convert_multiple_in_text_references( act_id, tree_element.references)
def convert( cls, tree_element: model.ArticleTitleAmendment ) -> Iterable[ArticleTitleAmendment]: assert tree_element.article is not None assert tree_element.act_reference is not None act_id = ActReferenceConversionHelper.get_act_id_from_parse_result( tree_element.act_reference) article_id = "".join(tree_element.article.id) yield ArticleTitleAmendment( position=Reference(act_id, article_id), original_text=convert_quote_to_string(tree_element.original_text), replacement_text=convert_quote_to_string( tree_element.replacement_text), )
def iter(self, start_override: Optional[int], end_override: int) -> Iterable[OutgoingReference]: self.commit_deferred_item() ref_args = [self.act, None, None, None, None] levels = ("articles", "paragraphs", "points", "subpoints") for arg_pos, level in enumerate(levels, start=1): level_vals = getattr(self, level) if len(level_vals) == 1: ref_args[arg_pos] = level_vals[0][0] else: level_vals.sort(key=lambda x: x[1]) for level_val, start, end in level_vals[:-1]: if start_override is not None: start = start_override start_override = None ref_args[arg_pos] = level_val yield OutgoingReference(start, end, Reference(*ref_args)) ref_args[arg_pos] = level_vals[-1][0] if start_override is None: start_override = level_vals[-1][1] assert start_override is not None yield OutgoingReference(start_override, end_override, Reference(*ref_args))
def convert( cls, tree_element: model.CompoundReference ) -> Iterable[OutgoingReference]: act_id = None if tree_element.act_reference is not None: act_id = ActReferenceConversionHelper.get_act_id_from_parse_result( tree_element.act_reference) start_pos, end_pos = ActReferenceConversionHelper.get_act_id_pos_from_parse_result( tree_element.act_reference) yield OutgoingReference(start_pos, end_pos, Reference(act=act_id)) if tree_element.references: for reference in tree_element.references: assert isinstance(reference, model.Reference) yield from ReferenceConversionHelper.convert_single_in_text_reference( act_id, reference)
def generate_html_body_for_act(act: Act, indent: bool = True) -> ET.Element: body = ET.Element('div', {'class': 'act_container'}) act_title = ET.SubElement(body, 'div', {'class': 'act_title'}) act_title.text = act.identifier ET.SubElement(act_title, 'br').tail = act.subject if act.preamble: preamble = ET.SubElement(body, 'div', {'class': 'preamble'}) preamble.text = act.preamble for c in act.children: if isinstance(c, Article): elements_to_add = generate_html_node_for_article( act, c, Reference()) else: elements_to_add = generate_html_node_for_structural_element(c) for element_to_add in elements_to_add: body.append(element_to_add) if indent: indent_etree_element_in_place(body) return body
def convert_multiple_in_text_references( cls, act_id: Optional[str], references: Iterable[model.Reference] ) -> Iterable[OutgoingReference]: # Some references will be relative to the previous ones. # This may be considered a mistake in parsing the reference lists, but # this 'relativization' can only be done in the context of the whole sentence. # As an example, see: # "... az 1. § (2) bekezdése, és az (5) bekezdés ..." # This can be: # "Az 1. § (2) bekezdése, és az (5) bekezdése a kihirdetést követő napon lép hatályba" # Or: # "Fontos kivétel 1. § (2) bekezdése, és az (5) bekezdés szerinti definíció" last_reference = Reference(act=act_id) for reference in references: for converted_reference in ReferenceConversionHelper.convert_single_in_text_reference( None, reference): fixed_reference = converted_reference.reference.relative_to( last_reference) yield attr.evolve(converted_reference, reference=fixed_reference) last_reference = fixed_reference
def ref(act: Optional[str] = None, article: ReferencePartType = None, paragraph: ReferencePartType = None, point: ReferencePartType = None, subpoint: ReferencePartType = None) -> Reference: return Reference(act, article, paragraph, point, subpoint)
def test_semantic_reparse_abbrevs() -> None: TEST_ACT = Act( identifier="2050. évi XD. törvény", publication_date=Date(2050, 3, 4), subject="A nyelvtani tesztelésről", preamble='', children=( Article( identifier="1", children=(Paragraph( text= "Fontos lesz később a tesztelés X tulajdonságiról szóló 2040. évi DX. törvény (a továbbiakban Xtv.) rövidítésének feloldása.", ), )), Article( identifier="2", children=( Paragraph( identifier="1", text= "Bekeverünk a tesztelés Y tulajdonságiról szóló 2041. évi X. törvény (a továbbiakban Ytv.) dolgaival", ), Paragraph( identifier="2", text= "Itt megemlítendő az Ytv. 10. §-a és a tesztelés Z tulajdonságiról szóló 2041. évi XXX. törvény (a továbbiakban Ztv.) 1. §-a közötti különbség", ), Paragraph( identifier="3", intro="Mert később használatban van", children=( AlphabeticPoint(identifier="a", text="az Xtv. 1. § c) pontja, és"), AlphabeticPoint(identifier="b", text="az Ytv. 2. §-a."), ), ), ), ), Article( identifier="3", children=(Paragraph( text= "Mégegyszer megemlítendő, hogy fontos az Xtv. 1. §-a, Ytv. 1. §-a, és Ztv. 1337. §-a.", ), )), Article( identifier="4", children=(Paragraph( intro= "Az Ytv. 12. § (8) bekezdése helyébe a következő rendelkezés lép:", children=(BlockAmendmentContainer(children=(Paragraph( identifier='8', text="Beillesztett referencia: 12. §, vajon lesz baj?" ), ), ), ), ), ), ), ), ) with_semantics_1 = ActSemanticsParser.add_semantics_to_act(TEST_ACT) assert with_semantics_1.is_semantic_parsed assert with_semantics_1.article('1').paragraph().act_id_abbreviations == ( ActIdAbbreviation('Xtv.', '2040. évi DX. törvény'), ) assert with_semantics_1.article('2').paragraph( '1').act_id_abbreviations == (ActIdAbbreviation( 'Ytv.', '2041. évi X. törvény'), ) assert with_semantics_1.article('2').paragraph( '2').act_id_abbreviations == (ActIdAbbreviation( 'Ztv.', '2041. évi XXX. törvény'), ) assert with_semantics_1.article('3').paragraph().outgoing_references == ( OutgoingReference(start_pos=40, end_pos=44, reference=Reference(act='2040. évi DX. törvény')), OutgoingReference(start_pos=45, end_pos=51, reference=Reference(act='2040. évi DX. törvény', article='1')), OutgoingReference(start_pos=53, end_pos=57, reference=Reference(act='2041. évi X. törvény')), OutgoingReference(start_pos=58, end_pos=64, reference=Reference(act='2041. évi X. törvény', article='1')), OutgoingReference(start_pos=69, end_pos=73, reference=Reference(act='2041. évi XXX. törvény')), OutgoingReference(start_pos=74, end_pos=83, reference=Reference(act='2041. évi XXX. törvény', article='1337')), ) with_semantics_2 = ActSemanticsParser.add_semantics_to_act( with_semantics_1) # TODO: with_semantics_2 is with_semantics_1 assert with_semantics_2 == with_semantics_1 modified_paragraph = attr.evolve( with_semantics_1.article("2").paragraph("1"), text= "Bekeverünk a tesztelés Y új tulajdonságiról szóló 2057. évi X. törvény (a továbbiakban Ytv.) dolgaival", semantic_data=None, outgoing_references=None, act_id_abbreviations=None, ) modified_article = attr.evolve( with_semantics_1.article("2"), children=(modified_paragraph, ) + with_semantics_1.article("2").children[1:], ) modified_act = attr.evolve( with_semantics_1, children=(with_semantics_1.children[0], modified_article, with_semantics_1.children[2], with_semantics_1.children[3]), ) assert not modified_act.is_semantic_parsed modified_with_semantics = ActSemanticsParser.add_semantics_to_act( modified_act) assert modified_with_semantics.article('2').paragraph( '1').act_id_abbreviations == (ActIdAbbreviation( 'Ytv.', '2057. évi X. törvény'), ) assert modified_with_semantics.article('3').paragraph( ).outgoing_references == ( OutgoingReference(start_pos=40, end_pos=44, reference=Reference(act='2040. évi DX. törvény')), OutgoingReference(start_pos=45, end_pos=51, reference=Reference(act='2040. évi DX. törvény', article='1')), OutgoingReference(start_pos=53, end_pos=57, reference=Reference(act='2057. évi X. törvény')), OutgoingReference(start_pos=58, end_pos=64, reference=Reference(act='2057. évi X. törvény', article='1')), OutgoingReference(start_pos=69, end_pos=73, reference=Reference(act='2041. évi XXX. törvény')), OutgoingReference(start_pos=74, end_pos=83, reference=Reference(act='2041. évi XXX. törvény', article='1337')), ) assert modified_with_semantics.article('4').paragraph().semantic_data == ( BlockAmendment(position=Reference(act='2057. évi X. törvény', article='12', paragraph='8'), ), ) assert with_semantics_1.article('1') is modified_with_semantics.article( '1') # Note that because of the abbreviation change, everything else may be reparsed, # so no asserts for e.g. article('3') # No need to reparse BlockAmendments though a4_children = with_semantics_1.article('4').paragraph().children modified_a4_children = modified_with_semantics.article( '4').paragraph().children assert a4_children is not None assert modified_a4_children is not None assert a4_children[0] is modified_a4_children[0]
def test_semantic_reparse_simple() -> None: TEST_ACT = Act( identifier="2050. évi XD. törvény", publication_date=Date(2050, 3, 4), subject="A nyelvtani tesztelésről", preamble='', children=( Article( identifier="1", children=(Paragraph( text= "Fontos lesz később a tesztelés X tulajdonságiról szóló 2040. évi DX. törvény (a továbbiakban Xtv.) rövidítésének feloldása.", ), )), Article( identifier="2", children=( Paragraph( identifier="1", text= "Bekeverünk a tesztelés Y tulajdonságiról szóló 2041. évi X. törvény dolgaival.", ), Paragraph( identifier="2", text= "Itt megemlítendő a 1. § és a tesztelés Z tulajdonságiról szóló 2041. évi XXX. törvény (a továbbiakban Ztv.) 1. §-a közötti különbség", ), Paragraph( identifier="3", intro="Az Xtv.", wrap_up="szöveg lép.", children=( AlphabeticPoint( identifier="a", text= "12. § (7) bekezdésében a „fontatlan” szövegrész helyébe a „nem fontos”,", ), AlphabeticPoint( identifier="b", text= "11. §-ben a „nemigazán” szövegrész helyébe a „nem”", ), ), ), ), ), Article( identifier="3", children=(Paragraph( text= "Ez a törvény a kihirdetését követő napon lép hatályba."), )), Article( identifier="4", children=(Paragraph( intro= "A Ztv. 12. § (8) bekezdése helyébe a következő rendelkezés lép:", children=(BlockAmendmentContainer(children=(Paragraph( identifier='8', text="Beillesztett referencia: 11. §, vajon lesz baj?" ), ), ), ), ), ), ), ), ) with_semantics_1 = ActSemanticsParser.add_semantics_to_act(TEST_ACT) assert with_semantics_1.is_semantic_parsed assert with_semantics_1.article('1').paragraph().act_id_abbreviations == ( ActIdAbbreviation('Xtv.', '2040. évi DX. törvény'), ) assert with_semantics_1.article('2').paragraph( '2').act_id_abbreviations == (ActIdAbbreviation( 'Ztv.', '2041. évi XXX. törvény'), ) assert with_semantics_1.article('1').paragraph().outgoing_references == ( OutgoingReference(start_pos=55, end_pos=76, reference=Reference(act='2040. évi DX. törvény')), ) assert with_semantics_1.article('2').paragraph( '1').outgoing_references == (OutgoingReference( start_pos=47, end_pos=67, reference=Reference(act='2041. évi X. törvény')), ) assert with_semantics_1.article('2').paragraph('3').point( 'a').semantic_data == (TextAmendment(position=Reference( act='2040. évi DX. törvény', article='12', paragraph='7'), original_text='fontatlan', replacement_text='nem fontos'), ) assert with_semantics_1.article('3').paragraph().semantic_data == ( EnforcementDate(position=None, date=DaysAfterPublication(days=1)), ) assert with_semantics_1.article('4').paragraph().semantic_data == ( BlockAmendment(position=Reference(act='2041. évi XXX. törvény', article='12', paragraph='8'), ), ) a4_children = with_semantics_1.article('4').paragraph().children assert a4_children is not None the_block_amendment = a4_children[0] assert isinstance(the_block_amendment, BlockAmendmentContainer) assert the_block_amendment.semantic_data is None assert the_block_amendment.outgoing_references is None assert the_block_amendment.act_id_abbreviations is None with_semantics_2 = ActSemanticsParser.add_semantics_to_act( with_semantics_1) assert with_semantics_2 is with_semantics_1 modified_paragraph = attr.evolve( with_semantics_1.article("2").paragraph("1"), text="Az 1. § és 3. § egészen fontos.", semantic_data=None, outgoing_references=None, act_id_abbreviations=None, ) modified_article = attr.evolve( with_semantics_1.article("2"), children=(modified_paragraph, ) + with_semantics_1.article("2").children[1:], ) modified_act = attr.evolve( with_semantics_1, children=(with_semantics_1.children[0], modified_article, with_semantics_1.children[2], with_semantics_1.children[3]), ) assert modified_act.article('1').is_semantic_parsed assert not modified_act.article('2').is_semantic_parsed assert modified_act.article('3').is_semantic_parsed assert modified_act.article('4').is_semantic_parsed assert not modified_act.is_semantic_parsed modified_with_semantics = ActSemanticsParser.add_semantics_to_act( modified_act) assert modified_with_semantics.is_semantic_parsed assert modified_with_semantics.article('2').paragraph( '1').outgoing_references == ( OutgoingReference(start_pos=3, end_pos=7, reference=Reference(act=None, article='1')), OutgoingReference(start_pos=11, end_pos=15, reference=Reference(act=None, article='3')), ) # Check if nothing else was touched but the modified part. assert with_semantics_1.article('1') is modified_with_semantics.article( '1') assert with_semantics_1.article('2').paragraph( '2') is modified_with_semantics.article('2').paragraph('2') assert with_semantics_1.article('2').paragraph( '3') is modified_with_semantics.article('2').paragraph('3') assert with_semantics_1.article('3') is modified_with_semantics.article( '3') assert with_semantics_1.article('4') is modified_with_semantics.article( '4')