def test_preamble_and_friends_in_table_of_contents(self): d = Document() d.content = document_fixture(xml=""" <coverpage> <content><p>hi</p></content> </coverpage> <preface> <content><p>hi</p></content> </preface> <preamble> <content><p>hi</p></content> </preamble> <body> <content><p>hi</p></content> </body> <conclusions> <content><p>hi></p></content> </conclusions> """) toc = d.table_of_contents() toc = [t.as_dict() for t in toc] self.maxDiff = None self.assertEqual(toc, [ {'type': 'coverpage', 'component': 'main', 'subcomponent': 'coverpage', 'title': 'Coverpage'}, {'type': 'preface', 'component': 'main', 'subcomponent': 'preface', 'title': 'Preface'}, {'type': 'preamble', 'component': 'main', 'subcomponent': 'preamble', 'title': 'Preamble'}, {'type': 'conclusions', 'component': 'main', 'subcomponent': 'conclusions', 'title': 'Conclusions'}, ])
def test_find_simple(self): document = Document(work=self.work, document_xml=document_fixture(xml=""" <section eId="sec_1"> <num>1.</num> <heading>Tester</heading> <paragraph eId="sec_1.paragraph-0"> <content> <p>Something to do with GN no 102 of 2012.</p> <p>And another thing about SI 4 of 1998.</p> </content> </paragraph> </section>"""), language=self.eng) expected = Document(work=self.work, document_xml=document_fixture(xml=""" <section eId="sec_1"> <num>1.</num> <heading>Tester</heading> <paragraph eId="sec_1.paragraph-0"> <content> <p>Something to do with <ref href="/akn/za/act/gn/2012/102">GN no 102 of 2012</ref>.</p> <p>And another thing about <ref href="/akn/za/act/si/1998/4">SI 4 of 1998</ref>.</p> </content> </paragraph> </section>"""), language=self.eng) self.finder.find_references_in_document(document) root = etree.fromstring(expected.content) expected.content = etree.tostring(root, encoding='utf-8').decode('utf-8') self.assertEqual(expected.content, document.content)
def test_italics_markup(self): document = Document(work=self.work, document_xml=document_fixture(xml=""" <section id="section-1"> <num>1.</num> <heading>Application of Act</heading> <content> <p>In the Gazette it says that habeus corpus is XYZ. As per the evidence of person X, Y.</p> </content> </section> """)) expected = Document(work=self.work, document_xml=document_fixture(xml=""" <section id="section-1"> <num>1.</num> <heading>Application of Act</heading> <content> <p>In the <i>Gazette</i> it says that <i>habeus corpus</i> is XYZ. As <i>per</i> the evidence of person X, Y.</p> </content> </section> """)) self.italics_terms_finder.mark_up_italics_in_document( document, self.italics_terms) root = etree.fromstring(expected.content) expected.content = etree.tostring(root, encoding='utf-8').decode('utf-8') self.assertEqual(expected.content, document.content)
def test_find_simple(self): document = Document(document_xml=document_fixture(xml=""" <section id="section-1"> <num>1.</num> <heading>Tester</heading> <paragraph id="section-1.paragraph-0"> <content> <p>Something to do with Act no 22 of 2012.</p> <p>And another thing about Act 4 of 1998.</p> </content> </paragraph> </section>"""), language=self.eng) expected = Document(document_xml=document_fixture(xml=""" <section id="section-1"> <num>1.</num> <heading>Tester</heading> <paragraph id="section-1.paragraph-0"> <content> <p>Something to do with Act <ref href="/za/act/2012/22">no 22 of 2012</ref>.</p> <p>And another thing about Act <ref href="/za/act/1998/4">4 of 1998</ref>.</p> </content> </paragraph> </section>"""), language=self.eng) self.finder.find_references_in_document(document) root = etree.fromstring(expected.content) expected.content = etree.tostring(root, encoding='utf-8').decode('utf-8') self.assertEqual(expected.content, document.content)
def test_set_content(self): d = Document() d.content = document_fixture(u'γνωρίζω') assert_equal(d.frbr_uri, '/za/act/1900/1') assert_equal(d.country, 'za') assert_equal(d.doc.publication_date, date(2005, 7, 24))
def test_section_of_this(self): document = Document( work=self.work, document_xml=document_fixture( xml=""" <section eId="sec_7"> <num>7.</num> <heading>Active ref heading</heading> <content> <p>As given in section 26 of this Act, blah.</p> <p>As given in section 26 (1) of this Act, blah.</p> <p>As given in section 26 (1) of this Proclamation, blah.</p> <p>As given in section 26(1)(b)(iii)(dd)(A) of this Act, blah.</p> <p>In section 26 of Act 5 of 2012 it says one thing and in section 26 of this Act it says another.</p> <p>As <i>given</i> in (we're now in a tail) section 26 of this Act, blah.</p> </content> </section> <section eId="sec_26"> <num>26.</num> <heading>Important heading</heading> <content> <p>An important provision.</p> </content> </section> """ ), language=self.eng) expected = Document( work=self.work, document_xml=document_fixture( xml=""" <section eId="sec_7"> <num>7.</num> <heading>Active ref heading</heading> <content> <p>As given in <ref href="#sec_26">section 26</ref> of this Act, blah.</p> <p>As given in <ref href="#sec_26">section 26</ref> (1) of this Act, blah.</p> <p>As given in <ref href="#sec_26">section 26</ref> (1) of this Proclamation, blah.</p> <p>As given in <ref href="#sec_26">section 26</ref>(1)(b)(iii)(dd)(A) of this Act, blah.</p> <p>In section 26 of Act 5 of 2012 it says one thing and in <ref href="#sec_26">section 26</ref> of this Act it says another.</p> <p>As <i>given</i> in (we're now in a tail) <ref href="#sec_26">section 26</ref> of this Act, blah.</p> </content> </section> <section eId="sec_26"> <num>26.</num> <heading>Important heading</heading> <content> <p>An important provision.</p> </content> </section> """ ), language=self.eng) self.section_refs_finder.find_references_in_document(document) root = etree.fromstring(expected.content) expected.content = etree.tostring(root, encoding='utf-8').decode('utf-8') self.assertEqual(expected.content, document.content)
def test_empty_expression_date(self): d = Document() d.content = document_fixture('test') d.expression_date = '' assert_equal(d.expression_date, '') d.expression_date = None assert_equal(d.expression_date, None)
def test_inherit_from_work(self): w = Work.objects.create(frbr_uri='/za/act/2009/test', title='Test document') d = Document(work=w, expression_date='2011-02-01') d.save() d = Document.objects.get(pk=d.id) assert_equal(w.frbr_uri, d.frbr_uri) assert_equal(w.title, d.title)
def test_inherit_from_work(self): user = User.objects.get(pk=1) w = Work.objects.create(frbr_uri='/za/act/2009/test', title='Test document', country=Country.for_code('za'), created_by_user=user) d = Document(work=w, expression_date='2011-02-01', language=self.eng, created_by_user=user) d.save() d = Document.objects.get(pk=d.id) assert_equal(w.frbr_uri, d.frbr_uri) assert_equal(w.title, d.title)
def test_section_valid_and_invalid(self): document = Document( work=self.work, document_xml=document_fixture( xml=""" <section eId="sec_7"> <num>7.</num> <heading>Active ref heading</heading> <content> <p>As given in sections 26 and 35, one of which isn't in this document, blah.</p> <p>As given in sections 35 and 26, one of which isn't in this document, blah.</p> <p>In section 200 it says one thing and in section 26 it says another.</p> <p>As <i>given</i> in (we're now in a tail) section 200, blah, but section 26 says something else.</p> <p>As <i>given</i> in (we're now in a tail) section 26 of Act 5 of 2012, blah, but section 26 of this Act says something else.</p> </content> </section> <section eId="sec_26"> <num>26.</num> <heading>The section we want</heading> <content> <p>The provision you're looking for.</p> </content> </section> """ ), language=self.eng) expected = Document( work=self.work, document_xml=document_fixture( xml=""" <section eId="sec_7"> <num>7.</num> <heading>Active ref heading</heading> <content> <p>As given in sections <ref href="#sec_26">26</ref> and 35, one of which isn't in this document, blah.</p> <p>As given in sections 35 and <ref href="#sec_26">26</ref>, one of which isn't in this document, blah.</p> <p>In section 200 it says one thing and in <ref href="#sec_26">section 26</ref> it says another.</p> <p>As <i>given</i> in (we're now in a tail) section 200, blah, but <ref href="#sec_26">section 26</ref> says something else.</p> <p>As <i>given</i> in (we're now in a tail) section 26 of Act 5 of 2012, blah, but <ref href="#sec_26">section 26</ref> of this Act says something else.</p> </content> </section> <section eId="sec_26"> <num>26.</num> <heading>The section we want</heading> <content> <p>The provision you're looking for.</p> </content> </section> """ ), language=self.eng) self.section_refs_finder.find_references_in_document(document) root = etree.fromstring(expected.content) expected_content = etree.tostring(root, encoding='utf-8').decode('utf-8') self.assertEqual(expected_content, document.content)
def test_find_simple(self): za = Country.objects.get(pk=1) user1 = User.objects.get(pk=1) settings.INDIGO['WORK_PROPERTIES'] = { 'za': { 'cap': 'Chapter (cap)', } } work = Work( frbr_uri='/akn/za/act/2002/5', title='Act 5 of 2002', country=za, created_by_user=user1, ) work.properties['cap'] = '12' work.updated_by_user = user1 work.save() document = Document( document_xml=document_fixture( xml=""" <section eId="sec_1"> <num>1.</num> <heading>Tester</heading> <paragraph eId="sec_1.paragraph-0"> <content> <p>Something to do with Cap. 12.</p> </content> </paragraph> </section>""" ), language=self.eng, work=work) expected = Document( document_xml=document_fixture( xml=""" <section eId="sec_1"> <num>1.</num> <heading>Tester</heading> <paragraph eId="sec_1.paragraph-0"> <content> <p>Something to do with <ref href="/akn/za/act/2002/5">Cap. 12</ref>.</p> </content> </paragraph> </section>""" ), language=self.eng, work=work) self.finder.find_references_in_document(document) root = etree.fromstring(expected.content) expected.content = etree.tostring(root, encoding='utf-8').decode('utf-8') self.assertEqual(expected.content, document.content)
def test_find_multiple_in_tail(self): doc = Document(work=self.work, content=document_fixture(xml=""" <section id="section-1"> <num>1.</num> <heading>Tester</heading> <paragraph id="section-1.paragraph-0"> <content> <p>Something Act 4 of 2000 and "<term>City</term>" means the (Act No. 117 of 1998) and also Act 5 of 2020;</p> </content> </paragraph> </section>""")) self.finder.find_references_in_document(doc) self.assertMultiLineEqual( '''<body xmlns="http://docs.oasis-open.org/legaldocml/ns/akn/3.0"> <section id="section-1"> <num>1.</num> <heading>Tester</heading> <paragraph id="section-1.paragraph-0"> <content> <p>Something <ref href="/akn/za/act/2000/4">Act 4 of 2000</ref> and "<term>City</term>" means the (<ref href="/akn/za/act/1998/117">Act No. 117 of 1998</ref>) and also <ref href="/akn/za/act/2020/5">Act 5 of 2020</ref>;</p> </content> </paragraph> </section> </body> ''', etree.tostring(doc.doc.body, encoding='utf-8', pretty_print=True).decode('utf-8'))
def test_constitution(self): doc = Document(work=self.work, content=document_fixture(xml=""" <section id="section-1"> <num>1.</num> <heading>Tester</heading> <paragraph id="section-1.paragraph-0"> <content> <p>die Grondwet</p> <p>die Grondwet, 1996</p> <p>die Grondwet van Suid Afrika</p> <p>die Grondwet van die Republiek van Suid-Afrika</p> <p>die Grondwet van die Republiek van Suid-Afrika, 1996</p> <p>die Grondwet van die Republiek van Suid-Afrika 1996</p> <p>die Grondwet van die Republiek van Suid-Afrika Wet, 1996</p> <p>die Grondwet van die Republiek van Suid-Afrika, 1996 ( Wet 108 van 1996 )</p> <p>die Grondwet van die Republiek van Suid-Afrika, 1996 ( Wet No 108 van 1996 )</p> <p>die Grondwet van die Republiek van Suid-Afrika, 1996 ( Wet No. 108 van 1996 )</p> <p>die Grondwet van die Republiek van Suid-Afrika Wet, 1996 ( Wet No. 108 van 1996 )</p> <p>die Grondwet van die Republiek van Suid-Afrika Wet 108 van 1996</p> <p>die Grondwet van die Republiek van Suid-Afrika (Wet 108 van 1996)</p> <p>the below shouldn't match</p> <p>enige grondwet</p> </content> </paragraph> </section>""")) self.finder.find_references_in_document(doc) self.maxDiff = None self.assertMultiLineEqual( '''<body xmlns="http://docs.oasis-open.org/legaldocml/ns/akn/3.0"> <section id="section-1"> <num>1.</num> <heading>Tester</heading> <paragraph id="section-1.paragraph-0"> <content> <p>die <ref href="/akn/za/act/1996/constitution">Grondwet</ref></p> <p>die <ref href="/akn/za/act/1996/constitution">Grondwet, 1996</ref></p> <p>die <ref href="/akn/za/act/1996/constitution">Grondwet van Suid Afrika</ref></p> <p>die <ref href="/akn/za/act/1996/constitution">Grondwet van die Republiek van Suid-Afrika</ref></p> <p>die <ref href="/akn/za/act/1996/constitution">Grondwet van die Republiek van Suid-Afrika, 1996</ref></p> <p>die <ref href="/akn/za/act/1996/constitution">Grondwet van die Republiek van Suid-Afrika 1996</ref></p> <p>die <ref href="/akn/za/act/1996/constitution">Grondwet van die Republiek van Suid-Afrika Wet, 1996</ref></p> <p>die <ref href="/akn/za/act/1996/constitution">Grondwet van die Republiek van Suid-Afrika, 1996</ref> ( <ref href="/akn/za/act/1996/constitution">Wet 108 van 1996</ref> )</p> <p>die <ref href="/akn/za/act/1996/constitution">Grondwet van die Republiek van Suid-Afrika, 1996</ref> ( <ref href="/akn/za/act/1996/constitution">Wet No 108 van 1996</ref> )</p> <p>die <ref href="/akn/za/act/1996/constitution">Grondwet van die Republiek van Suid-Afrika, 1996</ref> ( <ref href="/akn/za/act/1996/constitution">Wet No. 108 van 1996</ref> )</p> <p>die <ref href="/akn/za/act/1996/constitution">Grondwet van die Republiek van Suid-Afrika Wet, 1996</ref> ( <ref href="/akn/za/act/1996/constitution">Wet No. 108 van 1996</ref> )</p> <p>die <ref href="/akn/za/act/1996/constitution">Grondwet van die Republiek van Suid-Afrika</ref> <ref href="/akn/za/act/1996/constitution">Wet 108 van 1996</ref></p> <p>die <ref href="/akn/za/act/1996/constitution">Grondwet van die Republiek van Suid-Afrika</ref> (<ref href="/akn/za/act/1996/constitution">Wet 108 van 1996</ref>)</p> <p>the below shouldn't match</p> <p>enige grondwet</p> </content> </paragraph> </section> </body> ''', etree.tostring(doc.doc.body, encoding='utf-8', pretty_print=True).decode('utf-8'))
def test_find_without_act_in_parens(self): doc = Document(work=self.work, content=document_fixture(xml=""" <section id="section-1"> <num>1.</num> <heading>Tester</heading> <paragraph id="section-1.paragraph-0"> <content> <p>Something to do with Income Tax Act, 1962 (No 58 of 1962).</p> </content> </paragraph> </section>""")) self.finder.find_references_in_document(doc) self.maxDiff = None self.assertMultiLineEqual( '''<body xmlns="http://docs.oasis-open.org/legaldocml/ns/akn/3.0"> <section id="section-1"> <num>1.</num> <heading>Tester</heading> <paragraph id="section-1.paragraph-0"> <content> <p>Something to do with Income Tax Act, 1962 (<ref href="/akn/za/act/1962/58">No 58 of 1962</ref>).</p> </content> </paragraph> </section> </body> ''', etree.tostring(doc.doc.body, encoding='utf-8', pretty_print=True).decode('utf-8'))
def test_toc_with_schedule_no_heading(self): doc = Document(work=self.work, document_xml=component_fixture(text="hi"), language=self.eng) # strip the heading element, the builder will use the FRBRalias instead for node in doc.doc.root.xpath('//a:attachment/a:heading', namespaces={'a': doc.doc.namespace}): node.getparent().remove(node) toc = self.builder.table_of_contents_for_document(doc) self.assertEqual([{ 'type': 'attachment', 'component': 'schedule1', 'subcomponent': None, 'title': 'Schedule alias', 'heading': 'Schedule alias', 'id': 'att_1', 'children': [{ 'component': 'schedule1', 'title': 'Section', 'type': 'section', 'id': 'sec_1', 'subcomponent': 'section', }], }], [t.as_dict() for t in toc]) self.assertEqual("att_1/sec_1", toc[0].children[0].qualified_id)
def test_find_with_empty_string(self): doc = Document(work=self.work, content=document_fixture(xml=""" <section eId="sec_1"> <num>1.</num> <heading/> <hcontainer eId="sec_1__hcontainer_1"> <content> <p>In this By-law, unless the context indicates otherwise-</p> <p>"" means the National Road Traffic Act, 1996 (<ref href="/akn/za/act/1996/93">Act No. 93 of 1996</ref>);</p> </content> </hcontainer> </section>""")) self.maxDiff = None self.finder.find_terms_in_document(doc) self.assertMultiLineEqual( '''<body xmlns="http://docs.oasis-open.org/legaldocml/ns/akn/3.0"> <section eId="sec_1"> <num>1.</num> <heading/> <hcontainer eId="sec_1__hcontainer_1"> <content> <p>In this By-law, unless the context indicates otherwise-</p> <p>"" means the National Road Traffic Act, 1996 (<ref href="/akn/za/act/1996/93">Act No. 93 of 1996</ref>);</p> </content> </hcontainer> </section> </body> ''', etree.tostring(doc.doc.body, pretty_print=True).decode('utf-8'))
def test_fancy_quotes(self): doc = Document(work=self.work, content=document_fixture(xml=""" <section eId="sec_1"> <num>1.</num> <heading>Definitions</heading> <hcontainer eId="sec_1__hcontainer_1"> <content> <p>“Act“ means the National Road Traffic Act, 1996 (<ref href="/akn/za/act/1996/93">Act No. 93 of 1996</ref>);</p> </content> </hcontainer> </section>""")) self.maxDiff = None self.finder.find_terms_in_document(doc) self.assertMultiLineEqual( '''<body xmlns="http://docs.oasis-open.org/legaldocml/ns/akn/3.0"> <section eId="sec_1"> <num>1.</num> <heading>Definitions</heading> <hcontainer eId="sec_1__hcontainer_1"> <content> <p refersTo="#term-Act">“<def refersTo="#term-Act">Act</def>“ means the National Road Traffic Act, 1996 (<ref href="/akn/za/act/1996/93">Act No. 93 of 1996</ref>);</p> </content> </hcontainer> </section> </body> ''', etree.tostring(doc.doc.body, pretty_print=True, encoding='UTF-8').decode('utf-8'))
def import_from_file(self, fname, frbr_uri): cmd = ['bundle', 'exec', 'slaw', 'parse'] if self.fragment: cmd.extend(['--fragment', self.fragment]) if self.fragment_id_prefix: cmd.extend(['--id-prefix', self.fragment_id_prefix]) if self.section_number_position: cmd.extend( ['--section-number-position', self.section_number_position]) cmd.extend(['--grammar', self.slaw_grammar]) cmd.append(fname) code, stdout, stderr = self.shell(cmd) if code > 0: raise ValueError(stderr) if not stdout: raise ValueError("We couldn't get any useful text out of the file") if self.fragment: doc = Fragment(stdout.decode('utf-8')) else: doc = Document.randomized(frbr_uri) doc.content = stdout.decode('utf-8') doc.frbr_uri = frbr_uri # reset it doc.title = None doc.copy_attributes() self.log.info("Successfully imported from %s" % fname) return doc
def test_toc_with_schedule(self): doc = Document(work=self.work, document_xml=component_fixture(text="hi"), language=self.eng) toc = self.builder.table_of_contents_for_document(doc) self.assertEqual([{ 'type': 'attachment', 'component': 'schedule1', 'subcomponent': None, 'title': 'Schedule 1', 'heading': 'Schedule 1', 'id': 'att_1', 'children': [{ 'component': 'schedule1', 'title': 'Section', 'type': 'section', 'id': 'sec_1', 'subcomponent': 'section', }], }], [t.as_dict() for t in toc]) self.assertEqual("att_1/sec_1", toc[0].children[0].qualified_id)
def test_ignore_existing(self): doc = Document(work=self.work, content=document_fixture(xml=""" <section id="section-1"> <num>1.</num> <heading>Tester</heading> <paragraph id="section-1.paragraph-0"> <content> <p>Something to do with <ref href="/akn/za/act/2012/22">Act no 22 of 2012</ref>.</p> <p>And another thing about <ref href="/akn/za/act/1998/4"><b>Act 4 of 1998</b></ref>.</p> </content> </paragraph> </section>""")) self.finder.find_references_in_document(doc) self.assertMultiLineEqual( '''<body xmlns="http://docs.oasis-open.org/legaldocml/ns/akn/3.0"> <section id="section-1"> <num>1.</num> <heading>Tester</heading> <paragraph id="section-1.paragraph-0"> <content> <p>Something to do with <ref href="/akn/za/act/2012/22">Act no 22 of 2012</ref>.</p> <p>And another thing about <ref href="/akn/za/act/1998/4"><b>Act 4 of 1998</b></ref>.</p> </content> </paragraph> </section> </body> ''', etree.tostring(doc.doc.body, encoding='utf-8', pretty_print=True).decode('utf-8'))
def import_from_upload(self, upload, frbr_uri, request): """ Create a new Document by importing it from a :class:`django.core.files.uploadedfile.UploadedFile` instance. """ self.reformat = True if upload.content_type in ['text/xml', 'application/xml']: # just assume it's valid AKN xml doc = Document.randomized(frbr_uri) doc.content = upload.read().decode('utf-8') return doc if upload.content_type == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': # pre-process docx to HTML and then import html html = self.docx_to_html(upload) doc = self.import_from_text(html, frbr_uri, '.html') elif upload.content_type == 'application/pdf': doc = self.import_from_pdf(upload, frbr_uri) else: # slaw will do its best with self.tempfile_for_upload(upload) as f: doc = self.import_from_file(f.name, frbr_uri) self.analyse_after_import(doc) return doc
def test_ignore_existing(self): doc = Document(content=document_fixture(xml=u""" <section id="section-1"> <num>1.</num> <heading>Tester</heading> <paragraph id="section-1.paragraph-0"> <content> <p>Something to do with <ref href="/za/act/2012/22">Act no 22 of 2012</ref>.</p> <p>And another thing about <ref href="/za/act/1998/4"><b>Act 4 of 1998</b></ref>.</p> </content> </paragraph> </section>""")) self.finder.find_references_in_document(doc) self.assertMultiLineEqual( '''<body xmlns="http://www.akomantoso.org/2.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> <section id="section-1"> <num>1.</num> <heading>Tester</heading> <paragraph id="section-1.paragraph-0"> <content> <p>Something to do with <ref href="/za/act/2012/22">Act no 22 of 2012</ref>.</p> <p>And another thing about <ref href="/za/act/1998/4"><b>Act 4 of 1998</b></ref>.</p> </content> </paragraph> </section> </body> ''', etree.tostring(doc.doc.body, pretty_print=True))
def test_find_multiple_in_tail(self): doc = Document(content=document_fixture(xml=u""" <section id="section-1"> <num>1.</num> <heading>Tester</heading> <paragraph id="section-1.paragraph-0"> <content> <p>Something Act 4 of 2000 and "<term>City</term>" means the (Act No. 117 of 1998) and also Act 5 of 2020;</p> </content> </paragraph> </section>""")) self.finder.find_references_in_document(doc) self.assertMultiLineEqual( '''<body xmlns="http://www.akomantoso.org/2.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> <section id="section-1"> <num>1.</num> <heading>Tester</heading> <paragraph id="section-1.paragraph-0"> <content> <p>Something <ref href="/za/act/2000/4">Act 4 of 2000</ref> and "<term>City</term>" means the (<ref href="/za/act/1998/117">Act No. 117 of 1998</ref>) and also <ref href="/za/act/2020/5">Act 5 of 2020</ref>;</p> </content> </paragraph> </section> </body> ''', etree.tostring(doc.doc.body, pretty_print=True))
def test_find_without_act_in_parens(self): doc = Document(content=document_fixture(xml=u""" <section id="section-1"> <num>1.</num> <heading>Tester</heading> <paragraph id="section-1.paragraph-0"> <content> <p>Something to do with Income Tax Act, 1962 (No 58 of 1962).</p> </content> </paragraph> </section>""")) self.finder.find_references_in_document(doc) self.maxDiff = None self.assertMultiLineEqual( '''<body xmlns="http://www.akomantoso.org/2.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> <section id="section-1"> <num>1.</num> <heading>Tester</heading> <paragraph id="section-1.paragraph-0"> <content> <p>Something to do with Income Tax Act, 1962 (<ref href="/za/act/1962/58">No 58 of 1962</ref>).</p> </content> </paragraph> </section> </body> ''', etree.tostring(doc.doc.body, pretty_print=True))
def test_unicode(self): doc = Document(content=document_fixture(xml=u""" <section id="section-1"> <num>1.</num> <heading>Definitions</heading> <paragraph id="section-1.paragraph-0"> <content> <p>"Actë" means the National Road Traffic Act, 1996 (<ref href="/za/act/1996/93">Act No. 93 of 1996</ref>);</p> </content> </paragraph> </section> """)) self.maxDiff = None self.finder.find_terms_in_document(doc) self.assertMultiLineEqual( '''<body xmlns="http://www.akomantoso.org/2.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> <section id="section-1"> <num>1.</num> <heading>Definitions</heading> <paragraph id="section-1.paragraph-0"> <content> <p refersTo="#term-Actë">"<def refersTo="#term-Actë">Actë</def>" means the National Road Traffic Act, 1996 (<ref href="/za/act/1996/93">Act No. 93 of 1996</ref>);</p> </content> </paragraph> </section> </body> ''', etree.tostring(doc.doc.body, pretty_print=True, encoding='UTF-8'))
def test_constitution(self): doc = Document(work=self.work, content=document_fixture(xml=""" <section id="section-1"> <num>1.</num> <heading>Tester</heading> <paragraph id="section-1.paragraph-0"> <content> <p>the Constitution</p> <p>the Constitution, 1996</p> <p>the Constitution of South Africa</p> <p>the Constitution of the Republic of South Africa</p> <p>the Constitution of the Republic of South Africa, 1996</p> <p>the Constitution of the Republic of South Africa 1996</p> <p>the Constitution of the Republic of South Africa Act, 1996</p> <p>the Constitution of the Republic of South Africa, 1996 ( Act 108 of 1996 )</p> <p>the Constitution of the Republic of South Africa, 1996 ( Act No 108 of 1996 )</p> <p>the Constitution of the Republic of South Africa, 1996 ( Act No. 108 of 1996 )</p> <p>the Constitution of the Republic of South Africa Act, 1996 ( Act No. 108 of 1996 )</p> <p>the Constitution of the Republic of South Africa Act 108 of 1996</p> <p>the Constitution of the Republic of South Africa (Act 108 of 1996)</p> <p>the below shouldn't match</p> <p>Constitutionally unsound</p> <p>is unconstitutional</p> <p>their constitution is poor</p> </content> </paragraph> </section>""")) self.finder.find_references_in_document(doc) self.maxDiff = None self.assertMultiLineEqual('''<body xmlns="http://docs.oasis-open.org/legaldocml/ns/akn/3.0"> <section id="section-1"> <num>1.</num> <heading>Tester</heading> <paragraph id="section-1.paragraph-0"> <content> <p>the <ref href="/akn/za/act/1996/constitution">Constitution</ref></p> <p>the <ref href="/akn/za/act/1996/constitution">Constitution, 1996</ref></p> <p>the <ref href="/akn/za/act/1996/constitution">Constitution of South Africa</ref></p> <p>the <ref href="/akn/za/act/1996/constitution">Constitution of the Republic of South Africa</ref></p> <p>the <ref href="/akn/za/act/1996/constitution">Constitution of the Republic of South Africa, 1996</ref></p> <p>the <ref href="/akn/za/act/1996/constitution">Constitution of the Republic of South Africa 1996</ref></p> <p>the <ref href="/akn/za/act/1996/constitution">Constitution of the Republic of South Africa Act, 1996</ref></p> <p>the <ref href="/akn/za/act/1996/constitution">Constitution of the Republic of South Africa, 1996</ref> ( <ref href="/akn/za/act/1996/constitution">Act 108 of 1996</ref> )</p> <p>the <ref href="/akn/za/act/1996/constitution">Constitution of the Republic of South Africa, 1996</ref> ( <ref href="/akn/za/act/1996/constitution">Act No 108 of 1996</ref> )</p> <p>the <ref href="/akn/za/act/1996/constitution">Constitution of the Republic of South Africa, 1996</ref> ( <ref href="/akn/za/act/1996/constitution">Act No. 108 of 1996</ref> )</p> <p>the <ref href="/akn/za/act/1996/constitution">Constitution of the Republic of South Africa Act, 1996</ref> ( <ref href="/akn/za/act/1996/constitution">Act No. 108 of 1996</ref> )</p> <p>the <ref href="/akn/za/act/1996/constitution">Constitution of the Republic of South Africa</ref> <ref href="/akn/za/act/1996/constitution">Act 108 of 1996</ref></p> <p>the <ref href="/akn/za/act/1996/constitution">Constitution of the Republic of South Africa</ref> (<ref href="/akn/za/act/1996/constitution">Act 108 of 1996</ref>)</p> <p>the below shouldn't match</p> <p>Constitutionally unsound</p> <p>is unconstitutional</p> <p>their constitution is poor</p> </content> </paragraph> </section> </body> ''', etree.tostring(doc.doc.body, encoding='utf-8', pretty_print=True).decode('utf-8'))
def test_find_with_empty_string(self): doc = Document(content=document_fixture(xml=u""" <section id="section-1"> <num>1.</num> <heading/> <paragraph id="section-1.paragraph-0"> <content> <p>In this By-law, unless the context indicates otherwise-</p> <p>"" means the National Road Traffic Act, 1996 (<ref href="/za/act/1996/93">Act No. 93 of 1996</ref>);</p> </content> </paragraph> </section> """)) self.maxDiff = None self.finder.find_terms_in_document(doc) self.assertMultiLineEqual( '''<body xmlns="http://www.akomantoso.org/2.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> <section id="section-1"> <num>1.</num> <heading/> <paragraph id="section-1.paragraph-0"> <content> <p>In this By-law, unless the context indicates otherwise-</p> <p>"" means the National Road Traffic Act, 1996 (<ref href="/za/act/1996/93">Act No. 93 of 1996</ref>);</p> </content> </paragraph> </section> </body> ''', etree.tostring(doc.doc.body, pretty_print=True))
def test_dont_link_constitution_in_constitution(self): constitution = Work(frbr_uri='/akn/za/act/1996/constitution') doc = Document(work=constitution, content=document_fixture(xml=""" <section id="section-1"> <num>1.</num> <heading>Tester</heading> <paragraph id="section-1.paragraph-0"> <content> <p>the Constitution</p> <p>the Constitution, 1996</p> <p>the Constitution of South Africa</p> <p>the Constitution of the Republic of South Africa</p> <p>the Constitution of the Republic of South Africa, 1996</p> <p>the Constitution of the Republic of South Africa 1996</p> <p>the Constitution of the Republic of South Africa Act, 1996</p> <p>the Constitution of the Republic of South Africa, 1996 ( Act 108 of 1996 )</p> <p>the Constitution of the Republic of South Africa, 1996 ( Act No 108 of 1996 )</p> <p>the Constitution of the Republic of South Africa, 1996 ( Act No. 108 of 1996 )</p> <p>the Constitution of the Republic of South Africa Act, 1996 ( Act No. 108 of 1996 )</p> <p>the Constitution of the Republic of South Africa Act 108 of 1996</p> <p>the Constitution of the Republic of South Africa (Act 108 of 1996)</p> </content> </paragraph> </section>""")) unchanged = doc.document_xml doc.doc.frbr_uri = FrbrUri.parse(constitution.frbr_uri) self.finder.find_references_in_document(doc) self.assertMultiLineEqual(unchanged, doc.document_xml)
def test_constitution(self): doc = Document(content=document_fixture(xml=u""" <section id="section-1"> <num>1.</num> <heading>Tester</heading> <paragraph id="section-1.paragraph-0"> <content> <p>the Constitution</p> <p>the Constitution, 1996</p> <p>the Constitution of South Africa</p> <p>the Constitution of the Republic of South Africa</p> <p>the Constitution of the Republic of South Africa, 1996</p> <p>the Constitution of the Republic of South Africa 1996</p> <p>the Constitution of the Republic of South Africa Act, 1996</p> <p>the Constitution of the Republic of South Africa, 1996 ( Act 108 of 1996 )</p> <p>the Constitution of the Republic of South Africa, 1996 ( Act No 108 of 1996 )</p> <p>the Constitution of the Republic of South Africa, 1996 ( Act No. 108 of 1996 )</p> <p>the Constitution of the Republic of South Africa Act, 1996 ( Act No. 108 of 1996 )</p> <p>the Constitution of the Republic of South Africa Act 108 of 1996</p> <p>the Constitution of the Republic of South Africa (Act 108 of 1996)</p> <p>the below shouldn't match</p> <p>Constitutionally unsound</p> <p>is unconstitutional</p> <p>their constitution is poor</p> </content> </paragraph> </section> """)) self.finder.find_references_in_document(doc) self.maxDiff = None self.assertMultiLineEqual( '''<body xmlns="http://www.akomantoso.org/2.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> <section id="section-1"> <num>1.</num> <heading>Tester</heading> <paragraph id="section-1.paragraph-0"> <content> <p>the <ref href="/za/act/1996/constitution">Constitution</ref></p> <p>the <ref href="/za/act/1996/constitution">Constitution, 1996</ref></p> <p>the <ref href="/za/act/1996/constitution">Constitution of South Africa</ref></p> <p>the <ref href="/za/act/1996/constitution">Constitution of the Republic of South Africa</ref></p> <p>the <ref href="/za/act/1996/constitution">Constitution of the Republic of South Africa, 1996</ref></p> <p>the <ref href="/za/act/1996/constitution">Constitution of the Republic of South Africa 1996</ref></p> <p>the <ref href="/za/act/1996/constitution">Constitution of the Republic of South Africa Act, 1996</ref></p> <p>the <ref href="/za/act/1996/constitution">Constitution of the Republic of South Africa, 1996</ref> ( <ref href="/za/act/1996/constitution">Act 108 of 1996</ref> )</p> <p>the <ref href="/za/act/1996/constitution">Constitution of the Republic of South Africa, 1996</ref> ( <ref href="/za/act/1996/constitution">Act No 108 of 1996</ref> )</p> <p>the <ref href="/za/act/1996/constitution">Constitution of the Republic of South Africa, 1996</ref> ( <ref href="/za/act/1996/constitution">Act No. 108 of 1996</ref> )</p> <p>the <ref href="/za/act/1996/constitution">Constitution of the Republic of South Africa Act, 1996</ref> ( <ref href="/za/act/1996/constitution">Act No. 108 of 1996</ref> )</p> <p>the <ref href="/za/act/1996/constitution">Constitution of the Republic of South Africa</ref> <ref href="/za/act/1996/constitution">Act 108 of 1996</ref></p> <p>the <ref href="/za/act/1996/constitution">Constitution of the Republic of South Africa</ref> (<ref href="/za/act/1996/constitution">Act 108 of 1996</ref>)</p> <p>the below shouldn't match</p> <p>Constitutionally unsound</p> <p>is unconstitutional</p> <p>their constitution is poor</p> </content> </paragraph> </section> </body> ''', etree.tostring(doc.doc.body, pretty_print=True))
def test_table_of_contents_afr(self): d = Document() d.content = document_fixture(xml=""" <body xmlns="http://www.akomantoso.org/2.0"> <section id="section-1"> <num>1.</num> <heading>Foo</heading> <content> <p>hello</p> </content> </section> <chapter id="chapter-1"> <num>1.</num> <heading>The Chapter</heading> <part id="part-A"> <num>A</num> <heading>The Part</heading> <section id="section-2"> <num>2.</num> <heading>Other</heading> <content> <p>hi</p> </content> </section> </part> </chapter> </body> """) d.language = 'afr' toc = d.table_of_contents() toc = [t.as_dict() for t in toc] self.maxDiff = None self.assertEqual(toc, [ {'id': 'section-1', 'num': '1.', 'type': 'section', 'heading': 'Foo', 'component': 'main', 'subcomponent': 'section/1', 'title': '1. Foo'}, {'id': 'chapter-1', 'num': '1.', 'type': 'chapter', 'heading': 'The Chapter', 'component': 'main', 'subcomponent': 'chapter/1', 'title': 'Hoofstuk 1. - The Chapter', 'children': [ {'id': 'part-A', 'num': 'A', 'type': 'part', 'heading': 'The Part', 'component': 'main', 'subcomponent': 'chapter/1/part/A', 'title': 'Deel A - The Part', 'children': [ {'id': 'section-2', 'num': '2.', 'type': 'section', 'heading': 'Other', 'component': 'main', 'subcomponent': 'section/2', 'title': '2. Other'}, ] }, ]}, ])