Esempio n. 1
0
    def test_preamble_and_friends_in_table_of_contents(self):
        d = Document()
        d.content = document_fixture(xml="""
        <coverpage>
            <content><p>hi</p></content>
        </coverpage>
        <preface>
            <content><p>hi</p></content>
        </preface>
        <preamble>
            <content><p>hi</p></content>
        </preamble>
        <body>
            <content><p>hi</p></content>
        </body>
        <conclusions>
            <content><p>hi></p></content>
        </conclusions>
        """)

        toc = d.table_of_contents()
        toc = [t.as_dict() for t in toc]
        self.maxDiff = None
        self.assertEqual(toc, [
            {'type': 'coverpage', 'component': 'main', 'subcomponent': 'coverpage', 'title': 'Coverpage'},
            {'type': 'preface', 'component': 'main', 'subcomponent': 'preface', 'title': 'Preface'},
            {'type': 'preamble', 'component': 'main', 'subcomponent': 'preamble', 'title': 'Preamble'},
            {'type': 'conclusions', 'component': 'main', 'subcomponent': 'conclusions', 'title': 'Conclusions'},
        ])
Esempio n. 2
0
    def test_find_simple(self):
        document = Document(work=self.work,
                            document_xml=document_fixture(xml="""
        <section eId="sec_1">
          <num>1.</num>
          <heading>Tester</heading>
          <paragraph eId="sec_1.paragraph-0">
            <content>
              <p>Something to do with GN no 102 of 2012.</p>
              <p>And another thing about SI 4 of 1998.</p>
            </content>
          </paragraph>
        </section>"""),
                            language=self.eng)

        expected = Document(work=self.work,
                            document_xml=document_fixture(xml="""
        <section eId="sec_1">
          <num>1.</num>
          <heading>Tester</heading>
          <paragraph eId="sec_1.paragraph-0">
            <content>
              <p>Something to do with <ref href="/akn/za/act/gn/2012/102">GN no 102 of 2012</ref>.</p>
              <p>And another thing about <ref href="/akn/za/act/si/1998/4">SI 4 of 1998</ref>.</p>
            </content>
          </paragraph>
        </section>"""),
                            language=self.eng)

        self.finder.find_references_in_document(document)
        root = etree.fromstring(expected.content)
        expected.content = etree.tostring(root,
                                          encoding='utf-8').decode('utf-8')
        self.assertEqual(expected.content, document.content)
Esempio n. 3
0
    def test_italics_markup(self):
        document = Document(work=self.work,
                            document_xml=document_fixture(xml="""
        <section id="section-1">
          <num>1.</num>
          <heading>Application of Act</heading>
          <content>
            <p>In the Gazette it says that habeus corpus is XYZ. As per the evidence of person X, Y.</p>
          </content>
        </section>
                """))

        expected = Document(work=self.work,
                            document_xml=document_fixture(xml="""
        <section id="section-1">
          <num>1.</num>
          <heading>Application of Act</heading>
          <content>
            <p>In the <i>Gazette</i> it says that <i>habeus corpus</i> is XYZ. As <i>per</i> the evidence of person X, Y.</p>
          </content>
        </section>
                """))

        self.italics_terms_finder.mark_up_italics_in_document(
            document, self.italics_terms)
        root = etree.fromstring(expected.content)
        expected.content = etree.tostring(root,
                                          encoding='utf-8').decode('utf-8')
        self.assertEqual(expected.content, document.content)
Esempio n. 4
0
    def test_find_simple(self):
        document = Document(document_xml=document_fixture(xml="""
        <section id="section-1">
          <num>1.</num>
          <heading>Tester</heading>
          <paragraph id="section-1.paragraph-0">
            <content>
              <p>Something to do with Act no 22 of 2012.</p>
              <p>And another thing about Act 4 of 1998.</p>
            </content>
          </paragraph>
        </section>"""),
                            language=self.eng)

        expected = Document(document_xml=document_fixture(xml="""
        <section id="section-1">
          <num>1.</num>
          <heading>Tester</heading>
          <paragraph id="section-1.paragraph-0">
            <content>
              <p>Something to do with Act <ref href="/za/act/2012/22">no 22 of 2012</ref>.</p>
              <p>And another thing about Act <ref href="/za/act/1998/4">4 of 1998</ref>.</p>
            </content>
          </paragraph>
        </section>"""),
                            language=self.eng)

        self.finder.find_references_in_document(document)
        root = etree.fromstring(expected.content)
        expected.content = etree.tostring(root,
                                          encoding='utf-8').decode('utf-8')
        self.assertEqual(expected.content, document.content)
Esempio n. 5
0
    def test_set_content(self):
        d = Document()
        d.content = document_fixture(u'γνωρίζω')

        assert_equal(d.frbr_uri, '/za/act/1900/1')
        assert_equal(d.country, 'za')
        assert_equal(d.doc.publication_date, date(2005, 7, 24))
Esempio n. 6
0
    def test_section_of_this(self):
        document = Document(
            work=self.work,
            document_xml=document_fixture(
                xml="""
      <section eId="sec_7">
        <num>7.</num>
        <heading>Active ref heading</heading>
        <content>
          <p>As given in section 26 of this Act, blah.</p>
          <p>As given in section 26 (1) of this Act, blah.</p>
          <p>As given in section 26 (1) of this Proclamation, blah.</p>
          <p>As given in section 26(1)(b)(iii)(dd)(A) of this Act, blah.</p>
          <p>In section 26 of Act 5 of 2012 it says one thing and in section 26 of this Act it says another.</p>
          <p>As <i>given</i> in (we're now in a tail) section 26 of this Act, blah.</p>
        </content>
      </section>
      <section eId="sec_26">
        <num>26.</num>
        <heading>Important heading</heading>
        <content>
          <p>An important provision.</p>
        </content>
      </section>
        """
            ),
            language=self.eng)

        expected = Document(
            work=self.work,
            document_xml=document_fixture(
                xml="""
      <section eId="sec_7">
        <num>7.</num>
        <heading>Active ref heading</heading>
        <content>
          <p>As given in <ref href="#sec_26">section 26</ref> of this Act, blah.</p>
          <p>As given in <ref href="#sec_26">section 26</ref> (1) of this Act, blah.</p>
          <p>As given in <ref href="#sec_26">section 26</ref> (1) of this Proclamation, blah.</p>
          <p>As given in <ref href="#sec_26">section 26</ref>(1)(b)(iii)(dd)(A) of this Act, blah.</p>
          <p>In section 26 of Act 5 of 2012 it says one thing and in <ref href="#sec_26">section 26</ref> of this Act it says another.</p>
          <p>As <i>given</i> in (we're now in a tail) <ref href="#sec_26">section 26</ref> of this Act, blah.</p>
        </content>
      </section>
      <section eId="sec_26">
        <num>26.</num>
        <heading>Important heading</heading>
        <content>
          <p>An important provision.</p>
        </content>
      </section>
        """
            ),
            language=self.eng)

        self.section_refs_finder.find_references_in_document(document)
        root = etree.fromstring(expected.content)
        expected.content = etree.tostring(root, encoding='utf-8').decode('utf-8')
        self.assertEqual(expected.content, document.content)
Esempio n. 7
0
    def test_empty_expression_date(self):
        d = Document()
        d.content = document_fixture('test')
        d.expression_date = ''
        assert_equal(d.expression_date, '')

        d.expression_date = None
        assert_equal(d.expression_date, None)
Esempio n. 8
0
    def test_inherit_from_work(self):
        w = Work.objects.create(frbr_uri='/za/act/2009/test',
                                title='Test document')
        d = Document(work=w, expression_date='2011-02-01')
        d.save()

        d = Document.objects.get(pk=d.id)
        assert_equal(w.frbr_uri, d.frbr_uri)
        assert_equal(w.title, d.title)
Esempio n. 9
0
    def test_inherit_from_work(self):
        user = User.objects.get(pk=1)
        w = Work.objects.create(frbr_uri='/za/act/2009/test', title='Test document', country=Country.for_code('za'), created_by_user=user)
        d = Document(work=w, expression_date='2011-02-01', language=self.eng, created_by_user=user)
        d.save()

        d = Document.objects.get(pk=d.id)
        assert_equal(w.frbr_uri, d.frbr_uri)
        assert_equal(w.title, d.title)
Esempio n. 10
0
    def test_section_valid_and_invalid(self):
        document = Document(
            work=self.work,
            document_xml=document_fixture(
                xml="""
      <section eId="sec_7">
        <num>7.</num>
        <heading>Active ref heading</heading>
        <content>
          <p>As given in sections 26 and 35, one of which isn't in this document, blah.</p>
          <p>As given in sections 35 and 26, one of which isn't in this document, blah.</p>
          <p>In section 200 it says one thing and in section 26 it says another.</p>
          <p>As <i>given</i> in (we're now in a tail) section 200, blah, but section 26 says something else.</p>
          <p>As <i>given</i> in (we're now in a tail) section 26 of Act 5 of 2012, blah, but section 26 of this Act says something else.</p>
        </content>
      </section>
      <section eId="sec_26">
        <num>26.</num>
        <heading>The section we want</heading>
        <content>
          <p>The provision you're looking for.</p>
        </content>
      </section>
        """
            ),
            language=self.eng)

        expected = Document(
            work=self.work,
            document_xml=document_fixture(
                xml="""
      <section eId="sec_7">
        <num>7.</num>
        <heading>Active ref heading</heading>
        <content>
          <p>As given in sections <ref href="#sec_26">26</ref> and 35, one of which isn't in this document, blah.</p>
          <p>As given in sections 35 and <ref href="#sec_26">26</ref>, one of which isn't in this document, blah.</p>
          <p>In section 200 it says one thing and in <ref href="#sec_26">section 26</ref> it says another.</p>
          <p>As <i>given</i> in (we're now in a tail) section 200, blah, but <ref href="#sec_26">section 26</ref> says something else.</p>
          <p>As <i>given</i> in (we're now in a tail) section 26 of Act 5 of 2012, blah, but <ref href="#sec_26">section 26</ref> of this Act says something else.</p>
        </content>
      </section>
      <section eId="sec_26">
        <num>26.</num>
        <heading>The section we want</heading>
        <content>
          <p>The provision you're looking for.</p>
        </content>
      </section>
        """
            ),
            language=self.eng)

        self.section_refs_finder.find_references_in_document(document)
        root = etree.fromstring(expected.content)
        expected_content = etree.tostring(root, encoding='utf-8').decode('utf-8')
        self.assertEqual(expected_content, document.content)
Esempio n. 11
0
    def test_find_simple(self):
        za = Country.objects.get(pk=1)
        user1 = User.objects.get(pk=1)
        settings.INDIGO['WORK_PROPERTIES'] = {
            'za': {
                'cap': 'Chapter (cap)',
            }
        }

        work = Work(
            frbr_uri='/akn/za/act/2002/5',
            title='Act 5 of 2002',
            country=za,
            created_by_user=user1,
        )
        work.properties['cap'] = '12'
        work.updated_by_user = user1
        work.save()

        document = Document(
            document_xml=document_fixture(
                xml="""
        <section eId="sec_1">
          <num>1.</num>
          <heading>Tester</heading>
          <paragraph eId="sec_1.paragraph-0">
            <content>
              <p>Something to do with Cap. 12.</p>
            </content>
          </paragraph>
        </section>"""
            ),
            language=self.eng,
            work=work)

        expected = Document(
            document_xml=document_fixture(
                xml="""
        <section eId="sec_1">
          <num>1.</num>
          <heading>Tester</heading>
          <paragraph eId="sec_1.paragraph-0">
            <content>
              <p>Something to do with <ref href="/akn/za/act/2002/5">Cap. 12</ref>.</p>
            </content>
          </paragraph>
        </section>"""
            ),
            language=self.eng,
            work=work)

        self.finder.find_references_in_document(document)
        root = etree.fromstring(expected.content)
        expected.content = etree.tostring(root, encoding='utf-8').decode('utf-8')
        self.assertEqual(expected.content, document.content)
Esempio n. 12
0
    def test_find_multiple_in_tail(self):
        doc = Document(work=self.work,
                       content=document_fixture(xml="""
<section id="section-1">
  <num>1.</num>
  <heading>Tester</heading>
  <paragraph id="section-1.paragraph-0">
    <content>
      <p>Something Act 4 of 2000 and "<term>City</term>" means the (Act No. 117 of 1998) and also Act 5 of 2020;</p>
    </content>
  </paragraph>
</section>"""))

        self.finder.find_references_in_document(doc)
        self.assertMultiLineEqual(
            '''<body xmlns="http://docs.oasis-open.org/legaldocml/ns/akn/3.0">
      
<section id="section-1">
  <num>1.</num>
  <heading>Tester</heading>
  <paragraph id="section-1.paragraph-0">
    <content>
      <p>Something <ref href="/akn/za/act/2000/4">Act 4 of 2000</ref> and "<term>City</term>" means the (<ref href="/akn/za/act/1998/117">Act No. 117 of 1998</ref>) and also <ref href="/akn/za/act/2020/5">Act 5 of 2020</ref>;</p>
    </content>
  </paragraph>
</section>
    </body>
  
''',
            etree.tostring(doc.doc.body, encoding='utf-8',
                           pretty_print=True).decode('utf-8'))
Esempio n. 13
0
    def test_constitution(self):
        doc = Document(work=self.work,
                       content=document_fixture(xml="""
<section id="section-1">
  <num>1.</num>
  <heading>Tester</heading>
  <paragraph id="section-1.paragraph-0">
    <content>
      <p>die Grondwet</p>
      <p>die Grondwet, 1996</p>
      <p>die Grondwet van Suid Afrika</p>
      <p>die Grondwet van die Republiek van Suid-Afrika</p>
      <p>die Grondwet van die Republiek van Suid-Afrika, 1996</p>
      <p>die Grondwet van die Republiek van Suid-Afrika 1996</p>
      <p>die Grondwet van die Republiek van Suid-Afrika Wet, 1996</p>
      <p>die Grondwet van die Republiek van Suid-Afrika, 1996 ( Wet 108 van 1996 )</p>
      <p>die Grondwet van die Republiek van Suid-Afrika, 1996 ( Wet No 108 van 1996 )</p>
      <p>die Grondwet van die Republiek van Suid-Afrika, 1996 ( Wet No. 108 van 1996 )</p>
      <p>die Grondwet van die Republiek van Suid-Afrika Wet, 1996 ( Wet No. 108 van 1996 )</p>
      <p>die Grondwet van die Republiek van Suid-Afrika  Wet 108 van 1996</p>
      <p>die Grondwet van die Republiek van Suid-Afrika (Wet 108 van 1996)</p>
      <p>the below shouldn't match</p>
      <p>enige grondwet</p>
    </content>
  </paragraph>
</section>"""))

        self.finder.find_references_in_document(doc)
        self.maxDiff = None
        self.assertMultiLineEqual(
            '''<body xmlns="http://docs.oasis-open.org/legaldocml/ns/akn/3.0">
      
<section id="section-1">
  <num>1.</num>
  <heading>Tester</heading>
  <paragraph id="section-1.paragraph-0">
    <content>
      <p>die <ref href="/akn/za/act/1996/constitution">Grondwet</ref></p>
      <p>die <ref href="/akn/za/act/1996/constitution">Grondwet, 1996</ref></p>
      <p>die <ref href="/akn/za/act/1996/constitution">Grondwet van Suid Afrika</ref></p>
      <p>die <ref href="/akn/za/act/1996/constitution">Grondwet van die Republiek van Suid-Afrika</ref></p>
      <p>die <ref href="/akn/za/act/1996/constitution">Grondwet van die Republiek van Suid-Afrika, 1996</ref></p>
      <p>die <ref href="/akn/za/act/1996/constitution">Grondwet van die Republiek van Suid-Afrika 1996</ref></p>
      <p>die <ref href="/akn/za/act/1996/constitution">Grondwet van die Republiek van Suid-Afrika Wet, 1996</ref></p>
      <p>die <ref href="/akn/za/act/1996/constitution">Grondwet van die Republiek van Suid-Afrika, 1996</ref> ( <ref href="/akn/za/act/1996/constitution">Wet 108 van 1996</ref> )</p>
      <p>die <ref href="/akn/za/act/1996/constitution">Grondwet van die Republiek van Suid-Afrika, 1996</ref> ( <ref href="/akn/za/act/1996/constitution">Wet No 108 van 1996</ref> )</p>
      <p>die <ref href="/akn/za/act/1996/constitution">Grondwet van die Republiek van Suid-Afrika, 1996</ref> ( <ref href="/akn/za/act/1996/constitution">Wet No. 108 van 1996</ref> )</p>
      <p>die <ref href="/akn/za/act/1996/constitution">Grondwet van die Republiek van Suid-Afrika Wet, 1996</ref> ( <ref href="/akn/za/act/1996/constitution">Wet No. 108 van 1996</ref> )</p>
      <p>die <ref href="/akn/za/act/1996/constitution">Grondwet van die Republiek van Suid-Afrika</ref>  <ref href="/akn/za/act/1996/constitution">Wet 108 van 1996</ref></p>
      <p>die <ref href="/akn/za/act/1996/constitution">Grondwet van die Republiek van Suid-Afrika</ref> (<ref href="/akn/za/act/1996/constitution">Wet 108 van 1996</ref>)</p>
      <p>the below shouldn't match</p>
      <p>enige grondwet</p>
    </content>
  </paragraph>
</section>
    </body>
  
''',
            etree.tostring(doc.doc.body, encoding='utf-8',
                           pretty_print=True).decode('utf-8'))
Esempio n. 14
0
    def test_find_without_act_in_parens(self):
        doc = Document(work=self.work,
                       content=document_fixture(xml="""
<section id="section-1">
  <num>1.</num>
  <heading>Tester</heading>
  <paragraph id="section-1.paragraph-0">
    <content>
      <p>Something to do with Income Tax Act, 1962 (No 58 of 1962).</p>
    </content>
  </paragraph>
</section>"""))

        self.finder.find_references_in_document(doc)
        self.maxDiff = None
        self.assertMultiLineEqual(
            '''<body xmlns="http://docs.oasis-open.org/legaldocml/ns/akn/3.0">
      
<section id="section-1">
  <num>1.</num>
  <heading>Tester</heading>
  <paragraph id="section-1.paragraph-0">
    <content>
      <p>Something to do with Income Tax Act, 1962 (<ref href="/akn/za/act/1962/58">No 58 of 1962</ref>).</p>
    </content>
  </paragraph>
</section>
    </body>
  
''',
            etree.tostring(doc.doc.body, encoding='utf-8',
                           pretty_print=True).decode('utf-8'))
Esempio n. 15
0
    def test_toc_with_schedule_no_heading(self):
        doc = Document(work=self.work,
                       document_xml=component_fixture(text="hi"),
                       language=self.eng)

        # strip the heading element, the builder will use the FRBRalias instead
        for node in doc.doc.root.xpath('//a:attachment/a:heading',
                                       namespaces={'a': doc.doc.namespace}):
            node.getparent().remove(node)

        toc = self.builder.table_of_contents_for_document(doc)
        self.assertEqual([{
            'type':
            'attachment',
            'component':
            'schedule1',
            'subcomponent':
            None,
            'title':
            'Schedule alias',
            'heading':
            'Schedule alias',
            'id':
            'att_1',
            'children': [{
                'component': 'schedule1',
                'title': 'Section',
                'type': 'section',
                'id': 'sec_1',
                'subcomponent': 'section',
            }],
        }], [t.as_dict() for t in toc])
        self.assertEqual("att_1/sec_1", toc[0].children[0].qualified_id)
Esempio n. 16
0
    def test_find_with_empty_string(self):
        doc = Document(work=self.work,
                       content=document_fixture(xml="""
<section eId="sec_1">
  <num>1.</num>
  <heading/>
  <hcontainer eId="sec_1__hcontainer_1">
    <content>
      <p>In this By-law, unless the context indicates otherwise-</p>
      <p>"" means the National Road Traffic Act, 1996 (<ref href="/akn/za/act/1996/93">Act No. 93 of 1996</ref>);</p>
    </content>
  </hcontainer>
</section>"""))

        self.maxDiff = None
        self.finder.find_terms_in_document(doc)
        self.assertMultiLineEqual(
            '''<body xmlns="http://docs.oasis-open.org/legaldocml/ns/akn/3.0">
      
<section eId="sec_1">
  <num>1.</num>
  <heading/>
  <hcontainer eId="sec_1__hcontainer_1">
    <content>
      <p>In this By-law, unless the context indicates otherwise-</p>
      <p>"" means the National Road Traffic Act, 1996 (<ref href="/akn/za/act/1996/93">Act No. 93 of 1996</ref>);</p>
    </content>
  </hcontainer>
</section>
    </body>
  
''',
            etree.tostring(doc.doc.body, pretty_print=True).decode('utf-8'))
Esempio n. 17
0
    def test_fancy_quotes(self):
        doc = Document(work=self.work,
                       content=document_fixture(xml="""
<section eId="sec_1">
  <num>1.</num>
  <heading>Definitions</heading>
  <hcontainer eId="sec_1__hcontainer_1">
    <content>
      <p>“Act“ means the National Road Traffic Act, 1996 (<ref href="/akn/za/act/1996/93">Act No. 93 of 1996</ref>);</p>
    </content>
  </hcontainer>
</section>"""))

        self.maxDiff = None
        self.finder.find_terms_in_document(doc)
        self.assertMultiLineEqual(
            '''<body xmlns="http://docs.oasis-open.org/legaldocml/ns/akn/3.0">
      
<section eId="sec_1">
  <num>1.</num>
  <heading>Definitions</heading>
  <hcontainer eId="sec_1__hcontainer_1">
    <content>
      <p refersTo="#term-Act">“<def refersTo="#term-Act">Act</def>“ means the National Road Traffic Act, 1996 (<ref href="/akn/za/act/1996/93">Act No. 93 of 1996</ref>);</p>
    </content>
  </hcontainer>
</section>
    </body>
  
''',
            etree.tostring(doc.doc.body, pretty_print=True,
                           encoding='UTF-8').decode('utf-8'))
Esempio n. 18
0
    def import_from_file(self, fname, frbr_uri):
        cmd = ['bundle', 'exec', 'slaw', 'parse']

        if self.fragment:
            cmd.extend(['--fragment', self.fragment])
            if self.fragment_id_prefix:
                cmd.extend(['--id-prefix', self.fragment_id_prefix])

        if self.section_number_position:
            cmd.extend(
                ['--section-number-position', self.section_number_position])

        cmd.extend(['--grammar', self.slaw_grammar])
        cmd.append(fname)

        code, stdout, stderr = self.shell(cmd)

        if code > 0:
            raise ValueError(stderr)

        if not stdout:
            raise ValueError("We couldn't get any useful text out of the file")

        if self.fragment:
            doc = Fragment(stdout.decode('utf-8'))
        else:
            doc = Document.randomized(frbr_uri)
            doc.content = stdout.decode('utf-8')
            doc.frbr_uri = frbr_uri  # reset it
            doc.title = None
            doc.copy_attributes()

        self.log.info("Successfully imported from %s" % fname)
        return doc
Esempio n. 19
0
    def test_toc_with_schedule(self):
        doc = Document(work=self.work,
                       document_xml=component_fixture(text="hi"),
                       language=self.eng)

        toc = self.builder.table_of_contents_for_document(doc)
        self.assertEqual([{
            'type':
            'attachment',
            'component':
            'schedule1',
            'subcomponent':
            None,
            'title':
            'Schedule 1',
            'heading':
            'Schedule 1',
            'id':
            'att_1',
            'children': [{
                'component': 'schedule1',
                'title': 'Section',
                'type': 'section',
                'id': 'sec_1',
                'subcomponent': 'section',
            }],
        }], [t.as_dict() for t in toc])
        self.assertEqual("att_1/sec_1", toc[0].children[0].qualified_id)
Esempio n. 20
0
    def test_ignore_existing(self):
        doc = Document(work=self.work,
                       content=document_fixture(xml="""
<section id="section-1">
  <num>1.</num>
  <heading>Tester</heading>
  <paragraph id="section-1.paragraph-0">
    <content>
      <p>Something to do with <ref href="/akn/za/act/2012/22">Act no 22 of 2012</ref>.</p>
      <p>And another thing about <ref href="/akn/za/act/1998/4"><b>Act 4 of 1998</b></ref>.</p>
    </content>
  </paragraph>
</section>"""))

        self.finder.find_references_in_document(doc)
        self.assertMultiLineEqual(
            '''<body xmlns="http://docs.oasis-open.org/legaldocml/ns/akn/3.0">
      
<section id="section-1">
  <num>1.</num>
  <heading>Tester</heading>
  <paragraph id="section-1.paragraph-0">
    <content>
      <p>Something to do with <ref href="/akn/za/act/2012/22">Act no 22 of 2012</ref>.</p>
      <p>And another thing about <ref href="/akn/za/act/1998/4"><b>Act 4 of 1998</b></ref>.</p>
    </content>
  </paragraph>
</section>
    </body>
  
''',
            etree.tostring(doc.doc.body, encoding='utf-8',
                           pretty_print=True).decode('utf-8'))
Esempio n. 21
0
    def import_from_upload(self, upload, frbr_uri, request):
        """ Create a new Document by importing it from a
        :class:`django.core.files.uploadedfile.UploadedFile` instance.
        """
        self.reformat = True

        if upload.content_type in ['text/xml', 'application/xml']:
            # just assume it's valid AKN xml
            doc = Document.randomized(frbr_uri)
            doc.content = upload.read().decode('utf-8')
            return doc

        if upload.content_type == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
            # pre-process docx to HTML and then import html
            html = self.docx_to_html(upload)
            doc = self.import_from_text(html, frbr_uri, '.html')
        elif upload.content_type == 'application/pdf':
            doc = self.import_from_pdf(upload, frbr_uri)
        else:
            # slaw will do its best
            with self.tempfile_for_upload(upload) as f:
                doc = self.import_from_file(f.name, frbr_uri)

        self.analyse_after_import(doc)

        return doc
Esempio n. 22
0
    def test_ignore_existing(self):
        doc = Document(content=document_fixture(xml=u"""
<section id="section-1">
  <num>1.</num>
  <heading>Tester</heading>
  <paragraph id="section-1.paragraph-0">
    <content>
      <p>Something to do with <ref href="/za/act/2012/22">Act no 22 of 2012</ref>.</p>
      <p>And another thing about <ref href="/za/act/1998/4"><b>Act 4 of 1998</b></ref>.</p>
    </content>
  </paragraph>
</section>"""))

        self.finder.find_references_in_document(doc)
        self.assertMultiLineEqual(
            '''<body xmlns="http://www.akomantoso.org/2.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
      
<section id="section-1">
  <num>1.</num>
  <heading>Tester</heading>
  <paragraph id="section-1.paragraph-0">
    <content>
      <p>Something to do with <ref href="/za/act/2012/22">Act no 22 of 2012</ref>.</p>
      <p>And another thing about <ref href="/za/act/1998/4"><b>Act 4 of 1998</b></ref>.</p>
    </content>
  </paragraph>
</section>
    </body>
  
''', etree.tostring(doc.doc.body, pretty_print=True))
Esempio n. 23
0
    def test_find_multiple_in_tail(self):
        doc = Document(content=document_fixture(xml=u"""
<section id="section-1">
  <num>1.</num>
  <heading>Tester</heading>
  <paragraph id="section-1.paragraph-0">
    <content>
      <p>Something Act 4 of 2000 and "<term>City</term>" means the (Act No. 117 of 1998) and also Act 5 of 2020;</p>
    </content>
  </paragraph>
</section>"""))

        self.finder.find_references_in_document(doc)
        self.assertMultiLineEqual(
            '''<body xmlns="http://www.akomantoso.org/2.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
      
<section id="section-1">
  <num>1.</num>
  <heading>Tester</heading>
  <paragraph id="section-1.paragraph-0">
    <content>
      <p>Something <ref href="/za/act/2000/4">Act 4 of 2000</ref> and "<term>City</term>" means the (<ref href="/za/act/1998/117">Act No. 117 of 1998</ref>) and also <ref href="/za/act/2020/5">Act 5 of 2020</ref>;</p>
    </content>
  </paragraph>
</section>
    </body>
  
''', etree.tostring(doc.doc.body, pretty_print=True))
Esempio n. 24
0
    def test_find_without_act_in_parens(self):
        doc = Document(content=document_fixture(xml=u"""
<section id="section-1">
  <num>1.</num>
  <heading>Tester</heading>
  <paragraph id="section-1.paragraph-0">
    <content>
      <p>Something to do with Income Tax Act, 1962 (No 58 of 1962).</p>
    </content>
  </paragraph>
</section>"""))

        self.finder.find_references_in_document(doc)
        self.maxDiff = None
        self.assertMultiLineEqual(
            '''<body xmlns="http://www.akomantoso.org/2.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
      
<section id="section-1">
  <num>1.</num>
  <heading>Tester</heading>
  <paragraph id="section-1.paragraph-0">
    <content>
      <p>Something to do with Income Tax Act, 1962 (<ref href="/za/act/1962/58">No 58 of 1962</ref>).</p>
    </content>
  </paragraph>
</section>
    </body>
  
''', etree.tostring(doc.doc.body, pretty_print=True))
Esempio n. 25
0
    def test_unicode(self):
        doc = Document(content=document_fixture(xml=u"""
<section id="section-1">
  <num>1.</num>
  <heading>Definitions</heading>
  <paragraph id="section-1.paragraph-0">
    <content>
      <p>"Actë" means the National Road Traffic Act, 1996 (<ref href="/za/act/1996/93">Act No. 93 of 1996</ref>);</p>
    </content>
  </paragraph>
</section>
        """))

        self.maxDiff = None
        self.finder.find_terms_in_document(doc)
        self.assertMultiLineEqual(
            '''<body xmlns="http://www.akomantoso.org/2.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
  <section id="section-1">
    <num>1.</num>
    <heading>Definitions</heading>
    <paragraph id="section-1.paragraph-0">
      <content>
        <p refersTo="#term-Actë">"<def refersTo="#term-Actë">Actë</def>" means the National Road Traffic Act, 1996 (<ref href="/za/act/1996/93">Act No. 93 of 1996</ref>);</p>
      </content>
    </paragraph>
  </section>
</body>
''', etree.tostring(doc.doc.body, pretty_print=True, encoding='UTF-8'))
Esempio n. 26
0
    def test_constitution(self):
        doc = Document(work=self.work, content=document_fixture(xml="""
<section id="section-1">
  <num>1.</num>
  <heading>Tester</heading>
  <paragraph id="section-1.paragraph-0">
    <content>
      <p>the Constitution</p>
      <p>the Constitution, 1996</p>
      <p>the Constitution of South Africa</p>
      <p>the Constitution of the Republic of South Africa</p>
      <p>the Constitution of the Republic of South Africa, 1996</p>
      <p>the Constitution of the Republic of South Africa 1996</p>
      <p>the Constitution of the Republic of South Africa Act, 1996</p>
      <p>the Constitution of the Republic of South Africa, 1996 ( Act 108 of 1996 )</p>
      <p>the Constitution of the Republic of South Africa, 1996 ( Act No 108 of 1996 )</p>
      <p>the Constitution of the Republic of South Africa, 1996 ( Act No. 108 of 1996 )</p>
      <p>the Constitution of the Republic of South Africa Act, 1996 ( Act No. 108 of 1996 )</p>
      <p>the Constitution of the Republic of South Africa  Act 108 of 1996</p>
      <p>the Constitution of the Republic of South Africa (Act 108 of 1996)</p>
      <p>the below shouldn't match</p>
      <p>Constitutionally unsound</p>
      <p>is unconstitutional</p>
      <p>their constitution is poor</p>
    </content>
  </paragraph>
</section>"""))

        self.finder.find_references_in_document(doc)
        self.maxDiff = None
        self.assertMultiLineEqual('''<body xmlns="http://docs.oasis-open.org/legaldocml/ns/akn/3.0">
      
<section id="section-1">
  <num>1.</num>
  <heading>Tester</heading>
  <paragraph id="section-1.paragraph-0">
    <content>
      <p>the <ref href="/akn/za/act/1996/constitution">Constitution</ref></p>
      <p>the <ref href="/akn/za/act/1996/constitution">Constitution, 1996</ref></p>
      <p>the <ref href="/akn/za/act/1996/constitution">Constitution of South Africa</ref></p>
      <p>the <ref href="/akn/za/act/1996/constitution">Constitution of the Republic of South Africa</ref></p>
      <p>the <ref href="/akn/za/act/1996/constitution">Constitution of the Republic of South Africa, 1996</ref></p>
      <p>the <ref href="/akn/za/act/1996/constitution">Constitution of the Republic of South Africa 1996</ref></p>
      <p>the <ref href="/akn/za/act/1996/constitution">Constitution of the Republic of South Africa Act, 1996</ref></p>
      <p>the <ref href="/akn/za/act/1996/constitution">Constitution of the Republic of South Africa, 1996</ref> ( <ref href="/akn/za/act/1996/constitution">Act 108 of 1996</ref> )</p>
      <p>the <ref href="/akn/za/act/1996/constitution">Constitution of the Republic of South Africa, 1996</ref> ( <ref href="/akn/za/act/1996/constitution">Act No 108 of 1996</ref> )</p>
      <p>the <ref href="/akn/za/act/1996/constitution">Constitution of the Republic of South Africa, 1996</ref> ( <ref href="/akn/za/act/1996/constitution">Act No. 108 of 1996</ref> )</p>
      <p>the <ref href="/akn/za/act/1996/constitution">Constitution of the Republic of South Africa Act, 1996</ref> ( <ref href="/akn/za/act/1996/constitution">Act No. 108 of 1996</ref> )</p>
      <p>the <ref href="/akn/za/act/1996/constitution">Constitution of the Republic of South Africa</ref>  <ref href="/akn/za/act/1996/constitution">Act 108 of 1996</ref></p>
      <p>the <ref href="/akn/za/act/1996/constitution">Constitution of the Republic of South Africa</ref> (<ref href="/akn/za/act/1996/constitution">Act 108 of 1996</ref>)</p>
      <p>the below shouldn't match</p>
      <p>Constitutionally unsound</p>
      <p>is unconstitutional</p>
      <p>their constitution is poor</p>
    </content>
  </paragraph>
</section>
    </body>
  
''', etree.tostring(doc.doc.body, encoding='utf-8', pretty_print=True).decode('utf-8'))
Esempio n. 27
0
    def test_find_with_empty_string(self):
        doc = Document(content=document_fixture(xml=u"""
<section id="section-1">
  <num>1.</num>
  <heading/>
  <paragraph id="section-1.paragraph-0">
    <content>
      <p>In this By-law, unless the context indicates otherwise-</p>
      <p>"" means the National Road Traffic Act, 1996 (<ref href="/za/act/1996/93">Act No. 93 of 1996</ref>);</p>
    </content>
  </paragraph>
</section>
        """))

        self.maxDiff = None
        self.finder.find_terms_in_document(doc)
        self.assertMultiLineEqual(
            '''<body xmlns="http://www.akomantoso.org/2.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
  <section id="section-1">
    <num>1.</num>
    <heading/>
    <paragraph id="section-1.paragraph-0">
      <content>
        <p>In this By-law, unless the context indicates otherwise-</p>
        <p>"" means the National Road Traffic Act, 1996 (<ref href="/za/act/1996/93">Act No. 93 of 1996</ref>);</p>
      </content>
    </paragraph>
  </section>
</body>
''', etree.tostring(doc.doc.body, pretty_print=True))
Esempio n. 28
0
    def test_dont_link_constitution_in_constitution(self):
        constitution = Work(frbr_uri='/akn/za/act/1996/constitution')
        doc = Document(work=constitution, content=document_fixture(xml="""
        <section id="section-1">
          <num>1.</num>
          <heading>Tester</heading>
          <paragraph id="section-1.paragraph-0">
            <content>
              <p>the Constitution</p>
              <p>the Constitution, 1996</p>
              <p>the Constitution of South Africa</p>
              <p>the Constitution of the Republic of South Africa</p>
              <p>the Constitution of the Republic of South Africa, 1996</p>
              <p>the Constitution of the Republic of South Africa 1996</p>
              <p>the Constitution of the Republic of South Africa Act, 1996</p>
              <p>the Constitution of the Republic of South Africa, 1996 ( Act 108 of 1996 )</p>
              <p>the Constitution of the Republic of South Africa, 1996 ( Act No 108 of 1996 )</p>
              <p>the Constitution of the Republic of South Africa, 1996 ( Act No. 108 of 1996 )</p>
              <p>the Constitution of the Republic of South Africa Act, 1996 ( Act No. 108 of 1996 )</p>
              <p>the Constitution of the Republic of South Africa  Act 108 of 1996</p>
              <p>the Constitution of the Republic of South Africa (Act 108 of 1996)</p>
            </content>
          </paragraph>
        </section>"""))

        unchanged = doc.document_xml
        doc.doc.frbr_uri = FrbrUri.parse(constitution.frbr_uri)
        self.finder.find_references_in_document(doc)
        self.assertMultiLineEqual(unchanged, doc.document_xml)
Esempio n. 29
0
    def test_constitution(self):
        doc = Document(content=document_fixture(xml=u"""
<section id="section-1">
  <num>1.</num>
  <heading>Tester</heading>
  <paragraph id="section-1.paragraph-0">
    <content>
      <p>the Constitution</p>
      <p>the Constitution, 1996</p>
      <p>the Constitution of South Africa</p>
      <p>the Constitution of the Republic of South Africa</p>
      <p>the Constitution of the Republic of South Africa, 1996</p>
      <p>the Constitution of the Republic of South Africa 1996</p>
      <p>the Constitution of the Republic of South Africa Act, 1996</p>
      <p>the Constitution of the Republic of South Africa, 1996 ( Act 108 of 1996 )</p>
      <p>the Constitution of the Republic of South Africa, 1996 ( Act No 108 of 1996 )</p>
      <p>the Constitution of the Republic of South Africa, 1996 ( Act No. 108 of 1996 )</p>
      <p>the Constitution of the Republic of South Africa Act, 1996 ( Act No. 108 of 1996 )</p>
      <p>the Constitution of the Republic of South Africa  Act 108 of 1996</p>
      <p>the Constitution of the Republic of South Africa (Act 108 of 1996)</p>
      <p>the below shouldn't match</p>
      <p>Constitutionally unsound</p>
      <p>is unconstitutional</p>
      <p>their constitution is poor</p>
    </content>
  </paragraph>
</section>
        """))

        self.finder.find_references_in_document(doc)
        self.maxDiff = None
        self.assertMultiLineEqual(
            '''<body xmlns="http://www.akomantoso.org/2.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
  <section id="section-1">
    <num>1.</num>
    <heading>Tester</heading>
    <paragraph id="section-1.paragraph-0">
      <content>
        <p>the <ref href="/za/act/1996/constitution">Constitution</ref></p>
        <p>the <ref href="/za/act/1996/constitution">Constitution, 1996</ref></p>
        <p>the <ref href="/za/act/1996/constitution">Constitution of South Africa</ref></p>
        <p>the <ref href="/za/act/1996/constitution">Constitution of the Republic of South Africa</ref></p>
        <p>the <ref href="/za/act/1996/constitution">Constitution of the Republic of South Africa, 1996</ref></p>
        <p>the <ref href="/za/act/1996/constitution">Constitution of the Republic of South Africa 1996</ref></p>
        <p>the <ref href="/za/act/1996/constitution">Constitution of the Republic of South Africa Act, 1996</ref></p>
        <p>the <ref href="/za/act/1996/constitution">Constitution of the Republic of South Africa, 1996</ref> ( <ref href="/za/act/1996/constitution">Act 108 of 1996</ref> )</p>
        <p>the <ref href="/za/act/1996/constitution">Constitution of the Republic of South Africa, 1996</ref> ( <ref href="/za/act/1996/constitution">Act No 108 of 1996</ref> )</p>
        <p>the <ref href="/za/act/1996/constitution">Constitution of the Republic of South Africa, 1996</ref> ( <ref href="/za/act/1996/constitution">Act No. 108 of 1996</ref> )</p>
        <p>the <ref href="/za/act/1996/constitution">Constitution of the Republic of South Africa Act, 1996</ref> ( <ref href="/za/act/1996/constitution">Act No. 108 of 1996</ref> )</p>
        <p>the <ref href="/za/act/1996/constitution">Constitution of the Republic of South Africa</ref>  <ref href="/za/act/1996/constitution">Act 108 of 1996</ref></p>
        <p>the <ref href="/za/act/1996/constitution">Constitution of the Republic of South Africa</ref> (<ref href="/za/act/1996/constitution">Act 108 of 1996</ref>)</p>
        <p>the below shouldn't match</p>
        <p>Constitutionally unsound</p>
        <p>is unconstitutional</p>
        <p>their constitution is poor</p>
      </content>
    </paragraph>
  </section>
</body>
''', etree.tostring(doc.doc.body, pretty_print=True))
Esempio n. 30
0
    def test_table_of_contents_afr(self):
        d = Document()
        d.content = document_fixture(xml="""
        <body xmlns="http://www.akomantoso.org/2.0">
          <section id="section-1">
            <num>1.</num>
            <heading>Foo</heading>
            <content>
              <p>hello</p>
            </content>
          </section>
          <chapter id="chapter-1">
            <num>1.</num>
            <heading>The Chapter</heading>
            <part id="part-A">
              <num>A</num>
              <heading>The Part</heading>
              <section id="section-2">
                <num>2.</num>
                <heading>Other</heading>
                <content>
                  <p>hi</p>
                </content>
              </section>
            </part>
          </chapter>
        </body>
        """)
        d.language = 'afr'

        toc = d.table_of_contents()
        toc = [t.as_dict() for t in toc]
        self.maxDiff = None
        self.assertEqual(toc, [
            {'id': 'section-1', 'num': '1.', 'type': 'section', 'heading': 'Foo',
                'component': 'main', 'subcomponent': 'section/1', 'title': '1. Foo'},
            {'id': 'chapter-1', 'num': '1.', 'type': 'chapter', 'heading': 'The Chapter',
                'component': 'main', 'subcomponent': 'chapter/1', 'title': 'Hoofstuk 1. - The Chapter', 'children': [
                    {'id': 'part-A', 'num': 'A', 'type': 'part', 'heading': 'The Part',
                     'component': 'main', 'subcomponent': 'chapter/1/part/A', 'title': 'Deel A - The Part', 'children': [
                         {'id': 'section-2', 'num': '2.', 'type': 'section', 'heading': 'Other',
                          'component': 'main', 'subcomponent': 'section/2', 'title': '2. Other'},
                     ]
                     },
                ]},
        ])