Beispiel #1
0
 def test_process_fenced(self):
     node = Node("Content content\n```abc def\nLine 1\nLine 2\n```")
     result = formatting.Formatting(None).process(node)
     self.assertEqual(1, len(result))
     result = result[0]
     self.assertEqual(result['text'], node.text[16:])
     self.assertEqual(result['fence_data'],
                      {'type': 'abc def', 'lines': ['Line 1', 'Line 2']})
Beispiel #2
0
 def test_process_subscript(self):
     node = Node("This is a_{subscript}. And then a_{subscript} again")
     result = formatting.Formatting(None).process(node)
     self.assertEqual(1, len(result))
     result = result[0]
     self.assertEqual(result['text'], "a_{subscript}")
     self.assertEqual(result['locations'], [0, 1])
     self.assertEqual(result['subscript_data'],
                      {'variable': 'a', 'subscript': 'subscript'})
Beispiel #3
0
    def test_process_dashes(self):
        node = Node("This is an fp-dash_____")
        result = formatting.Formatting(None).process(node)
        self.assertEqual(1, len(result))
        result = result[0]

        self.assertEqual(result['text'], "This is an fp-dash_____")
        self.assertEqual(result['locations'], [0])
        self.assertEqual(result['dash_data'],
                         {'text': 'This is an fp-dash'})
    def test_process_table(self):
        with XMLBuilder("GPOTABLE") as ctx:
            with ctx.BOXHD():
                ctx.CHED("1-1", H=1)
                ctx.CHED("1-2", H=1)
                ctx.CHED("2-1", H=2)
                ctx.CHED("3-1", H=3)
                ctx.CHED("2-2", H=2)
                ctx.CHED("3-2", H=3)
                ctx.child_from_string(
                    '<CHED H="3">3-3<LI>Content</LI>Here</CHED>')
            with ctx.ROW():
                ctx.ENT("11")
                ctx.ENT("12")
                ctx.ENT("13")
                ctx.ENT("14")
            with ctx.ROW():
                ctx.ENT("21")
                ctx.ENT("22")
                ctx.ENT("23")
            with ctx.ROW():
                ctx.ENT()
                ctx.ENT("32")
                ctx.child_from_string('<ENT>33<E>More</E></ENT>')
                ctx.ENT("34")
        markdown = formatting.table_xml_to_plaintext(ctx.xml)
        self.assertTrue("3-3 Content Here" in markdown)
        self.assertTrue("33 More" in markdown)
        node = Node(markdown, source_xml=ctx.xml)
        result = formatting.Formatting(None).process(node)
        self.assertEqual(1, len(result))
        result = result[0]

        self.assertEqual(markdown, result['text'])
        self.assertEqual([0], result['locations'])

        def mkhd(t, c, r):
            return {'text': t, 'colspan': c, 'rowspan': r}

        data = result['table_data']
        self.assertEqual(
            data['header'],
            [[mkhd('1-1', 1, 3), mkhd('1-2', 3, 1)],
             [mkhd('2-1', 1, 1), mkhd('2-2', 2, 1)],
             [
                 mkhd('3-1', 1, 1),
                 mkhd('3-2', 1, 1),
                 mkhd('3-3 Content Here', 1, 1)
             ]])
        self.assertEqual(data['rows'],
                         [['11', '12', '13', '14'], ['21', '22', '23'],
                          ['', '32', '33 More', '34']])
    def test_awkward_table(self):
        """
        |R1C1     |R1C2               |
        |R2C1|R2C2|R2C3     |R2C4     |
        |    |    |R3C1|R3C2|R3C3|R3C4|
        """
        with XMLBuilder("GPOTABLE", COLS=6) as ctx:
            with ctx.BOXHD():
                ctx.CHED(u"R1C1", H=1)
                ctx.CHED(u"R2C1", H=2)
                ctx.CHED(u"R2C2", H=2)
                ctx.CHED(u"R1C2", H=1)
                ctx.CHED(u"R2C3", H=2)
                ctx.CHED(u"R3C1", H=3)
                ctx.CHED(u"R3C2", H=3)
                ctx.CHED(u"R2C4", H=2)
                ctx.CHED(u"R3C3", H=3)
                ctx.CHED(u"R3C4", H=3)

        markdown = formatting.table_xml_to_plaintext(ctx.xml)
        self.assertTrue("R1C1" in markdown)
        self.assertTrue("R2C2" in markdown)

        node = Node(markdown, source_xml=ctx.xml)
        result = formatting.Formatting(None).process(node)
        self.assertEqual(1, len(result))
        result = result[0]

        self.assertEqual(markdown, result['text'])
        self.assertEqual([0], result['locations'])
        data = result['table_data']
        self.assertTrue("header" in data)

        # Verify header matches:
        def mkhd(t, c, r):
            return {'text': t, 'colspan': c, 'rowspan': r}

        self.assertEqual(
            data["header"],
            [[mkhd("R1C1", 2, 1), mkhd("R1C2", 4, 1)],
             [
                 mkhd("R2C1", 1, 2),
                 mkhd("R2C2", 1, 2),
                 mkhd("R2C3", 2, 1),
                 mkhd("R2C4", 2, 1)
             ],
             [
                 mkhd("R3C1", 1, 1),
                 mkhd("R3C2", 1, 1),
                 mkhd("R3C3", 1, 1),
                 mkhd("R3C4", 1, 1)
             ]])
    def test_table_with_caption_with_footnote_as_caption(self):
        """
        Caption[^1](No work of any kind shall be conducted)
         Caption[^1]
        |R1C1       |
        |R2C1 |R2C2 |

        This is testing the implementation of the TTITLE as a caption element.
        """
        with XMLBuilder("GPOTABLE", COLS=6) as ctx:
            ctx.child_from_string("<TTITLE>Caption<SU>1</SU></TTITLE>")
            with ctx.BOXHD():
                ctx.CHED(u"R1C1", H=1)
                ctx.CHED(u"R2C1", H=2)
                ctx.CHED(u"R2C2", H=2)
            ctx.child_from_string(
                "<TNOTE><SU>1</SU> No work of any kind shall be conducted"
                "</TNOTE>")

        preprocessor = preprocessors.Footnotes()
        preprocessor.transform(ctx.xml)
        markdown = formatting.table_xml_to_plaintext(ctx.xml)
        self.assertTrue("R1C1" in markdown)
        self.assertTrue("R2C2" in markdown)

        node = Node(markdown, source_xml=ctx.xml)
        result = formatting.Formatting(None).process(node)
        self.assertEqual(2, len(result))
        table, footnote = result

        self.assertEqual(markdown, table['text'])
        self.assertEqual([0], table['locations'])
        data = table['table_data']
        self.assertTrue("header" in data)

        # Verify header matches:
        def mkhd(t, c, r):
            return {'text': t, 'colspan': c, 'rowspan': r}

        self.assertEqual(
            [[mkhd("R1C1", 2, 1)], [mkhd("R2C1", 1, 1),
                                    mkhd("R2C2", 1, 1)]], data["header"])
        self.assertTrue("caption" in data)
        self.assertEqual("Caption[^1](No work of any kind shall be conducted)",
                         data["caption"])
        self.assertEqual(u'[^1](No work of any kind shall be conducted)',
                         footnote['text'])
        self.assertEqual(u'1', footnote['footnote_data']['ref'])
        self.assertEqual(u'No work of any kind shall be conducted',
                         footnote['footnote_data']['note'])
        self.assertEqual([0], footnote['locations'])
Beispiel #7
0
    def test_process(self):
        xml = etree.fromstring("""
            <GPOTABLE>
                <BOXHD>
                    <CHED H="1">1-1</CHED>
                    <CHED H="1">1-2</CHED>
                    <CHED H="2">2-1</CHED>
                    <CHED H="3">3-1</CHED>
                    <CHED H="2">2-2</CHED>
                    <CHED H="3">3-2</CHED>
                    <CHED H="3">3-3<LI>Content</LI>Here</CHED>
                </BOXHD>
                <ROW><ENT>11</ENT><ENT>12</ENT><ENT>13</ENT><ENT>14</ENT></ROW>
                <ROW><ENT>21</ENT><ENT>22</ENT><ENT>23</ENT></ROW>
                <ROW>
                    <ENT /><ENT>32</ENT><ENT>33<E>More</E></ENT><ENT>34</ENT>
                </ROW>
            </GPOTABLE>""")
        markdown = formatting.table_xml_to_plaintext(xml)
        self.assertTrue("3-3 Content Here" in markdown)
        self.assertTrue("33 More" in markdown)
        node = Node(markdown, source_xml=xml)
        result = formatting.Formatting(None).process(node)
        self.assertEqual(1, len(result))
        result = result[0]

        self.assertEqual(markdown, result['text'])
        self.assertEqual([0], result['locations'])

        mkhd = lambda t, c, r: {'text': t, 'colspan': c, 'rowspan': r}
        data = result['table_data']
        self.assertEqual(
            data['header'],
            [[mkhd('1-1', 1, 3), mkhd('1-2', 3, 1)],
             [mkhd('2-1', 1, 1), mkhd('2-2', 2, 1)],
             [
                 mkhd('3-1', 1, 1),
                 mkhd('3-2', 1, 1),
                 mkhd('3-3 Content Here', 1, 1)
             ]])
        self.assertEqual(data['rows'],
                         [['11', '12', '13', '14'], ['21', '22', '23'],
                          ['', '32', '33 More', '34']])
    def test_table_with_caption_as_caption(self):
        """
         Caption
        |R1C1     |
        |R2C1|R2C2|
        """
        with XMLBuilder("GPOTABLE", COLS=6) as ctx:
            ctx.TTITLE("Caption")
            with ctx.BOXHD():
                ctx.CHED(u"R1C1", H=1)
                ctx.CHED(u"R2C1", H=2)
                ctx.CHED(u"R2C2", H=2)

        markdown = formatting.table_xml_to_plaintext(ctx.xml)
        self.assertTrue("R1C1" in markdown)
        self.assertTrue("R2C2" in markdown)

        node = Node(markdown, source_xml=ctx.xml)
        result = formatting.Formatting(None).process(node)
        self.assertEqual(1, len(result))
        result = result[0]

        self.assertEqual(markdown, result['text'])
        self.assertEqual([0], result['locations'])
        data = result['table_data']
        self.assertTrue("header" in data)

        # Verify header matches:
        def mkhd(t, c, r):
            return {'text': t, 'colspan': c, 'rowspan': r}

        self.assertEqual(
            [[mkhd("R1C1", 2, 1)], [mkhd("R2C1", 1, 1),
                                    mkhd("R2C2", 1, 1)]], data["header"])
        self.assertTrue("caption" in data)
        self.assertEqual("Caption", data["caption"])
    def test_atf_555_218_table(self):
        """
        Inspired by the more complicated table headers from ATF 27 555.

        This is a difficult table, 555.218; it should look something like this:

        |Q of expl  |Distances in feet                                        |
        |lbs >|lbs <|Inhb bldgs|hwys <3000 veh|hwys >3000 veh|sep magazines   |
        |     |     |Barr|Unbar|Barr  |Unbar  |Barr  |Unbar  |Barr   |Unbarr  |
        |-----|-----|----|-----|------|-------|------|-------|-------|--------|
        |1    |2    |3   |4    |5     |6      |7     |8      |9      |10      |

        """
        xml = etree.fromstring("""
            <GPOTABLE CDEF="7,7,5,5,5,5,6,6,5,5" COLS="10" OPTS="L2">
              <BOXHD>
                <CHED H="1">Quantity of explosives</CHED>
                <CHED H="2">Pounds over</CHED>
                <CHED H="2">Pounds not over</CHED>
                <CHED H="1">Distances in feet</CHED>
                <CHED H="2">Inhabited buildings</CHED>
                <CHED H="3">Barri-caded</CHED>
                <CHED H="3">Unbarri-caded</CHED>
                <CHED H="2">Public highways with traffic volume of 3000</CHED>
                <CHED H="3">Barri-caded</CHED>
                <CHED H="3">Unbarri-caded</CHED>
                <CHED H="2">Passenger railways—public highways</CHED>
                <CHED H="3">Barri-caded</CHED>
                <CHED H="3">Unbarri-caded</CHED>
                <CHED H="2">Separation of magazines</CHED>
                <CHED H="3">Barri-caded</CHED>
                <CHED H="3">Unbarri-caded</CHED>
              </BOXHD>
              <ROW>
                <ENT I="01">0</ENT>
                <ENT>5</ENT>
                <ENT>70</ENT>
                <ENT>140</ENT>
                <ENT>30</ENT>
                <ENT>60</ENT>
                <ENT>51</ENT>
                <ENT>102</ENT>
                <ENT>6</ENT>
                <ENT>12</ENT>
              </ROW>
            </GPOTABLE>""")
        markdown = formatting.table_xml_to_plaintext(xml)
        self.assertTrue("Quantity of explosives" in markdown)
        self.assertTrue("public highways" in markdown)

        node = Node(markdown, source_xml=xml)
        result = formatting.Formatting(None).process(node)
        self.assertEqual(1, len(result))
        result = result[0]

        self.assertEqual(markdown, result['text'])
        self.assertEqual([0], result['locations'])
        data = result['table_data']
        self.assertTrue("header" in data)

        # Verify header matches:
        def mkhd(t, c, r):
            return {'text': t, 'colspan': c, 'rowspan': r}

        hwys_header = mkhd("Public highways with traffic volume of 3000", 2, 1)
        rail_header = mkhd(u"Passenger railways—public highways", 2, 1)
        barr_header = mkhd(u"Barri-caded", 1, 1)
        unbr_header = mkhd(u"Unbarri-caded", 1, 1)
        self.assertEqual(
            data["header"],
            [[
                mkhd("Quantity of explosives", 2, 1),
                mkhd("Distances in feet", 8, 1)
            ],
             [
                 mkhd("Pounds over", 1, 2),
                 mkhd("Pounds not over", 1, 2),
                 mkhd("Inhabited buildings", 2, 1), hwys_header, rail_header,
                 mkhd("Separation of magazines", 2, 1)
             ],
             [
                 barr_header, unbr_header, barr_header, unbr_header,
                 barr_header, unbr_header, barr_header, unbr_header
             ]])