Beispiel #1
0
    def test_directive(self):
        """Test markup directives."""
        s = S.markup_parser('<!DOCTYPE html public>')
        self.assertEqual(len(s), 2)
        self.assertEqual(s[0].type, 'dir')
        self.assertEqual(s[0].name, 'DOCTYPE')
        self.assertEqual(s[0].innersource, 'DOCTYPE html public')

        s = S.markup_parser('<?xml ... ?>')
        self.assertEqual(len(s), 2)
        self.assertEqual(s[0].type, 'dir')
        self.assertEqual(s[0].name, 'xml')
        self.assertEqual(s[0].innersource, 'xml ... ')

        for src in ('<!', '<?'):
            s = S.markup_parser(src + 'incomplete')
            self.assertEqual(len(s), 2)
            self.assertEqual(s[0].type, 'udir')
            self.assertEqual(s[0].name, 'incomplete')
            self.assertEqual(s[0].innersource, 'incomplete')

            s = S.markup_parser(src)
            self.assertEqual(len(s), 2)
            self.assertEqual(s[0].type, 'text')
            self.assertEqual(s[0].source, s.input)

        for src in ('<!>', '<??>'):
            s = S.markup_parser(src)
            self.assertEqual(len(s), 2)
            self.assertEqual(s[0].type, 'text')
            self.assertEqual(s[0].source, s.input)
Beispiel #2
0
    def test_line_numbers(self):
        """Test line numbering support."""
        # Corner cases: Empty string, single line.
        self.assertEqual(S.markup_parser('').get_linecount(), 1)
        self.assertEqual(S.markup_parser('a').get_linecount(), 1)
        self.assertEqual(S.markup_parser('a\n').get_linecount(), 1)

        s = S.markup_parser('<line num=1>\nline 2\nline 3')
        self.assertEqual(s.get_linecount(), 3)
        self.assertEqual(s.get_line(1), '<line num=1>\n')
        self.assertEqual(s.get_line(2), 'line 2\n')
        self.assertEqual(s.get_line(3), 'line 3')
        self.assertRaises(IndexError, lambda: s.get_line(4))

        t = s.first(type='text')
        self.assertEqual(t.source, '\nline 2\nline 3')
        self.assertEqual(t.linepos, (1, 12))
        self.assertEqual(s.locate(t.start + 1), t)

        self.assertEqual(s.locate(len(s.input)).type, 'eof')

        t = s.locate(7)
        self.assertEqual(t.type, 'open')
        self.assertEqual(t.name, 'num')
        self.assertEqual(t.value, '1')
Beispiel #3
0
    def test_query_candidates(self):
        """Test queries that stipulate a particular candidate set."""
        #                    0  1             2   3           4
        s = S.markup_parser('<T>a span of text<U/>another span</T>')

        # If no candidate set is given, search the entire set.
        self.assertEqual(s.first(type='text').obj_id, 1)

        # Search on an object queries the contents of the object.
        self.assertEqual(s[0].first(type='text').obj_id, 1)

        # Search with an explicit range restricts the span.
        self.assertEqual(s.first(type='text', search_after=s[2]).obj_id, 3)

        # Multiple restrictions are allowed.
        self.assertEqual(
            s.first(search_after=s[1], search_before=s[3]).obj_id, 2)
        self.assertEqual(
            s.first(search_after=s[2], search_inside=s[0]).obj_id, 3)
        self.assertEqual(s[0].first(search_after=s[2], type='text').obj_id, 3)
        self.assertEqual(s[0].first(search_before=s[2], type='text').obj_id, 1)

        # Regression: Searching inside a text field doesn't search everything.
        self.assertRaises(KeyError, lambda: s.first(search_inside=s[1]))
        self.assertRaises(KeyError, lambda: s[1].first())
Beispiel #4
0
    def test_simple_queries(self):
        """Test simple .find() and .first() queries."""
        s = S.markup_parser('<doc><!-- comment -->text<?dir?>'
                            '<atag x=y><tag><self/>more&amp;</tag>')

        # Check that we don't find things we're not supposed to
        self.assertEqual(list(s.find(name='none')), [])
        self.assertRaises(KeyError, lambda: s.first(name='none'))
        self.assertRaises(KeyError,
                          lambda: s.first(name='DOC', case_matters=True))

        # Make sure we find the things we are supposed to
        self.assertEqual(s.first(type='com').source, '<!-- comment -->')
        self.assertEqual(s.last(name='tag').partner.source, '<tag>')
        self.assertEqual(list(t.source for t in s.find(type='text')),
                         ['text', 'more&amp;'])
        self.assertEqual(s.first(value='more&').obj_id, 7)
        self.assertEqual(s.first(type='self').source, '<self/>')
        self.assertEqual(s.first(type='dir').source, '<?dir?>')

        # Check that attribute matching works
        self.assertRaises(KeyError,
                          lambda: s.first(name='atag', attr=('x', False)))
        self.assertRaises(TypeError, lambda: s.first(attr=False))
        self.assertEqual(s.first(name='atag').keys(), ['x'])
        self.assertEqual(s.first(attr=('x', True))['x'].value, 'y')
Beispiel #5
0
    def test_basic_nesting(self):
        """Test basic tag nesting rules."""
        # test indices        0  1  2  3  4  5  6   7   8  9  10
        s = S.markup_parser('<A> w <B> x <C> y <D></B></C> z </A>')
        self.assertChildOf(s[0], None)  # <A>  is at the top level
        self.assertChildOf(s[1], s[0])  # w    is a child of <A>
        self.assertChildOf(s[2], s[0])  # <B>  is a child of <A>
        self.assertChildOf(s[3], s[2])  # x    is a child of <B>
        self.assertChildOf(s[4], s[0])  # <C>  is a child of <A>
        self.assertChildOf(s[5], s[2])  # y    is a child of <B>
        self.assertChildOf(s[6], s[2])  # <D>  is a child of <B>
        self.assertChildOf(s[7], s[0])  # </B> is a child of </A>
        self.assertChildOf(s[8], s[0])  # </C> is a child of <A>
        self.assertChildOf(s[9], s[0])  # z    is a child of <A>
        self.assertChildOf(s[10], None)  # </A> is at the top level

        # Partnership tests
        self.assertPartners(s[0], s[10])
        self.assertPartners(s[2], s[7])
        self.assertPartners(s[4], s[8])

        # Path tests
        # <A> ==> <B> ==> <D>
        self.assertEqual(s[6].path, [s[0], s[2], s[6]])
        # <A> ==> <B> ==> y
        self.assertEqual(s[5].path, [s[0], s[2], s[5]])
        # <A> ==> <C>
        self.assertEqual(s[4].path, [s[0], s[4]])
Beispiel #6
0
 def test_white_filter(self):
     """Test whitespace filtering."""
     s = S.markup_parser('  <t0>  <t1/> t2 <t3 num=3> ',
                         skip_white_text=True)
     self.assertEqual(len(s), 5)
     self.assertTagShape(s[0], 'open', 't0')
     self.assertTagShape(s[1], 'self', 't1')
     self.assertObjectShape(s[2], 'text', source=' t2 ')
     self.assertTagShape(s[3], 'open', 't3', num='3')
     self.assertObjectShape(s[4], 'eof')
Beispiel #7
0
    def test_query_mapping(self):
        """Test mapping functions over query results."""
        s = S.markup_parser('<a><b/>foobar<c></a></c>')

        self.assertEqual(s.first(type='text', map='source'), 'foobar')
        self.assertEqual(s.first(type='self', map='start'), 3)
        self.assertEqual(s.first(type='close', map=('name', 'obj_id')),
                         ('a', 4))
        self.assertEqual(list(s.find(map=len)),
                         [3, 4, 6, 3, 4, 4, 0])  # includes eof marker
        self.assertEqual(list(s.find(map=lambda t: t.linepos[1])),
                         [0, 3, 7, 13, 16, 20, 24])  #includes eof marker
Beispiel #8
0
    def test_tags(self):
        """Test tag formats."""
        s = S.markup_parser('<open1><open2 bare name1=val name2="foo">')
        self.assertEqual(len(s), 3)
        self.assertTagShape(s[0], 'open', 'open1')
        self.assertTagShape(s[1],
                            'open',
                            'open2',
                            bare='',
                            name1='val',
                            name2='foo')

        s = S.markup_parser("<self foo=bar baz='quux crunch' zot/></endtag>")
        self.assertEqual(len(s), 3)
        self.assertTagShape(s[0],
                            'self',
                            'self',
                            foo='bar',
                            baz='quux crunch',
                            zot='')
        self.assertTagShape(s[1], 'close', 'endtag')
Beispiel #9
0
    def test_entities(self):
        """Test entity substitution."""
        s = S.markup_parser('a&lt;b&gt;c&quot;<x>'  # 0 1
                            '--&testName;--<x>'  # 2 3
                            '--&NoneSuch;--<x>'  # 4 5
                            '&#32;&&&#9;')  # 6 <eof>
        self.assertEqual(len(s), 8)

        # Check default entities
        self.assertObjectShape(s[0], 'text', value='a<b>c"')

        # Check custom entities
        s.ENTITY_NAME_MAP['testName'] = '[ok]'
        self.assertObjectShape(s[2], 'text', value='--[ok]--')
        self.assertObjectShape(s[4], 'text', value='--&NoneSuch;--')

        # Check numeric entities
        self.assertObjectShape(s[6], 'text', value=' &&\t')
Beispiel #10
0
    def test_positions(self):
        """Test line number, character offset, and column number finding."""
        s = S.markup_parser('<a>\n\t<b/>\t<c>d\n</c>\t</a>')
        s.LINE_NUM_BASE = 1
        s.COLUMN_NUM_BASE = 0
        s.TAB_WIDTH = 8

        # Edge case: First line, first column.
        self.assertEqual(s[0].linepos, (1, 0))
        self.assertEqual(s[0].linecol, (1, 0))
        self.assertEqual(s[2].linepos, (2, 1))
        self.assertEqual(s[2].linecol, (2, 8))

        # Conversion of line/offset and line/column into input offsets.
        self.assertEqual(s.get_offset(1, 0), 0)  # beginning
        self.assertEqual(s.get_offset(2, 9), 13)  # at d
        self.assertEqual(s.get_offset(2, 0), 4)  # start of <b>
        self.assertEqual(s.get_column_offset(1, 0), 0)  # beginning
        self.assertEqual(s.get_column_offset(2, 19), 13)  # at d
        self.assertEqual(s.get_column_offset(2, 8), 5)  # start of <b>