예제 #1
0
    def test_migrate_html(self):
        # chunks:  foo bar baz
        # element: ___ ba_ ___
        queue = self.reset_queue()
        elements = [budou.Element('ba', 'a', '<a href="#">ba</a>', 3)]
        expected = ['foo', '<a href="#">ba</a>r', 'baz']
        result = self.parser._migrate_html(queue, elements)
        self.assertEqual(expected, [chunk.word for chunk in result.chunks])

        # chunks:  foo bar baz
        # element: ___ bar b__
        queue = self.reset_queue()
        elements = [budou.Element('barb', 'a', '<a href="#">barb</a>', 3)]
        expected = ['foo', '<a href="#">barb</a>az']
        result = self.parser._migrate_html(queue, elements)
        self.assertEqual(expected, [chunk.word for chunk in result.chunks])
예제 #2
0
 def test_get_elements_list(self):
     source = u'<a>こちら</a>をクリック'
     dom = html.fragment_fromstring(source, create_parent='body')
     expected = [budou.Element(u'こちら', 'a', u'<a>こちら</a>', 0)]
     result = self.parser._get_elements_list(dom)
     self.assertEqual(
         result, expected,
         'The input DOM should be processed to an element list.')
예제 #3
0
 def test_get_elements_list(self):
     dom = html.fragment_fromstring('click <a>this</a>',
                                    create_parent='body')
     expected = [budou.Element('this', 'a', '<a>this</a>', 6)]
     result = self.parser._get_elements_list(dom)
     self.assertEqual(
         result, expected,
         'The input DOM should be processed to an element list.')