def test_migrate_html(self): # chunks: foo bar baz # element: ___ ba_ ___ queue = self.reset_queue() elements = [budou.Element('ba', 'a', '<a href="#">ba</a>', 3)] expected = ['foo', '<a href="#">ba</a>r', 'baz'] result = self.parser._migrate_html(queue, elements) self.assertEqual(expected, [chunk.word for chunk in result.chunks]) # chunks: foo bar baz # element: ___ bar b__ queue = self.reset_queue() elements = [budou.Element('barb', 'a', '<a href="#">barb</a>', 3)] expected = ['foo', '<a href="#">barb</a>az'] result = self.parser._migrate_html(queue, elements) self.assertEqual(expected, [chunk.word for chunk in result.chunks])
def test_get_elements_list(self): source = u'<a>こちら</a>をクリック' dom = html.fragment_fromstring(source, create_parent='body') expected = [budou.Element(u'こちら', 'a', u'<a>こちら</a>', 0)] result = self.parser._get_elements_list(dom) self.assertEqual( result, expected, 'The input DOM should be processed to an element list.')
def test_get_elements_list(self): dom = html.fragment_fromstring('click <a>this</a>', create_parent='body') expected = [budou.Element('this', 'a', '<a>this</a>', 6)] result = self.parser._get_elements_list(dom) self.assertEqual( result, expected, 'The input DOM should be processed to an element list.')