def test_select_multiple(self): ''' Test that extracted components can accept multiple selectors ''' from mobilize.components import CssPath, XPath selectors = [ 'nav', 'section', ] src_html = '''<div> <nav> <a href="/A">A</a> <a href="/B">B</a> </nav> <table><tr><td> </td><td>I'm using tables for layout!!! DUR</td></tr></table> <section> <p>Hello.</p> </section> </div> ''' expected_html = '''<div class="mwu-elem" id="foo"> <nav> <a href="/A">A</a> <a href="/B">B</a> </nav> <section> <p>Hello.</p> </section> </div>''' # test for CssPath css_component = CssPath(selectors, idname='foo') css_component.extract(html.fromstring(src_html)) extracted = css_component.process() extracted_str = html.tostring(extracted) self.assertSequenceEqual(normxml(expected_html), normxml(extracted_str)) # test for XPath x_component = XPath(selectors, idname='foo') x_component.extract(html.fromstring(src_html)) extracted = x_component.process() extracted_str = html.tostring(extracted) self.assertSequenceEqual(normxml(expected_html), normxml(extracted_str))
def test_collapse(self): ''' Test for collapsing filter application mode ''' from mobilize.components import ( XPath, FILT_EACHELEM, FILT_COLLAPSED, ) def testfilter(elem): if elem.tag == 'a': elem.attrib['class'] = 'foo' for ii, child in enumerate(elem): if 'a' == child.tag: child.attrib['id'] = 'child-%d' % ii htmlstr1 = '''<a href="/">a</a> <a href="/">b</a> <a href="/">c</a> ''' nocollapse = XPath('//a', postfilters=[testfilter], filtermode=FILT_EACHELEM) nocollapse.extract(html.fromstring(htmlstr1)) actual = nocollapse.process('idname') actual_str = html.tostring(actual) expected_str = '''<div class="mwu-elem" id="idname"> <a href="/" class="foo">a</a> <a href="/" class="foo">b</a> <a href="/" class="foo">c</a> </div> ''' self.assertSequenceEqual(normxml(expected_str), normxml(actual_str)) expected_str = '''<div id="idname"> <a href="/">a</a> <a href="/">b</a> <a href="/">c</a> </div> ''' collapse = XPath('//a', postfilters=[testfilter], filtermode=FILT_COLLAPSED) collapse.extract(html.fromstring(htmlstr1)) actual = collapse.process('idname') actual_str = html.tostring(actual) expected_str = '''<div class="mwu-elem" id="idname"> <a href="/" id="child-0">a</a> <a href="/" id="child-1">b</a> <a href="/" id="child-2">c</a> </div> ''' self.assertSequenceEqual(normxml(expected_str), normxml(actual_str))
def test_innerhtml(self): from mobilize.components import XPath html_str = '''<table><tr><td>Hello</td></tr></table>''' # test for innerhtml=False component_f = XPath('//td', idname='foo', innerhtml=False) component_f.extract(html.fromstring(html_str)) extracted = component_f.process() extracted_str = html.tostring(extracted) expected = '<div class="mwu-elem" id="foo"><td>Hello</td></div>' e = normxml(expected) a = normxml(extracted_str) self.assertSequenceEqual(e, a) # test for innerhtml=True component_t = XPath('//td', idname='foo', innerhtml=True) component_t.extract(html.fromstring(html_str)) extracted = component_t.process() extracted_str = html.tostring(extracted) expected = '<div class="mwu-elem" id="foo">Hello</div>' self.assertSequenceEqual(normxml(expected), normxml(extracted_str)) # test for ineffectiveness of innerhtml=True with multiple matching elements component_t = XPath('//td', idname='foo', innerhtml=True) component_t.extract(html.fromstring(''' <table><tr> <td>Hello</td> <td>Goodbye</td> </tr></table> ''')) extracted = component_t.process() extracted_str = html.tostring(extracted) expected = '<div class="mwu-elem" id="foo"><td>Hello</td><td>Goodbye</td></div>' self.assertSequenceEqual(normxml(expected), normxml(extracted_str))