Example #1
0
    def test_select_multiple(self):
        '''
        Test that extracted components can accept multiple selectors
        '''
        from mobilize.components import CssPath, XPath
        selectors = [
            'nav',
            'section',
            ]
        src_html = '''<div>
<nav>
  <a href="/A">A</a>
  <a href="/B">B</a>
</nav>
<table><tr><td>&nbsp;</td><td>I'm using tables for layout!!! DUR</td></tr></table>
<section>
<p>Hello.</p>
</section>
</div>
'''
        expected_html = '''<div class="mwu-elem" id="foo">
<nav>
  <a href="/A">A</a>
  <a href="/B">B</a>
</nav>
<section>
<p>Hello.</p>
</section>
</div>'''
        # test for CssPath
        css_component = CssPath(selectors, idname='foo')
        css_component.extract(html.fromstring(src_html))
        extracted = css_component.process()
        extracted_str = html.tostring(extracted)
        self.assertSequenceEqual(normxml(expected_html), normxml(extracted_str))

        # test for XPath
        x_component = XPath(selectors, idname='foo')
        x_component.extract(html.fromstring(src_html))
        extracted = x_component.process()
        extracted_str = html.tostring(extracted)
        self.assertSequenceEqual(normxml(expected_html), normxml(extracted_str))
Example #2
0
    def test_collapse(self):
        '''
        Test for collapsing filter application mode
        '''
        from mobilize.components import (
            XPath,
            FILT_EACHELEM,
            FILT_COLLAPSED,
            )
        def testfilter(elem):
            if elem.tag == 'a':
                elem.attrib['class'] = 'foo'
            for ii, child in enumerate(elem):
                if 'a' == child.tag:
                    child.attrib['id'] = 'child-%d' % ii
            
        htmlstr1 = '''<a href="/">a</a>
<a href="/">b</a>
<a href="/">c</a>
'''
        nocollapse = XPath('//a', postfilters=[testfilter], filtermode=FILT_EACHELEM)
        nocollapse.extract(html.fromstring(htmlstr1))
        actual = nocollapse.process('idname')
        actual_str = html.tostring(actual)
        expected_str = '''<div class="mwu-elem" id="idname">
<a href="/" class="foo">a</a>
<a href="/" class="foo">b</a>
<a href="/" class="foo">c</a>
</div>
'''
        self.assertSequenceEqual(normxml(expected_str), normxml(actual_str))
        
        expected_str = '''<div id="idname">
<a href="/">a</a>
<a href="/">b</a>
<a href="/">c</a>
</div>
'''
        collapse = XPath('//a', postfilters=[testfilter], filtermode=FILT_COLLAPSED)
        collapse.extract(html.fromstring(htmlstr1))
        actual = collapse.process('idname')
        actual_str = html.tostring(actual)
        expected_str = '''<div class="mwu-elem" id="idname">
<a href="/" id="child-0">a</a>
<a href="/" id="child-1">b</a>
<a href="/" id="child-2">c</a>
</div>
'''
        self.assertSequenceEqual(normxml(expected_str), normxml(actual_str))
Example #3
0
    def test_innerhtml(self):
        from mobilize.components import XPath
        html_str = '''<table><tr><td>Hello</td></tr></table>'''
        # test for innerhtml=False
        component_f = XPath('//td', idname='foo', innerhtml=False)
        component_f.extract(html.fromstring(html_str))
        extracted = component_f.process()
        extracted_str = html.tostring(extracted)
        expected = '<div class="mwu-elem" id="foo"><td>Hello</td></div>'
        e = normxml(expected)
        a = normxml(extracted_str)
        self.assertSequenceEqual(e, a)
        
        # test for innerhtml=True
        component_t = XPath('//td', idname='foo', innerhtml=True)
        component_t.extract(html.fromstring(html_str))
        extracted = component_t.process()
        extracted_str = html.tostring(extracted)
        expected = '<div class="mwu-elem" id="foo">Hello</div>'
        self.assertSequenceEqual(normxml(expected), normxml(extracted_str))
        
        # test for ineffectiveness of innerhtml=True with multiple matching elements
        component_t = XPath('//td', idname='foo', innerhtml=True)
        component_t.extract(html.fromstring('''
<table><tr>
<td>Hello</td>
<td>Goodbye</td>
</tr></table>
'''))
        extracted = component_t.process()
        extracted_str = html.tostring(extracted)
        expected = '<div class="mwu-elem" id="foo"><td>Hello</td><td>Goodbye</td></div>'
        self.assertSequenceEqual(normxml(expected), normxml(extracted_str))