def test_multipleRoot(self): parser = AdvancedHTMLParser() root1 = AdvancedTag('div') root1.setAttribute('id', 'div1') root2 = AdvancedTag('div') root2.setAttribute('id', 'div2') parser.parseStr(root1.outerHTML + root2.outerHTML) assert len( parser.getRootNodes()) == 2, 'Expected two root nodes on tree' foundRoot1 = parser.getElementById('div1') assert foundRoot1, 'Expected to find id=div1 in multi-root tree' foundRoot2 = parser.getElementById('div2') assert foundRoot2, 'Expected to find id=div1 in multi-root tree' combinedHTML = (foundRoot1.outerHTML + foundRoot2.outerHTML).replace( '\n', '').strip() parsedHTML = parser.getHTML().replace('\n', '').strip() assert combinedHTML == parsedHTML, 'Expected single element outerHTMLs to match parser HTML. """\n%s\n""" != """\n%s\n"""' % ( combinedHTML, parsedHTML)
def test_HandleMultipleRoot(self): parser = AdvancedHTMLParser() try: parser.parseStr(MULTIPLE_ROOT) except Exception as e: raise AssertionError('Failed to properly parse invalid HTML with multiple root nodes') oneEm = parser.getElementById('one') assert oneEm , 'Failed to find first element' assert len(parser.getRootNodes()) == 2
def test_multipleRoot(self): parser = AdvancedHTMLParser() root1 = AdvancedTag('div') root1.setAttribute('id', 'div1') root2 = AdvancedTag('div') root2.setAttribute('id', 'div2') parser.parseStr(root1.outerHTML + root2.outerHTML) assert len(parser.getRootNodes()) == 2, 'Expected two root nodes on tree' foundRoot1 = parser.getElementById('div1') assert foundRoot1, 'Expected to find id=div1 in multi-root tree' foundRoot2 = parser.getElementById('div2') assert foundRoot2, 'Expected to find id=div1 in multi-root tree' combinedHTML = (foundRoot1.outerHTML + foundRoot2.outerHTML).replace('\n', '').strip() parsedHTML = parser.getHTML().replace('\n', '').strip() assert combinedHTML == parsedHTML, 'Expected single element outerHTMLs to match parser HTML. """\n%s\n""" != """\n%s\n"""' %(combinedHTML, parsedHTML)