Esempi in Python per AdvancedHTMLParser.AdvancedHTMLParser, esempi in Python per AdvancedHTMLParser.Parser.AdvancedHTMLParser.AdvancedHTMLParser

Esempio n. 1

0

Mostra file

    def test_createElement(self):
        parser = AdvancedHTMLParser()

        divEm = parser.createElement('div')

        assert isinstance(divEm, AdvancedTag) , 'Expected createElement to create an AdvancedTag element.'
        assert divEm.tagName == 'div' , 'Expected createElement to set tag name properly'

Esempio n. 2

0

Mostra file

    def test_getElementsByAttr(self):
        html = """<html> <head> <title> Hello </title> </head>
<body>
    <div cheese="cheddar" id="cheddar1" >
        <span> Hello </span>
    </div>
    <div cheese="bologna" id="not_really_cheese">
        <span cheese="cheddar" id="cheddar2" > Goodbye </span>
    </div>
</body>
</html>"""
        parser = AdvancedHTMLParser()
        parser.parseStr(html)

        elements = parser.getElementsByAttr('cheese', 'cheddar')
        assert len(elements) == 2

        foundCheese1 = foundCheese2 = False
        for element in elements:
            myID = element.getAttribute('id')
            if myID == 'cheddar1':
                foundCheese1 = True
            elif myID == 'cheddar2':
                foundCheese2 = True

        assert foundCheese1
        assert foundCheese2

Esempio n. 3

0

Mostra file

File: test_Building.py Progetto: wangxl1998/AdvancedHTMLParser

    def test_ownerDocument(self):
        parser = AdvancedHTMLParser()

        parser.parseStr(
            """<div id='outer'> <div id='items'> <div name="item" id="item1" >item1 <span id="subItem1">Sub item</span></div> <div name="item" id="item2" >item2</div> </div> </div>"""
        )

        outerEm = parser.getElementById('outer')

        assert outerEm.ownerDocument == parser, 'Expected the ownerDocument to be set to parser'

        for element in outerEm.getAllNodes():
            assert element.ownerDocument == parser, 'Expected ownerDocument to be set on every element. Was not set on: %s' % (
                element.getStartTag(), )

        clonedEm = outerEm.cloneNode()

        assert clonedEm.parentNode is None, 'Expected cloned child to have no parent'
        assert clonedEm.ownerDocument is None, 'Expected cloned child to have no owner document'

        assert len(clonedEm.children
                   ) == 0, 'Expected cloned element to have no children'

        itemsEm = outerEm.removeChild(outerEm.children[0])

        assert itemsEm, 'Expected removeChild to return removed element'

        assert itemsEm.id == 'items', 'Got wrong element, expected to remove "items", got: %s' % (
            itemsEm.getStartTag(), )

        assert itemsEm.ownerDocument is None, 'Expected owner document to be set to None after element was removed.'

        for subElement in itemsEm.getAllChildNodes():
            assert subElement.ownerDocument is None, 'Expected owner document to be cleared on all children after removal from document'

Esempio n. 4

0

Mostra file

File: test_Building.py Progetto: wangxl1998/AdvancedHTMLParser

    def test_appending(self):
        parser = AdvancedHTMLParser()

        parser.parseStr(
            """<div id='outer'> <div id='items'> <div name="item" id="item1" >item1</div> <div name="item" id="item2" >item2</div> </div> </div>"""
        )

        itemsEm = parser.getElementById('items')
        assert itemsEm, 'Expected  to get <div id="outer" '

        assert len(itemsEm.children) == 2, 'Expected two children'

        assert itemsEm.childElementCount == 2, 'Expected childElementCount to equal 2'

        newItem = AdvancedTag('div')
        newItem.setAttributes({'name': 'item', 'id': 'item3'})

        itemsEm.appendNode(newItem)

        assert parser.getElementById(
            'item3'), 'Expected to get item3 after append'
        assert len(parser.getElementsByName(
            'item')) == 3, 'Expected after append that 3 nodes are  set'
        assert itemsEm.children[2].getAttribute(
            'id') == 'item3', 'Expected to be third attribute'

        newItem = AdvancedTag('div')
        newItem.setAttributes({'name': 'item', 'id': 'item2point5'})

        itemsEm.insertAfter(newItem, itemsEm.children[1])
        childIds = [x.id for x in itemsEm.getElementsByName('item')]

        assert childIds == [
            'item1', 'item2', 'item2point5', 'item3'
        ], 'Expected items to be ordered. Got: %s' % (str(childIds, ))

Esempio n. 5

0

Mostra file

File: test_Building.py Progetto: wangxl1998/AdvancedHTMLParser

    def test_multipleRoot(self):
        parser = AdvancedHTMLParser()

        root1 = AdvancedTag('div')
        root1.setAttribute('id', 'div1')

        root2 = AdvancedTag('div')
        root2.setAttribute('id', 'div2')

        parser.parseStr(root1.outerHTML + root2.outerHTML)

        assert len(
            parser.getRootNodes()) == 2, 'Expected two root nodes on tree'

        foundRoot1 = parser.getElementById('div1')
        assert foundRoot1, 'Expected to find id=div1 in multi-root tree'

        foundRoot2 = parser.getElementById('div2')
        assert foundRoot2, 'Expected to find id=div1 in multi-root tree'

        combinedHTML = (foundRoot1.outerHTML + foundRoot2.outerHTML).replace(
            '\n', '').strip()
        parsedHTML = parser.getHTML().replace('\n', '').strip()

        assert combinedHTML == parsedHTML, 'Expected single element outerHTMLs to match parser HTML. """\n%s\n""" != """\n%s\n"""' % (
            combinedHTML, parsedHTML)

Esempio n. 6

0

Mostra file

    def test_attributeDefault(self):
        parser = AdvancedHTMLParser()
        parser.parseStr('<input id="item" type="text" value="hello" />')

        tag = parser.getElementById('item')
        assert tag.getAttribute('type', 'bloogity') == 'text'
        assert tag.getAttribute('woogity', 'snoogity') == 'snoogity'

Esempio n. 7

0

Mostra file

    def testNextSibling(self):
        parser = AdvancedHTMLParser()
        parser.parseStr(
            '<div>Head Text<div id="one">An item</div><div id="two">Another item</div>More Text<div id="three">Last  item</div></div>'
        )

        root = parser.getRoot()

        assert root.getElementById(
            'one'
        ).nextSibling.id == 'two', 'Expected to get element with id "two"'
        assert root.getElementById(
            'one'
        ).nextSiblingElement.id == 'two', 'Expected to get element with id "two"'

        assert root.getElementById(
            'two'
        ).nextSibling == 'Another Item', 'Expected to get text "Another Item" after item id=two'
        assert root.getElementById(
            'two'
        ).nextSiblingElement.id == 'three', 'Expected to get element with id "three"'

        assert root.getElementById(
            'three'
        ).nextSibling == None, 'Expected to get no element after id="three"'
        assert root.getElementById(
            'three'
        ).nextSiblingElement == None, 'Expected to get no element after id="three"'

Esempio n. 8

0

Mostra file

    def test_formAttribute(self):
        '''
            test the "form" attribute, that links to parent form
        '''

        document = AdvancedHTMLParser()
        document.parseStr(
            '''<html><head></head><body><div id="main"> <form id="myForm"> <div> <input type="text" id="inputWithinForm" /> </div> </form> </div> <input type="text" id="inputOutsideForm" /> </body></html>'''
        )

        myFormEm = document.getElementById('myForm')

        assert myFormEm, 'Failed to get element by id="myForm"'

        inputWithinFormEm = document.getElementById('inputWithinForm')

        assert inputWithinFormEm, 'Failed to get element with id="inputWithinForm"'

        foundFormEm = inputWithinFormEm.form

        assert foundFormEm, 'Expected inputWithinFormEm.form to return parent form. Got nada.'

        assert foundFormEm is myFormEm, 'Expected to get parent form via .form, got: ' + str(
            foundFormEm.getStartTag())

        inputOutsideFormEm = document.getElementById('inputOutsideForm')

        assert inputOutsideFormEm, 'Failed to get element with id="inputOutsideForm"'

        foundFormEm = inputOutsideFormEm.form

        assert foundFormEm is None, 'Expected .form to return None on an input outside of form. Got: ' + str(
            foundFormEm.getStartTag())

Esempio n. 9

0

Mostra file

    def test_noValueAttributes(self):
        parser = AdvancedHTMLParser()
        parser.parseStr('<input id="thebox" type="checkbox" checked />')

        tag = parser.getElementById('thebox')
        assert tag.hasAttribute('checked')
        assert 'checked' in tag.outerHTML

Esempio n. 10

0

Mostra file

    def test_getAttributesDict(self):
        parser = AdvancedHTMLParser()

        parser.parseStr(
            '<div id="hello" style="display: none; width: 500px; padding-left: 15px;" class="One Two" data="Yes">Hello</div>'
        )

        helloEm = parser.getElementById('hello')

        assert helloEm.getAttribute('id',
                                    '') == 'hello', 'Got unxpected element'

        attributesDict = helloEm.getAttributesDict()

        assert 'id' in attributesDict, 'Did not find "id" in the attributes dict copy'
        assert 'style' in attributesDict, 'Did not find "style" in the attributes dict copy'
        assert 'class' in attributesDict, 'Did not find "class" in the attributes dict copy'
        assert 'data' in attributesDict, 'Did not find "data" in the attributes dict copy'

        assert len(
            attributesDict.keys()
        ) == 4, 'Got unexpected keys in attributesDict. Only expected "id" "style" "class" and "data", got: "%s"' % (
            repr(attributesDict), )

        assert attributesDict[
            'id'] == 'hello', 'Attribute "id" did not have expected value "hello", got "%s"' % (
                attributesDict['id'], )

        style = StyleAttribute(attributesDict['style'])
        assert style.display == 'none', 'Got unexpected value for display in style copy. Expected "none", got "%s"' % (
            style.display, )
        assert style.width == '500px', 'Got unexpected value for width in style copy. Expected "500px", got "%s"' % (
            style.width, )
        assert style.paddingLeft == '15px', 'Got unexpected value for padding-left. Expected "15px", got "%s"' % (
            style.paddingLeft, )

        assert attributesDict[
            'class'] == 'One Two', 'Got unexpected value for "class" in dict copy. Expected "One Two", Got: "%s"' % (
                attributesDict['class'], )

        assert attributesDict[
            'data'] == 'Yes', 'Got unexpected value for "data" in dict copy, Expected "Yes", Got: "%s"' % (
                attributesDict['data'], )

        # Assert we aren't modifying the original element
        style.paddingTop = '13em'

        assert helloEm.style.paddingTop != '13em', 'Expected getAttributesDict to return copies, but modified original element on "style"'

        attributesDict['class'] += ' Three'

        assert 'Three' not in helloEm.getAttribute(
            'class'
        ), 'Expected getAttributesDict to return copies, but modified original element on "class"'

        attributesDict['id'] = 'zzz'

        assert helloEm.getAttribute(
            'id'
        ) != 'zzz', 'Expected getAttributesDict to return copies, but modified original element on "id"'

Esempio n. 11

0

Mostra file

File: test_untaggedText.py Progetto: yexihu/AdvancedHTMLParser

    def test_multipleRootsWithExternalTextSameReturn(self):
        html = """<span>Hello</span>Outside<span>World</span>End"""
        parser = AdvancedHTMLParser()
        parser.parseStr(html)

        strippedHTML = parser.getHTML().replace('\n', '').replace(' ', '')
        assert strippedHTML == html, "Expected multiple root nodes with text between the nodes to retain, '%s' == '%s'" % (
            html, strippedHTML)

Esempio n. 12

0

Mostra file

    def getItemsParser(self):
        parser = AdvancedHTMLParser()

        parser.parseStr(
            """<div id='outer'> <div id='items'> <div name="item" id="item1" >item1</div> <div name="item" id="item2" >item2</div> </div> </div>"""
        )

        return parser

Esempio n. 13

0

Mostra file

File: test_Insertions.py Progetto: wangxl1998/AdvancedHTMLParser

    def test_firstLastChild(self):
        '''
            test_firstChild - test

                AdvancedTag.firstChild and AdvancedTag.firstElementChild
                AdvancedTag.lastChild and AdvancedTag.lastElementChild
        '''
        document = AdvancedHTMLParser()
        document.parseStr('<div id="main">Hello<div id="two">Blah</div><div id="emptyDiv"></div><div id="three">Three</div>End Text</div>')


        mainEm = document.getElementById('main')

        assert mainEm , "Failed to get element by id='main'"

        assert mainEm.id == 'main' , 'Got wrong element for id="main"'

        firstChild = mainEm.firstChild

        assert firstChild == 'Hello' , 'Expected .firstChild to return the first block child, str("Hello") but got: %s(%s)' %( firstChild.__class__.__name__, repr(firstChild))

        firstChildEm = mainEm.firstElementChild

        assert issubclass(firstChildEm.__class__, AdvancedTag) , 'Expected firstElementChild to return an AdvancedTag object. Got: ' + firstChildEm.__class__.__name__

        assert firstChildEm.tagName == 'div' and firstChildEm.id == 'two' , 'Expected to get div id="two" as firstElementChild. Got: %s(%s)' %( firstChildEm.__class__.__name__, repr(firstChildEm))

        lastChild = mainEm.lastChild

        assert lastChild == "End Text" , 'Expected .lastChild to return the last block child, str("End Text") but got: %s(%s)' %( lastChild.__class__.__name__, repr(lastChild))

        lastChildEm = mainEm.lastElementChild

        assert issubclass(lastChildEm.__class__, AdvancedTag) , 'Expected lastElementChild to return an AdvancedTag object. Got: ' + lastChildEm.__class__.__name__

        assert lastChildEm.tagName == 'div' and lastChildEm.id == 'three' , 'Expected to get div id="three" as lastElementChild. Got: %s(%s)' %( lastChildEm.__class__.__name__, repr(lastChildEm))


        emptyDivEm = document.getElementById('emptyDiv')

        assert emptyDivEm , 'Failed to get element by id="emptyDiv"'
        assert emptyDivEm.id == 'emptyDiv' , 'Got wrong element for id="emptyDiv"'

        firstChildEmpty = emptyDivEm.firstChild

        assert firstChildEmpty is None , 'Expected empty div .firstChild to be None (null). Got: ' + repr(firstChildEmpty)

        firstChildElementEmpty = emptyDivEm.firstElementChild

        assert firstChildElementEmpty is None , 'Expected empty div .firstElementChild to be None (null). Got: ' + repr(firstChildElementEmpty)

        lastChildEmpty = emptyDivEm.lastChild

        assert lastChildEmpty is None , 'Expected empty div .lastChild to be None (null). Got: ' + repr(lastChildEmpty)

        lastChildElementEmpty = emptyDivEm.lastElementChild

        assert lastChildElementEmpty is None , 'Expected empty div .lastElementChild to be None (null). Got: ' + repr(lastChildElementEmpty)

Esempio n. 14

0

Mostra file

File: test_untaggedText.py Progetto: yexihu/AdvancedHTMLParser

    def test_multipleRootsSameReturn(self):
        html = """<span>Hello</span><span>World</span>"""
        parser = AdvancedHTMLParser()
        parser.parseStr(html)

        strippedHTML = parser.getHTML().replace('\n', '').replace(' ', '')

        assert strippedHTML == html, "Expected multiple root nodes to retain, '%s' == '%s'" % (
            html, strippedHTML)

Esempio n. 15

0

Mostra file

    def test_ParseStr(self):
        parser = AdvancedHTMLParser()

        parser.parseStr(TEST_HTML)

        testEm = parser.getElementById('farm')
        assert testEm , 'Failed to extract data'
        assert len(testEm.children) == 2 , 'Invalid data from file parsing'
        assert testEm.children[0].innerHTML.strip() == 'Moo' , 'Invalid data from file parsing'

Esempio n. 16

0

Mostra file

File: test_untaggedText.py Progetto: yexihu/AdvancedHTMLParser

    def test_untaggedText(self):
        html = """    <span class="WebRupee">Rs.</span>\n29\n<br/><font style="font-size:smaller;font-weight:normal">\n3 days\n</font></td>, <td class="pricecell"><span class="WebRupee">Rs.</span>\n59\n<br/><font style="font-size:smaller;font-weight:normal">\n7 days\n</font></td>, <td class="pricecell"><span class="WebRupee">Rs.</span>\n99\n<br/><font style="font-size:smaller;font-weight:normal">\n12 days\n</font></td>"""

        parser = AdvancedHTMLParser()
        parser.parseStr(html)

        html = parser.getHTML()

        assert '\n29\n' in html, 'Expected to find item outside tags: \\n29\\n in ' + str(
            html)

Esempio n. 17

0

Mostra file

File: test_InvalidHtml.py Progetto: wangxl1998/AdvancedHTMLParser

    def test_HandleMultipleRoot(self):
        parser = AdvancedHTMLParser()
        try:
            parser.parseStr(MULTIPLE_ROOT)
        except Exception as e:
            raise AssertionError('Failed to properly parse invalid HTML with multiple root nodes')

        oneEm = parser.getElementById('one')
        assert oneEm , 'Failed to find first element'
        assert len(parser.getRootNodes()) == 2

Esempio n. 18

0

Mostra file

File: test_InvalidHtml.py Progetto: wangxl1998/AdvancedHTMLParser

    def test_HandleMissClose(self):
        parser = AdvancedHTMLParser()
        try:
            parser.parseStr(MISS_CLOSE)
        except Exception as e:
            raise AssertionError('Failed to properly parse invalid HTML with missed close')

        oneEm = parser.getElementById('one')
        assert oneEm , 'Failed to find id="one"'
        assert oneEm.children[0].innerHTML.strip() == 'Hello' , 'Could not find child tag'

Esempio n. 19

0

Mostra file

    def test_encodingWorkingFile(self):
        parser = AdvancedHTMLParser(encoding='ascii')

        gotException = False
        try:
            parser.parseFile(self.tempFile.name)
        except UnicodeDecodeError as e:
            gotException = True

        assert gotException is True, 'Should have failed to parse unicode characters in ascii codec, probably not using passed encoding'

Esempio n. 20

0

Mostra file

File: test_untaggedText.py Progetto: yexihu/AdvancedHTMLParser

    def test_commentRetainedAfterRoot(self):
        html = """<html>
        <body><span>Hello</span></body></html><!-- CommentX -->"""

        parser = AdvancedHTMLParser()
        parser.parseStr(html)

        retHTML = parser.getHTML()

        assert 'CommentX' in retHTML, 'Expected to find comment, "CommentX" in returned HTML: "%s"' % (
            retHTML, )

Esempio n. 21

0

Mostra file

    def test_ParseFile(self):
        parser = AdvancedHTMLParser()
        try:
            parser.parseFile(self.tempFile.name)
        except Exception as e:
            raise AssertionError('Failed to parse file, exception was: %s' %(str(e),))

        testEm = parser.getElementById('farm')
        assert testEm , 'Failed to extract data from file parsing'
        assert len(testEm.children) == 2 , 'Invalid data from file parsing'
        assert testEm.children[0].innerHTML.strip() == 'Moo' , 'Invalid data from file parsing'

Esempio n. 22

0

Mostra file

File: test_Insertions.py Progetto: wangxl1998/AdvancedHTMLParser

    def test_previousSibling(self):
        parser = AdvancedHTMLParser()
        parser.parseStr('<div>Head Text<div id="one">An item</div><div id="two">Another item</div>More Text<div id="three">Last  item</div></div>')

        root = parser.getRoot()

        assert root.getElementById('one').previousSibling == 'Head Text' , 'Expected to get "Head Text" as first sibling'
        assert root.getElementById('one').previousSiblingElement == None , 'Expected to get no element prior to first sibling'

        assert root.getElementById('two').previousSibling.id == 'one' , 'Expected to get element  "one" prior to two'
        assert root.getElementById('two').previousSiblingElement.id == 'one' , 'Expected to get element  "one" prior to two'

Esempio n. 23

0

Mostra file

File: test_RefTag.py Progetto: wangxl1998/AdvancedHTMLParser

    def test_refTag(self):
        html = """<html><body><p>This is &lt;html&gt;</p></body></html>"""

        parser = AdvancedHTMLParser()
        parser.parseStr(html)

        html = parser.getHTML().replace('\n', '').replace('html ', 'html')
        assert 'This is <html>' not in html, 'Expected to retain &lt; and &gt;, got %s' % (
            html, )
        assert 'This is &lt;html&gt;' in html, 'Expected to retain &lt; and &gt;, got %s' % (
            html, )

Esempio n. 24

0

Mostra file

File: test_Formatting.py Progetto: wangxl1998/AdvancedHTMLParser

    def test_getMiniHTML(self):
        '''
            test_getMiniHTML - Gets a "mini" representation that only contains the functional whitespace characters in HTML repr
        '''
        parser = AdvancedHTMLParser()

        parser.parseStr(TEST_HTML)

        miniHTML = parser.getMiniHTML()

        assert miniHTML == '<html ><head ><title >Hello World</title></head> <body > <div >Hello world <span >And welcome to the show.</span> </div> </body></html>'

Esempio n. 25

0

Mostra file

File: test_Formatting.py Progetto: wangxl1998/AdvancedHTMLParser

    def test_retainOriginalWhitespace(self):
        '''
            test_retainOriginalWhitespace - Test that we retain the original whitespacing
        '''
        parser = AdvancedHTMLParser()

        parser.parseStr(TEST_HTML)

        rawHtml = parser.getHTML()

        # This will not equal the original HTML exactly because we fixup some tag issues, like ' >'
        assert rawHtml == '<html ><head ><title >Hello World</title></head>\n <body >\n <div >Hello world <span >And welcome to the show.</span>\n </div>\n </body></html>', 'Did not retain original whitespace like expected'

Esempio n. 26

0

Mostra file

File: test_untaggedText.py Progetto: yexihu/AdvancedHTMLParser

    def test_textPriorToRoot(self):
        html = """Hello<html><span id="one">Cheese</span><div>Goodbye</div></html>"""

        parser = AdvancedHTMLParser()
        parser.parseStr(html)

        strippedHTML = parser.getHTML().replace('\n', '')

        #        print ( strippedHTML )
        assert strippedHTML.startswith(
            'Hello'
        ), 'Expected text before root tag to be retained, got "%s"' % (
            strippedHTML, )

Esempio n. 27

0

Mostra file

File: test_Formatting.py Progetto: wangxl1998/AdvancedHTMLParser

    def test_getFormattedHTML(self):
        '''
            test_getFormattedHTML - Tests the getFormattedHTML call for pretty-printing HTML
        '''
        parser = AdvancedHTMLParser()

        parser.parseStr(TEST_HTML)

        formattedHTML = parser.getFormattedHTML()

        assert formattedHTML == '\n<html >\n  <head >\n    <title >Hello World\n    </title>\n  </head> \n  <body > \n    <div >Hello world \n      <span >And welcome to the show.\n      </span> \n    </div> \n  </body>\n</html>', 'Did not get expected formatting using default 4 spaces.'

        formattedHTMLTabIndent = parser.getFormattedHTML('\t')

        assert formattedHTMLTabIndent == '\n<html >\n\t<head >\n\t\t<title >Hello World\n\t\t</title>\n\t</head> \n\t<body > \n\t\t<div >Hello world \n\t\t\t<span >And welcome to the show.\n\t\t\t</span> \n\t\t</div> \n\t</body>\n</html>', 'Did not get expected formatting using tabs.'

Esempio n. 28

0

Mostra file

File: test_RefTag.py Progetto: wangxl1998/AdvancedHTMLParser

    def test_nbsp(self):
        html = """<html><body><p>Test&nbsp;One</p></body></html>"""
        parser = AdvancedHTMLParser()
        parser.parseStr(html)

        html = parser.getHTML().replace('\n', '').replace('html ', 'html')
        assert '&nbsp;' in html, '(Will fail in python2..) Expected to retain &nbsp; got %s' % (
            html, )

        html = """<html><body><p>Test One</p></body></html>"""
        parser = AdvancedHTMLParser()
        parser.parseStr(html)

        html = parser.getHTML().replace('\n', '').replace('html ', 'html')
        assert '&nbsp;' not in html, '(Will fail in python2..) Expected not to insert &nbsp; got %s' % (
            html, )

        html = """<html><body><p>Test&nbsp;&nbsp;One</p></body></html>"""
        parser = AdvancedHTMLParser()
        parser.parseStr(html)

        html = parser.getHTML().replace('\n', '').replace('html ', 'html')
        assert 'Test&nbsp;&nbsp;One' in html, '(Will fail in python2..) Expected to retain original data with two &nbsp; got %s' % (
            html, )

Esempio n. 29

0

Mostra file

    def test_removeAndContains(self):
        parser = AdvancedHTMLParser()

        parser.parseStr("""<div id='outer'> <div id='items'> <div name="item" id="item1" >item1 <span id="subItem1">Sub item</span></div> <div name="item" id="item2" >item2</div> </div> </div>""")


        itemsEm = parser.getElementById('items')
        item1Em = parser.getElementById('item1')
        subItem1 = parser.getElementById('subItem1')

        assert itemsEm.hasChild(item1Em) is True, 'Expected itemsEm to have item1Em as a child.'

        assert parser.getElementById('subItem1') is not None, 'Expected to find id=subItem1'

        assert itemsEm.contains(item1Em) , 'Expected itemsEm to contain items1Em'
        assert itemsEm.contains(subItem1) , 'Expected itemsEm to contain subItem1'

        assert subItem1.uid in itemsEm.getAllNodeUids()

        assert parser.contains(item1Em) , 'Expected parser to contain item1Em via contains'
        assert item1Em in parser, 'Expected parser to contain item1Em via in operator'

        assert item1Em.ownerDocument == parser , 'Expected ownerDocument to be set prior to remove'

        # Remove item1 from the tree
        item1Em.remove()

        assert itemsEm.hasChild(item1Em) is False, 'Expected after remove for item1Em to no longer be a child of itemsEm'

        assert parser.getElementById('item1') is None, 'Expected to not be able to find id=item1 after remove'

        assert parser.getElementById('subItem1') is None, 'Expected to not be able to find sub item of id=item1, id=subItem1 after remove.'

        assert item1Em.parentNode is None , 'Expected parentNode on item1Em to be None after remove.'

        assert not itemsEm.contains(item1Em) , 'Expected itemsEm to not contain items1Em'
        assert not itemsEm.containsUid(item1Em.uid) , 'Expected itemsEm to not contain items1Em'
        assert not itemsEm.contains(subItem1) , 'Expected itemsEm to not contain subItem1'

        assert subItem1.uid not in itemsEm.getAllNodeUids()

        assert not parser.contains(item1Em) , 'Expected parser to not contain item1Em via contains'
        assert item1Em not in parser, 'Expected parser to not contain item1Em via in operator'

        assert item1Em.ownerDocument is None , 'Expected owner document to be unset upon removal'

Esempio n. 30

0

Mostra file

File: test_Compare.py Progetto: wangxl1998/AdvancedHTMLParser

    def test_cloneNode(self):
        parser = AdvancedHTMLParser()
        parser.parseStr('''
        <div id="hello"  class="classX classY" cheese="cheddar" > <span>Child</span><span>Other Child</span> </div>
        ''')

        helloEm = parser.getElementById('hello')

        helloClone = helloEm.cloneNode()

        for attributeName in ('id', 'class', 'cheese'):
            helloEmValue = helloEm.getAttribute(attributeName, None)
            helloCloneValue = helloClone.getAttribute(attributeName, None)
            assert helloEmValue == helloCloneValue, 'Expected cloneNode to return an exact copy, got different %s. %s != %s' % (
                attributeName, repr(helloEmValue), repr(helloCloneValue))

        assert helloEm.childElementCount == 2, 'Expected original helloEm to retain two direct children'
        assert helloClone.childElementCount == 0, 'Expected clone to NOT copy children'