Python parse_html Examples

Programming Language: Python

Namespace/Package Name: mirrordom.parser

Method/Function: parse_html

Examples at hotexamples.com: 6

Python parse_html - 6 examples found. These are the top rated real world Python examples of mirrordom.parser.parse_html extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: test_mirrordom_javascript.py Project: jymcheong/mirror-dom

    def test_fetch_document(self):
        """ Test 1: Just make sure fetching works """
        self.init_webdriver()

        # Right now, browser_html should be the raw inner html
        browser_html = self.execute_script("""
            var data = broadcaster.start_document();
            return data['html'];
        """)

        # Internet explorer values contain windows line endings
        browser_html = browser_html.replace('\r\n', '\n')
        print "Browser HTML: %s" % (browser_html)
        browser_tree = parse_html(browser_html)
        browser_xml = lxml.etree.tostring(browser_tree)

        # Compare it to the actual HTML file
        html_path = util.get_html_path(self.HTML_CONTENT_FILE)
        actual_html = open(html_path, 'r').read()

        # Semi hack: We expect all browsers to insert tbody, so we'll manually
        # insert tbodies into our "expected" html too
        #import lxml.html
        #actual_tree = lxml.html.fromstring(actual_html)
        #util.force_insert_tbody(actual_tree)

        #browser_tree = lxml.html.fromstring(browser_html)

        assert self.compare_html(actual_html, browser_xml, ignore_script_content=True)

Example #2

Show file

 def apply_and_compare(self, html, desired_html=None,
         **compare_kwargs):
     browser_html = self.to_browser_html(html)
     #print "Browser html: %s" % (browser_html)
     desired_html = html if desired_html is None else desired_html
     parsed_tree = parse_html(browser_html)
     parsed_html = lxml.etree.tostring(parsed_tree)
     return self.compare_html(desired_html, parsed_html, **compare_kwargs)

Example #3

Show file

File: test_mirrordom_javascript.py Project: jymcheong/mirror-dom

    def test_apply_document(self):
        """ Test 2: Apply document to viewer """
        self.init_webdriver()
        desired_html = self.TEST_APPLY_DOCUMENT
        self.execute_script("""
            var de = viewer.get_document_element();
            viewer.apply_document(de, arguments[0]);
        """, desired_html)


        # Parse the viewer html. We need to parse it with our own custom HTML
        # parser and then dump it back out as well-formed XML, to allow the
        # comparison to proceed.
        viewer_html = self.get_viewer_html()
        viewer_tree = parse_html(viewer_html)
        viewer_xml = lxml.etree.tostring(viewer_tree)
        assert self.compare_html(desired_html, viewer_xml, strip_localhost_hrefs_for_ie=True)

Example #4

Show file

    def parse_and_compare(self, raw, desired=None, is_fragment=False,
            **compare_args):
        """
        :param raw:         Unsanitised HTML, the type you would expect to see
                            from a browser's innerHTML. This well be parsed
                            with mirrordom's parsing routine.

        :param desired:     What the HTML should look like. MUST be well formed XML.
                            Will be parsed with a strict XML parser.
        """
        global compare_html
        desired = raw if desired is None else desired
        raw = raw.strip()
        desired = desired.strip()
        parsed_tree = parse_html(raw)
        parsed_html = lxml.etree.tostring(parsed_tree)
        return self.compare_html(desired, parsed_html, **compare_args)

Example #5

Show file

 def html_to_xml(self, html):
     tree = parse_html(html)
     return lxml.etree.tostring(tree)

Example #6

Show file

File: test_mirrordom.py Project: fooby/mirror-dom

 def html_to_xml(self, html):
     tree = parse_html(html)
     return lxml.etree.tostring(tree)