def test_fetch_document(self): """ Test 1: Just make sure fetching works """ self.init_webdriver() # Right now, browser_html should be the raw inner html browser_html = self.execute_script(""" var data = broadcaster.start_document(); return data['html']; """) # Internet explorer values contain windows line endings browser_html = browser_html.replace('\r\n', '\n') print "Browser HTML: %s" % (browser_html) browser_tree = parse_html(browser_html) browser_xml = lxml.etree.tostring(browser_tree) # Compare it to the actual HTML file html_path = util.get_html_path(self.HTML_CONTENT_FILE) actual_html = open(html_path, 'r').read() # Semi hack: We expect all browsers to insert tbody, so we'll manually # insert tbodies into our "expected" html too #import lxml.html #actual_tree = lxml.html.fromstring(actual_html) #util.force_insert_tbody(actual_tree) #browser_tree = lxml.html.fromstring(browser_html) assert self.compare_html(actual_html, browser_xml, ignore_script_content=True)
def apply_and_compare(self, html, desired_html=None, **compare_kwargs): browser_html = self.to_browser_html(html) #print "Browser html: %s" % (browser_html) desired_html = html if desired_html is None else desired_html parsed_tree = parse_html(browser_html) parsed_html = lxml.etree.tostring(parsed_tree) return self.compare_html(desired_html, parsed_html, **compare_kwargs)
def test_apply_document(self): """ Test 2: Apply document to viewer """ self.init_webdriver() desired_html = self.TEST_APPLY_DOCUMENT self.execute_script(""" var de = viewer.get_document_element(); viewer.apply_document(de, arguments[0]); """, desired_html) # Parse the viewer html. We need to parse it with our own custom HTML # parser and then dump it back out as well-formed XML, to allow the # comparison to proceed. viewer_html = self.get_viewer_html() viewer_tree = parse_html(viewer_html) viewer_xml = lxml.etree.tostring(viewer_tree) assert self.compare_html(desired_html, viewer_xml, strip_localhost_hrefs_for_ie=True)
def parse_and_compare(self, raw, desired=None, is_fragment=False, **compare_args): """ :param raw: Unsanitised HTML, the type you would expect to see from a browser's innerHTML. This well be parsed with mirrordom's parsing routine. :param desired: What the HTML should look like. MUST be well formed XML. Will be parsed with a strict XML parser. """ global compare_html desired = raw if desired is None else desired raw = raw.strip() desired = desired.strip() parsed_tree = parse_html(raw) parsed_html = lxml.etree.tostring(parsed_tree) return self.compare_html(desired, parsed_html, **compare_args)
def html_to_xml(self, html): tree = parse_html(html) return lxml.etree.tostring(tree)