def _process_html_tree(elt): node_list = safe_dom.NodeList() tail = elt.tail if elt.tag in tag_bindings: elt = tag_bindings[elt.tag]().render(elt, handler) try: if elt.tag.lower() == 'script': out_elt = safe_dom.ScriptElement() else: out_elt = safe_dom.Element(elt.tag) out_elt.add_attribute(**elt.attrib) if elt.text: out_elt.add_text(elt.text) for child in elt: out_elt.add_children(_process_html_tree(child)) except Exception as e: # pylint: disable-msg=broad-except logging.error('Invalid HTML tag: %s. %s', elt, e) out_elt = safe_dom.Element('span') out_elt.add_attribute(className='gcb-error-tag') out_elt.add_text(INVALID_HTML_TAG_MESSAGE) node_list.append(out_elt) if tail: node_list.append(safe_dom.Text(tail)) return node_list
def _process_html_tree(elt): """Recursively parses an HTML tree into a safe_dom.NodeList().""" # Return immediately with an error message if a duplicate instanceid is # detected. if 'instanceid' in elt.attrib: if elt.attrib['instanceid'] in used_instance_ids: return _generate_error_message_node_list( elt, DUPLICATE_INSTANCE_ID_MESSAGE) used_instance_ids.add(elt.attrib['instanceid']) # Otherwise, attempt to parse this tag and all its child tags. original_elt = elt try: if render_custom_tags and elt.tag in tag_bindings: tag = tag_bindings[elt.tag]() if isinstance(tag, ContextAwareTag): # Get or initialize a environment dict for this type of tag. # Each tag type gets a separate environment shared by all # instances of that tag. context = tag_contexts.get(elt.tag) if context is None: context = ContextAwareTag.Context(handler, {}) tag_contexts[elt.tag] = context # Render the tag elt = tag.render(elt, context) else: # Render the tag elt = tag.render(elt, handler) if elt.tag == cElementTree.Comment: out_elt = safe_dom.Comment() elif elt.tag.lower() == 'script': out_elt = safe_dom.ScriptElement() else: out_elt = safe_dom.Element(_remove_namespace(elt.tag)) out_elt.add_attribute(**elt.attrib) if elt.text: out_elt.add_text(elt.text) for child in elt: out_elt.add_children( _process_html_tree(child)) node_list = safe_dom.NodeList() node_list.append(out_elt) if original_elt.tail: node_list.append(safe_dom.Text(original_elt.tail)) return node_list except Exception as e: # pylint: disable=broad-except logging.exception('Error handling tag: %s', elt.tag) return _generate_error_message_node_list( original_elt, '%s: %s' % (INVALID_HTML_TAG_MESSAGE, e))
def _process_html_tree(elt): node_list = safe_dom.NodeList() tail = elt.tail if elt.tag in tag_bindings: elt = tag_bindings[elt.tag]().render(elt) if elt.tag.lower() == 'script': out_elt = safe_dom.ScriptElement() else: out_elt = safe_dom.Element(elt.tag) out_elt.add_attribute(**elt.attrib) if elt.text: out_elt.add_text(elt.text) for child in elt: out_elt.add_children(_process_html_tree(child)) node_list.append(out_elt) if tail: node_list.append(safe_dom.Text(tail)) return node_list
def _process_html_tree(elt, used_instance_ids): # Return immediately with an error message if a duplicate instanceid is # detected. if 'instanceid' in elt.attrib: if elt.attrib['instanceid'] in used_instance_ids: return _generate_error_message_node_list( elt, DUPLICATE_INSTANCE_ID_MESSAGE) used_instance_ids.add(elt.attrib['instanceid']) # Otherwise, attempt to parse this tag and all its child tags. original_elt = elt try: if elt.tag in tag_bindings: elt = tag_bindings[elt.tag]().render(elt, handler) if elt.tag.lower() == 'script': out_elt = safe_dom.ScriptElement() else: out_elt = safe_dom.Element(elt.tag) out_elt.add_attribute(**elt.attrib) if elt.text: out_elt.add_text(elt.text) for child in elt: out_elt.add_children( _process_html_tree(child, used_instance_ids)) node_list = safe_dom.NodeList() node_list.append(out_elt) if original_elt.tail: node_list.append(safe_dom.Text(original_elt.tail)) return node_list except Exception as e: # pylint: disable-msg=broad-except return _generate_error_message_node_list( original_elt, '%s: %s' % (INVALID_HTML_TAG_MESSAGE, e))