def remove_namespaces(self, root): lang = None for attr, val in root.attrib.iteritems(): if attr.rpartition('}')[-1] == 'lang': lang = val # Remove all namespace information from the tree. This means namespaced # tags have their namespaces removed and all namespace declarations are # removed. We have to do this manual cloning of the tree as there is no # other way to remove namespace declarations in lxml. This is done so # that serialization creates clean HTML 5 markup with no namespaces. We # insert the XHTML namespace manually after serialization. The # preceding layers should have removed svg and any other non html # namespaced tags. attrib = {'lang': lang} if lang else {} nroot = etree.Element('html', attrib=attrib) nroot.text = root.text nroot.tail = '\n' # Remove Comments and ProcessingInstructions as kindlegen seems to # remove them as well for tag in root.iterdescendants(): if tag.tag in {etree.Comment, etree.ProcessingInstruction}: extract(tag) for tag in root.iterdescendants(): if tag.tag == etree.Entity: elem = etree.Entity(tag.name) else: tn = tag.tag if tn is not None: tn = tn.rpartition('}')[-1] elem = nroot.makeelement( tn, attrib={ k.rpartition('}')[-1]: v for k, v in tag.attrib.iteritems() }) elem.text = tag.text elem.tail = tag.tail parent = node_from_path(nroot, path_to_node(tag.getparent())) parent.append(elem) return nroot
def dict_to_node(o, xpo, xlo, buf): ''' <o> current dict <xlo> node casted from the last dict <xpo> node casted from the parent dict ''' if isinstance(o, str): if xlo is None: ## this is the first child ## set text of parent node <xpo> xpo.text = o return o elif isinstance(xlo, str): raise Exception(f'Consecutive raw fields in list: [{xlo!r},{o!r}') else: ## this is not the first child ## set tail of last node xlo.tail = o return o elif isinstance(o, list): assert 0, ('Undefined occurence of list %s' % (repr(o)[:200])) elif isinstance(o, dict): o = OrderedDict(o) oks = list(o) k = oks[0] v = o.pop(oks[0]) if v is 'T': xpo = ET.Element(oks[0]) for i, (k, v) in enumerate(o.items()): if k[:3] == '_#c': assert i + 1 == len(o), (i, k, o.keys()) break else: xpo.set(k, v) if oks[-1] == '_#cr': ### use _#cr to map to a _#c: [{_#r: innerHtml }] <--> _#cr: innterHtml xpo.text = o['_#cr'] else: assert oks[-1] == '_#c' xlo = None for oo in o['_#c']: xlo = dict_to_node(oo, xpo, xlo, buf) if not isinstance(xlo, str): xpo.append(xlo) return xpo elif v is 'E': 'this is an entity like . lets append to the text of parent node' # print('[dbg]',v.tag,v.text,v.prefix) # import pdb; pdb.set_trace() xpo = ET.Entity(k) # assert len(oks)==1 return xpo elif v is 'D': assert k is '_', ('Key must be _ when specifying D _: D ', k, v) # xpo = init_et_tree(o) oo = o['_#c'][0] root_name = list(oo)[0] xpo = ET.ElementTree(dict_to_node(oo, xpo, None, buf)) if 'doctype' in o: v = o['doctype'] if isinstance(v, dict): xpo.docinfo.system_url = v['system_url'] xpo.docinfo.public_id = v['public_id'] else: buf.write(f'<!DOCTYPE {root_name}>\n'.encode()) return xpo else: assert 0, ('Invalid node identifier', oks[0], v) ## anything left should be an attribute field # assert not len(o),('Not all keys processed. Only allowing _#t, _#c, _#cr',o.keys()) # if len(o)>=2: # pprint(['_#o',o]) # import pdb; pdb.set_trace() # return xpo pass
def appendCvLink(root, key, lntail): linknode = etree.Entity('cv-link-' + key) linknode.tail = lntail root.append(linknode)
def serialize_paragraph(ctx, document, par, root, embed=True): """Serializes paragraph element. This is the most important serializer of them all. """ style = get_style(document, par) elem = etree.Element('p') if ctx.options['embed_styles']: _style = get_style_css(ctx, par) if _style != '': elem.set('style', _style) else: _style = '' if style: elem.set('class', get_css_classes(document, style)) max_font_size = get_style_fontsize(par) if style: max_font_size = _get_font_size(document, style) for el in par.elements: _serializer = ctx.get_serializer(el) if _serializer: _serializer(ctx, document, el, elem) if isinstance(el, doc.Text): children = list(elem) _text_style = get_style_css(ctx, el) _text_class = el.rpr.get('style', '').lower() if get_style_fontsize(el) > max_font_size: max_font_size = get_style_fontsize(el) if 'superscript' in el.rpr: new_element = etree.Element('sup') new_element.text = el.value() elif 'subscript' in el.rpr: new_element = etree.Element('sub') new_element.text = el.value() elif 'b' in el.rpr or 'i' in el.rpr or 'u' in el.rpr: new_element = None _element = None def _add_formatting(f, new_element, _element): if f in el.rpr: _t = etree.Element(f) if new_element is not None: _element.append(_t) _element = _t else: new_element = _t _element = new_element return new_element, _element new_element, _element = _add_formatting( 'b', new_element, _element) new_element, _element = _add_formatting( 'i', new_element, _element) new_element, _element = _add_formatting( 'u', new_element, _element) _element.text = el.value() for comment_id in ctx.opened_comments: document.comments[comment_id].text += ' ' + el.value() else: new_element = etree.Element('span') new_element.text = el.value() if ctx.options['embed_styles']: try: new_element.set('class', _text_class) except: pass for comment_id in ctx.opened_comments: if comment_id in document.comments: document.comments[comment_id].text += ' ' + el.value() if ctx.options['embed_styles']: if _text_style != '' and _style != _text_style: new_element.set('style', _text_style) # This is for situations when style has options and # text is trying to unset them # else: # new_element.set('class', 'noformat') was_inserted = False if len(children) > 0: _child_style = children[-1].get('style') or '' _child_class = children[-1].get('class', '') if new_element.tag == children[-1].tag and ( (_text_class == _child_class or _child_class == '') and (_text_style == _child_style or _child_style == '')) and children[-1].tail is None: txt = children[-1].text or '' txt2 = new_element.text or '' children[-1].text = u'{}{}'.format(txt, txt2) was_inserted = True if not was_inserted: if _style == _text_style and new_element.tag == 'span' and ( _text_class == _child_class or _child_class == ''): _e = children[-1] txt = _e.tail or '' _e.tail = u'{}{}'.format(txt, new_element.text) was_inserted = True if not was_inserted and new_element.tag == 'span' and ( _text_class != _child_class): _e = children[-1] txt = _e.tail or '' _e.tail = u'{}{}'.format(txt, new_element.text) was_inserted = True if not was_inserted: _child_class = new_element.get('class', '') try: _child_class = children[-1].get('class', '') except: _child_class = '' if _style == _text_style and new_element.tag == 'span' and ( _text_class == _child_class): txt = elem.text or '' elem.text = u'{}{}'.format(txt, new_element.text) else: if new_element.text != u'': elem.append(new_element) if not par.is_dropcap() and par.ilvl == None: if style: if ctx.header.is_header(par, max_font_size, elem, style=style): elem.tag = ctx.header.get_header(par, style, elem) if par.ilvl == None: root = close_list(ctx, root) ctx.ilvl, ctx.numid = None, None if root is not None: root.append(elem) fire_hooks(ctx, document, par, elem, ctx.get_hook('h')) return root else: # Commented part where we only checked for heading if font size # was bigger than default font size. In many cases this did not # work out well. # if max_font_size > ctx.header.default_font_size: if True: if ctx.header.is_header(par, max_font_size, elem, style=style): if elem.text != '' and len(list(elem)) != 0: elem.tag = ctx.header.get_header( par, max_font_size, elem) if par.ilvl == None: root = close_list(ctx, root) ctx.ilvl, ctx.numid = None, None if root is not None: root.append(elem) fire_hooks(ctx, document, par, elem, ctx.get_hook('h')) return root if len(list(elem)) == 0 and elem.text is None: if ctx.options['empty_paragraph_as_nbsp']: elem.append(etree.Entity('nbsp')) # Indentation is different. We are starting or closing list. if par.ilvl != None: root = open_list(ctx, document, par, root, elem) return root else: root = close_list(ctx, root) ctx.ilvl, ctx.numid = None, None # Add new elements to our root element. if root is not None: root.append(elem) fire_hooks(ctx, document, par, elem, ctx.get_hook('p')) return root
}, { 'prefix': 'OS-KSADM', 'value': 'http://docs.openstack.org/identity/api/ext/OS-KSADM/v1.0', }, ] PARSER = etree.XMLParser( resolve_entities=False, remove_comments=True, remove_pis=True) # NOTE(dolph): lxml.etree.Entity() is just a callable that currently returns an # lxml.etree._Entity instance, which doesn't appear to be part of the # public API, so we discover the type dynamically to be safe ENTITY_TYPE = type(etree.Entity('x')) def from_xml(xml): """Deserialize XML to a dictionary.""" if xml is None: return None deserializer = XmlDeserializer() return deserializer(xml) def to_xml(d, xmlns=None): """Serialize a dictionary to XML.""" if d is None: return None
def addElement(self, elemName): self.docString += '<!ENTITY ' + elemName + ' SYSTEM "' + elemName + '">\n' self.gdml.append(etree.Entity(elemName))
def prepend_space(element): element.addprevious(etree.Entity('nbsp'))
def tile_render_tween(request): response = handler(request) if response.content_type == 'text/html': if isinstance(response, WSGIHTTPException): # the body of a WSGIHTTPException needs to be "prepared" response.prepare(request.environ) serializer = getHTMLSerializer(response.app_iter) tree = serializer.tree head_node = tree.getroot().find('head') for tile_node in TILE_XPATH(serializer.tree): # determine tile path tile_path = tile_node.attrib.get('path') tile_type = tile_node.attrib.get('type') if tile_path and tile_type: if tile_path == '/': path = '/tile:' + tile_type else: path = '/'.join((tile_path, 'tile:' + tile_type)) elif tile_path: path = tile_path elif tile_type: path = request.resource_path(request.context, 'tile:' + tile_type) else: # XXX how can we show a useful line number? raise Exception('Tile must have a path or type') # fetch tile contents subrequest = Request.blank(path) subrequest.registry = registry tile_data = dict(tile_node.attrib) tile_data['innerHTML'] = (tile_node.text or '') + ''.join([ html.tostring(child) for child in tile_node.iterchildren() ]) if tile_path: edit_url = request.route_path(MANAGE_ROUTE_NAME, 'edit_tile', traverse=tile_path) else: edit_url = request.mgmt_path(request.context, 'edit_tile') edit_url += '?' + urlencode(tile_data) del tile_data['type'] subrequest.tile_data = tile_data tile_response = handler(subrequest) tile_tree = getHTMLSerializer(tile_response.app_iter).tree tile_root = tile_tree.getroot() tile_body = tile_root.find('body') # add edit link if has_permission('Edit tile', subrequest.context, request): edit_link = builder.E.a('', href=edit_url) edit_link.append(etree.Entity('#9997')) tile_body.append(edit_link) # insert tile content tile_head = tile_root.find('head') if tile_head is not None: for child in tile_head: head_node.append(child) if tile_tree is not None: replace_content_with_children(tile_node, tile_body) response.app_iter = [serializer.serialize()] return response
def exportElement(self, dirPath, elemName, elem) : etree.ElementTree(elem).write(os.path.join(dirPath,elemName)) self.docString += '<!ENTITY '+elemName+' SYSTEM "'+elemName+'">\n' self.gdml.append(etree.Entity(elemName))
# pip install lxml from lxml import etree root = etree.Element("root", interesting="totally") root.append(etree.Element("child1")) etree.SubElement(root, "child2").text = "Child 2" etree.SubElement(root, "child3") root.insert(0, etree.Element("child0")) print(root.tag, '\n' + etree.tostring(root, pretty_print=True).decode("UTF-8")) print(len(root), root[:3], root[-1:]) '''list''' for child in root: child.text = "dd" print(child.tag) root.append(etree.Entity("#123")) root.append(etree.Comment("some comment")) print(root.tag, '\n' + etree.tostring(root, pretty_print=True).decode("UTF-8"))
def rule_url(self, acc): return etree.Entity("lo.pacc"), "/vis/"
root = etree.Element("root") etree.SubElement(root, "child").text = "child 1" etree.SubElement(root, "child").text = "child 2" etree.SubElement(root, "another").text = "child 3" print(etree.tostring(root, pretty_print=True)) for element in root.iter(): print('%s - %s' % (element.tag, element.text)) for element in root.iter("child"): print('%s - %s' % (element.tag, element.text)) for element in root.iter("child", "another"): print('%s - %s' % (element.tag, element.text)) root.append(etree.Entity("#234")) root.append(etree.Comment("some comment")) for element in root.iter(): if isinstance(element.tag, str): print('%s - %s' % (element.tag, element.text)) else: print('%s - %s' % (element, element.text)) for element in root.iter(tag=etree.Element): print("%s - %s" % (element.tag, element.text)) for element in root.iter(tag=etree.Entity): print(element.text)