Esempio n. 1
0
    def remove_namespaces(self, root):
        lang = None
        for attr, val in root.attrib.iteritems():
            if attr.rpartition('}')[-1] == 'lang':
                lang = val

        # Remove all namespace information from the tree. This means namespaced
        # tags have their namespaces removed and all namespace declarations are
        # removed. We have to do this manual cloning of the tree as there is no
        # other way to remove namespace declarations in lxml. This is done so
        # that serialization creates clean HTML 5 markup with no namespaces. We
        # insert the XHTML namespace manually after serialization. The
        # preceding layers should have removed svg and any other non html
        # namespaced tags.
        attrib = {'lang': lang} if lang else {}
        nroot = etree.Element('html', attrib=attrib)
        nroot.text = root.text
        nroot.tail = '\n'

        # Remove Comments and ProcessingInstructions as kindlegen seems to
        # remove them as well
        for tag in root.iterdescendants():
            if tag.tag in {etree.Comment, etree.ProcessingInstruction}:
                extract(tag)

        for tag in root.iterdescendants():
            if tag.tag == etree.Entity:
                elem = etree.Entity(tag.name)
            else:
                tn = tag.tag
                if tn is not None:
                    tn = tn.rpartition('}')[-1]
                elem = nroot.makeelement(
                    tn,
                    attrib={
                        k.rpartition('}')[-1]: v
                        for k, v in tag.attrib.iteritems()
                    })
                elem.text = tag.text
            elem.tail = tag.tail
            parent = node_from_path(nroot, path_to_node(tag.getparent()))
            parent.append(elem)

        return nroot
Esempio n. 2
0
def dict_to_node(o, xpo, xlo, buf):
    '''
  <o> current dict
  <xlo> node casted from the last dict
  <xpo> node casted from the parent dict

  '''
    if isinstance(o, str):
        if xlo is None:
            ## this is the first child
            ## set text of parent node <xpo>
            xpo.text = o
            return o
        elif isinstance(xlo, str):
            raise Exception(f'Consecutive raw fields in list: [{xlo!r},{o!r}')
        else:
            ## this is not the first child
            ## set tail of last node
            xlo.tail = o
            return o
    elif isinstance(o, list):
        assert 0, ('Undefined occurence of list %s' % (repr(o)[:200]))
    elif isinstance(o, dict):
        o = OrderedDict(o)
        oks = list(o)
        k = oks[0]
        v = o.pop(oks[0])
        if v is 'T':
            xpo = ET.Element(oks[0])

            for i, (k, v) in enumerate(o.items()):
                if k[:3] == '_#c':
                    assert i + 1 == len(o), (i, k, o.keys())
                    break
                else:
                    xpo.set(k, v)

            if oks[-1] == '_#cr':
                ### use _#cr to map to a _#c: [{_#r: innerHtml }] <--> _#cr: innterHtml
                xpo.text = o['_#cr']
            else:
                assert oks[-1] == '_#c'
                xlo = None
                for oo in o['_#c']:
                    xlo = dict_to_node(oo, xpo, xlo, buf)
                    if not isinstance(xlo, str):
                        xpo.append(xlo)
            return xpo
        elif v is 'E':
            'this is an entity like &nbsp;. lets append to the text of parent node'
            # print('[dbg]',v.tag,v.text,v.prefix)
            # import pdb; pdb.set_trace()
            xpo = ET.Entity(k)
            # assert len(oks)==1
            return xpo
        elif v is 'D':
            assert k is '_', ('Key must be _ when specifying D _: D ', k, v)
            # xpo = init_et_tree(o)
            oo = o['_#c'][0]
            root_name = list(oo)[0]
            xpo = ET.ElementTree(dict_to_node(oo, xpo, None, buf))

            if 'doctype' in o:
                v = o['doctype']
                if isinstance(v, dict):
                    xpo.docinfo.system_url = v['system_url']
                    xpo.docinfo.public_id = v['public_id']
                else:
                    buf.write(f'<!DOCTYPE {root_name}>\n'.encode())
            return xpo

        else:
            assert 0, ('Invalid node identifier', oks[0], v)

            ## anything left should be an attribute field
        # assert not len(o),('Not all keys processed. Only allowing _#t, _#c, _#cr',o.keys())

        # if len(o)>=2:
        #   pprint(['_#o',o])
        # import pdb; pdb.set_trace()
        # return xpo

    pass
Esempio n. 3
0
 def appendCvLink(root, key, lntail):
     linknode = etree.Entity('cv-link-' + key)
     linknode.tail = lntail
     root.append(linknode)
Esempio n. 4
0
def serialize_paragraph(ctx, document, par, root, embed=True):
    """Serializes paragraph element.

    This is the most important serializer of them all.    
    """

    style = get_style(document, par)

    elem = etree.Element('p')

    if ctx.options['embed_styles']:
        _style = get_style_css(ctx, par)

        if _style != '':
            elem.set('style', _style)

    else:
        _style = ''

    if style:
        elem.set('class', get_css_classes(document, style))

    max_font_size = get_style_fontsize(par)

    if style:
        max_font_size = _get_font_size(document, style)

    for el in par.elements:
        _serializer = ctx.get_serializer(el)

        if _serializer:
            _serializer(ctx, document, el, elem)

        if isinstance(el, doc.Text):
            children = list(elem)
            _text_style = get_style_css(ctx, el)
            _text_class = el.rpr.get('style', '').lower()

            if get_style_fontsize(el) > max_font_size:
                max_font_size = get_style_fontsize(el)

            if 'superscript' in el.rpr:
                new_element = etree.Element('sup')
                new_element.text = el.value()
            elif 'subscript' in el.rpr:
                new_element = etree.Element('sub')
                new_element.text = el.value()
            elif 'b' in el.rpr or 'i' in el.rpr or 'u' in el.rpr:
                new_element = None
                _element = None

                def _add_formatting(f, new_element, _element):
                    if f in el.rpr:
                        _t = etree.Element(f)

                        if new_element is not None:
                            _element.append(_t)
                            _element = _t
                        else:
                            new_element = _t
                            _element = new_element

                    return new_element, _element

                new_element, _element = _add_formatting(
                    'b', new_element, _element)
                new_element, _element = _add_formatting(
                    'i', new_element, _element)
                new_element, _element = _add_formatting(
                    'u', new_element, _element)

                _element.text = el.value()

                for comment_id in ctx.opened_comments:
                    document.comments[comment_id].text += ' ' + el.value()
            else:
                new_element = etree.Element('span')
                new_element.text = el.value()

                if ctx.options['embed_styles']:
                    try:
                        new_element.set('class', _text_class)
                    except:
                        pass

                for comment_id in ctx.opened_comments:
                    if comment_id in document.comments:
                        document.comments[comment_id].text += ' ' + el.value()

            if ctx.options['embed_styles']:
                if _text_style != '' and _style != _text_style:
                    new_element.set('style', _text_style)

            # This is for situations when style has options and
            # text is trying to unset them
            # else:
            #     new_element.set('class', 'noformat')

            was_inserted = False

            if len(children) > 0:
                _child_style = children[-1].get('style') or ''
                _child_class = children[-1].get('class', '')

                if new_element.tag == children[-1].tag and (
                    (_text_class == _child_class or _child_class == '') and
                    (_text_style == _child_style
                     or _child_style == '')) and children[-1].tail is None:
                    txt = children[-1].text or ''
                    txt2 = new_element.text or ''
                    children[-1].text = u'{}{}'.format(txt, txt2)
                    was_inserted = True

                if not was_inserted:
                    if _style == _text_style and new_element.tag == 'span' and (
                            _text_class == _child_class or _child_class == ''):
                        _e = children[-1]

                        txt = _e.tail or ''
                        _e.tail = u'{}{}'.format(txt, new_element.text)
                        was_inserted = True

                    if not was_inserted and new_element.tag == 'span' and (
                            _text_class != _child_class):
                        _e = children[-1]
                        txt = _e.tail or ''
                        _e.tail = u'{}{}'.format(txt, new_element.text)
                        was_inserted = True

            if not was_inserted:
                _child_class = new_element.get('class', '')
                try:
                    _child_class = children[-1].get('class', '')
                except:
                    _child_class = ''

                if _style == _text_style and new_element.tag == 'span' and (
                        _text_class == _child_class):
                    txt = elem.text or ''
                    elem.text = u'{}{}'.format(txt, new_element.text)
                else:
                    if new_element.text != u'':
                        elem.append(new_element)

    if not par.is_dropcap() and par.ilvl == None:
        if style:
            if ctx.header.is_header(par, max_font_size, elem, style=style):
                elem.tag = ctx.header.get_header(par, style, elem)
                if par.ilvl == None:
                    root = close_list(ctx, root)
                    ctx.ilvl, ctx.numid = None, None

                if root is not None:
                    root.append(elem)

                fire_hooks(ctx, document, par, elem, ctx.get_hook('h'))
                return root
        else:
            #            Commented part where we only checked for heading if font size
            #            was bigger than default font size. In many cases this did not
            #            work out well.
            #            if max_font_size > ctx.header.default_font_size:
            if True:
                if ctx.header.is_header(par, max_font_size, elem, style=style):
                    if elem.text != '' and len(list(elem)) != 0:
                        elem.tag = ctx.header.get_header(
                            par, max_font_size, elem)

                        if par.ilvl == None:
                            root = close_list(ctx, root)
                            ctx.ilvl, ctx.numid = None, None

                        if root is not None:
                            root.append(elem)

                        fire_hooks(ctx, document, par, elem, ctx.get_hook('h'))
                        return root

    if len(list(elem)) == 0 and elem.text is None:
        if ctx.options['empty_paragraph_as_nbsp']:
            elem.append(etree.Entity('nbsp'))

    # Indentation is different. We are starting or closing list.
    if par.ilvl != None:
        root = open_list(ctx, document, par, root, elem)
        return root
    else:
        root = close_list(ctx, root)
        ctx.ilvl, ctx.numid = None, None

    # Add new elements to our root element.
    if root is not None:
        root.append(elem)

    fire_hooks(ctx, document, par, elem, ctx.get_hook('p'))

    return root
Esempio n. 5
0
    },
    {
        'prefix': 'OS-KSADM',
        'value': 'http://docs.openstack.org/identity/api/ext/OS-KSADM/v1.0',
    },
]

PARSER = etree.XMLParser(
    resolve_entities=False,
    remove_comments=True,
    remove_pis=True)

# NOTE(dolph): lxml.etree.Entity() is just a callable that currently returns an
# lxml.etree._Entity instance, which doesn't appear to be part of the
# public API, so we discover the type dynamically to be safe
ENTITY_TYPE = type(etree.Entity('x'))


def from_xml(xml):
    """Deserialize XML to a dictionary."""
    if xml is None:
        return None

    deserializer = XmlDeserializer()
    return deserializer(xml)


def to_xml(d, xmlns=None):
    """Serialize a dictionary to XML."""
    if d is None:
        return None
 def addElement(self, elemName):
     self.docString += '<!ENTITY ' + elemName + ' SYSTEM "' + elemName + '">\n'
     self.gdml.append(etree.Entity(elemName))
Esempio n. 7
0
def prepend_space(element):
    element.addprevious(etree.Entity('nbsp'))
Esempio n. 8
0
    def tile_render_tween(request):
        response = handler(request)
        if response.content_type == 'text/html':
            if isinstance(response, WSGIHTTPException):
                # the body of a WSGIHTTPException needs to be "prepared"
                response.prepare(request.environ)

            serializer = getHTMLSerializer(response.app_iter)
            tree = serializer.tree
            head_node = tree.getroot().find('head')

            for tile_node in TILE_XPATH(serializer.tree):
                # determine tile path
                tile_path = tile_node.attrib.get('path')
                tile_type = tile_node.attrib.get('type')
                if tile_path and tile_type:
                    if tile_path == '/':
                        path = '/tile:' + tile_type
                    else:
                        path = '/'.join((tile_path, 'tile:' + tile_type))
                elif tile_path:
                    path = tile_path
                elif tile_type:
                    path = request.resource_path(request.context,
                                                 'tile:' + tile_type)
                else:
                    # XXX how can we show a useful line number?
                    raise Exception('Tile must have a path or type')

                # fetch tile contents
                subrequest = Request.blank(path)
                subrequest.registry = registry
                tile_data = dict(tile_node.attrib)
                tile_data['innerHTML'] = (tile_node.text or '') + ''.join([
                    html.tostring(child) for child in tile_node.iterchildren()
                ])
                if tile_path:
                    edit_url = request.route_path(MANAGE_ROUTE_NAME,
                                                  'edit_tile',
                                                  traverse=tile_path)
                else:
                    edit_url = request.mgmt_path(request.context, 'edit_tile')
                edit_url += '?' + urlencode(tile_data)
                del tile_data['type']
                subrequest.tile_data = tile_data
                tile_response = handler(subrequest)
                tile_tree = getHTMLSerializer(tile_response.app_iter).tree
                tile_root = tile_tree.getroot()
                tile_body = tile_root.find('body')

                # add edit link
                if has_permission('Edit tile', subrequest.context, request):
                    edit_link = builder.E.a('', href=edit_url)
                    edit_link.append(etree.Entity('#9997'))
                    tile_body.append(edit_link)

                # insert tile content
                tile_head = tile_root.find('head')
                if tile_head is not None:
                    for child in tile_head:
                        head_node.append(child)
                if tile_tree is not None:
                    replace_content_with_children(tile_node, tile_body)

            response.app_iter = [serializer.serialize()]

        return response
 def exportElement(self, dirPath, elemName, elem) :
     etree.ElementTree(elem).write(os.path.join(dirPath,elemName))
     self.docString += '<!ENTITY '+elemName+' SYSTEM "'+elemName+'">\n'
     self.gdml.append(etree.Entity(elemName))
Esempio n. 10
0
# pip install lxml
from lxml import etree

root = etree.Element("root", interesting="totally")

root.append(etree.Element("child1"))
etree.SubElement(root, "child2").text = "Child 2"
etree.SubElement(root, "child3")
root.insert(0, etree.Element("child0"))
print(root.tag, '\n' + etree.tostring(root, pretty_print=True).decode("UTF-8"))

print(len(root), root[:3], root[-1:])
'''list'''
for child in root:
    child.text = "dd"
    print(child.tag)

root.append(etree.Entity("#123"))
root.append(etree.Comment("some comment"))

print(root.tag, '\n' + etree.tostring(root, pretty_print=True).decode("UTF-8"))
Esempio n. 11
0
 def rule_url(self, acc):
     return etree.Entity("lo.pacc"), "/vis/"
root = etree.Element("root")
etree.SubElement(root, "child").text = "child 1"
etree.SubElement(root, "child").text = "child 2"
etree.SubElement(root, "another").text = "child 3"

print(etree.tostring(root, pretty_print=True))

for element in root.iter():
    print('%s - %s' % (element.tag, element.text))

for element in root.iter("child"):
    print('%s - %s' % (element.tag, element.text))

for element in root.iter("child", "another"):
    print('%s - %s' % (element.tag, element.text))

root.append(etree.Entity("#234"))
root.append(etree.Comment("some comment"))

for element in root.iter():
    if isinstance(element.tag, str):
        print('%s - %s' % (element.tag, element.text))
    else:
        print('%s - %s' % (element, element.text))

for element in root.iter(tag=etree.Element):
    print("%s - %s" % (element.tag, element.text))

for element in root.iter(tag=etree.Entity):
    print(element.text)