Example #1
0
def stringForEntity(node):
    """Replaces entities in the node."""
    text = node.serialize('utf-8')
    try:
        # Lets add document DTD so entities are resolved
        dtd = node.doc.intSubset()
        tmp = dtd.serialize('utf-8') + '<norm>%s</norm>' % text
        next = 1
    except:
        tmp = '<norm>%s</norm>' % text
        next = 0

    ctxt = libxml2.createDocParserCtxt(tmp)
    if expand_entities:
        ctxt.replaceEntities(1)
    ctxt.parseDocument()
    tree = ctxt.doc()
    if next:
        newnode = tree.children.next
    else:
        newnode = tree.children

    result = ''
    child = newnode.children
    while child:
        result += child.serialize('utf-8')
        child = child.next

    return result
Example #2
0
def replaceNodeContentsWithText(node,text):
    """Replaces all subnodes of a node with contents of text treated as XML."""

    if node.children:
        starttag = startTagForNode(node)
        endtag = endTagForNode(node)

        # Lets add document DTD so entities are resolved
        tmp = '<?xml version="1.0" encoding="utf-8" ?>'
        try:
            dtd = doc.intSubset()
            tmp = tmp + dtd.serialize('utf-8')
        except libxml2.treeError:
            pass

        content = '<%s>%s</%s>' % (starttag, text, endtag)
        tmp = tmp + content.encode('utf-8')

        newnode = None
        try:
            ctxt = libxml2.createDocParserCtxt(tmp)
            ctxt.replaceEntities(0)
            ctxt.parseDocument()
            newnode = ctxt.doc()
        except:
            pass

        if not newnode:
            print >> sys.stderr, """Error while parsing translation as XML:\n"%s"\n""" % (text.encode('utf-8'))
            return

        newelem = newnode.getRootElement()

        if newelem and newelem.children:
            free = node.children
            while free:
                next = free.next
                free.unlinkNode()
                free = next

            if node:
                copy = newelem.copyNodeList()
                next = node.next
                node.replaceNode(newelem.copyNodeList())
                node.next = next

        else:
            # In practice, this happens with tags such as "<para>    </para>" (only whitespace in between)
            pass
    else:
        node.setContent(text)
Example #3
0
def replaceNodeContentsWithText(node,text):
    """Replaces all subnodes of a node with contents of text treated as XML."""
    if node.children:
        starttag = node.name #startTagForNode(node)
        endtag = endTagForNode(node)
        try:
            # Lets add document DTD so entities are resolved
            dtd = doc.intSubset()
            tmp = ''
            if expand_entities: # FIXME: we get a "Segmentation fault" in libxml2.parseMemory() when we include DTD otherwise
                tmp = dtd.serialize('utf-8')
            tmp = tmp + '<%s>%s</%s>' % (starttag, text, endtag)
        except:
            tmp = '<%s>%s</%s>' % (starttag, text, endtag)

        try:
            ctxt = libxml2.createDocParserCtxt(tmp.encode('utf-8'))
            ctxt.replaceEntities(0)
            ctxt.parseDocument()
            newnode = ctxt.doc()
        except:
            print >> sys.stderr, """Error while parsing translation as XML:\n"%s"\n""" % (text.encode('utf-8'))
            return

        newelem = newnode.getRootElement()
        if newelem and newelem.children:
            free = node.children
            while free:
                next = free.next
                free.unlinkNode()
                free = next

            node.addChildList(newelem.children)
        else:
            # In practice, this happens with tags such as "<para>    </para>" (only whitespace in between)
            pass
    else:
        node.setContent(text)
Example #4
0
 def validate(self,xml,container,line_offset=0,char_offset=0):
     dtd=self.plugin.settings["dtd"]
     xml=(u"<!DOCTYPE %s %s>\n<%s>\n%s\n</%s>"
             % (container,dtd,container,xml,container))
     xml=xml.encode("utf-8")
     self.plugin.debug("validating %r",xml)
     self.xml=xml
     self.xml_line_offset=line_offset;
     self.xml_char_offset=char_offset;
     self.xml_line_char_offsets=[]
     self.xml_errors=0
     self.parser=libxml2.createDocParserCtxt(xml)
     self.parser.lineNumbers(1)
     self.parser.validate(1)
     self.parser.setErrorHandler(self.xml_error,None)
     if " HTML " in dtd:
         self.parser.htmlParseDocument()
     else:
         self.parser.parseDocument()
     ok=self.parser.isValid() and self.xml_errors==0
     self.parser=None
     self.plugin.debug("validation result: %r",ok)
     return ok
Example #5
0
def normalizeString(text, ignorewhitespace = 1):
    """Normalizes string to be used as key for gettext lookup.

    Removes all unnecessary whitespace."""
    if not ignorewhitespace:
        return text
    try:
        # Lets add document DTD so entities are resolved
        dtd = doc.intSubset()
        tmp = dtd.serialize('utf-8')
        tmp = tmp + '<norm>%s</norm>' % text
    except:
        tmp = '<norm>%s</norm>' % text

    try:
        ctxt = libxml2.createDocParserCtxt(tmp)
        if expand_entities:
            ctxt.replaceEntities(1)
        ctxt.parseDocument()
        tree = ctxt.doc()
        newnode = tree.getRootElement()
    except:
        print >> sys.stderr, """Error while normalizing string as XML:\n"%s"\n""" % (text)
        return text

    normalizeNode(newnode)

    result = ''
    child = newnode.children
    while child:
        result += child.serialize('utf-8')
        child = child.next

    result = re.sub('^ ','', result)
    result = re.sub(' $','', result)
    
    return result