Esempio n. 1
0
def stringForEntity(node):
    """Replaces entities in the node."""
    text = node.serialize('utf-8')
    try:
        # Lets add document DTD so entities are resolved
        dtd = node.doc.intSubset()
        tmp = dtd.serialize('utf-8') + '<norm>%s</norm>' % text
        next = 1
    except:
        tmp = '<norm>%s</norm>' % text
        next = 0

    ctxt = libxml2.createDocParserCtxt(tmp)
    if expand_entities:
        ctxt.replaceEntities(1)
    ctxt.parseDocument()
    tree = ctxt.doc()
    if next:
        newnode = tree.children.next
    else:
        newnode = tree.children

    result = ''
    child = newnode.children
    while child:
        result += child.serialize('utf-8')
        child = child.next

    return result
Esempio n. 2
0
    def stringForEntity(self, node):
        """Replaces entities in the node."""
        text = node.serialize('utf-8')
        try:
            # Lets add document DTD so entities are resolved
            dtd = self.doc.intSubset()
            tmp = dtd.serialize('utf-8') + '<norm>%s</norm>' % text
            next = True
        except:
            tmp = '<norm>%s</norm>' % text
            next = False

        ctxt = libxml2.createDocParserCtxt(tmp)
        if self.expand_entities:
            ctxt.replaceEntities(1)
        ctxt.parseDocument()
        tree = ctxt.doc()
        if next:
            newnode = tree.children.next
        else:
            newnode = tree.children

        result = ''
        child = newnode.children
        while child:
            result += child.serialize('utf-8')
            child = child.next
        tree.freeDoc()
        return result
Esempio n. 3
0
    def replaceNodeContentsWithText(self, node, text):
        """Replaces all subnodes of a node with contents of text treated as XML."""

        if not self.CheckMatchedTags(text):
            return

        if node.children:
            starttag = self.startTagForNode(node)
            endtag = self.endTagForNode(node)

            # Lets add document DTD so entities are resolved
            tmp = '<?xml version="1.0" encoding="utf-8" ?>'
            try:
                dtd = self.doc.intSubset()
                tmp = tmp + dtd.serialize('utf-8')
            except libxml2.treeError:
                pass

            content = '<%s>%s</%s>' % (starttag, text, endtag)
            tmp = tmp + content

            newnode = None
            try:
                ctxt = libxml2.createDocParserCtxt(tmp)
                ctxt.replaceEntities(0)
                ctxt.parseDocument()
                newnode = ctxt.doc()
            except:
                pass

            if not newnode:
                print(f"\n--> Error parsing translation as XML:\n{text}")
                # See: https://gitlab.gnome.org/GNOME/libxml2/-/issues/64
                print("--> Note: this might be caused by a bug in libxml2.\n")
                return

            newelem = newnode.getRootElement()

            if newelem and newelem.children:
                free = node.children
                while free:
                    nextchild = free.next
                    free.unlinkNode()
                    free = nextchild

                if node:
                    nextnode = node.next
                    node.replaceNode(newelem.copyNodeList())
                    node.__next__ = nextnode

            else:
                # In practice, this happens with tags such as "<para>    </para>" (only whitespace in between)
                pass
        else:
            node.setContent(text)
Esempio n. 4
0
    def replaceNodeContentsWithText(self, node, text):
        """Replaces all subnodes of a node with contents of text treated as XML."""

        if node.children:
            starttag = self.startTagForNode(node)
            endtag = self.endTagForNode(node)

            # Lets add document DTD so entities are resolved
            tmp = '<?xml version="1.0" encoding="utf-8" ?>'
            try:
                dtd = self.doc.intSubset()
                tmp = tmp + dtd.serialize('utf-8')
            except libxml2.treeError:
                pass

            content = '<%s>%s</%s>' % (starttag, text, endtag)
            tmp = tmp + content.encode('utf-8')

            newnode = None
            try:
                ctxt = libxml2.createDocParserCtxt(tmp)
                ctxt.replaceEntities(0)
                ctxt.parseDocument()
                newnode = ctxt.doc()
            except:
                pass

            if not newnode:
                print >> sys.stderr, """Error while parsing translation as XML:\n"%s"\n""" % (
                    text.encode('utf-8'))
                return

            newelem = newnode.getRootElement()

            if newelem and newelem.children:
                free = node.children
                while free:
                    next = free.next
                    free.unlinkNode()
                    free = next

                if node:
                    copy = newelem.copyNodeList()
                    next = node.next
                    node.replaceNode(newelem.copyNodeList())
                    node.next = next

            else:
                # In practice, this happens with tags such as "<para>    </para>" (only whitespace in between)
                pass
        else:
            node.setContent(text)
Esempio n. 5
0
def replaceNodeContentsWithText(node,text):
    """Replaces all subnodes of a node with contents of text treated as XML."""

    if node.children:
        starttag = startTagForNode(node)
        endtag = endTagForNode(node)

        # Lets add document DTD so entities are resolved
        tmp = '<?xml version="1.0" encoding="utf-8" ?>'
        try:
            dtd = doc.intSubset()
            tmp = tmp + dtd.serialize('utf-8')
        except libxml2.treeError:
            pass

        content = '<%s>%s</%s>' % (starttag, text, endtag)
        tmp = tmp + content.encode('utf-8')

        newnode = None
        try:
            ctxt = libxml2.createDocParserCtxt(tmp)
            ctxt.replaceEntities(0)
            ctxt.parseDocument()
            newnode = ctxt.doc()
        except:
            pass

        if not newnode:
            print >> sys.stderr, """Error while parsing translation as XML:\n"%s"\n""" % (text.encode('utf-8'))
            return

        newelem = newnode.getRootElement()

        if newelem and newelem.children:
            free = node.children
            while free:
                next = free.next
                free.unlinkNode()
                free = next

            if node:
                copy = newelem.copyNodeList()
                next = node.next
                node.replaceNode(newelem.copyNodeList())
                node.next = next

        else:
            # In practice, this happens with tags such as "<para>    </para>" (only whitespace in between)
            pass
    else:
        node.setContent(text)
Esempio n. 6
0
    def normalizeString(self, text, spacepreserve=False):
        """Normalizes string to be used as key for gettext lookup.

        Removes all unnecessary whitespace."""
        mytext = text
        if spacepreserve:
            return text
        try:
            # Lets add document DTD so entities are resolved
            dtd = self.doc.intSubset()
            tmp = dtd.serialize('utf-8')
            tmp = tmp + '<norm>%s</norm>' % text
        except:
            tmp = '<norm>%s</norm>' % text

        try:
            ctxt = libxml2.createDocParserCtxt(tmp)
            if self.app.options.get('expand_entities'):
                ctxt.replaceEntities(1)
            ctxt.parseDocument()
            tree = ctxt.doc()
            newnode = tree.getRootElement()
        except:
            print("""Error while normalizing string as XML:\n"%s"\n""" %
                  (text),
                  file=sys.stderr)
            return text

        # Not sure if saving the doc here is really necessary. It was one of the
        # things done in debugging and don't want to spend time now to check if
        # we can remove it.
        save_doc = self.doc
        self.doc = ctxt.doc()
        self.normalizeNode(newnode)
        self.doc = save_doc

        result = ''
        child = newnode.children
        while child:
            nextchild = child.next
            result += child.serialize('utf-8')
            child = nextchild

        result = re.sub('^ ', '', result)
        result = re.sub(' $', '', result)
        tree.freeDoc()

        return result
Esempio n. 7
0
    def validate_xml_string(self, sXml_content):
        ctxt = libxml2.createDocParserCtxt(sXml_content)

        #        ctxt.validate(1)

        ctxt.parseDocument()

        # detecte si au moins une erreur c'est produite
        try:
            error = libxml2.lastError()
        except:
            error = None

        if error != None:
            sMessage = _(
                "At least one error occured when validating XML file.")
            raise "metroValidationError", sMessage

        doc = ctxt.doc()
        doc.freeDoc()
Esempio n. 8
0
    def normalizeString(self, text, spacepreserve=False):
        """Normalizes string to be used as key for gettext lookup.

        Removes all unnecessary whitespace."""
        if spacepreserve:
            return text
        try:
            # Lets add document DTD so entities are resolved
            dtd = self.doc.intSubset()
            tmp = dtd.serialize('utf-8')
            tmp = tmp + '<norm>%s</norm>' % text
        except:
            tmp = '<norm>%s</norm>' % text

        try:
            ctxt = libxml2.createDocParserCtxt(tmp)
            if self.app.options.get('expand_entities'):
                ctxt.replaceEntities(1)
            ctxt.parseDocument()
            tree = ctxt.doc()
            newnode = tree.getRootElement()
        except:
            print >> sys.stderr, """Error while normalizing string as XML:\n"%s"\n""" % (
                text)
            return text

        self.normalizeNode(newnode)

        result = ''
        child = newnode.children
        while child:
            result += child.serialize('utf-8')
            child = child.next

        result = re.sub('^ ', '', result)
        result = re.sub(' $', '', result)
        tree.freeDoc()

        return result
def replaceNodeContentsWithText(node, text):
    """Replaces all subnodes of a node with contents of text treated as XML."""
    if node.children:
        starttag = node.name  #startTagForNode(node)
        endtag = endTagForNode(node)
        try:
            # Lets add document DTD so entities are resolved
            dtd = doc.intSubset()
            tmp = ''
            if expand_entities:  # FIXME: we get a "Segmentation fault" in libxml2.parseMemory() when we include DTD otherwise
                tmp = dtd.serialize('utf-8')
            tmp = tmp + '<%s>%s</%s>' % (starttag, text, endtag)
        except:
            tmp = '<%s>%s</%s>' % (starttag, text, endtag)

        try:
            ctxt = libxml2.createDocParserCtxt(tmp.encode('utf-8'))
            ctxt.replaceEntities(0)
            ctxt.parseDocument()
            newnode = ctxt.doc()
        except:
            print >> sys.stderr, """Error while parsing translation as XML:\n"%s"\n""" % (
                text.encode('utf-8'))
            return

        newelem = newnode.getRootElement()
        if newelem and newelem.children:
            free = node.children
            while free:
                next = free.next
                free.unlinkNode()
                free = next

            node.addChildList(newelem.children)
        else:
            # In practice, this happens with tags such as "<para>    </para>" (only whitespace in between)
            pass
    else:
        node.setContent(text)
Esempio n. 10
0
def replaceNodeContentsWithText(node,text):
    """Replaces all subnodes of a node with contents of text treated as XML."""
    if node.children:
        starttag = node.name #startTagForNode(node)
        endtag = endTagForNode(node)
        try:
            # Lets add document DTD so entities are resolved
            dtd = doc.intSubset()
            tmp = ''
            if expand_entities: # FIXME: we get a "Segmentation fault" in libxml2.parseMemory() when we include DTD otherwise
                tmp = dtd.serialize('utf-8')
            tmp = tmp + '<%s>%s</%s>' % (starttag, text, endtag)
        except:
            tmp = '<%s>%s</%s>' % (starttag, text, endtag)

        try:
            ctxt = libxml2.createDocParserCtxt(tmp.encode('utf-8'))
            ctxt.replaceEntities(0)
            ctxt.parseDocument()
            newnode = ctxt.doc()
        except:
            print >> sys.stderr, """Error while parsing translation as XML:\n"%s"\n""" % (text.encode('utf-8'))
            return

        newelem = newnode.getRootElement()
        if newelem and newelem.children:
            free = node.children
            while free:
                next = free.next
                free.unlinkNode()
                free = next

            node.addChildList(newelem.children)
        else:
            # In practice, this happens with tags such as "<para>    </para>" (only whitespace in between)
            pass
    else:
        node.setContent(text)
Esempio n. 11
0
 def validate(self, xml, container, line_offset=0, char_offset=0):
     dtd = self.plugin.settings["dtd"]
     xml = (u"<!DOCTYPE %s %s>\n<%s>\n%s\n</%s>" %
            (container, dtd, container, xml, container))
     xml = xml.encode("utf-8")
     self.plugin.debug("validating %r", xml)
     self.xml = xml
     self.xml_line_offset = line_offset
     self.xml_char_offset = char_offset
     self.xml_line_char_offsets = []
     self.xml_errors = 0
     self.parser = libxml2.createDocParserCtxt(xml)
     self.parser.lineNumbers(1)
     self.parser.validate(1)
     self.parser.setErrorHandler(self.xml_error, None)
     if " HTML " in dtd:
         self.parser.htmlParseDocument()
     else:
         self.parser.parseDocument()
     ok = self.parser.isValid() and self.xml_errors == 0
     self.parser = None
     self.plugin.debug("validation result: %r", ok)
     return ok
Esempio n. 12
0
 def validate(self,xml,container,line_offset=0,char_offset=0):
     dtd=self.plugin.settings["dtd"]
     xml=(u"<!DOCTYPE %s %s>\n<%s>\n%s\n</%s>"
             % (container,dtd,container,xml,container))
     xml=xml.encode("utf-8")
     self.plugin.debug("validating %r",xml)
     self.xml=xml
     self.xml_line_offset=line_offset;
     self.xml_char_offset=char_offset;
     self.xml_line_char_offsets=[]
     self.xml_errors=0
     self.parser=libxml2.createDocParserCtxt(xml)
     self.parser.lineNumbers(1)
     self.parser.validate(1)
     self.parser.setErrorHandler(self.xml_error,None)
     if " HTML " in dtd:
         self.parser.htmlParseDocument()
     else:
         self.parser.parseDocument()
     ok=self.parser.isValid() and self.xml_errors==0
     self.parser=None
     self.plugin.debug("validation result: %r",ok)
     return ok
Esempio n. 13
0
def normalizeString(text, ignorewhitespace = 1):
    """Normalizes string to be used as key for gettext lookup.

    Removes all unnecessary whitespace."""
    if not ignorewhitespace:
        return text
    try:
        # Lets add document DTD so entities are resolved
        dtd = doc.intSubset()
        tmp = dtd.serialize('utf-8')
        tmp = tmp + '<norm>%s</norm>' % text
    except:
        tmp = '<norm>%s</norm>' % text

    try:
        ctxt = libxml2.createDocParserCtxt(tmp)
        if expand_entities:
            ctxt.replaceEntities(1)
        ctxt.parseDocument()
        tree = ctxt.doc()
        newnode = tree.getRootElement()
    except:
        print >> sys.stderr, """Error while normalizing string as XML:\n"%s"\n""" % (text)
        return text

    normalizeNode(newnode)

    result = ''
    child = newnode.children
    while child:
        result += child.serialize('utf-8')
        child = child.next

    result = re.sub('^ ','', result)
    result = re.sub(' $','', result)
    
    return result