def stringForEntity(node): """Replaces entities in the node.""" text = node.serialize('utf-8') try: # Lets add document DTD so entities are resolved dtd = node.doc.intSubset() tmp = dtd.serialize('utf-8') + '<norm>%s</norm>' % text next = 1 except: tmp = '<norm>%s</norm>' % text next = 0 ctxt = libxml2.createDocParserCtxt(tmp) if expand_entities: ctxt.replaceEntities(1) ctxt.parseDocument() tree = ctxt.doc() if next: newnode = tree.children.next else: newnode = tree.children result = '' child = newnode.children while child: result += child.serialize('utf-8') child = child.next return result
def stringForEntity(self, node): """Replaces entities in the node.""" text = node.serialize('utf-8') try: # Lets add document DTD so entities are resolved dtd = self.doc.intSubset() tmp = dtd.serialize('utf-8') + '<norm>%s</norm>' % text next = True except: tmp = '<norm>%s</norm>' % text next = False ctxt = libxml2.createDocParserCtxt(tmp) if self.expand_entities: ctxt.replaceEntities(1) ctxt.parseDocument() tree = ctxt.doc() if next: newnode = tree.children.next else: newnode = tree.children result = '' child = newnode.children while child: result += child.serialize('utf-8') child = child.next tree.freeDoc() return result
def replaceNodeContentsWithText(self, node, text): """Replaces all subnodes of a node with contents of text treated as XML.""" if not self.CheckMatchedTags(text): return if node.children: starttag = self.startTagForNode(node) endtag = self.endTagForNode(node) # Lets add document DTD so entities are resolved tmp = '<?xml version="1.0" encoding="utf-8" ?>' try: dtd = self.doc.intSubset() tmp = tmp + dtd.serialize('utf-8') except libxml2.treeError: pass content = '<%s>%s</%s>' % (starttag, text, endtag) tmp = tmp + content newnode = None try: ctxt = libxml2.createDocParserCtxt(tmp) ctxt.replaceEntities(0) ctxt.parseDocument() newnode = ctxt.doc() except: pass if not newnode: print(f"\n--> Error parsing translation as XML:\n{text}") # See: https://gitlab.gnome.org/GNOME/libxml2/-/issues/64 print("--> Note: this might be caused by a bug in libxml2.\n") return newelem = newnode.getRootElement() if newelem and newelem.children: free = node.children while free: nextchild = free.next free.unlinkNode() free = nextchild if node: nextnode = node.next node.replaceNode(newelem.copyNodeList()) node.__next__ = nextnode else: # In practice, this happens with tags such as "<para> </para>" (only whitespace in between) pass else: node.setContent(text)
def replaceNodeContentsWithText(self, node, text): """Replaces all subnodes of a node with contents of text treated as XML.""" if node.children: starttag = self.startTagForNode(node) endtag = self.endTagForNode(node) # Lets add document DTD so entities are resolved tmp = '<?xml version="1.0" encoding="utf-8" ?>' try: dtd = self.doc.intSubset() tmp = tmp + dtd.serialize('utf-8') except libxml2.treeError: pass content = '<%s>%s</%s>' % (starttag, text, endtag) tmp = tmp + content.encode('utf-8') newnode = None try: ctxt = libxml2.createDocParserCtxt(tmp) ctxt.replaceEntities(0) ctxt.parseDocument() newnode = ctxt.doc() except: pass if not newnode: print >> sys.stderr, """Error while parsing translation as XML:\n"%s"\n""" % ( text.encode('utf-8')) return newelem = newnode.getRootElement() if newelem and newelem.children: free = node.children while free: next = free.next free.unlinkNode() free = next if node: copy = newelem.copyNodeList() next = node.next node.replaceNode(newelem.copyNodeList()) node.next = next else: # In practice, this happens with tags such as "<para> </para>" (only whitespace in between) pass else: node.setContent(text)
def replaceNodeContentsWithText(node,text): """Replaces all subnodes of a node with contents of text treated as XML.""" if node.children: starttag = startTagForNode(node) endtag = endTagForNode(node) # Lets add document DTD so entities are resolved tmp = '<?xml version="1.0" encoding="utf-8" ?>' try: dtd = doc.intSubset() tmp = tmp + dtd.serialize('utf-8') except libxml2.treeError: pass content = '<%s>%s</%s>' % (starttag, text, endtag) tmp = tmp + content.encode('utf-8') newnode = None try: ctxt = libxml2.createDocParserCtxt(tmp) ctxt.replaceEntities(0) ctxt.parseDocument() newnode = ctxt.doc() except: pass if not newnode: print >> sys.stderr, """Error while parsing translation as XML:\n"%s"\n""" % (text.encode('utf-8')) return newelem = newnode.getRootElement() if newelem and newelem.children: free = node.children while free: next = free.next free.unlinkNode() free = next if node: copy = newelem.copyNodeList() next = node.next node.replaceNode(newelem.copyNodeList()) node.next = next else: # In practice, this happens with tags such as "<para> </para>" (only whitespace in between) pass else: node.setContent(text)
def normalizeString(self, text, spacepreserve=False): """Normalizes string to be used as key for gettext lookup. Removes all unnecessary whitespace.""" mytext = text if spacepreserve: return text try: # Lets add document DTD so entities are resolved dtd = self.doc.intSubset() tmp = dtd.serialize('utf-8') tmp = tmp + '<norm>%s</norm>' % text except: tmp = '<norm>%s</norm>' % text try: ctxt = libxml2.createDocParserCtxt(tmp) if self.app.options.get('expand_entities'): ctxt.replaceEntities(1) ctxt.parseDocument() tree = ctxt.doc() newnode = tree.getRootElement() except: print("""Error while normalizing string as XML:\n"%s"\n""" % (text), file=sys.stderr) return text # Not sure if saving the doc here is really necessary. It was one of the # things done in debugging and don't want to spend time now to check if # we can remove it. save_doc = self.doc self.doc = ctxt.doc() self.normalizeNode(newnode) self.doc = save_doc result = '' child = newnode.children while child: nextchild = child.next result += child.serialize('utf-8') child = nextchild result = re.sub('^ ', '', result) result = re.sub(' $', '', result) tree.freeDoc() return result
def validate_xml_string(self, sXml_content): ctxt = libxml2.createDocParserCtxt(sXml_content) # ctxt.validate(1) ctxt.parseDocument() # detecte si au moins une erreur c'est produite try: error = libxml2.lastError() except: error = None if error != None: sMessage = _( "At least one error occured when validating XML file.") raise "metroValidationError", sMessage doc = ctxt.doc() doc.freeDoc()
def normalizeString(self, text, spacepreserve=False): """Normalizes string to be used as key for gettext lookup. Removes all unnecessary whitespace.""" if spacepreserve: return text try: # Lets add document DTD so entities are resolved dtd = self.doc.intSubset() tmp = dtd.serialize('utf-8') tmp = tmp + '<norm>%s</norm>' % text except: tmp = '<norm>%s</norm>' % text try: ctxt = libxml2.createDocParserCtxt(tmp) if self.app.options.get('expand_entities'): ctxt.replaceEntities(1) ctxt.parseDocument() tree = ctxt.doc() newnode = tree.getRootElement() except: print >> sys.stderr, """Error while normalizing string as XML:\n"%s"\n""" % ( text) return text self.normalizeNode(newnode) result = '' child = newnode.children while child: result += child.serialize('utf-8') child = child.next result = re.sub('^ ', '', result) result = re.sub(' $', '', result) tree.freeDoc() return result
def replaceNodeContentsWithText(node, text): """Replaces all subnodes of a node with contents of text treated as XML.""" if node.children: starttag = node.name #startTagForNode(node) endtag = endTagForNode(node) try: # Lets add document DTD so entities are resolved dtd = doc.intSubset() tmp = '' if expand_entities: # FIXME: we get a "Segmentation fault" in libxml2.parseMemory() when we include DTD otherwise tmp = dtd.serialize('utf-8') tmp = tmp + '<%s>%s</%s>' % (starttag, text, endtag) except: tmp = '<%s>%s</%s>' % (starttag, text, endtag) try: ctxt = libxml2.createDocParserCtxt(tmp.encode('utf-8')) ctxt.replaceEntities(0) ctxt.parseDocument() newnode = ctxt.doc() except: print >> sys.stderr, """Error while parsing translation as XML:\n"%s"\n""" % ( text.encode('utf-8')) return newelem = newnode.getRootElement() if newelem and newelem.children: free = node.children while free: next = free.next free.unlinkNode() free = next node.addChildList(newelem.children) else: # In practice, this happens with tags such as "<para> </para>" (only whitespace in between) pass else: node.setContent(text)
def replaceNodeContentsWithText(node,text): """Replaces all subnodes of a node with contents of text treated as XML.""" if node.children: starttag = node.name #startTagForNode(node) endtag = endTagForNode(node) try: # Lets add document DTD so entities are resolved dtd = doc.intSubset() tmp = '' if expand_entities: # FIXME: we get a "Segmentation fault" in libxml2.parseMemory() when we include DTD otherwise tmp = dtd.serialize('utf-8') tmp = tmp + '<%s>%s</%s>' % (starttag, text, endtag) except: tmp = '<%s>%s</%s>' % (starttag, text, endtag) try: ctxt = libxml2.createDocParserCtxt(tmp.encode('utf-8')) ctxt.replaceEntities(0) ctxt.parseDocument() newnode = ctxt.doc() except: print >> sys.stderr, """Error while parsing translation as XML:\n"%s"\n""" % (text.encode('utf-8')) return newelem = newnode.getRootElement() if newelem and newelem.children: free = node.children while free: next = free.next free.unlinkNode() free = next node.addChildList(newelem.children) else: # In practice, this happens with tags such as "<para> </para>" (only whitespace in between) pass else: node.setContent(text)
def validate(self, xml, container, line_offset=0, char_offset=0): dtd = self.plugin.settings["dtd"] xml = (u"<!DOCTYPE %s %s>\n<%s>\n%s\n</%s>" % (container, dtd, container, xml, container)) xml = xml.encode("utf-8") self.plugin.debug("validating %r", xml) self.xml = xml self.xml_line_offset = line_offset self.xml_char_offset = char_offset self.xml_line_char_offsets = [] self.xml_errors = 0 self.parser = libxml2.createDocParserCtxt(xml) self.parser.lineNumbers(1) self.parser.validate(1) self.parser.setErrorHandler(self.xml_error, None) if " HTML " in dtd: self.parser.htmlParseDocument() else: self.parser.parseDocument() ok = self.parser.isValid() and self.xml_errors == 0 self.parser = None self.plugin.debug("validation result: %r", ok) return ok
def validate(self,xml,container,line_offset=0,char_offset=0): dtd=self.plugin.settings["dtd"] xml=(u"<!DOCTYPE %s %s>\n<%s>\n%s\n</%s>" % (container,dtd,container,xml,container)) xml=xml.encode("utf-8") self.plugin.debug("validating %r",xml) self.xml=xml self.xml_line_offset=line_offset; self.xml_char_offset=char_offset; self.xml_line_char_offsets=[] self.xml_errors=0 self.parser=libxml2.createDocParserCtxt(xml) self.parser.lineNumbers(1) self.parser.validate(1) self.parser.setErrorHandler(self.xml_error,None) if " HTML " in dtd: self.parser.htmlParseDocument() else: self.parser.parseDocument() ok=self.parser.isValid() and self.xml_errors==0 self.parser=None self.plugin.debug("validation result: %r",ok) return ok
def normalizeString(text, ignorewhitespace = 1): """Normalizes string to be used as key for gettext lookup. Removes all unnecessary whitespace.""" if not ignorewhitespace: return text try: # Lets add document DTD so entities are resolved dtd = doc.intSubset() tmp = dtd.serialize('utf-8') tmp = tmp + '<norm>%s</norm>' % text except: tmp = '<norm>%s</norm>' % text try: ctxt = libxml2.createDocParserCtxt(tmp) if expand_entities: ctxt.replaceEntities(1) ctxt.parseDocument() tree = ctxt.doc() newnode = tree.getRootElement() except: print >> sys.stderr, """Error while normalizing string as XML:\n"%s"\n""" % (text) return text normalizeNode(newnode) result = '' child = newnode.children while child: result += child.serialize('utf-8') child = child.next result = re.sub('^ ','', result) result = re.sub(' $','', result) return result