def __activate__(self, context): self.log = context["log"] self.config = context["systemConfig"] response = context["response"] try: ## Variable prep defaultPath = FascinatorHome.getPath("alerts") self.alertsPath = self.config.getString(defaultPath, ["alerts", "path"]) self.configFile = None # We'll allocate this later... if needed self.redboxVersion = self.config.getString( "", "redbox.version.string") self.csvDialect = csv.excel self.csvDialect.skipinitialspace = True ## XML Parsing docFactory = DocumentFactory() ##docFactory.setXPathNamespaceURIs(namespaces) self.saxReader = SAXReader(docFactory) ## Do our job (success, failed) = self.__processDir() ## Send response to the client (if debugging in browser) writer = response.getPrintWriter("text/plain; charset=UTF-8") writer.println("%s successful, %s failed" % (success, failed)) writer.close() except Exception, e: response.setStatus(500) writer = response.getPrintWriter("text/plain; charset=UTF-8") writer.println("Unexpected error during script execution:\n%s" % str(e)) writer.close()
def test(docname): """Test the functions in this module. """ reader = SAXReader() doc = reader.read(docname) show_tree(doc, 'Original tree:') date = time.ctime() modify_tree(doc, 'person', 'date', date) show_tree(doc, 'After walk and modify:') modify_tree_xpath(doc, '//people/person/name', 'date', date) show_tree(doc, 'After XPath modify:')
def getPayloadContent(self): mimeType = self.__mimeType print " * single.py: payload content mimeType=%s" % mimeType contentStr = "" if mimeType.startswith("text/"): if mimeType == "text/html": contentStr = '<iframe class="iframe-preview" src="%s/download/%s"></iframe>' % \ (portalPath, self.__oid) else: pid = self.__oid[self.__oid.rfind("/") + 1:] payload = self.__storage.getPayload(self.__oid, pid) print " * single.py: pid=%s payload=%s" % (pid, payload) if payload is not None: sw = StringWriter() sw.write("<pre>") IOUtils.copy(payload.getInputStream(), sw) sw.write("</pre>") sw.flush() contentStr = sw.toString() elif mimeType == "application/pdf" or mimeType.find( "vnd.ms") > -1 or mimeType.find( "vnd.oasis.opendocument.") > -1: # get the html version if exist... pid = os.path.splitext(self.__pid)[0] + ".htm" print " * single.py: pid=%s" % pid #contentStr = '<iframe class="iframe-preview" src="%s/download/%s/%s"></iframe>' % \ # (portalPath, self.__oid, pid) payload = self.__storage.getPayload(self.__oid, pid) saxReader = SAXReader(Boolean.parseBoolean("false")) try: document = saxReader.read(payload.getInputStream()) slideNode = document.selectSingleNode( "//*[local-name()='body']") #linkNodes = slideNode.selectNodes("//img") #contentStr = slideNode.asXML(); # encode character entities correctly slideNode.setName("div") out = ByteArrayOutputStream() format = OutputFormat.createPrettyPrint() format.setSuppressDeclaration(True) format.setExpandEmptyElements(True) writer = XMLWriter(out, format) writer.write(slideNode) writer.close() contentStr = out.toString("UTF-8") except: traceback.print_exc() contentStr = "<p class=\"error\">No preview available</p>" elif mimeType.startswith("image/"): src = "%s/%s" % (self.__oid, self.__pid) contentStr = '<a class="image" href="%(src)s" style="max-width:98%%">' \ '<img src="%(src)s" style="max-width:100%%" /></a>' % { "src": self.__pid } return contentStr
def __init__(self, file, config, baseline): AlertHandler.__init__(self, file, config, baseline) docFactory = DocumentFactory() self.saxReader = SAXReader(docFactory) self.xmlMapFile = StrSubstitutor.replaceSystemProperties( config['xmlMap']) if not os.path.exists(self.xmlMapFile): raise AlertException("Requested xmlMap file %s does not exist." % self.xmlMapFile) ## Make sure we can see our mappings inStream = FileInputStream(File(self.xmlMapFile)) xmlMappings = JsonSimple(inStream) self.map = xmlMappings.getObject(["mappings"]) self.exceptions = xmlMappings.getObject(["exceptions"]) self.defaultNamespace = xmlMappings.getObject(["defaultNamespace"]) self.mappedExceptionCount = 0
def __getPayloadContent(self, oid, pid): print " * combined.py: oid='%s' pid='%s'" % (oid, pid) payload = self.__storage.getPayload(oid, pid) if payload is None: return "<div>Error: No content for '%s'</div>" % oid mimeType = payload.contentType contentStr = "" if mimeType.startswith("text/"): if mimeType == "text/html": contentStr = '<iframe class="iframe-preview" src="%s/download/%s"></iframe>' % \ (portalPath, oid) else: sw = StringWriter() sw.write("<pre>") IOUtils.copy(payload.getInputStream(), sw) sw.write("</pre>") sw.flush() contentStr = sw.toString() elif mimeType == "application/pdf" or mimeType.find("vnd.ms")>-1 or mimeType.find("vnd.oasis.opendocument.")>-1: # get the html version if exist... pid = os.path.splitext(pid)[0] + ".htm" print " * combined.py: pid=%s" % pid payload = self.__storage.getPayload(oid, pid) saxReader = SAXReader(False) try: document = saxReader.read(payload.getInputStream()) slideNode = document.selectSingleNode("//*[local-name()='body']") slideNode.setName("div") out = ByteArrayOutputStream() format = OutputFormat.createPrettyPrint() format.setSuppressDeclaration(True) format.setExpandEmptyElements(True) writer = XMLWriter(out, format) writer.write(slideNode) writer.close() contentStr = out.toString("UTF-8") except: traceback.print_exc() contentStr = "<p class=\"error\">No preview available</p>" elif mimeType.startswith("image/"): src = "%s/%s" % (oid, pid) contentStr = '<a class="image" href="%(src)s" style="max-width:98%%">' \ '<img src="%(src)s" style="max-width:100%%" /></a>' % { "src": pid } return contentStr
def __createEpub(self): title = self.__manifest.getString(None, "title") self.vc("response").setHeader( "Content-Disposition", "attachment; filename=%s.epub" % urllib.quote(title)) out = self.vc("response").getOutputStream("application/epub+zip") zipOutputStream = ZipOutputStream(out) #save mimetype... and the rest of standard files in epub zipOutputStream.putNextEntry(ZipEntry("mimetype")) epubMimetypeStream = self.__getResourceAsStream("/epub/mimetype") IOUtils.copy(epubMimetypeStream, zipOutputStream) zipOutputStream.closeEntry() zipOutputStream.putNextEntry(ZipEntry("META-INF/container.xml")) epubContainerStream = self.__getResourceAsStream("/epub/container.xml") IOUtils.copy(epubContainerStream, zipOutputStream) zipOutputStream.closeEntry() zipOutputStream.putNextEntry(ZipEntry("OEBPS/epub.css")) epubcss = self.__getResourceAsStream("/epub/epub.css") IOUtils.copy(epubcss, zipOutputStream) zipOutputStream.closeEntry() #### Creating toc.ncx #### tocXml = ElementTree.Element( "ncx", { "version": "2005-1", "xml:lang": "en", "xmlns": "http://www.daisy.org/z3986/2005/ncx/" }) headNode = ElementTree.Element("head") tocXml.append(headNode) headNode.append( ElementTree.Element("meta", { "name": "dtb:uid", "content": "1" })) headNode.append( ElementTree.Element("meta", { "name": "dtb:depth", "content": "1" })) headNode.append( ElementTree.Element("meta", { "name": "dtb:totalPageCount", "content": "1" })) headNode.append( ElementTree.Element("meta", { "name": "dtb:maxPageNumber", "content": "1" })) headNode.append( ElementTree.Element("meta", { "name": "dtb:generator", "content": "ICE v2" })) #docTitle docTitle = ElementTree.Element("docTitle") textNode = ElementTree.Element("text") textNode.text = title docTitle.append(textNode) tocXml.append(docTitle) #docAuthor docAuthor = ElementTree.Element("docAuthor") textNode = ElementTree.Element("text") textNode.text = "ICE v2" docAuthor.append(textNode) tocXml.append(docAuthor) #navMap navMap = ElementTree.Element("navMap") tocXml.append(navMap) #### Creating content.opf #### contentXml = ElementTree.Element( "package", { "version": "2.0", "xmlns": "http://www.idpf.org/2007/opf", "unique-identifier": "BookId" }) metadataNode = ElementTree.Element( "metadata", { "xmlns:dc": "http://purl.org/dc/elements/1.1/", "xmlns:opf": "http://www.idpf.org/2007/opf" }) contentXml.append(metadataNode) #metadata information metadata = ElementTree.Element("dc:title") metadata.text = title metadataNode.append(metadata) metadata = ElementTree.Element("dc:language") metadata.text = "en-AU" metadataNode.append(metadata) metadata = ElementTree.Element("dc:creator", {"opf:role": "aut"}) metadata.text = "ICE" metadataNode.append(metadata) metadata = ElementTree.Element("dc:publisher") metadata.text = "University of Southern Queensland" metadataNode.append(metadata) metadata = ElementTree.Element("dc:identifier", {"id": "BookId"}) metadata.text = title metadataNode.append(metadata) #manifest manifest = ElementTree.Element("manifest") contentXml.append(manifest) spine = ElementTree.Element("spine", {"toc": "ncx"}) contentXml.append(spine) item = ElementTree.Element("item", { "id": "ncx", "href": "toc.ncx", "media-type": "text/xml" }) manifest.append(item) css = ElementTree.Element("item", { "id": "style", "href": "epub.css", "media-type": "text/css" }) manifest.append(css) count = 1 for itemHash in self.__orderedItem: id, title, htmlFileName, payloadDict, isImage = self.__itemRefDict[ itemHash] for payloadId in payloadDict: payload, payloadType = payloadDict[payloadId] if isinstance(payload, Payload): payloadId = payloadId.lower() zipEntryId = payloadId.replace(" ", "_").replace("\\", "/") if payloadType == "application/xhtml+xml": zipOutputStream.putNextEntry( ZipEntry("OEBPS/%s" % zipEntryId)) ##process the html.... saxReader = SAXReader(False) try: saxDoc = saxReader.read(payload.open()) payload.close() # ## remove class or style nodes # classOrStyleNodes = saxDoc.selectNodes("//@class | //@style ") # for classOrStyleNode in classOrStyleNodes: # node = classOrStyleNode # if classOrStyleNode.getParent(): # node = classOrStyleNode.getParent() # if node.getQualifiedName() == "img": # attr = node.attribute(QName("class")) # attr = node.attribute(QName("class")) # if attr: # node.remove(attr) # attr = node.attribute(QName("style")) # if attr: # node.remove(attr) ## remove name in a tags ahrefs = saxDoc.selectNodes( "//*[local-name()='a' and @name!='']") for a in ahrefs: attr = a.attribute(QName("name")) if attr: a.remove(attr) ## fix images src name.... replace space with underscore and all lower case imgs = saxDoc.selectNodes( "//*[local-name()='img' and contains(@src, '_files')]" ) for img in imgs: srcAttr = img.attribute(QName("src")) if srcAttr: src = srcAttr.getValue() #hash the sourcename filepath, filename = os.path.split(src) filename, ext = os.path.splitext(filename) filename = hashlib.md5( filename).hexdigest() src = os.path.join( filepath.lower().replace(" ", "_"), "node-%s%s" % (filename, ext)) img.addAttribute(QName("src"), src.replace(" ", "_")) bodyNode = saxDoc.selectSingleNode( "//*[local-name()='div' and @class='body']") bodyNode.setName("div") out = ByteArrayOutputStream() format = OutputFormat.createPrettyPrint() format.setSuppressDeclaration(True) writer = XMLWriter(out, format) writer.write(bodyNode) writer.flush() contentStr = out.toString("UTF-8") htmlString = """<?xml version="1.0" encoding="UTF-8"?> <html xmlns="http://www.w3.org/1999/xhtml"><head><title>%s</title> <link rel="stylesheet" href="epub.css"/> </head><body>%s</body></html>""" htmlString = htmlString % (title, contentStr) self.__copyString(htmlString, zipOutputStream) includeFile = False except: traceback.print_exc() else: #images.... zipOutputStream.putNextEntry( ZipEntry("OEBPS/%s" % zipEntryId)) IOUtils.copy(payload.open(), zipOutputStream) payload.close() zipOutputStream.closeEntry() else: zipOutputStream.putNextEntry( ZipEntry("OEBPS/%s" % zipEntryId)) IOUtils.copy(payload, zipOutputStream) zipOutputStream.closeEntry() itemNode = ElementTree.Element("item", { "media-type": payloadType, "href": zipEntryId }) if payloadId == htmlFileName.lower(): itemNode.set("id", itemHash) else: itemNode.set("id", payloadId.replace("/", "_")) manifest.append(itemNode) if not isImage: navPoint = ElementTree.Element( "navPoint", { "class": "chapter", "id": "%s" % itemHash, "playOrder": "%s" % count }) else: navPoint = ElementTree.Element( "navPoint", { "class": "chapter", "id": "%s" % htmlFileName, "playOrder": "%s" % count }) navMap.append(navPoint) navLabel = ElementTree.Element("navLabel") navPoint.append(navLabel) textNode = ElementTree.Element("text") textNode.text = title navLabel.append(textNode) content = ElementTree.Element("content") navPoint.append(content) content.set("src", htmlFileName) count += 1 itemRefNode = ElementTree.Element("itemref") spine.append(itemRefNode) itemRefNode.set("idref", itemHash) #saving content.opf... zipOutputStream.putNextEntry(ZipEntry("OEBPS/content.opf")) self.__copyString(ElementTree.tostring(contentXml), zipOutputStream) zipOutputStream.closeEntry() #saving toc.ncx zipOutputStream.putNextEntry(ZipEntry("OEBPS/toc.ncx")) self.__copyString(ElementTree.tostring(tocXml), zipOutputStream) zipOutputStream.closeEntry() zipOutputStream.close()
def __load_xml__(self): xml_file = File(self.xmlfile) sr = SAXReader() self.xmldoc = sr.read(xml_file)
def test(indocname): reader = SAXReader() doc = reader.read(indocname) visitor = Visitor() doc.accept(visitor)