def asCatalogDoc(self): rec = XmlRecord(xml='<AsnDocuments/>') root = rec.doc root.setAttribute("jurisdiction", self.jurisdiction) for asnDoc in self.values(): root.appendChild(asnDoc.asCatalogElement()) return rec
def asXml(self): ## doc = XmlUtils.createDocument ("toc") rec = XmlRecord(xml="<toc/>") for docInfo in self.values(): el = docInfo.asElement() rec.doc.appendChild(el) return rec
def makeProvsOutlineOLD(self): rec = XmlRecord(xml="<body/>") body = rec.doc typeDefs = self.xsdMgr.xsd.getProvTypes() # typeDefs.sort (self.provTypeSortFn) for typeDef in typeDefs: print typeDef.countryName continue provItems = typeDef.getValues() item = provItems[ 0] # all provItems share the same country information # print "**", typeDef.__class__.__name__ countryOutline = XmlUtils.addElement(rec.dom, body, "outline") countryOutline.setAttribute("type", "group") countryOutline.setAttribute("text", item.countryName) #countryOutline.setAttribute ("vocab", item.countryCode) # don't think we want to do this for provTerm in provItems: provOutline = XmlUtils.addElement(rec.dom, countryOutline, "outline") provOutline.setAttribute("type", "vocab") provOutline.setAttribute("text", provTerm.provName) provOutline.setAttribute( "vocab", provTerm.provCode) # don't think we want to do this return rec
def getResponseDoc(self, params=None, opts=None): """ returns response as XmlRecord """ # print 'params: %s' % params # return XmlRecord(xml=self.getData(params, opts)) responseDoc = None try: # responseText = data.read() # responseText = unicode (data.read(), 'iso-8859-1') # universal? # responseText = unicode (data.read(), 'utf-8') # experimental 12/2/2010 data = self.getData(params, opts) # print data responseDoc = XmlRecord(xml=data) webResponseErrorNode = responseDoc.selectSingleNode( responseDoc.dom, 'DDSWebService:error') if webResponseErrorNode: errorCode = webResponseErrorNode.getAttribute('code') if errorCode == 'noRecordsMatch': return None print 'errorCode', errorCode raise SimpleClientError, XmlUtils.getText(webResponseErrorNode) except Exception, msg: ## self.error = ServiceError (sys.exc_info()) # self.error = ServiceError (["ServiceResponse: Could not parse XML", sys.exc_info()[1]]) raise SimpleClientError, "DDSClient: Could not parse XML: %s" % msg
def beautify(path, out=None): out = out or path rec = XmlRecord(path=path) s = rec.doc.toprettyxml() fp = open(out, 'w') fp.write(s) fp.close()
def __init__(self, data, exc_info=None, preprocessor=None): self.data = data self.error = None if exc_info: self.error = ServiceError(exc_info) self.doc = None if not exc_info: try: # responseText = data.read() # responseText = unicode (data.read(), 'iso-8859-1') # universal? responseText = unicode(data.read(), 'utf-8') # experimental 12/2/2010 # print "serviceClient: reponseText:\n%s" % responseText if preprocessor: responseText = preprocessor(responseText) self.doc = XmlRecord(xml=responseText) webResponseErrorNode = self.doc.selectSingleNode( self.doc.dom, 'DDSWebService:error') if webResponseErrorNode: self.error = XmlUtils.getText(webResponseErrorNode) except: ## self.error = ServiceError (sys.exc_info()) self.error = ServiceError([ "ServiceResponse: Could not parse XML", sys.exc_info()[1] ])
def __init__(self, path): """ self.unit - the unit to which this Chapter belongs (e.g., 'Pathways & Advance Engineering') self.data - TabData instances for each topic """ self.data = [] s = utils.getHtml(path) filename = os.path.basename(path) self.unit = os.path.basename(os.path.dirname(path)) self.num, self.chapter = self.getChapterInfo(filename) tagPat = RegExUtils.getTagPattern('x:ExcelWorkbook') m = tagPat.search(s) if not m: raise Exception, "could not get TABS data from file (%s)" % path print 'found data' xml = m.group(0).replace('x:', '') # strip x prefix from all elements rec = XmlRecord(xml=xml) rec.xpath_delimiter = '/' tabNodes = rec.selectNodes( rec.dom, "ExcelWorkbook/ExcelWorksheets/ExcelWorksheet") # we ignore the 'Cover sheet' print 'creating %d tabs' % len(tabNodes) for tabElement in tabNodes: tabData = TabData(tabElement, self.unit) if tabData.name.lower() != 'cover sheet': tabData.num = len(self) + 1 self.append(tabData)
def getRecord(self, identifier, xmlFormat): params = { 'verb': 'GetRecord', 'identifier': identifier, 'metadataPrefix': xmlFormat } data = self.getData(params) return XmlRecord(xml=data)
def __init__(self, collection): UserList.__init__(self) self.collection = collection self.dataPath = os.path.join(self.baseDir, collection + '.xml') print "DATA_PATH: ", self.dataPath self.rec = XmlRecord(path=self.dataPath) nodes = self.rec.selectNodes(self.rec.dom, "collectionInfo:rec") print "%d recs read from meta-metadata" % len(nodes) map(self.append, map(RecordInfo, nodes))
def initializeFromBaseMappings(self): baseRec = XmlRecord(path="output/dr_2_recId_mappings.xml") mappingEls = baseRec.selectNodes(baseRec.dom, 'dr_2_recId_mappings:mapping') for mappingEl in mappingEls: drNum = mappingEl.getAttribute('drNumber') recId = mappingEl.getAttribute('recordID') self[drNum] = recId print '%d base mappings found' % len(self)
def processDir(self): for filename in os.listdir(self.dir): path = os.path.join(self.dir, filename) rec = XmlRecord(path=path) order = self.processRecord(rec) key = self.makeKey(order) if not self.has_key(key): self[key] = 0 self[key] = self[key] + 1
def initializeFromBaseMappingsBOG(self): baseRec = XmlRecord(path="input/accessionNumberMappings.xml") mappingEls = baseRec.selectNodes(baseRec.dom, 'accessionNumberMappings:mapping') for mappingEl in mappingEls: drNum = mappingEl.getAttribute('drNumber') queryString = mappingEl.getAttribute('queryString') self[drNum] = queryString print '%d base mappings found' % len(self)
def asXml(self): rec = XmlRecord(xml="<accessionNumberMappings />") root = rec.doc root.setAttribute("date", time.asctime()) for drNum in self.keys(): mappingEl = rec.addElement(root, "mapping") self.populateMappingElement(mappingEl, drNum) return rec
def populateXml (self, xmlData): dataRec = XmlRecord (xml=xmlData) dataElements = dataRec.getElements (dataRec.doc) for dataElement in dataElements: cells = XmlUtils.getChildElements (dataElement, "TD") name = XmlUtils.getText (cells[0]).strip() if name[-1] == ":": name = name[:-1] value = XmlUtils.getText (XmlUtils.getChild ("B", cells[1])).strip() XmlUtils.addChild (self.dom, self.normalizeTagName(name), value)
def __init__(self): UserDict.__init__(self) rec = XmlRecord('output/FINAL-accessionNumberMappings.xml') mappings = rec.selectNodes(rec.dom, 'accessionNumberMappings:mapping') print '%d mappings found' % len(mappings) for mapping in mappings: drNum = mapping.getAttribute("drNumber") queryString = mapping.getAttribute("queryString") # print '%s -> %s' % (drNum, queryString) self[drNum] = queryString
def asXml(self): from JloXml import XmlRecord, XmlUtils import time rec = XmlRecord(xml="<%s />" % self.rootElementName) root = rec.doc root.setAttribute("date", time.asctime()) for drNum in self.keys(): mappingEl = rec.addElement(root, "mapping") self.populateMappingElement(mappingEl, drNum) return rec
def makeRecord(self): rec = XmlRecord(xml=self.record_template) XmlUtils.addChild(rec.dom, "date", asctime(localtime())) XmlUtils.addChild(rec.dom, "recordSource", self.record_source) collections = XmlUtils.addElement(rec.dom, rec.doc, "collections") for key in self.keys(): collection = XmlUtils.addElement(rec.dom, collections, "collection") collection.setAttribute("key", key) collection.setAttribute("prefix", self[key].getIdPrefix()) return rec
def writeTopicRecords(self): for topic in self.keys(): print "%s - %d" % (topic, len(self[topic])) rec = XmlRecord(xml="<AsnDocuments/>") root = rec.doc root.setAttribute("topic", topic) for asnInfo in self[topic]: root.appendChild(asnInfo) path = os.path.join(self.topicCache, topic + '.xml') rec.write(path) print 'wrote to', path
def __init__(self, grouping): self.data = {} path = os.path.join (self.grouping_data_dir, grouping+'Map.xml') self.rec = XmlRecord(path=path) groupNodes = self.rec.selectNodes (self.rec.dom, 'dupGroups:group') print '%d dup nodes found' % len(groupNodes) for groupNode in groupNodes[:self.max_dups]: dupGroup = DupGroup (groupNode) key = dupGroup.key self[key] = dupGroup print 'comparisonManager ingested %d dupGroups' % len(self.keys())
def writeXml (self, path=None): """ write record info file to disk as xml """ path = path or "not-fy10-records.xml" rec = XmlRecord (xml="<not-fy10-records/>") rec.doc.setAttribute ("date", time.asctime(time.localtime())) for recInfo in self: rec.doc.appendChild (recInfo.asElement()) rec.write(path) print 'wrote to ', path
def __init__ (self, path="output/MetadataModifySpecs.xml"): UserList.__init__ (self) if not os.path.exists(path): raise IOError, "output does not exist at %s" % path updateInfoDoc = XmlRecord(path=path) updateInfos = updateInfoDoc.selectNodes (updateInfoDoc.dom, "changeSpecs:pubNameSpec") print "%d specs found" % len(updateInfos) for info in updateInfos: changeSpec = ChangeSpec (info) print changeSpec self.updateMetadata (changeSpec)
def __init__(self): self.data = {} NsdlSearcher.verbose = False if os.path.exists(idCacheFile): rec = XmlRecord(path=idCacheFile) else: rec = self.getBlankRec() for node in rec.selectNodes(rec.dom, 'idCache:entry'): url = node.getAttribute('url') nsdlId = node.getAttribute('id') self[url] = nsdlId
def readTocXml(): docInfos = [] # rec = XmlRecord ("doctored-toc.xml") rec = XmlRecord("browser/toc.xml") ## print rec elements = rec.doc.getElementsByTagName("docInfo") ## print "%d docInfos read" % len (elements) for element in elements: docInfos.append(XmlDocInfo(element)) # print (docInfo) toc = TopicToc(docInfos, AsnHelper()) print toc.toHtml()
def getResourceIds (path): rec = XmlRecord(path=path) item_nodes = rec.selectNodes(rec.dom, 'playList:items:item') # print '%d item_nodes found' % len(item_nodes) ids=[];add=ids.append for node in item_nodes: if node.getAttribute('type') == 'ccs_saved_resource': id_node = XmlUtils.getChild ('id', node) add (XmlUtils.getText(id_node)) ids = filter (lambda x:not x.startswith('CCS'), ids) return ids
def walk(dir): for filename in os.listdir(dir): path = os.path.join(dir, filename) if os.path.isdir(path): print "dir: %s" % path walk(path) else: root, ext = os.path.splitext(filename) if not ext.upper() == ".XML": continue # print "file: %s" % path rec = XmlRecord(path=path) addSchemaLoc(getSchemaLocation(rec))
def toxml(self): """ make an xml document containing id and dcslastTouchDate value for each record """ rec = XmlRecord(xml="<lastTouchInfo></lastTouchInfo>") rec.doc.setAttribute("collection", self.collection) for result in self: el = XmlUtils.addElement(rec.dom, rec.doc, 'rec') for key in result.keys(): el.setAttribute(key, result[key]) dest = "RAW_lastTouchData/%s.xml" % self.collection rec.write(dest) print "wrote to ", dest
def __init__(self, xmlFormat, version): self.xmlFormat = xmlFormat self.version = version self.fieldsDir = os.path.join(baseDir, self.xmlFormat, self.version, "fields") self.rec = XmlRecord(xml=template) if not os.path.exists(self.fieldsDir): raise "FileDoesNotExist", self.fieldsDir self.buildDir = os.path.join(baseDir, self.xmlFormat, self.version, "build") if not os.path.exists(self.fieldsDir): raise "FileDoesNotExist", self.fieldsDir print "fieldsDir: %s\nbuildDir: %s" % (self.fieldsDir, self.buildDir) self.addFieldsFiles()
def parseResponse(self, xml): """ check for error in response and raise error if found otherwise, return response as XmlRecord """ if self.verbose: print "parseResponse" print "\n***********************\n%s\n********************" % xml rec = XmlRecord(xml=xml) # print rec errorEl = rec.selectSingleNode(rec.dom, "DCSWebService:error") if errorEl: if self.verbose: print rec raise DCSWebServiceClientError, "Service Error: %s" % XmlUtils.getText( errorEl) return rec
def __init__(self, path): self.data = {} rec = XmlRecord(path=data) ## print rec rec.xpath_delimiter = "/" nodes = rec.selectNodes(rec.dom, 'GatheredIds/id') self.asnResolutionClient = AsnResolutionClient() print "%d nodes found" % len(nodes) for node in nodes: stdId = node.getAttribute("stdId") docId = node.getAttribute("docId") stdIds = [] if self.has_key(docId): stdIds = self[docId] stdIds.append(stdId) self[docId] = stdIds
def read(self): self.data = {} self.data_rec = XmlRecord(path=self.data_path) self.data_rec.xpath_delimiter = "/" recNodes = self.data_rec.selectNodes(self.data_rec.dom, 'not-fy10-records/record') print '%d records read' % len(recNodes) i = 0 for recNode in recNodes: recInfo = RecordInfo(recNode) i = i + 1 if i % 500 == 0: print "%d/%d" % (i, len(recNodes)) if not self.acceptFn(recInfo): continue self.addRecord(recInfo)