def __init__(self, element): self.element = element self.attrs = [] for child in XmlUtils.getChildElements(element): attr = child.tagName self.attrs.append(attr) setattr(self, attr, XmlUtils.getText(child))
def getElementHtml(self, element, level): klass = 'level-%d' % level tagName = element.tagName text = XmlUtils.getText(element) children = XmlUtils.getChildElements(element) attributes = element.attributes if not (text or attributes or children): return "" html = DIV(klass="element") if text: html.append( DIV(SPAN(tagName + ': ', klass='el-name'), SPAN(text, klass="el-text"), klass=klass)) if attributes: html.append(self.getAttributesHtml(attributes, level)) else: html.append(DIV(tagName, klass=klass)) if attributes: html.append(self.getAttributesHtml(attributes, level)) if children: for child in children: html.append(self.getElementHtml(child, level + 1)) return html
def makeProvsOutlineOLD(self): rec = XmlRecord(xml="<body/>") body = rec.doc typeDefs = self.xsdMgr.xsd.getProvTypes() # typeDefs.sort (self.provTypeSortFn) for typeDef in typeDefs: print typeDef.countryName continue provItems = typeDef.getValues() item = provItems[ 0] # all provItems share the same country information # print "**", typeDef.__class__.__name__ countryOutline = XmlUtils.addElement(rec.dom, body, "outline") countryOutline.setAttribute("type", "group") countryOutline.setAttribute("text", item.countryName) #countryOutline.setAttribute ("vocab", item.countryCode) # don't think we want to do this for provTerm in provItems: provOutline = XmlUtils.addElement(rec.dom, countryOutline, "outline") provOutline.setAttribute("type", "vocab") provOutline.setAttribute("text", provTerm.provName) provOutline.setAttribute( "vocab", provTerm.provCode) # don't think we want to do this return rec
def rewriteUrls(self, selectFn, urlTestFn, rewriteFn): """ NOT TESTED! for each node selected by selectFn (e.g., getBSCSUrlNodes) - if url at that node passes rewrite each Url that matches testFn with a falue that is computed by writeFn: - base_protected_url + self.collection + protectedAssetFileName returns True if a change was made, False otherwise """ recordChanged = False for urlNode in selectFn(self): url = XmlUtils.getText(urlNode) assetPath = getAssetPath (url) newProtectedCollPath = os.path.join (getReorgProtectedDir(), self.collection) newAssetPath = os.path.join (newProtectedCollPath, os.path.basename(assetPath)) newProtectedUrl = os.path.join (base_protected_url, self.collection, os.path.basename(assetPath)) if self.verbose: print '\n- assetPath:', assetPath print '- newAssetPath:', newAssetPath print '- oldUrl:', url print '- newProtectedUrl:', newProtectedUrl if urlTestFn(self, url): new_url = writeFn (self, url) XmlUtils.setText(urlNode, new_url) recordChanged = True return recordChanged
def populate (self): for field in required_fields: if field == 'authors': continue self.addChild (field, self.data[field]) # process authors authorsElement = self.addElement (self.doc, "authors") for author in self.data['authors']: authorElement = self.addElement (authorsElement, "author") ## attributes if author.authororder: authorElement.setAttribute ("author_order", str(author.authororder)) if author.person_id: authorElement.setAttribute ("person_id", str(author.person_id)) if author.upid: authorElement.setAttribute ("upid", str(author.upid)) # self.setText (authorElement, author) for attr in ['lastName', 'firstName', 'middleName', 'suffix']: if getattr (author, attr): XmlUtils.addChild (self.dom, attr, getattr (author, attr), authorElement) for field in other_fields: if field in self.data.keys(): self.addChild (field, self.data[field])
def processRecord(self, rec): """ tally the fields. first make a map of occurrances for the record, then merge record map into global map """ recordData = {} for element in XmlUtils.getChildElements(rec.doc): tag = element.tagName text = XmlUtils.getText(element).strip() if not recordData.has_key(tag): recordData[tag] = 0 recordData[tag] = recordData[tag] + 1 ## now enter data into global tally for tag in recordData.keys(): if not self.has_key(tag): self[tag] = Entry(tag) entry = self[tag] entry.count = entry.count + recordData[tag] entry.max = max(entry.max, recordData[tag]) if entry.max == recordData[tag]: entry.maxRec = os.path.split(rec.path)[1] entry.min = min(entry.min, recordData[tag]) for entry in self.values(): if entry.tag in recordData.keys(): continue entry.min = 0 self.recordCount = self.recordCount + 1
def fixRelations (self): """ get relation elements and then fix each in turn -- if href: url = value type = Has part -- else label = value type = Is related """ nodes = self.getFieldElements ("relation") if not nodes: return print "\n%s" % self.getId() for r in nodes: value = XmlUtils.getText(r) if not value: return XmlUtils.setText (r,"") if value.startswith ("http://"): r.setAttribute ("type", "Has part") r.setAttribute ("url", value) else: r.setAttribute ("type", "Is related") r.setAttribute ("title", value) print r.toxml() if 0: self.write() print "wrote record"
def setPubName (self, pubName, pubNameType=None): """ creates pubName element if one doesn't exist ASSUMES A TITLE ELEMENT EXISTS for proper insertion (inserts following title element) """ if not self.selectSingleNode (self.dom, self.xpaths['pubName']): # create pubNamem element now, but we populate later pubNameEl = XmlUtils.createElement("pubName") ## now find where to insert pubNameEl generalEl = self.selectSingleNode (self.dom, 'record/general') if not generalEl: raise Exception, "record does not contain a general element" gen_children = XmlUtils.getChildElements(generalEl) print "%d elements found" % len(gen_children) targetEl = None for child in gen_children: print ' - ', child.tagName if not child.tagName in ['recordID', 'recordDate', 'urlOfRecord']: targetEl = child print "target is %s" % child.tagName break if targetEl: # insert after targetEl generalEl.insertBefore (pubNameEl, targetEl) else: # insert at end of general element XmlUtils.addElement (self.dom, generalEl, 'pubName') self.set ('pubName', pubName) if pubNameType: self.setPubNameType (pubNameType)
def processRecord(self, rec): """ add namespace info add RecordID, Url elements """ rec.doc.setAttribute ("xmlns:"+rec.schema_instance_namespace, \ rec.SCHEMA_INSTANCE_URI) rec.setNoNamespaceSchemaLocation ( \ "http://www.dls.ucar.edu/people/ostwald/Metadata/webcat/webcat-record.xsd") accessionNum = self.getAccessionNum(rec) # print "%d (%s)" % (idNum, type(idNum)) # print accessionNum, id url = "http://www.library.ucar.edu/uhtbin/hyperion-image/" + accessionNum urlElement = rec.dom.createElement("Url") XmlUtils.setText(urlElement, url) id = makeId(accessionNum) idElement = rec.dom.createElement("RecordID") XmlUtils.setText(idElement, id) children = XmlUtils.getChildElements(rec.doc) rec.doc.insertBefore(urlElement, children[0]) rec.doc.insertBefore(idElement, urlElement) # print rec rec.write() print accessionNum
def __init__(self, element): """ element is a row element containing an unknown number of cells. the last cell is where the data is, the others are "indents", which determin this node's "level" """ cells = XmlUtils.getChildElements(element, "TD") self.level = len(cells) - 1 dataCell = cells[-1] components = XmlUtils.getChildElements(dataCell, "A") icon = components[0] img = XmlUtils.getChild("IMG", icon) filename = os.path.split(img.getAttribute("SRC"))[1] self.type = filename.split(".")[0] self.metadatapath = webcatUtils.webcatDomain + icon.getAttribute( "HREF") linkElement = components[1] url = linkElement.getAttribute("HREF") label = XmlUtils.getText(linkElement) self.link = webcatUtils.WebCatLink((url, label)) self.title = self.link.label self.parent = None self.children = None
def action(osmRecord): """ - remember the status value - remove the status node - ?? create an empty date element if one does not exist for this status value ?? """ if verbose: print '\n-- task 11 action ---' modified = False copyrightNotice = osmRecord.selectSingleNode( osmRecord.dom, "record/rights/copyrightNotice") if not copyrightNotice: # raise Exception, "I have to have a copyright!" rights = osmRecord.selectSingleNode(osmRecord.dom, 'record/rights') if not rights: rights = XmlUtils.addElement(osmRecord.dom, osmRecord.doc, 'rights') copyrightNotice = XmlUtils.addElement(osmRecord.dom, rights, 'copyrightNotice') copyrightNotice.setAttribute('holder', 'UCAR') copyrightNotice.setAttribute('url', termsOfUseUrl) if verbose: print copyrightNotice.toxml() XmlUtils.setText(copyrightNotice, copyrightBlurb) modified = True return modified
def finalizeXml (self): self.doc.setAttribute ("xmlns:"+self.schema_instance_namespace, \ self.SCHEMA_INSTANCE_URI) self.setNoNamespaceSchemaLocation ( \ "http://www.dls.ucar.edu/people/ostwald/Metadata/webcat/webcat-record.xsd") accessionNum = self.getAccessionNum () url = "http://www.library.ucar.edu/uhtbin/hyperion-image/" + accessionNum urlElement = self.dom.createElement ("url") XmlUtils.setText(urlElement, url) try: id = makeId (accessionNum, self.prefix) except: id = "ERROR" msg = "Error processing " + self.url print msg print sys.exc_info()[0], sys.exc_info()[1] idElement = self.dom.createElement ("recordID") XmlUtils.setText(idElement, id) children = XmlUtils.getChildElements (self.doc) self.doc.insertBefore (urlElement, children[0]) self.doc.insertBefore (idElement, urlElement)
def getPlaylistResourceAnnotations(userId): """ returns the ids of annotations by user that annotate resources found on playlists """ resourceIds = getPlaylistResources(userId) resourceResults = ResourceSearcher(resourceIds) #print "%d resources found" % len (resourceResults) ## print resourceResults[0] anno_ids = [] add = anno_ids.append for result in resourceResults: result.xpath_delimiter = '/' #print result.recId relations = result.selectNodes(result.dom, 'record/relations/relation/record') #print ' %d relations found' % len(relations) # now find the relation that is a annotationRecord and is contributed by our user for rel in relations: idPath = "metadata/annotationRecord/moreInfo/userSelection/user/userId" if XmlUtils.getTextAtPath(rel, idPath) == userId: anno_id = XmlUtils.getTextAtPath(rel, 'head/id') if not anno_id in anno_ids: add(anno_id) return anno_ids
def replaceVocabTerm(badTerm, goodTerm, vocab, osmRec): """ field used to obtain vocab info from vocab_data """ vocabField = getVocabField(vocab) # say the field is 'instName' # print 'type of indexField: %s' % type(vocabField) # print vocabField if type(vocabField) == type('') or type(vocabField) == type(u''): vocabField = [vocabField] for indexField in vocabField: xpath = getFieldXpath(indexField) # print 'xpath: %s' % xpath vocabNodes = osmRec.selectNodes(osmRec.dom, xpath) # print '%d vocabNodes found' % len(vocabNodes) for node in vocabNodes: value = XmlUtils.getText(node) if value == badTerm: # print 'old:', value XmlUtils.setText(node, goodTerm) # print 'new:', XmlUtils.getText(node).encode('utf-8') print ' .. replaced' return osmRec
def __init__(self, element): self.element = element self.id = element.getAttribute('id') for child in XmlUtils.getChildElements(element): attr = child.tagName val = XmlUtils.getText(child) setattr(self, attr, val)
def insertFiscalYear(self, fiscalYear): """ insert proviced fiscalYear in the dom if setFiscalYear fails the first call, the instance record does not have the necessary elements, which are created """ try: self.setFiscalYear(fiscalYear) return except: pass fyEl = XmlUtils.createElement('fiscalYear') coverageNode = self.getCoverageNode() # coverageNode = self.selectSingleNode (self.dom, 'record/coverage') if not coverageNode: raise Exception, "not not found at 'record/coverage' for %s" % self.getId( ) children = XmlUtils.getChildElements(coverageNode) if children: coverageNode.insertBefore(fyEl, children[0]) else: coverageNode.appendChild(fyEl) self.setFiscalYear(fiscalYear)
def doFormulas(self, parent): """ formulas are expressed as attribute values. e.g., <TxtHeight F="Height*0.861111"> variables used are 'Width' and 'Height', so these must be available to eval all elements having formulaty ("F") attrs are assigned a text value that is the result of evaluating the formula """ Width = getattr(self, 'Width') Height = self.Height # print 'Width is a %s' % type(Width) for child in XmlUtils.getChildElements(parent): f = child.getAttribute("F") if f: # print '- %s - "%s"' % (child.tagName, f) if f.startswith('NURBS'): val = f else: val = eval(f) # print " -> ", val XmlUtils.setText(child, str(val)) if XmlUtils.getChildElements(child): self.doFormulas(child)
def getRowCells(self, row, rowNum): cells = self.selectNodes(row, 'td') data = [] add = data.append for i, cell in enumerate(cells): if i == 1: if rowNum == 0: # hdr add(XmlUtils.getText(cell)) add('url') else: link = XmlUtils.getChild('a', cell) if link == None: if rowNum == 1: add('') add('') continue else: raise NoTopPickDataError, 'No link found in row %d' % rowNum add(XmlUtils.getText(link)) add(link.getAttribute('href')) else: add(XmlUtils.getText(cell)) return map(self.cleanCellData, data)
def getResponseDoc(self, params=None, opts=None): """ returns response as XmlRecord """ # print 'params: %s' % params # return XmlRecord(xml=self.getData(params, opts)) responseDoc = None try: # responseText = data.read() # responseText = unicode (data.read(), 'iso-8859-1') # universal? # responseText = unicode (data.read(), 'utf-8') # experimental 12/2/2010 data = self.getData(params, opts) # print data responseDoc = XmlRecord(xml=data) webResponseErrorNode = responseDoc.selectSingleNode( responseDoc.dom, 'DDSWebService:error') if webResponseErrorNode: errorCode = webResponseErrorNode.getAttribute('code') if errorCode == 'noRecordsMatch': return None print 'errorCode', errorCode raise SimpleClientError, XmlUtils.getText(webResponseErrorNode) except Exception, msg: ## self.error = ServiceError (sys.exc_info()) # self.error = ServiceError (["ServiceResponse: Could not parse XML", sys.exc_info()[1]]) raise SimpleClientError, "DDSClient: Could not parse XML: %s" % msg
def addFieldsFiles(self): filesElement = self.rec.selectSingleNode(self.rec.doc, "files") if not filesElement: raise Exception, "Files element not found" for fieldsFile in self.getFieldsFileNames(): text = "/".join( [self.xmlFormat, self.version, "fields", fieldsFile]) XmlUtils.addChild(self.rec.dom, "file", text, filesElement)
def __init__(self, did_element): if not did_element: raise Exception, "did_element is NONE" self.element = did_element self.unittitle = XmlUtils.getTextAtPath(did_element, "unittitle/title") self.unitid = XmlUtils.getTextAtPath(did_element, "unitid") self.containerElements = XmlUtils.selectNodes(self.element, "container") self.physdescElements = XmlUtils.selectNodes(self.element, "physdesc")
def getRootChild (self, childName): if not childName in self.root_child_order: raise Exeption, 'Unrecognized childName: "%s"' % childName child = self.selectSingleNode(self.dom, 'record/'+childName) if not child: child = XmlUtils.addElement(self.dom, self.doc, childName) XmlUtils.orderElements(self.doc, self.root_child_order) return child
def getInstDiv (self, instDiv): """ looks for the provided instDiv VOCAB in this Afflilation """ instDivEls = XmlUtils.selectNodes (self.element, 'instDivision') ## print "%d instDiv elements found" % len(instDivEls) for instDivEl in instDivEls: if XmlUtils.getText (instDivEl) == instDiv: return instDivEl
def finalizeXml(self): if not self.node: return childrenElement = XmlUtils.addElement(self.dom, self.doc, "children") for child in self.node.children: XmlUtils.addChild(self.dom, "child", child.title, childrenElement) children = XmlUtils.getChildElements(self.doc) self.doc.appendChild(childrenElement)
def getJournalMap(self): for row in self.getRows(): children = rec.getElements(row) if len(children) == 2: journal = XmlUtils.getText(children[0]) abbrev = XmlUtils.getText(children[1]) if abbrev: self.journal_map[journal] = abbrev return self.journal_map
def __init__(self, element, unit): self.unit = unit self.name = urllib.unquote(XmlUtils.getChildText( element, 'Name')) # take the %20s out source = XmlUtils.getChild('WorksheetSource', element) self.href = urllib.unquote( source.getAttribute('HRef')) # take the %20s out self.data_path = os.path.join(utils.ingest_data_dir, self.unit, self.href)
def asCatalogElement(self): element = XmlUtils.createElement('asnDocument') element.setAttribute("id", self.asnUri) for attr in ['title', 'topic', 'author', 'created', 'status']: child = XmlUtils.createElement(attr) val = getattr(self, attr) or '' ## print "val: %s (%s)" % (val, type(val)) XmlUtils.setText(child, val) element.appendChild(child) return element
def addViewContext(self, vc): vcParent = self.selectSingleNode(self.dom, 'record:collection:viewContexts') vcNodes = XmlUtils.getChildElements(vcParent) print '%d vc nodes found' % len(vcNodes) vcValues = map(lambda x: XmlUtils.getText(x), vcNodes) for val in vcValues: print '-', val if not vc in vcValues: XmlUtils.addChild(self.dom, 'viewContext', vc, vcParent)
def __init__(self, element): Contributor.__init__(self, element) self.type = 'organization' organization = XmlUtils.getChild('organization', element) if not organization: raise Exception, 'organization not found in %s' % self.element.toxml( ) self.instName = XmlUtils.getChildText(organization, 'instName') self.instDept = XmlUtils.getChildText(organization, 'instDept') self.instEmail = XmlUtils.getChildText(organization, 'instEmail')
def asElement(self): """ render this PubNameSpec as an XML element so it can be put in an XML document containing multiple change PubNameSpecs """ element = XmlUtils.createElement("pubNameSpec") for attr in ['recId', 'collection', 'xmlFormat', 'pubType']: element.setAttribute(attr, getattr(self, attr)) XmlUtils.setText(element, self.term) return element