def fixRelations (self): """ get relation elements and then fix each in turn -- if href: url = value type = Has part -- else label = value type = Is related """ nodes = self.getFieldElements ("relation") if not nodes: return print "\n%s" % self.getId() for r in nodes: value = XmlUtils.getText(r) if not value: return XmlUtils.setText (r,"") if value.startswith ("http://"): r.setAttribute ("type", "Has part") r.setAttribute ("url", value) else: r.setAttribute ("type", "Is related") r.setAttribute ("title", value) print r.toxml() if 0: self.write() print "wrote record"
def action(osmRecord): """ - remember the status value - remove the status node - ?? create an empty date element if one does not exist for this status value ?? """ if verbose: print '\n-- task 11 action ---' modified = False copyrightNotice = osmRecord.selectSingleNode( osmRecord.dom, "record/rights/copyrightNotice") if not copyrightNotice: # raise Exception, "I have to have a copyright!" rights = osmRecord.selectSingleNode(osmRecord.dom, 'record/rights') if not rights: rights = XmlUtils.addElement(osmRecord.dom, osmRecord.doc, 'rights') copyrightNotice = XmlUtils.addElement(osmRecord.dom, rights, 'copyrightNotice') copyrightNotice.setAttribute('holder', 'UCAR') copyrightNotice.setAttribute('url', termsOfUseUrl) if verbose: print copyrightNotice.toxml() XmlUtils.setText(copyrightNotice, copyrightBlurb) modified = True return modified
def replaceVocabTerm(badTerm, goodTerm, vocab, osmRec): """ field used to obtain vocab info from vocab_data """ vocabField = getVocabField(vocab) # say the field is 'instName' # print 'type of indexField: %s' % type(vocabField) # print vocabField if type(vocabField) == type('') or type(vocabField) == type(u''): vocabField = [vocabField] for indexField in vocabField: xpath = getFieldXpath(indexField) # print 'xpath: %s' % xpath vocabNodes = osmRec.selectNodes(osmRec.dom, xpath) # print '%d vocabNodes found' % len(vocabNodes) for node in vocabNodes: value = XmlUtils.getText(node) if value == badTerm: # print 'old:', value XmlUtils.setText(node, goodTerm) # print 'new:', XmlUtils.getText(node).encode('utf-8') print ' .. replaced' return osmRec
def finalizeXml (self): self.doc.setAttribute ("xmlns:"+self.schema_instance_namespace, \ self.SCHEMA_INSTANCE_URI) self.setNoNamespaceSchemaLocation ( \ "http://www.dls.ucar.edu/people/ostwald/Metadata/webcat/webcat-record.xsd") accessionNum = self.getAccessionNum () url = "http://www.library.ucar.edu/uhtbin/hyperion-image/" + accessionNum urlElement = self.dom.createElement ("url") XmlUtils.setText(urlElement, url) try: id = makeId (accessionNum, self.prefix) except: id = "ERROR" msg = "Error processing " + self.url print msg print sys.exc_info()[0], sys.exc_info()[1] idElement = self.dom.createElement ("recordID") XmlUtils.setText(idElement, id) children = XmlUtils.getChildElements (self.doc) self.doc.insertBefore (urlElement, children[0]) self.doc.insertBefore (idElement, urlElement)
def rewriteUrls(self, selectFn, urlTestFn, rewriteFn): """ NOT TESTED! for each node selected by selectFn (e.g., getBSCSUrlNodes) - if url at that node passes rewrite each Url that matches testFn with a falue that is computed by writeFn: - base_protected_url + self.collection + protectedAssetFileName returns True if a change was made, False otherwise """ recordChanged = False for urlNode in selectFn(self): url = XmlUtils.getText(urlNode) assetPath = getAssetPath (url) newProtectedCollPath = os.path.join (getReorgProtectedDir(), self.collection) newAssetPath = os.path.join (newProtectedCollPath, os.path.basename(assetPath)) newProtectedUrl = os.path.join (base_protected_url, self.collection, os.path.basename(assetPath)) if self.verbose: print '\n- assetPath:', assetPath print '- newAssetPath:', newAssetPath print '- oldUrl:', url print '- newProtectedUrl:', newProtectedUrl if urlTestFn(self, url): new_url = writeFn (self, url) XmlUtils.setText(urlNode, new_url) recordChanged = True return recordChanged
def finalizeXml(self): """ doctor the metadata with information contained in the folderNode """ if not self.node: return # get TN issue from title, or if not from parent's title self.tn_issue = self.getTN() or self.tn_issue if self.tn_issue: print "ADDING %s" % self.tn_issue tnElement = XmlUtils.addElement(self.dom, self.doc, "tn_isssue") XmlUtils.setText(tnElement, self.tn_issue) self.title = webcatUtils.stripIssue(self.title, self.tn_issue) self.setFieldValue("title", self.title) childrenElement = XmlUtils.addElement(self.dom, self.doc, "children") for child in self.node.children: # XmlUtils.addChild (self.dom, "child", child.title, childrenElement) md = child.getMetadata(None) id = md.getAccessionNum() print id childElement = XmlUtils.addChild(self.dom, "child", child.title, childrenElement) childElement.setAttribute("accessionNum", id) children = XmlUtils.getChildElements(self.doc) self.doc.appendChild(childrenElement)
def processRecord(self, rec): """ add namespace info add RecordID, Url elements """ rec.doc.setAttribute ("xmlns:"+rec.schema_instance_namespace, \ rec.SCHEMA_INSTANCE_URI) rec.setNoNamespaceSchemaLocation ( \ "http://www.dls.ucar.edu/people/ostwald/Metadata/webcat/webcat-record.xsd") accessionNum = self.getAccessionNum(rec) # print "%d (%s)" % (idNum, type(idNum)) # print accessionNum, id url = "http://www.library.ucar.edu/uhtbin/hyperion-image/" + accessionNum urlElement = rec.dom.createElement("Url") XmlUtils.setText(urlElement, url) id = makeId(accessionNum) idElement = rec.dom.createElement("RecordID") XmlUtils.setText(idElement, id) children = XmlUtils.getChildElements(rec.doc) rec.doc.insertBefore(urlElement, children[0]) rec.doc.insertBefore(idElement, urlElement) # print rec rec.write() print accessionNum
def doFormulas(self, parent): """ formulas are expressed as attribute values. e.g., <TxtHeight F="Height*0.861111"> variables used are 'Width' and 'Height', so these must be available to eval all elements having formulaty ("F") attrs are assigned a text value that is the result of evaluating the formula """ Width = getattr(self, 'Width') Height = self.Height # print 'Width is a %s' % type(Width) for child in XmlUtils.getChildElements(parent): f = child.getAttribute("F") if f: # print '- %s - "%s"' % (child.tagName, f) if f.startswith('NURBS'): val = f else: val = eval(f) # print " -> ", val XmlUtils.setText(child, str(val)) if XmlUtils.getChildElements(child): self.doFormulas(child)
def asCatalogElement(self): element = XmlUtils.createElement('asnDocument') element.setAttribute("id", self.asnUri) for attr in ['title', 'topic', 'author', 'created', 'status']: child = XmlUtils.createElement(attr) val = getattr(self, attr) or '' ## print "val: %s (%s)" % (val, type(val)) XmlUtils.setText(child, val) element.appendChild(child) return element
def asElement(self): """ render this PubNameSpec as an XML element so it can be put in an XML document containing multiple change PubNameSpecs """ element = XmlUtils.createElement("pubNameSpec") for attr in ['recId', 'collection', 'xmlFormat', 'pubType']: element.setAttribute(attr, getattr(self, attr)) XmlUtils.setText(element, self.term) return element
def updateOsmRecord(self, osmRecord, before, after): for xpath in [self.person_field, self.org_field]: nodes = osmRecord.selectNodes(osmRecord.dom, xpath) for node in nodes: value = XmlUtils.getText(node) if self.dataTable.beforeMap.has_key(value): XmlUtils.setText(node, self.dataTable.getAfter(value)) # print '- before: %s, after: %s\n' % (before, XmlUtils.getText(node)) return osmRecord
def asElement(self): element = XmlUtils.createElement('relation') element.setAttribute('relationship', self.relationship) if self.num: element.setAttribute('num', self.num) element.setAttribute('objectTitle', unicode(self.objectTitle)) element.setAttribute('object', self.object) idEl = element.appendChild(XmlUtils.createElement('id')) # idEl = XmlUtils.addElement(doc, parent, tagName) # idEl.setAttribute ('id', self.id) XmlUtils.setText(idEl, self.id) idEl.setAttribute('type', self.idType) return element
def addInstDivVocab (self, instDivVocab): """ add all the segments of the provided instDivVocab (that do not already exist) to this Affliation. NOTE: we don't add the first split by itself, the first split we add is [0:1] """ splits = instDivVocab.split(":") for i in range (1, len(splits)): vocab = ':'.join(splits[:i+1]) if not self.getInstDiv (vocab): instDiv = XmlUtils.createElement("instDivision") XmlUtils.setText (instDiv, vocab) self.element.appendChild (instDiv)
def action(itemRecord): """ replace terms of use urls """ if verbose: print "-- %s action --" % task_name rec_changed = False for licenseUrlEl in itemRecord.getLicenseUrlNodes(): val = XmlUtils.getText(licenseUrlEl) if val.find(find_str) != find_str: XmlUtils.setText(licenseUrlEl, val.replace(find_str, replace_str)) rec_changed = True
def asElement(self): element = XmlUtils.createElement('person') element.setAttribute('role', 'Author') if self.authororder is not None: element.setAttribute('order', str(self.authororder)) for attr in self.attrs: tag = attr value = getattr(self, attr) if value: child = element.appendChild(XmlUtils.createElement(tag)) XmlUtils.setText(child, value) return element
def toXml(self): """ return an "enumeration" element populated with code and description """ # enum = XmlUtils.createElement (qp("enumeration"), XSD_NAMESPACE_URI) enum = createSchemaElement("enumeration") enum.setAttribute("value", self.value) anno = enum.appendChild(createSchemaElement("annotation")) doc = anno.appendChild(createSchemaElement("documentation")) ## XmlUtils.setText (doc, unicode(self.description, 'utf-8')) # description is already unicode XmlUtils.setText(doc, self.description) return enum
def getUnknownCopyrightNoticeElement(self): """<copyrightNotice type="Unknown" holder="Unknown" url="http://www.ucar.edu/legal/terms_of_use.shtml"> Copyright information is unknown. Please contact the creator, author or publisher for further information. </copyrightNotice> """ el = XmlUtils.createElement("copyrightNotice") XmlUtils.setText( el, "Copyright information is unknown. Please contact the creator, author or publisher for further information." ) el.setAttribute('type', 'Unknown') el.setAttribute('holder', 'Unknown') el.setAttribute('url', 'http://www.ucar.edu/legal/terms_of_use.shtml') return el
def action(itemRecord): """ For each entry, move the contents of /record/resources/relation/@title and @description to a new /record/resource/description field. The description field should have the title content, then a colon followed by description content. If there is only description content and no title content, skip the title content and colon. """ if verbose: print "-- %s action --" % task_name for contrib in itemRecord.getContributorElements('Publisher'): if XmlUtils.getText(contrib) == find_str: XmlUtils.setText(contrib, replace_str)
def getAffiliation (self, instName): """ affilations are associated with ONE instName returns Affilation instance for provided instName, creating if necessary """ affiliationEl = None for node in self.getAffiliationElements(): instNameEl = XmlUtils.getChild('instName', node) if XmlUtils.getText(instNameEl) == instName: affiliationEl = node break if not affiliationEl: affiliationEl = self.element.appendChild (XmlUtils.createElement('affiliation')) instNameEl = affiliationEl.appendChild (XmlUtils.createElement('instName')) XmlUtils.setText(instNameEl, instName) return Affiliation (affiliationEl)
def action(osmRecord): """ 1 - set the copyright blurb 2 - set /record/rights/copyrightNotice/@url to http://www.ucar.edu/legal/terms_of_use.shtml """ if verbose: print '\n-- task 6 action ---' copyrightNotice = osmRecord.selectSingleNode( osmRecord.dom, "record/rights/copyrightNotice") if not copyrightNotice: return False # but we would expect there to be one .... if verbose: print copyrightNotice.toxml() XmlUtils.setText(copyrightNotice, copyrightBlurb) copyrightNotice.setAttribute("url", termsOfUseUrl) return True
def setDate (self, dateStr, dateType): coverageNode = self.selectSingleNode (self.dom, 'record/coverage') if not coverageNode: #raise Exception, "no coverage node found" coverageNode = XmlUtils.addElement(self.dom, self.doc, "coverage") targetDateElement = None dateNodes = self.getDateNodes() if dateNodes: for node in dateNodes: if node.hasAttribute (dateType): targetDateElement = node if targetDateElement is None: targetDateElement = XmlUtils.createElement ("date") targetDateElement.setAttribute ("type", dateType) coverageChildren = XmlUtils.getChildElements (coverageNode) if coverageChildren: firstChild = coverageChildren[0] coverageNode.insertBefore (targetDateElement, firstChild) else: coverageNode.appendChild (targetDateElement) XmlUtils.setText (targetDateElement, dateStr) return targetDateElement
def rewriteProtectedUrls(self): """ rewrite each protectected Url in this record with a new_url: - base_protected_url + self.collection + protectedAssetFileName returns True if a change was made, False otherwise """ recordChanged = False for urlNode in self.getProtectedUrlNodes(): url = XmlUtils.getText(urlNode) assetPath = getAssetPath(url) fileName = os.path.basename(url) collection = systemGeneratedKeysMap.has_key(self.collection) and \ systemGeneratedKeysMap[self.collection]['key'] or \ self.collection newAssetPath = os.path.join(getReorgProtectedDir(), collection, fileName) newProtectedUrl = os.path.join(base_protected_url, collection, fileName) if self.verbose: print '\n- assetPath:', assetPath print '- newAssetPath:', newAssetPath print '- oldUrl:', url print '- newProtectedUrl:', newProtectedUrl print '- self.collection: ' + self.collection sys.exit() if url != newProtectedUrl: XmlUtils.setText(urlNode, newProtectedUrl) recordChanged = True return recordChanged
def makeTerm(title, docCount): element = XmlUtils.createElement('term') element.setAttribute('docCount', str(docCount)) element.setAttribute('termCount', '0') # we don't care about termCount XmlUtils.setText(element, title) return Term(element)
unique_values.append(value) return unique_values if __name__ == '__main__': from bppb_rules import BPPBMappings from nsdl.formats import NcsItemRecord mappings = BPPBMappings() print 'table has %d entries' % len(mappings) unique_values = [] colDir = '/Users/ostwald/Documents/Work/NSDL/TNS Transition-Fall-2011/repo/ncs_item/1239144881424/' for filename in os.listdir(colDir): path = os.path.join(colDir, filename) rec = NcsItemRecord(path=path) rec_changed = False for node in rec.getIsPartOfUrlNodes(): value = XmlUtils.getText(node) mapping = mappings.getMapping(value) if mapping is None: print "NO mapping", value else: print "MAPPING", mapping XmlUtils.setText(node, mapping) rec_changed = True if rec_changed: # print 'WOULD HAVE WRITTEN', rec.getId() rec.write()
def update(record): """ - rewrite all the protectedUrls in this record - move the cataloged assets to the new protected directory """ recordChanged = False for urlNode in record.getProtectedUrlNodes(): url = XmlUtils.getText(urlNode) filename = os.path.basename(url) assetPath = getAssetPath(url) if 0: print '\n- assetPath:', assetPath print '- oldUrl:', url # Now copy the asset to new protectedDir at newAssetPath # did the protected url in metadata resolve to an existing asset? if not os.path.exists(assetPath): ## Missing Asset # print 'asset does NOT exist at %s' % assetPath # raise AssetNotFoundException, assetPath missing_assets.append(url) continue """ We only want to store one copy of each asset. - where dups are determined by filename existing_assets holds the assets that have been written to the reorgProctedDir. """ if existing_assets.has_key(filename): newAssetPath = existing_assets[filename] # print 'asset already exists for %s:\n\t%s' % (filename, newAssetPath) already_existing.append(filename) else: # newAssetPath = os.path.join(getNewProtectedDir(), record.collection, filename) newAssetPath = os.path.join(getReorgProtectedDir(), record.collection, filename) # newProtectedUrl = os.path.join (base_protected_url, record.collection, os.path.basename(assetPath)) newProtectedUrl = getProtectedUrlForPath(newAssetPath, True) if 0: print '- newProtectedUrl:', newProtectedUrl print '- assetPath:', assetPath print '- newAssetPath:', newAssetPath print '- DOWRITES:', dowrites # update the url if necessary if newProtectedUrl != url: XmlUtils.setText(urlNode, newProtectedUrl) recordChanged = True if dowrites and not os.path.exists(newAssetPath): try: newProtectedCollPath = os.path.dirname(newAssetPath) if not os.path.exists(newProtectedCollPath): os.mkdir(newProtectedCollPath) except Exception, msg: print 'ERROR: %s' % msg print ' -- newProtectedCollPath: %s' % newProtectedCollPath sys.exit() # copy the asset to newAssetPath try: shutil.copyfile(assetPath, newAssetPath) existing_assets[filename] = newAssetPath except OSError, msg: print '- ERROR: could not copy asset: %s' % msg print ' - assetPath: ', assetPath print ' - newAssetPath:', newAssetPath print 'wrote to newAssetPath:', newAssetPath
def action (osmRecord): """ For each entry, move the contents of /record/resources/relation/@title and @description to a new /record/general/description element The description content should have the title content, then a colon followed by description content. If there is only description content and no title content, skip the title content and colon. e.g., <description>f:this is great</description> then DELETE the original relation """ if verbose > 1: print '%s HAS IMAGE: %s' % (getModuleName(), osmRecord.getId()) # print osmRecord.__class__.__name__ # if not osmRecord.getId(): # print osmRecord modified = False relations = osmRecord.selectNodes (osmRecord.dom, 'record/resources/relation') if verbose: print '\n-- task 2 action fired ---' if verbose > 1: print "%d relations found" % len(relations) for relation in relations: if relation.getAttribute ("type") == 'Has image': title = '' if relation.hasAttribute ('title'): title = relation.getAttribute ("title") description = '' if relation.hasAttribute('description'): description = relation.getAttribute ("description") if verbose > 1: print 'title: "%s"' % title print 'description: "%s"' % description if title or description: msg = '' if title and description: msg = title + ':' + description else: msg = title + description description = osmRecord.addGeneralChild('description') XmlUtils.setText(description, msg) if verbose > 1: print 'WOULD HAVE DELETED RELATION' print "\n", relation.toxml() ## delete the 'Has image' relation osmRecord.deleteElement(relation) modified = True return modified
def setChildElementValue (self, tag, value): # print "TAG:%s, VALUE: %s" % (tag, value) child = XmlUtils.getChild (tag, self.element) if not child: child = self.element.appendChild(XmlUtils.createElement(tag)) XmlUtils.setText (child, value)
def setNameLast(self, name): node = XmlUtils.getChild('nameLast', self.element) if not node: node = XmlUtils.createElement('nameLast') self.element.appendChild(node) XmlUtils.setText(node, name)