def getResponseDoc(self, params=None, opts=None): """ returns response as XmlRecord """ # print 'params: %s' % params # return XmlRecord(xml=self.getData(params, opts)) responseDoc = None try: # responseText = data.read() # responseText = unicode (data.read(), 'iso-8859-1') # universal? # responseText = unicode (data.read(), 'utf-8') # experimental 12/2/2010 data = self.getData(params, opts) # print data responseDoc = XmlRecord(xml=data) webResponseErrorNode = responseDoc.selectSingleNode( responseDoc.dom, 'DDSWebService:error') if webResponseErrorNode: errorCode = webResponseErrorNode.getAttribute('code') if errorCode == 'noRecordsMatch': return None print 'errorCode', errorCode raise SimpleClientError, XmlUtils.getText(webResponseErrorNode) except Exception, msg: ## self.error = ServiceError (sys.exc_info()) # self.error = ServiceError (["ServiceResponse: Could not parse XML", sys.exc_info()[1]]) raise SimpleClientError, "DDSClient: Could not parse XML: %s" % msg
def getRowCells(self, row, rowNum): cells = self.selectNodes(row, 'td') data = [] add = data.append for i, cell in enumerate(cells): if i == 1: if rowNum == 0: # hdr add(XmlUtils.getText(cell)) add('url') else: link = XmlUtils.getChild('a', cell) if link == None: if rowNum == 1: add('') add('') continue else: raise NoTopPickDataError, 'No link found in row %d' % rowNum add(XmlUtils.getText(link)) add(link.getAttribute('href')) else: add(XmlUtils.getText(cell)) return map(self.cleanCellData, data)
def getJournalMap(self): for row in self.getRows(): children = rec.getElements(row) if len(children) == 2: journal = XmlUtils.getText(children[0]) abbrev = XmlUtils.getText(children[1]) if abbrev: self.journal_map[journal] = abbrev return self.journal_map
def populateXml (self, xmlData): dataRec = XmlRecord (xml=xmlData) dataElements = dataRec.getElements (dataRec.doc) for dataElement in dataElements: cells = XmlUtils.getChildElements (dataElement, "TD") name = XmlUtils.getText (cells[0]).strip() if name[-1] == ":": name = name[:-1] value = XmlUtils.getText (XmlUtils.getChild ("B", cells[1])).strip() XmlUtils.addChild (self.dom, self.normalizeTagName(name), value)
def report(self): rows = self.getRows() print "%d rows found" % len(rows) if not rows: return for row in rows: children = rec.getElements(row) if len(children) == 2: journal = XmlUtils.getText(children[0]) abbrev = XmlUtils.getText(children[1]) if abbrev: print "\n%s\n(%s)" % (journal, abbrev) else: print "\n -- %s --" % journal
def __init__(self, element): self.element = element self.attrs = [] for child in XmlUtils.getChildElements(element): attr = child.tagName self.attrs.append(attr) setattr(self, attr, XmlUtils.getText(child))
def rewriteUrls(self, selectFn, urlTestFn, rewriteFn): """ NOT TESTED! for each node selected by selectFn (e.g., getBSCSUrlNodes) - if url at that node passes rewrite each Url that matches testFn with a falue that is computed by writeFn: - base_protected_url + self.collection + protectedAssetFileName returns True if a change was made, False otherwise """ recordChanged = False for urlNode in selectFn(self): url = XmlUtils.getText(urlNode) assetPath = getAssetPath (url) newProtectedCollPath = os.path.join (getReorgProtectedDir(), self.collection) newAssetPath = os.path.join (newProtectedCollPath, os.path.basename(assetPath)) newProtectedUrl = os.path.join (base_protected_url, self.collection, os.path.basename(assetPath)) if self.verbose: print '\n- assetPath:', assetPath print '- newAssetPath:', newAssetPath print '- oldUrl:', url print '- newProtectedUrl:', newProtectedUrl if urlTestFn(self, url): new_url = writeFn (self, url) XmlUtils.setText(urlNode, new_url) recordChanged = True return recordChanged
def processRecord(self, rec): """ tally the fields. first make a map of occurrances for the record, then merge record map into global map """ recordData = {} for element in XmlUtils.getChildElements(rec.doc): tag = element.tagName text = XmlUtils.getText(element).strip() if not recordData.has_key(tag): recordData[tag] = 0 recordData[tag] = recordData[tag] + 1 ## now enter data into global tally for tag in recordData.keys(): if not self.has_key(tag): self[tag] = Entry(tag) entry = self[tag] entry.count = entry.count + recordData[tag] entry.max = max(entry.max, recordData[tag]) if entry.max == recordData[tag]: entry.maxRec = os.path.split(rec.path)[1] entry.min = min(entry.min, recordData[tag]) for entry in self.values(): if entry.tag in recordData.keys(): continue entry.min = 0 self.recordCount = self.recordCount + 1
def getElementHtml(self, element, level): klass = 'level-%d' % level tagName = element.tagName text = XmlUtils.getText(element) children = XmlUtils.getChildElements(element) attributes = element.attributes if not (text or attributes or children): return "" html = DIV(klass="element") if text: html.append( DIV(SPAN(tagName + ': ', klass='el-name'), SPAN(text, klass="el-text"), klass=klass)) if attributes: html.append(self.getAttributesHtml(attributes, level)) else: html.append(DIV(tagName, klass=klass)) if attributes: html.append(self.getAttributesHtml(attributes, level)) if children: for child in children: html.append(self.getElementHtml(child, level + 1)) return html
def __init__(self, element): """ element is a row element containing an unknown number of cells. the last cell is where the data is, the others are "indents", which determin this node's "level" """ cells = XmlUtils.getChildElements(element, "TD") self.level = len(cells) - 1 dataCell = cells[-1] components = XmlUtils.getChildElements(dataCell, "A") icon = components[0] img = XmlUtils.getChild("IMG", icon) filename = os.path.split(img.getAttribute("SRC"))[1] self.type = filename.split(".")[0] self.metadatapath = webcatUtils.webcatDomain + icon.getAttribute( "HREF") linkElement = components[1] url = linkElement.getAttribute("HREF") label = XmlUtils.getText(linkElement) self.link = webcatUtils.WebCatLink((url, label)) self.title = self.link.label self.parent = None self.children = None
def replaceVocabTerm(badTerm, goodTerm, vocab, osmRec): """ field used to obtain vocab info from vocab_data """ vocabField = getVocabField(vocab) # say the field is 'instName' # print 'type of indexField: %s' % type(vocabField) # print vocabField if type(vocabField) == type('') or type(vocabField) == type(u''): vocabField = [vocabField] for indexField in vocabField: xpath = getFieldXpath(indexField) # print 'xpath: %s' % xpath vocabNodes = osmRec.selectNodes(osmRec.dom, xpath) # print '%d vocabNodes found' % len(vocabNodes) for node in vocabNodes: value = XmlUtils.getText(node) if value == badTerm: # print 'old:', value XmlUtils.setText(node, goodTerm) # print 'new:', XmlUtils.getText(node).encode('utf-8') print ' .. replaced' return osmRec
def __init__(self, data, exc_info=None, preprocessor=None): self.data = data self.error = None if exc_info: self.error = ServiceError(exc_info) self.doc = None if not exc_info: try: # responseText = data.read() # responseText = unicode (data.read(), 'iso-8859-1') # universal? responseText = unicode(data.read(), 'utf-8') # experimental 12/2/2010 # print "serviceClient: reponseText:\n%s" % responseText if preprocessor: responseText = preprocessor(responseText) self.doc = XmlRecord(xml=responseText) webResponseErrorNode = self.doc.selectSingleNode( self.doc.dom, 'DDSWebService:error') if webResponseErrorNode: self.error = XmlUtils.getText(webResponseErrorNode) except: ## self.error = ServiceError (sys.exc_info()) self.error = ServiceError([ "ServiceResponse: Could not parse XML", sys.exc_info()[1] ])
def fixRelations (self): """ get relation elements and then fix each in turn -- if href: url = value type = Has part -- else label = value type = Is related """ nodes = self.getFieldElements ("relation") if not nodes: return print "\n%s" % self.getId() for r in nodes: value = XmlUtils.getText(r) if not value: return XmlUtils.setText (r,"") if value.startswith ("http://"): r.setAttribute ("type", "Has part") r.setAttribute ("url", value) else: r.setAttribute ("type", "Is related") r.setAttribute ("title", value) print r.toxml() if 0: self.write() print "wrote record"
def __init__(self, element): self.element = element self.id = element.getAttribute('id') for child in XmlUtils.getChildElements(element): attr = child.tagName val = XmlUtils.getText(child) setattr(self, attr, val)
def getPublishers(self): """ returns the element values for all contributor elements having a type attribute of value 'Publisher' """ return map(lambda x: XmlUtils.getText(x), self.getContributorElements('Publisher'))
def getPubDate(self): """ /record/coverage/date[@type='published' """ dates = self.selectNodes(self.dom, "record/coverage/date") for date in dates: if date.getAttribute("type") == "Published": return XmlUtils.getText(date)
def parseResponse(rec): rec.xpath_delimiter = "/" topicNodes = rec.selectNodes(rec.dom, "CATWebService/Topics/Topic") print "%d topics found" % len(topicNodes) topics = [] for node in topicNodes: topics.append(XmlUtils.getText(node)) return topics
def parseResponse(rec): rec.xpath_delimiter = "/" authorNodes = rec.selectNodes(rec.dom, "CATWebService/Authors/Author") print "%d authors found" % len(authorNodes) authors = [] for node in authorNodes: authors.append(XmlUtils.getText(node)) return authors
def getDoi (self): """ doi's are catalged as /records/classify/idNumber, with type="DOI" """ idNumbers = self.selectNodes (self.dom, 'record/classify/idNumber') for id in idNumbers: if id.getAttribute ("type") == "DOI": return XmlUtils.getText(id)
def getResponseDoc(self, params=None, opts=None): XmlRecord.xpath_delimiter = '/' doc = SimpleClient.getResponseDoc(self, params, opts) error = doc.selectSingleNode(doc.dom, "HandleResolutionService/error") if error: raise HRSError, '%s: %s' % (error.getAttribute('code'), XmlUtils.getText(error)) return doc
def getTypes(self): xpath = "record:itemType" types = [] nodes = self.selectNodes(self.dom, xpath) if nodes: for node in nodes: types.append(XmlUtils.getText(node)) print "%s %s" % (types, os.path.basename(self.path)) return types
def getInstDiv (self, instDiv): """ looks for the provided instDiv VOCAB in this Afflilation """ instDivEls = XmlUtils.selectNodes (self.element, 'instDivision') ## print "%d instDiv elements found" % len(instDivEls) for instDivEl in instDivEls: if XmlUtils.getText (instDivEl) == instDiv: return instDivEl
def getResponseDoc (self, params=None, opts=None): doc = SimpleClient.getResponseDoc(self, params, opts) error = doc.selectSingleNode(doc.dom,'DDSRepositoryUpdateService:error') if error: raise SimpleClientError, 'ERROR %s' % XmlUtils.getText(error) # print ' - updated %s' % params['id'] # id = doc.getTextAtPath('DDSRepositoryUpdateService:PutRecord:recordInfo:recordId') # print ' - updated', id return doc
def addViewContext(self, vc): vcParent = self.selectSingleNode(self.dom, 'record:collection:viewContexts') vcNodes = XmlUtils.getChildElements(vcParent) print '%d vc nodes found' % len(vcNodes) vcValues = map(lambda x: XmlUtils.getText(x), vcNodes) for val in vcValues: print '-', val if not vc in vcValues: XmlUtils.addChild(self.dom, 'viewContext', vc, vcParent)
def updateOsmRecord(self, osmRecord, before, after): for xpath in [self.person_field, self.org_field]: nodes = osmRecord.selectNodes(osmRecord.dom, xpath) for node in nodes: value = XmlUtils.getText(node) if self.dataTable.beforeMap.has_key(value): XmlUtils.setText(node, self.dataTable.getAfter(value)) # print '- before: %s, after: %s\n' % (before, XmlUtils.getText(node)) return osmRecord
def getFacetTerms(): params = { "field": "$facets", "verb": "ListTerms", } responseDoc = getResponseDoc(params) # print responseDoc term_nodes = responseDoc.selectNodes(responseDoc.dom, "DDSWebService:ListTerms:terms:term") return map(lambda x: XmlUtils.getText(x), term_nodes)
def verifyAssets(self): errors = [] for urlNode in self.getProtectedUrlNodes(): url = XmlUtils.getText(urlNode) assetPath = getAssetPath(url) if not os.path.exists(assetPath): errors.append(assetPath) if errors: errMsg = 'assets not found\n- %s' % '\n- '.join(errors) raise Exception(errMsg)
def getResourceIds (path): rec = XmlRecord(path=path) item_nodes = rec.selectNodes(rec.dom, 'playList:items:item') # print '%d item_nodes found' % len(item_nodes) ids=[];add=ids.append for node in item_nodes: if node.getAttribute('type') == 'ccs_saved_resource': id_node = XmlUtils.getChild ('id', node) add (XmlUtils.getText(id_node)) ids = filter (lambda x:not x.startswith('CCS'), ids) return ids
def getPubsId (self): """ pubId assgined to this publication in NESL PUBS database should be present in some osgc (before a certain date) and all of pubs-ref, and 'pub /record/classify/idNumber type='PUBID' """ nodes = self.selectNodes(self.dom, 'record/classify/idNumber') # if nodes is None: return None for node in nodes: pubsId = node.getAttribute('type') if pubsId == 'PUBID': return XmlUtils.getText(node)
def getUniqueValues(): colDir = '/Users/ostwald/Documents/Work/NSDL/TNS Transition-Fall-2011/repo/ncs_item/1239144881424/' for filename in os.listdir(colDir): path = os.path.join(colDir, filename) rec = NcsItemRecord(path=path) for node in rec.getIsPartOfUrlNodes(): value = XmlUtils.getText(node) if not value in unique_values: # print value unique_values.append(value) return unique_values