Exemple #1
0
    def getResponseDoc(self, params=None, opts=None):
        """
		returns response as XmlRecord
		"""
        # print 'params: %s' % params
        # return XmlRecord(xml=self.getData(params, opts))
        responseDoc = None
        try:
            # responseText = data.read()
            # responseText = unicode (data.read(), 'iso-8859-1') # universal?
            # responseText = unicode (data.read(), 'utf-8') # experimental 12/2/2010

            data = self.getData(params, opts)
            # print data
            responseDoc = XmlRecord(xml=data)

            webResponseErrorNode = responseDoc.selectSingleNode(
                responseDoc.dom, 'DDSWebService:error')
            if webResponseErrorNode:
                errorCode = webResponseErrorNode.getAttribute('code')
                if errorCode == 'noRecordsMatch':
                    return None
                print 'errorCode', errorCode
                raise SimpleClientError, XmlUtils.getText(webResponseErrorNode)
        except Exception, msg:
            ## self.error = ServiceError (sys.exc_info())
            # self.error = ServiceError (["ServiceResponse: Could not parse XML", sys.exc_info()[1]])
            raise SimpleClientError, "DDSClient: Could not parse XML: %s" % msg
Exemple #2
0
    def getRowCells(self, row, rowNum):
        cells = self.selectNodes(row, 'td')
        data = []
        add = data.append
        for i, cell in enumerate(cells):
            if i == 1:
                if rowNum == 0:  # hdr
                    add(XmlUtils.getText(cell))
                    add('url')
                else:
                    link = XmlUtils.getChild('a', cell)
                    if link == None:
                        if rowNum == 1:
                            add('')
                            add('')
                            continue
                        else:
                            raise NoTopPickDataError, 'No link found in row %d' % rowNum

                    add(XmlUtils.getText(link))
                    add(link.getAttribute('href'))
            else:
                add(XmlUtils.getText(cell))

        return map(self.cleanCellData, data)
Exemple #3
0
 def getJournalMap(self):
     for row in self.getRows():
         children = rec.getElements(row)
         if len(children) == 2:
             journal = XmlUtils.getText(children[0])
             abbrev = XmlUtils.getText(children[1])
             if abbrev:
                 self.journal_map[journal] = abbrev
     return self.journal_map
Exemple #4
0
	def populateXml (self, xmlData):
		dataRec = XmlRecord (xml=xmlData)
		dataElements = dataRec.getElements (dataRec.doc)
		for dataElement in dataElements:
			cells = XmlUtils.getChildElements (dataElement, "TD")
			name = XmlUtils.getText (cells[0]).strip()
			if name[-1] == ":": name = name[:-1]
			value = XmlUtils.getText (XmlUtils.getChild ("B", cells[1])).strip()
			
			XmlUtils.addChild (self.dom, self.normalizeTagName(name), value)
Exemple #5
0
 def report(self):
     rows = self.getRows()
     print "%d rows found" % len(rows)
     if not rows:
         return
     for row in rows:
         children = rec.getElements(row)
         if len(children) == 2:
             journal = XmlUtils.getText(children[0])
             abbrev = XmlUtils.getText(children[1])
             if abbrev:
                 print "\n%s\n(%s)" % (journal, abbrev)
             else:
                 print "\n -- %s --" % journal
Exemple #6
0
 def __init__(self, element):
     self.element = element
     self.attrs = []
     for child in XmlUtils.getChildElements(element):
         attr = child.tagName
         self.attrs.append(attr)
         setattr(self, attr, XmlUtils.getText(child))
	def rewriteUrls(self, selectFn, urlTestFn, rewriteFn):
		"""
		NOT TESTED!
		for each node selected by selectFn (e.g., getBSCSUrlNodes)
		- if url at that node passes 
		rewrite each Url that matches testFn with a falue that
		is computed by writeFn:
		  - base_protected_url + self.collection + protectedAssetFileName
		  
		returns True if a change was made, False otherwise
		  
		 """
		recordChanged = False
		for urlNode in selectFn(self):
			url = XmlUtils.getText(urlNode)
			
			assetPath = getAssetPath (url)
	
			newProtectedCollPath = os.path.join (getReorgProtectedDir(), self.collection)
			newAssetPath = os.path.join (newProtectedCollPath, os.path.basename(assetPath))
			newProtectedUrl = os.path.join (base_protected_url, self.collection, os.path.basename(assetPath))

			if self.verbose:
				print '\n- assetPath:', assetPath
				print '- newAssetPath:', newAssetPath
				print '- oldUrl:', url
				print '- newProtectedUrl:', newProtectedUrl
				
			if urlTestFn(self, url):
				new_url = writeFn (self, url)
				XmlUtils.setText(urlNode, new_url)
				recordChanged = True
				
		return recordChanged	
Exemple #8
0
    def processRecord(self, rec):
        """
		tally the fields. first make a map of occurrances for the record, then
		merge record map into global map
		"""
        recordData = {}
        for element in XmlUtils.getChildElements(rec.doc):
            tag = element.tagName
            text = XmlUtils.getText(element).strip()
            if not recordData.has_key(tag):
                recordData[tag] = 0
            recordData[tag] = recordData[tag] + 1

        ## now enter data into global tally
        for tag in recordData.keys():
            if not self.has_key(tag):
                self[tag] = Entry(tag)
            entry = self[tag]
            entry.count = entry.count + recordData[tag]
            entry.max = max(entry.max, recordData[tag])
            if entry.max == recordData[tag]:
                entry.maxRec = os.path.split(rec.path)[1]
            entry.min = min(entry.min, recordData[tag])

        for entry in self.values():
            if entry.tag in recordData.keys():
                continue
            entry.min = 0

        self.recordCount = self.recordCount + 1
    def getElementHtml(self, element, level):
        klass = 'level-%d' % level
        tagName = element.tagName
        text = XmlUtils.getText(element)
        children = XmlUtils.getChildElements(element)
        attributes = element.attributes

        if not (text or attributes or children):
            return ""

        html = DIV(klass="element")

        if text:
            html.append(
                DIV(SPAN(tagName + ': ', klass='el-name'),
                    SPAN(text, klass="el-text"),
                    klass=klass))
            if attributes:
                html.append(self.getAttributesHtml(attributes, level))

        else:
            html.append(DIV(tagName, klass=klass))
            if attributes:
                html.append(self.getAttributesHtml(attributes, level))
            if children:
                for child in children:
                    html.append(self.getElementHtml(child, level + 1))
        return html
Exemple #10
0
    def __init__(self, element):
        """
		element is a row element containing an unknown number of cells.
		the last cell is where the data is, the others are "indents", which
		determin this node's "level"
		"""

        cells = XmlUtils.getChildElements(element, "TD")
        self.level = len(cells) - 1
        dataCell = cells[-1]
        components = XmlUtils.getChildElements(dataCell, "A")
        icon = components[0]
        img = XmlUtils.getChild("IMG", icon)
        filename = os.path.split(img.getAttribute("SRC"))[1]
        self.type = filename.split(".")[0]
        self.metadatapath = webcatUtils.webcatDomain + icon.getAttribute(
            "HREF")

        linkElement = components[1]
        url = linkElement.getAttribute("HREF")
        label = XmlUtils.getText(linkElement)
        self.link = webcatUtils.WebCatLink((url, label))
        self.title = self.link.label
        self.parent = None
        self.children = None
Exemple #11
0
def replaceVocabTerm(badTerm, goodTerm, vocab, osmRec):
    """
	field used to obtain vocab info from vocab_data
	"""
    vocabField = getVocabField(vocab)

    # say the field is 'instName'
    # print 'type of indexField: %s' % type(vocabField)
    # print vocabField
    if type(vocabField) == type('') or type(vocabField) == type(u''):
        vocabField = [vocabField]

    for indexField in vocabField:
        xpath = getFieldXpath(indexField)
        # print 'xpath: %s' % xpath
        vocabNodes = osmRec.selectNodes(osmRec.dom, xpath)
        # print '%d vocabNodes found' % len(vocabNodes)
        for node in vocabNodes:
            value = XmlUtils.getText(node)
            if value == badTerm:
                # print 'old:', value
                XmlUtils.setText(node, goodTerm)
                # print 'new:', XmlUtils.getText(node).encode('utf-8')
                print ' .. replaced'
    return osmRec
Exemple #12
0
    def __init__(self, data, exc_info=None, preprocessor=None):
        self.data = data
        self.error = None
        if exc_info:
            self.error = ServiceError(exc_info)
        self.doc = None
        if not exc_info:
            try:
                # responseText = data.read()
                # responseText = unicode (data.read(), 'iso-8859-1') # universal?
                responseText = unicode(data.read(),
                                       'utf-8')  # experimental 12/2/2010

                # print "serviceClient: reponseText:\n%s" % responseText
                if preprocessor:
                    responseText = preprocessor(responseText)
                self.doc = XmlRecord(xml=responseText)

                webResponseErrorNode = self.doc.selectSingleNode(
                    self.doc.dom, 'DDSWebService:error')
                if webResponseErrorNode:
                    self.error = XmlUtils.getText(webResponseErrorNode)
            except:
                ## self.error = ServiceError (sys.exc_info())
                self.error = ServiceError([
                    "ServiceResponse: Could not parse XML",
                    sys.exc_info()[1]
                ])
	def fixRelations (self):
		"""
		get relation elements and then fix each in turn
		-- if href:
			url = value
			type = Has part
		-- else
			label = value
			type = Is related
		"""
		nodes  = self.getFieldElements ("relation")
		if not nodes: return
		
		print "\n%s" % self.getId()
		for r in nodes:
			value = XmlUtils.getText(r)
			if not value: return
			XmlUtils.setText (r,"")
			if value.startswith ("http://"):
				r.setAttribute ("type", "Has part")
				r.setAttribute ("url", value)
			else:
				r.setAttribute ("type", "Is related")
				r.setAttribute ("title", value)
			print r.toxml()
		if 0:
			self.write()
			print "wrote record"
Exemple #14
0
 def __init__(self, element):
     self.element = element
     self.id = element.getAttribute('id')
     for child in XmlUtils.getChildElements(element):
         attr = child.tagName
         val = XmlUtils.getText(child)
         setattr(self, attr, val)
Exemple #15
0
    def getPublishers(self):
        """
		returns the element values for all contributor elements having
		a type attribute of value 'Publisher'
		"""
        return map(lambda x: XmlUtils.getText(x),
                   self.getContributorElements('Publisher'))
Exemple #16
0
    def getPubDate(self):
        """
		/record/coverage/date[@type='published'
		"""
        dates = self.selectNodes(self.dom, "record/coverage/date")
        for date in dates:
            if date.getAttribute("type") == "Published":
                return XmlUtils.getText(date)
Exemple #17
0
def parseResponse(rec):
    rec.xpath_delimiter = "/"
    topicNodes = rec.selectNodes(rec.dom, "CATWebService/Topics/Topic")
    print "%d topics found" % len(topicNodes)
    topics = []
    for node in topicNodes:
        topics.append(XmlUtils.getText(node))
    return topics
Exemple #18
0
def parseResponse(rec):
    rec.xpath_delimiter = "/"
    authorNodes = rec.selectNodes(rec.dom, "CATWebService/Authors/Author")
    print "%d authors found" % len(authorNodes)
    authors = []
    for node in authorNodes:
        authors.append(XmlUtils.getText(node))
    return authors
Exemple #19
0
	def getDoi (self):
		"""
		doi's are catalged as /records/classify/idNumber, with type="DOI"
		"""
		idNumbers = self.selectNodes (self.dom, 'record/classify/idNumber')
		for id in idNumbers:
			if id.getAttribute ("type") == "DOI":
				return XmlUtils.getText(id)
Exemple #20
0
 def getResponseDoc(self, params=None, opts=None):
     XmlRecord.xpath_delimiter = '/'
     doc = SimpleClient.getResponseDoc(self, params, opts)
     error = doc.selectSingleNode(doc.dom, "HandleResolutionService/error")
     if error:
         raise HRSError, '%s: %s' % (error.getAttribute('code'),
                                     XmlUtils.getText(error))
     return doc
Exemple #21
0
 def getTypes(self):
     xpath = "record:itemType"
     types = []
     nodes = self.selectNodes(self.dom, xpath)
     if nodes:
         for node in nodes:
             types.append(XmlUtils.getText(node))
     print "%s %s" % (types, os.path.basename(self.path))
     return types
Exemple #22
0
	def getInstDiv (self, instDiv):
		"""
		looks for the provided instDiv VOCAB in this Afflilation
		"""
		instDivEls = XmlUtils.selectNodes (self.element, 'instDivision')
		## print "%d instDiv elements found" % len(instDivEls)
		for instDivEl in instDivEls:
			if XmlUtils.getText (instDivEl) == instDiv:
				return instDivEl
Exemple #23
0
	def getResponseDoc (self, params=None, opts=None):
		doc = SimpleClient.getResponseDoc(self, params, opts)
		error = doc.selectSingleNode(doc.dom,'DDSRepositoryUpdateService:error')
		if error:
			raise SimpleClientError, 'ERROR %s' % XmlUtils.getText(error)
		# print ' - updated %s' % params['id']
		# id = doc.getTextAtPath('DDSRepositoryUpdateService:PutRecord:recordInfo:recordId')
		# print ' - updated', id
		return doc
Exemple #24
0
 def addViewContext(self, vc):
     vcParent = self.selectSingleNode(self.dom,
                                      'record:collection:viewContexts')
     vcNodes = XmlUtils.getChildElements(vcParent)
     print '%d vc nodes found' % len(vcNodes)
     vcValues = map(lambda x: XmlUtils.getText(x), vcNodes)
     for val in vcValues:
         print '-', val
     if not vc in vcValues:
         XmlUtils.addChild(self.dom, 'viewContext', vc, vcParent)
Exemple #25
0
    def updateOsmRecord(self, osmRecord, before, after):

        for xpath in [self.person_field, self.org_field]:
            nodes = osmRecord.selectNodes(osmRecord.dom, xpath)
            for node in nodes:
                value = XmlUtils.getText(node)
                if self.dataTable.beforeMap.has_key(value):
                    XmlUtils.setText(node, self.dataTable.getAfter(value))
                    # print '- before: %s, after: %s\n' % (before, XmlUtils.getText(node))

        return osmRecord
Exemple #26
0
def getFacetTerms():
    params = {
        "field": "$facets",
        "verb": "ListTerms",
    }

    responseDoc = getResponseDoc(params)
    # print responseDoc
    term_nodes = responseDoc.selectNodes(responseDoc.dom,
                                         "DDSWebService:ListTerms:terms:term")
    return map(lambda x: XmlUtils.getText(x), term_nodes)
    def verifyAssets(self):
        errors = []
        for urlNode in self.getProtectedUrlNodes():
            url = XmlUtils.getText(urlNode)

            assetPath = getAssetPath(url)
            if not os.path.exists(assetPath):
                errors.append(assetPath)

        if errors:
            errMsg = 'assets not found\n- %s' % '\n- '.join(errors)
            raise Exception(errMsg)
Exemple #28
0
def getResourceIds (path):
	rec = XmlRecord(path=path)
	item_nodes = rec.selectNodes(rec.dom, 'playList:items:item')
	# print '%d item_nodes found' % len(item_nodes)

	ids=[];add=ids.append
	for node in item_nodes:
		if node.getAttribute('type') == 'ccs_saved_resource':
			id_node = XmlUtils.getChild ('id', node)
			add (XmlUtils.getText(id_node))
	ids = filter (lambda x:not x.startswith('CCS'), ids)
	return ids
Exemple #29
0
	def getPubsId (self):
		"""
		pubId assgined to this publication in NESL PUBS database
		should be present in some osgc (before a certain date) and all of pubs-ref, and 'pub
		/record/classify/idNumber
		type='PUBID'
		"""
		nodes = self.selectNodes(self.dom, 'record/classify/idNumber')
		# if nodes is None: return None
		for node in nodes:
			pubsId = node.getAttribute('type') 
			if pubsId == 'PUBID':
				return XmlUtils.getText(node)
Exemple #30
0
def getUniqueValues():

    colDir = '/Users/ostwald/Documents/Work/NSDL/TNS Transition-Fall-2011/repo/ncs_item/1239144881424/'
    for filename in os.listdir(colDir):
        path = os.path.join(colDir, filename)
        rec = NcsItemRecord(path=path)
        for node in rec.getIsPartOfUrlNodes():
            value = XmlUtils.getText(node)
            if not value in unique_values:
                # print value
                unique_values.append(value)

    return unique_values