Beispiel #1
0
    def __init__(self, path):
        """
		self.unit - the unit to which this Chapter belongs (e.g., 'Pathways & Advance Engineering')
		self.data - TabData instances for each topic
		"""
        self.data = []
        s = utils.getHtml(path)

        filename = os.path.basename(path)
        self.unit = os.path.basename(os.path.dirname(path))
        self.num, self.chapter = self.getChapterInfo(filename)
        tagPat = RegExUtils.getTagPattern('x:ExcelWorkbook')
        m = tagPat.search(s)
        if not m:
            raise Exception, "could not get TABS data from file (%s)" % path
        print 'found data'
        xml = m.group(0).replace('x:', '')  # strip x prefix from all elements

        rec = XmlRecord(xml=xml)
        rec.xpath_delimiter = '/'
        tabNodes = rec.selectNodes(
            rec.dom, "ExcelWorkbook/ExcelWorksheets/ExcelWorksheet")

        # we ignore the 'Cover sheet'
        print 'creating %d tabs' % len(tabNodes)
        for tabElement in tabNodes:
            tabData = TabData(tabElement, self.unit)
            if tabData.name.lower() != 'cover sheet':
                tabData.num = len(self) + 1
                self.append(tabData)
Beispiel #2
0
 def initializeFromBaseMappings(self):
     baseRec = XmlRecord(path="output/dr_2_recId_mappings.xml")
     mappingEls = baseRec.selectNodes(baseRec.dom,
                                      'dr_2_recId_mappings:mapping')
     for mappingEl in mappingEls:
         drNum = mappingEl.getAttribute('drNumber')
         recId = mappingEl.getAttribute('recordID')
         self[drNum] = recId
     print '%d base mappings found' % len(self)
Beispiel #3
0
 def initializeFromBaseMappingsBOG(self):
     baseRec = XmlRecord(path="input/accessionNumberMappings.xml")
     mappingEls = baseRec.selectNodes(baseRec.dom,
                                      'accessionNumberMappings:mapping')
     for mappingEl in mappingEls:
         drNum = mappingEl.getAttribute('drNumber')
         queryString = mappingEl.getAttribute('queryString')
         self[drNum] = queryString
     print '%d base mappings found' % len(self)
Beispiel #4
0
 def __init__(self):
     UserDict.__init__(self)
     rec = XmlRecord('output/FINAL-accessionNumberMappings.xml')
     mappings = rec.selectNodes(rec.dom, 'accessionNumberMappings:mapping')
     print '%d mappings found' % len(mappings)
     for mapping in mappings:
         drNum = mapping.getAttribute("drNumber")
         queryString = mapping.getAttribute("queryString")
         # print '%s -> %s' % (drNum, queryString)
         self[drNum] = queryString
Beispiel #5
0
	def __init__ (self, path="output/MetadataModifySpecs.xml"):
		UserList.__init__ (self)
		if not os.path.exists(path):
			raise IOError, "output does not exist at %s" % path
		updateInfoDoc = XmlRecord(path=path)
		updateInfos = updateInfoDoc.selectNodes (updateInfoDoc.dom, "changeSpecs:pubNameSpec")
		print "%d specs found" % len(updateInfos)
		for info in updateInfos:
			changeSpec = ChangeSpec (info)
			print changeSpec
			self.updateMetadata (changeSpec)
Beispiel #6
0
    def __init__(self):
        self.data = {}
        NsdlSearcher.verbose = False
        if os.path.exists(idCacheFile):
            rec = XmlRecord(path=idCacheFile)
        else:
            rec = self.getBlankRec()

        for node in rec.selectNodes(rec.dom, 'idCache:entry'):
            url = node.getAttribute('url')
            nsdlId = node.getAttribute('id')
            self[url] = nsdlId
Beispiel #7
0
def getResourceIds (path):
	rec = XmlRecord(path=path)
	item_nodes = rec.selectNodes(rec.dom, 'playList:items:item')
	# print '%d item_nodes found' % len(item_nodes)

	ids=[];add=ids.append
	for node in item_nodes:
		if node.getAttribute('type') == 'ccs_saved_resource':
			id_node = XmlUtils.getChild ('id', node)
			add (XmlUtils.getText(id_node))
	ids = filter (lambda x:not x.startswith('CCS'), ids)
	return ids
Beispiel #8
0
    def getResults(self, params):
        try:
            data = self.getData(params=params)
        except:
            print 'ERROR: %s' % sys.exc_info()[1]
            return

        # print 'DATA: %s' % data
        response = XmlRecord(xml=data)

        error = response.selectNodes(response.dom, 'OpenSkyWebService:error')
        if error:
            raise Exception, response.getTextAtPath('OpenSkyWebService:error')

        # Here's where we could ceck for error and raise Exception ..

        results_path = 'OpenSkyWebService:Search:results:result'
        results_els = response.selectNodes(response.dom, results_path)
        print '%d result elements found' % len(results_els)

        def getResult(node):
            return ModsRecord(xml=node.toxml())

        return map(OSWSResult, results_els)
Beispiel #9
0
    def __init__(self, path):
        self.data = {}
        rec = XmlRecord(path=data)
        ## print rec
        rec.xpath_delimiter = "/"
        nodes = rec.selectNodes(rec.dom, 'GatheredIds/id')
        self.asnResolutionClient = AsnResolutionClient()
        print "%d nodes found" % len(nodes)

        for node in nodes:
            stdId = node.getAttribute("stdId")
            docId = node.getAttribute("docId")
            stdIds = []
            if self.has_key(docId):
                stdIds = self[docId]
            stdIds.append(stdId)
            self[docId] = stdIds
Beispiel #10
0
class ComparisonManager (UserDict):
	"""
	reads cached comparison info from disk
	"""
	
	grouping_data_dir = 'grouping_data'
	max_dups = 5000
	
	def __init__(self, grouping):
		self.data = {}
		path = os.path.join (self.grouping_data_dir, grouping+'Map.xml')
		self.rec = XmlRecord(path=path)
		groupNodes = self.rec.selectNodes (self.rec.dom, 'dupGroups:group')
		print '%d dup nodes found' % len(groupNodes)
		for groupNode in groupNodes[:self.max_dups]:
			dupGroup = DupGroup (groupNode)
			key = dupGroup.key
			self[key] = dupGroup
		print 'comparisonManager ingested %d dupGroups' % len(self.keys())
		
	def writeListingHtml (self):
		"""
		create an html document that shows the groups and provides access to
		side-by-side display
		"""
		datapath = self.rec.path
		root, ext = os.path.splitext(os.path.basename (datapath))
		self.name = root
		htmlDoc = DuplicateGroupListingHTML(self, self.name)
		htmlDoc.write()
		
	def writeComparisonPages (self):
		baseDir = os.path.join ('html', self.name+'_data')
		if not os.path.exists(baseDir):
			os.mkdir (baseDir)
			
		for key in self.keys():
			returnUrl = '../%s.html?groupNum=%s' % (self.name, self[key].groupNum)
			compareHtml = RecordCompareHtml(self[key], self.name, returnUrl)
			compareHtml.write (os.path.join (baseDir, self[key].groupNum+'.html'))
		
	def keys (self):
		sorted = self.data.keys()
		sorted.sort()
		return sorted
Beispiel #11
0
    def __init__(self):
        self.data = {}
        self.jurisCache = os.path.join(self.cacheBase, 'jurisdictions')
        self.topicCache = os.path.join(self.cacheBase, 'topics')

        jurisFiles = filter(lambda x: x.endswith('.xml'),
                            os.listdir(self.jurisCache))
        for j in jurisFiles:
            # print 'processing %s' % j
            path = os.path.join(self.jurisCache, j)
            rec = XmlRecord(path=path)
            asnDocs = map(AsnInfo,
                          rec.selectNodes(rec.dom, 'AsnDocuments:asnDocument'))
            # print ' ... %d docs found' % len(asnDocs)
            for asnInfo in asnDocs:
                topic = asnInfo.topic
                vals = []
                if self.has_key(topic):
                    vals = self[topic]
                vals.append(asnInfo.element.cloneNode(True))
                self[topic] = vals
Beispiel #12
0
class RecordDataReader(SortedDict):
    """
	Reads the data in the collection-data file	
	"""

    data_path = 'not-fy10-records.xml'

    def __init__(self, acceptFn=None):
        self.acceptFn = acceptFn is None and self.acceptAll or acceptFn
        self.read()

    def read(self):
        self.data = {}
        self.data_rec = XmlRecord(path=self.data_path)
        self.data_rec.xpath_delimiter = "/"
        recNodes = self.data_rec.selectNodes(self.data_rec.dom,
                                             'not-fy10-records/record')
        print '%d records read' % len(recNodes)
        i = 0
        for recNode in recNodes:
            recInfo = RecordInfo(recNode)
            i = i + 1
            if i % 500 == 0:
                print "%d/%d" % (i, len(recNodes))

            if not self.acceptFn(recInfo):
                continue
            self.addRecord(recInfo)

    def acceptAll(self, recInfo):
        return 1

    def acceptFy0809OFF(self, recInfo):
        """
		accept only records with fiscalYear of 2008 or 2009
		"""
        return recInfo.fiscalYear in ['2008', '2009']

    def addRecord(self, recInfo):
        self[recInfo.recId] = recInfo
Beispiel #13
0
class CollectionInfo(UserList):

    # baseDir = "meta-metadata"
    baseDir = '/home/ostwald/python-lib/ncar_lib/dups/data/meta-metadata'

    def __init__(self, collection):
        UserList.__init__(self)
        self.collection = collection
        self.dataPath = os.path.join(self.baseDir, collection + '.xml')
        print "DATA_PATH: ", self.dataPath
        self.rec = XmlRecord(path=self.dataPath)
        nodes = self.rec.selectNodes(self.rec.dom, "collectionInfo:rec")
        print "%d recs read from meta-metadata" % len(nodes)
        map(self.append, map(RecordInfo, nodes))

    def selectByUnionDate(self, unionDate):
        """
		takes union date (e.g., 2011, 2011-02, 2011-02-25)
		and returns recs having lastTouch AFTER union date
		"""
        threshold = unionDateToSecs(unionDate)
        predicate = lambda x: x.timeStamp >= threshold
        return self.select(predicate)

    def select(self, predicate):
        """
		applies predicate to each item
		returns only those for which preciate is True
		"""
        return filter(predicate, self.data)

    def write(self, path=None):
        """
		self.rec writes to self.rec.path by default
		"""
        self.rec.write(path)
    def getDrNumbers(self):
        rec = XmlRecord(path=self.path)

        mappings = rec.selectNodes(rec.dom, self.mappingsXpath)
        print '%d mappings found' % len(mappings)
        return map(lambda x: x.getAttribute("drNumber"), mappings)