Exemple #1
0
    def __init__(self, path):
        """
		self.unit - the unit to which this Chapter belongs (e.g., 'Pathways & Advance Engineering')
		self.data - TabData instances for each topic
		"""
        self.data = []
        s = utils.getHtml(path)

        filename = os.path.basename(path)
        self.unit = os.path.basename(os.path.dirname(path))
        self.num, self.chapter = self.getChapterInfo(filename)
        tagPat = RegExUtils.getTagPattern('x:ExcelWorkbook')
        m = tagPat.search(s)
        if not m:
            raise Exception, "could not get TABS data from file (%s)" % path
        print 'found data'
        xml = m.group(0).replace('x:', '')  # strip x prefix from all elements

        rec = XmlRecord(xml=xml)
        rec.xpath_delimiter = '/'
        tabNodes = rec.selectNodes(
            rec.dom, "ExcelWorkbook/ExcelWorksheets/ExcelWorksheet")

        # we ignore the 'Cover sheet'
        print 'creating %d tabs' % len(tabNodes)
        for tabElement in tabNodes:
            tabData = TabData(tabElement, self.unit)
            if tabData.name.lower() != 'cover sheet':
                tabData.num = len(self) + 1
                self.append(tabData)
Exemple #2
0
    def __init__(self, xml, payload_constructor=None):
        self.payload_constructor = payload_constructor or self.default_payload_constructor
        # XmlRecord.__init__ (self, xml=element.toxml())  # xml is an Jlo Element
        XmlRecord.__init__(self, xml=xml)  # xml is a string
        # self.recId = self.getTextAtPath("head:id")
        self.pid = self.get_header_field("PID")
        self.pub_date = self.get_header_field("keyDateYMD")
        self.ark = self.get_header_field("ark")
        self.doi = self.get_header_field("doi")

        # get values from payload
        self.payload = self.get_payload()
        self.title = self.payload.get_title()
        self.pub_type = self.payload.get_genre()
        self.journal = self.payload.get_journal()
        self.collaboration = self.payload.get_collaboration()

        try:
            self.authors = self.payload.get_authors_display()
        except:
            print 'could not get get_authors_display for %s' % self.pid
            self.authors = 'no authors found'

        self.ncar_authors = self.payload.get_ncar_authors()
        # self.ncar_author_names = self.payload.get_authors_display(self.ncar_authors)
        # self.num_ncar_authors = len(self.ncar_authors)
        # self.ncar_author_upids = map(lambda x:x.upid, self.ncar_authors)

        # these fields must be populated externally (e.g., by Reporter)
        self.sum_author_charges = 0
        self.num_yellowstone_authors = ''
        self.yellowstone_authors = ''
        self.sum_author_charges = ''
        self.other_ncar_authors = ''
Exemple #3
0
    def __init__(self):
        XmlRecord.__init__(self, xml="<wgbh_lexicon/>")
        self.doc.setAttribute("timestamp", time.asctime())
        self.terms = LexiconWorkSheet()
        for i, term in enumerate(self.terms):
            # some terms have no category, skip these!
            if term.category:
                if i % 100 == 0:
                    print '%d/%d - %s' % (i, len(self.terms), term.prettyTerm)
                xpath = term.xpath
                parent = self.doc
                term_parts = term.segments
                term_parts.insert(0, term.category)
                for j, part in enumerate(term_parts):
                    isLeaf = self._is_leaf_segment(j, term)
                    isCategory = j == 0

                    itemText = isCategory and lexicon_set_map[part] or part

                    child = self.findChild(parent, itemText)
                    nodeName = isCategory and "category" or "segment"
                    if not child:
                        child = self.addElement(parent, nodeName)
                        # print 'added %s' % nodeName

                        child.setAttribute("text", itemText)
                        if isLeaf:
                            child.setAttribute("id", term.id)
                            child.setAttribute("term", term.prettyTerm)
                            # XmlUtils.setText (child, term.prettyTerm)
                    parent = child

            if i > 10000: break
Exemple #4
0
    def __init__(self, url):
        ## print "reading from: '%s'" % url
        self.url = url
        pagedata = urllib.urlopen(url)
        html = pagedata.read()
        marker = html.find("<FONT SIZE=4 FACE=arial>Archive Browse</FONT>")
        if marker < 0:
            raise Exception, "browse html not found"

        tablePat = re.compile("<TABLE[^>]*?>(.*?)</TABLE>", re.S)
        m = tablePat.search(html[marker:])
        if not m:
            raise Exception, "browse TABLE not found"

        ## following are manipulations required to convert HTML into XML
        tableXml = webcatUtils.stripComments(m.group())
        tableXml = webcatUtils.fixAttributes(
            tableXml)  # some attributes have no value or unquoted value
        tableXml = webcatUtils.removeBoldTags(
            tableXml)  # bold tags are interleaved with "A" tags!
        tableXml = webcatUtils.removeFontTags(
            tableXml)  # Font tags just make processing difficult

        if 0:
            fp = open("tableXml.xml", 'w')
            fp.write(tableXml)
            fp.close()
        ## print tableXml
        XmlRecord.__init__(self, xml=tableXml)
        if 0:
            fp = open("tableXml.xml", 'w')
            fp.write(self.__repr__())
            fp.close()
        self.nodeElements = self.getElements(self.doc)
Exemple #5
0
 def __init__(self, path):
     XmlRecord.__init__(self, path=path)
     self.path = path
     self.filename = os.path.basename(self.path)
     self.accessionNum = self._get_field("accessionNum")
     self.recordID = self._get_field("recordID")
     self.url = self._get_field("url")
Exemple #6
0
    def __init__(self, data, exc_info=None, preprocessor=None):
        self.data = data
        self.error = None
        if exc_info:
            self.error = ServiceError(exc_info)
        self.doc = None
        if not exc_info:
            try:
                # responseText = data.read()
                # responseText = unicode (data.read(), 'iso-8859-1') # universal?
                responseText = unicode(data.read(),
                                       'utf-8')  # experimental 12/2/2010

                # print "serviceClient: reponseText:\n%s" % responseText
                if preprocessor:
                    responseText = preprocessor(responseText)
                self.doc = XmlRecord(xml=responseText)

                webResponseErrorNode = self.doc.selectSingleNode(
                    self.doc.dom, 'DDSWebService:error')
                if webResponseErrorNode:
                    self.error = XmlUtils.getText(webResponseErrorNode)
            except:
                ## self.error = ServiceError (sys.exc_info())
                self.error = ServiceError([
                    "ServiceResponse: Could not parse XML",
                    sys.exc_info()[1]
                ])
Exemple #7
0
    def __init__(self):
        XmlRecord.__init__(self, xml="<opml></opml>")
        self.title = "Subject - Math"
        self.nameSpaceUri = "http://ns.nsdl.org/ncs/fields"
        self.schemaUri = "http://ns.nsdl.org/ncs/msp2/1.00/schemas/fields/mathSubject.xsd"
        self.version = "2.0"
        self.defaultNamespace = "http://ns.nsdl.org/ncs/fields"
        self.setSchemaLocation(self.schemaUri, self.nameSpaceUri)
        self.setSchemaNamespace()
        self.doc.setAttribute("xmlns:" + self.schema_instance_namespace,
                              self.defaultNamespace)

        self.head = self.addElement(self.doc, "head")
        title = self.addElement(self.head, "title")
        self.setText(title, self.title)
        concept = self.addElement(self.head, "concept")
        concept.setAttribute("language", "en-us")
        concept.setAttribute("metaFormat", "osm")
        concept.setAttribute("metaVersion", "1.0.0")
        concept.setAttribute("text", "Mathematics Subject")
        concept.setAttribute("audience", "cataloger")
        concept.setAttribute("path", "/record/coverage/location/@state")
        concept.setAttribute("deftn",
                             "mathematical topics the resource addresses")
        concept.setAttribute("collapseExpand", "true")
        self.body = self.addElement(self.doc, "body")
	def __init__ (self, path=None, xml=None):
		self.fields_list = self.field_specs.keys()
		if path or xml:
			XmlRecord.__init__ (self, path=path, xml=xml)
			## NCARRec.__init__ (self, path=path)s
		else:
			self.makeRecord ()
Exemple #9
0
 def __init__(self, instance, xmlFormat):
     self.instance = instance
     self.xmlFormat = xmlFormat
     path = instance._get_framework_config_path(xmlFormat + ".xml")
     # if not os.path.exists (path):
     # raise IOError, "file not found "
     XmlRecord.__init__(self, path=path)
Exemple #10
0
    def getResponseDoc(self, params=None, opts=None):
        """
		returns response as XmlRecord
		"""
        # print 'params: %s' % params
        # return XmlRecord(xml=self.getData(params, opts))
        responseDoc = None
        try:
            # responseText = data.read()
            # responseText = unicode (data.read(), 'iso-8859-1') # universal?
            # responseText = unicode (data.read(), 'utf-8') # experimental 12/2/2010

            data = self.getData(params, opts)
            # print data
            responseDoc = XmlRecord(xml=data)

            webResponseErrorNode = responseDoc.selectSingleNode(
                responseDoc.dom, 'DDSWebService:error')
            if webResponseErrorNode:
                errorCode = webResponseErrorNode.getAttribute('code')
                if errorCode == 'noRecordsMatch':
                    return None
                print 'errorCode', errorCode
                raise SimpleClientError, XmlUtils.getText(webResponseErrorNode)
        except Exception, msg:
            ## self.error = ServiceError (sys.exc_info())
            # self.error = ServiceError (["ServiceResponse: Could not parse XML", sys.exc_info()[1]])
            raise SimpleClientError, "DDSClient: Could not parse XML: %s" % msg
Exemple #11
0
 def __init__(self, path=None, xml=None):
     XmlRecord.__init__(self, path, xml)
     for attr in self.field_list:
         setattr(self, attr, None)
     for element in self.getElements(self.doc):
         setattr(self, element.tagName, self.getText(element))
         print 'set %s to %s' % (element.tagName, self.getText(element))
 def __init__(self, path, NSES=None):
     self.path = path
     self.NSES = NSES
     XmlRecord.__init__(self, path=path)
     self.group = self._get_group()
     self.band = self._get_band()
     self.nses_id = self._get_nses_id()
     self.numId = getNumId(self.nses_id)
Exemple #13
0
	def write (self, path, verbose=False):
		"""
		require a path so we don't tromp the template
		"""
		if self.dowrites:
			XmlRecord.write (self, path, verbose)
		else:
			print "WOULD have written to " + path
Exemple #14
0
	def __init__ (self):
		self.docId = "td-lexicon"
		XmlRecord.__init__ (self, xml="<%s/>" % self.docId)
		self.lexiconData = LexiconWorkSheet ()
		self.termMap, self.idMap = self.makeTermAndIdMap()
		self.nodeMap = NodeMap()
		self.processNodes()
		self.makeDoc()
Exemple #15
0
 def initializeFromBaseMappings(self):
     baseRec = XmlRecord(path="output/dr_2_recId_mappings.xml")
     mappingEls = baseRec.selectNodes(baseRec.dom,
                                      'dr_2_recId_mappings:mapping')
     for mappingEl in mappingEls:
         drNum = mappingEl.getAttribute('drNumber')
         recId = mappingEl.getAttribute('recordID')
         self[drNum] = recId
     print '%d base mappings found' % len(self)
Exemple #16
0
 def initializeFromBaseMappingsBOG(self):
     baseRec = XmlRecord(path="input/accessionNumberMappings.xml")
     mappingEls = baseRec.selectNodes(baseRec.dom,
                                      'accessionNumberMappings:mapping')
     for mappingEl in mappingEls:
         drNum = mappingEl.getAttribute('drNumber')
         queryString = mappingEl.getAttribute('queryString')
         self[drNum] = queryString
     print '%d base mappings found' % len(self)
Exemple #17
0
    def asXml(self):
        rec = XmlRecord(xml="<accessionNumberMappings />")
        root = rec.doc
        root.setAttribute("date", time.asctime())
        for drNum in self.keys():
            mappingEl = rec.addElement(root, "mapping")
            self.populateMappingElement(mappingEl, drNum)

        return rec
 def write(self, path=None):
     XmlRecord.write(self, path)
     writePath = None
     if path is not None:
         writePath = path
     elif self.path is not None:
         writePath = self.path
     if writePath is not None:
         print "xml written to %s" % writePath
    def __init__(self, xml):
        # MetaDataRecord.__init__ (self, xml=xml)
        XmlRecord.__init__(self, xml=xml)

        self.savedXmlFormat = self.getTextAtPath(
            'savedResource/savedXmlFormat')
        self.id = self.getTextAtPath('savedResource/id')
        self.collection = self.getTextAtPath(
            'savedResource/ddsRepoInfo/collectionKey')
Exemple #20
0
	def __init__ (self, path):
		XmlRecord.__init__ (self, path=path)
		archdescElement = self.selectSingleNode (self.dom, 'ead/archdesc')
		self.archdesc = ArchDesc (archdescElement, self)
		self.collections = self._get_collections()
		self.itemMap = UserDict()
		for col in self.collections:
			for item in col.getItems():
				self.itemMap[item.id] = item
Exemple #21
0
 def __init__(self, collection):
     UserList.__init__(self)
     self.collection = collection
     self.dataPath = os.path.join(self.baseDir, collection + '.xml')
     print "DATA_PATH: ", self.dataPath
     self.rec = XmlRecord(path=self.dataPath)
     nodes = self.rec.selectNodes(self.rec.dom, "collectionInfo:rec")
     print "%d recs read from meta-metadata" % len(nodes)
     map(self.append, map(RecordInfo, nodes))
Exemple #22
0
 def __init__(self, element):
     XmlRecord.__init__(self, xml=element.toxml())
     self.searchKey = self.getTextAtPath("collection:searchKey")
     self.recordId = self.getTextAtPath("collection:recordId")
     self.xmlFormat = self.getTextAtPath(
         "collection:additionalMetadata:dlese_collect:formatOfRecords")
     self.numRecords = self.getTextAtPath(
         "collection:additionalMetadata:dlese_collect:numRecords")
     self.name = self.getTextAtPath("collection:renderingGuidelines:label")
 def __init__(self):
     XmlRecord.__init__(self, path=self.data_path)
     collections = self.selectNodes(self.dom, "ncsCollections/collection")
     print '%d collections found' % len(collections)
     self.collectionInfos = []
     for collection in collections:
         info = CollectionInfo(collection)
         # recordID = XmlUtils.getChildText (collection, "recordID")
         # setSpec = XmlUtils.getChildText (collection, "setSpec")
         self.collectionInfos.append(info)
Exemple #24
0
 def __init__(self):
     UserDict.__init__(self)
     rec = XmlRecord('output/FINAL-accessionNumberMappings.xml')
     mappings = rec.selectNodes(rec.dom, 'accessionNumberMappings:mapping')
     print '%d mappings found' % len(mappings)
     for mapping in mappings:
         drNum = mapping.getAttribute("drNumber")
         queryString = mapping.getAttribute("queryString")
         # print '%s -> %s' % (drNum, queryString)
         self[drNum] = queryString
Exemple #25
0
	def populateXml (self, xmlData):
		dataRec = XmlRecord (xml=xmlData)
		dataElements = dataRec.getElements (dataRec.doc)
		for dataElement in dataElements:
			cells = XmlUtils.getChildElements (dataElement, "TD")
			name = XmlUtils.getText (cells[0]).strip()
			if name[-1] == ":": name = name[:-1]
			value = XmlUtils.getText (XmlUtils.getChild ("B", cells[1])).strip()
			
			XmlUtils.addChild (self.dom, self.normalizeTagName(name), value)
Exemple #26
0
 def writeTopicRecords(self):
     for topic in self.keys():
         print "%s - %d" % (topic, len(self[topic]))
         rec = XmlRecord(xml="<AsnDocuments/>")
         root = rec.doc
         root.setAttribute("topic", topic)
         for asnInfo in self[topic]:
             root.appendChild(asnInfo)
         path = os.path.join(self.topicCache, topic + '.xml')
         rec.write(path)
         print 'wrote to', path
Exemple #27
0
	def writeXml (self, path=None):
		"""
		write record info file to disk as xml
		"""
		path = path or "not-fy10-records.xml"
		rec = XmlRecord (xml="<not-fy10-records/>")
		rec.doc.setAttribute ("date", time.asctime(time.localtime()))
		for recInfo in self:
			rec.doc.appendChild (recInfo.asElement())
		rec.write(path)
		print 'wrote to ', path
Exemple #28
0
    def asXml(self):
        from JloXml import XmlRecord, XmlUtils
        import time
        rec = XmlRecord(xml="<%s />" % self.rootElementName)
        root = rec.doc
        root.setAttribute("date", time.asctime())
        for drNum in self.keys():
            mappingEl = rec.addElement(root, "mapping")
            self.populateMappingElement(mappingEl, drNum)

        return rec
Exemple #29
0
    def __init__(self):
        XmlRecord.__init__(self, path=titles_listing)
        termElements = self.selectNodes(self.dom,
                                        'DDSWebService/ListTerms/terms/term')
        print '%d termElements found' % len(termElements)

        self.terms = map(Term, termElements)
        # print self.terms[2]

        self.multiTerms = filter(lambda x: x.docCount > 1, self.terms)
        print '%d multiTerms found' % len(self.multiTerms)
Exemple #30
0
	def __init__ (self, path):
		XmlRecord.__init__ (self, path=path)
		self.id = self._make_id ()
		self.filename = self.id+".xml"
		self.timeStamp = self._get_time_stamp()
		self.ndrHandle = self.getTextAtPath ("ndrMetadataInfo/ndrHandle")
		
		self.ncs_item = self._make_ncs_item ()
		# print self.ncs_item
		
		self.dcs_data = self._make_dcs_data_record()