コード例 #1
0
    def __init__(self, xml, payload_constructor=None):
        self.payload_constructor = payload_constructor or self.default_payload_constructor
        # XmlRecord.__init__ (self, xml=element.toxml())  # xml is an Jlo Element
        XmlRecord.__init__(self, xml=xml)  # xml is a string
        # self.recId = self.getTextAtPath("head:id")
        self.pid = self.get_header_field("PID")
        self.pub_date = self.get_header_field("keyDateYMD")
        self.ark = self.get_header_field("ark")
        self.doi = self.get_header_field("doi")

        # get values from payload
        self.payload = self.get_payload()
        self.title = self.payload.get_title()
        self.pub_type = self.payload.get_genre()
        self.journal = self.payload.get_journal()
        self.collaboration = self.payload.get_collaboration()

        try:
            self.authors = self.payload.get_authors_display()
        except:
            print 'could not get get_authors_display for %s' % self.pid
            self.authors = 'no authors found'

        self.ncar_authors = self.payload.get_ncar_authors()
        # self.ncar_author_names = self.payload.get_authors_display(self.ncar_authors)
        # self.num_ncar_authors = len(self.ncar_authors)
        # self.ncar_author_upids = map(lambda x:x.upid, self.ncar_authors)

        # these fields must be populated externally (e.g., by Reporter)
        self.sum_author_charges = 0
        self.num_yellowstone_authors = ''
        self.yellowstone_authors = ''
        self.sum_author_charges = ''
        self.other_ncar_authors = ''
コード例 #2
0
ファイル: lexicon.py プロジェクト: ostwald/python-lib
    def __init__(self):
        XmlRecord.__init__(self, xml="<wgbh_lexicon/>")
        self.doc.setAttribute("timestamp", time.asctime())
        self.terms = LexiconWorkSheet()
        for i, term in enumerate(self.terms):
            # some terms have no category, skip these!
            if term.category:
                if i % 100 == 0:
                    print '%d/%d - %s' % (i, len(self.terms), term.prettyTerm)
                xpath = term.xpath
                parent = self.doc
                term_parts = term.segments
                term_parts.insert(0, term.category)
                for j, part in enumerate(term_parts):
                    isLeaf = self._is_leaf_segment(j, term)
                    isCategory = j == 0

                    itemText = isCategory and lexicon_set_map[part] or part

                    child = self.findChild(parent, itemText)
                    nodeName = isCategory and "category" or "segment"
                    if not child:
                        child = self.addElement(parent, nodeName)
                        # print 'added %s' % nodeName

                        child.setAttribute("text", itemText)
                        if isLeaf:
                            child.setAttribute("id", term.id)
                            child.setAttribute("term", term.prettyTerm)
                            # XmlUtils.setText (child, term.prettyTerm)
                    parent = child

            if i > 10000: break
コード例 #3
0
 def __init__(self, path=None, xml=None):
     XmlRecord.__init__(self, path, xml)
     for attr in self.field_list:
         setattr(self, attr, None)
     for element in self.getElements(self.doc):
         setattr(self, element.tagName, self.getText(element))
         print 'set %s to %s' % (element.tagName, self.getText(element))
コード例 #4
0
ファイル: groups.py プロジェクト: ostwald/python-lib
    def __init__(self):
        XmlRecord.__init__(self, xml="<opml></opml>")
        self.title = "Subject - Math"
        self.nameSpaceUri = "http://ns.nsdl.org/ncs/fields"
        self.schemaUri = "http://ns.nsdl.org/ncs/msp2/1.00/schemas/fields/mathSubject.xsd"
        self.version = "2.0"
        self.defaultNamespace = "http://ns.nsdl.org/ncs/fields"
        self.setSchemaLocation(self.schemaUri, self.nameSpaceUri)
        self.setSchemaNamespace()
        self.doc.setAttribute("xmlns:" + self.schema_instance_namespace,
                              self.defaultNamespace)

        self.head = self.addElement(self.doc, "head")
        title = self.addElement(self.head, "title")
        self.setText(title, self.title)
        concept = self.addElement(self.head, "concept")
        concept.setAttribute("language", "en-us")
        concept.setAttribute("metaFormat", "osm")
        concept.setAttribute("metaVersion", "1.0.0")
        concept.setAttribute("text", "Mathematics Subject")
        concept.setAttribute("audience", "cataloger")
        concept.setAttribute("path", "/record/coverage/location/@state")
        concept.setAttribute("deftn",
                             "mathematical topics the resource addresses")
        concept.setAttribute("collapseExpand", "true")
        self.body = self.addElement(self.doc, "body")
コード例 #5
0
    def __init__(self, url):
        ## print "reading from: '%s'" % url
        self.url = url
        pagedata = urllib.urlopen(url)
        html = pagedata.read()
        marker = html.find("<FONT SIZE=4 FACE=arial>Archive Browse</FONT>")
        if marker < 0:
            raise Exception, "browse html not found"

        tablePat = re.compile("<TABLE[^>]*?>(.*?)</TABLE>", re.S)
        m = tablePat.search(html[marker:])
        if not m:
            raise Exception, "browse TABLE not found"

        ## following are manipulations required to convert HTML into XML
        tableXml = webcatUtils.stripComments(m.group())
        tableXml = webcatUtils.fixAttributes(
            tableXml)  # some attributes have no value or unquoted value
        tableXml = webcatUtils.removeBoldTags(
            tableXml)  # bold tags are interleaved with "A" tags!
        tableXml = webcatUtils.removeFontTags(
            tableXml)  # Font tags just make processing difficult

        if 0:
            fp = open("tableXml.xml", 'w')
            fp.write(tableXml)
            fp.close()
        ## print tableXml
        XmlRecord.__init__(self, xml=tableXml)
        if 0:
            fp = open("tableXml.xml", 'w')
            fp.write(self.__repr__())
            fp.close()
        self.nodeElements = self.getElements(self.doc)
コード例 #6
0
ファイル: PdfTool.py プロジェクト: ostwald/python-lib
 def __init__(self, path):
     XmlRecord.__init__(self, path=path)
     self.path = path
     self.filename = os.path.basename(self.path)
     self.accessionNum = self._get_field("accessionNum")
     self.recordID = self._get_field("recordID")
     self.url = self._get_field("url")
コード例 #7
0
 def __init__(self, instance, xmlFormat):
     self.instance = instance
     self.xmlFormat = xmlFormat
     path = instance._get_framework_config_path(xmlFormat + ".xml")
     # if not os.path.exists (path):
     # raise IOError, "file not found "
     XmlRecord.__init__(self, path=path)
コード例 #8
0
	def __init__ (self, path=None, xml=None):
		self.fields_list = self.field_specs.keys()
		if path or xml:
			XmlRecord.__init__ (self, path=path, xml=xml)
			## NCARRec.__init__ (self, path=path)s
		else:
			self.makeRecord ()
コード例 #9
0
 def __init__(self, path, NSES=None):
     self.path = path
     self.NSES = NSES
     XmlRecord.__init__(self, path=path)
     self.group = self._get_group()
     self.band = self._get_band()
     self.nses_id = self._get_nses_id()
     self.numId = getNumId(self.nses_id)
コード例 #10
0
	def __init__ (self):
		self.docId = "td-lexicon"
		XmlRecord.__init__ (self, xml="<%s/>" % self.docId)
		self.lexiconData = LexiconWorkSheet ()
		self.termMap, self.idMap = self.makeTermAndIdMap()
		self.nodeMap = NodeMap()
		self.processNodes()
		self.makeDoc()
コード例 #11
0
	def __init__ (self, path):
		XmlRecord.__init__ (self, path=path)
		archdescElement = self.selectSingleNode (self.dom, 'ead/archdesc')
		self.archdesc = ArchDesc (archdescElement, self)
		self.collections = self._get_collections()
		self.itemMap = UserDict()
		for col in self.collections:
			for item in col.getItems():
				self.itemMap[item.id] = item
コード例 #12
0
 def __init__(self, element):
     XmlRecord.__init__(self, xml=element.toxml())
     self.searchKey = self.getTextAtPath("collection:searchKey")
     self.recordId = self.getTextAtPath("collection:recordId")
     self.xmlFormat = self.getTextAtPath(
         "collection:additionalMetadata:dlese_collect:formatOfRecords")
     self.numRecords = self.getTextAtPath(
         "collection:additionalMetadata:dlese_collect:numRecords")
     self.name = self.getTextAtPath("collection:renderingGuidelines:label")
コード例 #13
0
    def __init__(self, xml):
        # MetaDataRecord.__init__ (self, xml=xml)
        XmlRecord.__init__(self, xml=xml)

        self.savedXmlFormat = self.getTextAtPath(
            'savedResource/savedXmlFormat')
        self.id = self.getTextAtPath('savedResource/id')
        self.collection = self.getTextAtPath(
            'savedResource/ddsRepoInfo/collectionKey')
コード例 #14
0
 def __init__(self):
     XmlRecord.__init__(self, path=self.data_path)
     collections = self.selectNodes(self.dom, "ncsCollections/collection")
     print '%d collections found' % len(collections)
     self.collectionInfos = []
     for collection in collections:
         info = CollectionInfo(collection)
         # recordID = XmlUtils.getChildText (collection, "recordID")
         # setSpec = XmlUtils.getChildText (collection, "setSpec")
         self.collectionInfos.append(info)
コード例 #15
0
ファイル: backfiller.py プロジェクト: ostwald/python-lib
    def __init__(self, path):
        XmlRecord.__init__(self, path=path)
        self.children = self._get_children()
        self.creators = self.getFieldValues("creator")
        self.contributors = self.getFieldValues("contributor")
        self.title = self.getFieldValue("title")
        self.issue = self.getFieldValue("tn_isssue")
        self.accessionNum = self.getFieldValue("accessionNum")

        self.creators.sort()
        self.contributors.sort()
コード例 #16
0
ファイル: citation.py プロジェクト: ostwald/python-lib
	def __init__ (self, data, id):
		self.data = data
		self._validate()
		self.id = id
		XmlRecord.__init__ (self, xml="<record />")
		self.setSchemaNamespace ()
		self.setSchemaLocation (self.schemaURI, self.targetNamespace)
		self.setDefaultNamespace (self.targetNamespace)
		# self.setNoNamespaceSchemaLocation (self.schemaURI)
		self.addChild ("recordId", id)
		self.populate ()
コード例 #17
0
    def __init__(self):
        XmlRecord.__init__(self, path=titles_listing)
        termElements = self.selectNodes(self.dom,
                                        'DDSWebService/ListTerms/terms/term')
        print '%d termElements found' % len(termElements)

        self.terms = map(Term, termElements)
        # print self.terms[2]

        self.multiTerms = filter(lambda x: x.docCount > 1, self.terms)
        print '%d multiTerms found' % len(self.multiTerms)
コード例 #18
0
	def __init__ (self, path):
		XmlRecord.__init__ (self, path=path)
		self.id = self._make_id ()
		self.filename = self.id+".xml"
		self.timeStamp = self._get_time_stamp()
		self.ndrHandle = self.getTextAtPath ("ndrMetadataInfo/ndrHandle")
		
		self.ncs_item = self._make_ncs_item ()
		# print self.ncs_item
		
		self.dcs_data = self._make_dcs_data_record()
コード例 #19
0
    def __init__(self, url):
        self.url = url
        XmlRecord.__init__(self, xml=self.getUrl(url))
        self.mapped_ids = self._getMappedIds()

        print "\nreading NEW standards"
        self.new_AsnStandards = StdDocument(new_asnpath)
        self.new_asn_ids = self.new_AsnStandards.keys()

        print "\nreading OLD standards"
        self.old_AsnStandards = StdDocument(old_asnpath)
        self.old_asn_ids = self.old_AsnStandards.keys()
コード例 #20
0
ファイル: xsd.py プロジェクト: ostwald/python-lib
	def __init__ (self, path=None, xml=None):
		if path:
			print "reading %s" % path
		XmlRecord.__init__ (self, path=path, xml=xml)

		import xsd_globals
		xsd_globals.XSD_PREFIX = self.root_name_space_prefix
		del xsd_globals
		
		# since these use XSD prefix - we define only after figuring out what it is..
		self.enumeration_path = qp("schema/simpleType/restriction/enumeration")
		self.restriction_path = qp("schema/simpleType/restriction")
		self._enumTypes = None
コード例 #21
0
ファイル: topicRecord.py プロジェクト: ostwald/python-lib
    def __init__(self, table_html, encoding=None):
        self.encoding = encoding or self.default_encoding
        table_html = table_html.replace('&nbsp', ' ')
        try:
            XmlRecord.__init__(self, xml=table_html, encoding=self.encoding)
        except:
            print '\topicRecord: XmlRecord couldnt parse table xml - halting'
            bog = 'bogus-table-html.xml'
            fp = codecs.open(bog, 'w', self.encoding)
            fp.write(table_html)
            fp.close()
            print 'wrote to', bog
            sys.exit()

        self.parsed_data = self.parse()
コード例 #22
0
 def __init__(self, element, payload_constructor=None):
     self.payload_constructor = payload_constructor or self.default_payload_constructor
     XmlRecord.__init__(self, xml=element.toxml())
     # self.recId = self.getTextAtPath("head:id")
     self.recId = self.getTextAtPath("record:head:id")
     self.xmlFormat = self.getTextAtPath("record:head:xmlFormat")
     self.fileLastModified = self.getTextAtPath(
         "record:head:fileLastModified")
     self.collection = self.get_collection()
     self.collectionName = self.get_collectionName()
     self.storedContent = StoredContent(
         self.selectSingleNode(self.dom, "record:storedContent"))
     self.dcsstatus = self.get_status()
     self.dcsstatusNote = self.storedContent.get('dcsstatusNote')
     self.dcsisValid = self.storedContent.get('dcsisValid')
     self.payload = self.get_payload()
コード例 #23
0
ファイル: __init__.py プロジェクト: ostwald/python-lib
    def __init__(self):
        XmlRecord.__init__(self, xml="<positionHistory/>")
        self.data = JoinedData()
        for upid in self.data.keys():

            # print upid
            personEl = XmlUtils.addElement(self.dom, self.doc, 'person')
            personEl.setAttribute('upid', str(upid))

            for i, ivRec in enumerate(self.data[upid]):
                if i == 0:
                    personEl.setAttribute('peid', str(ivRec.peid))
                posEl = XmlUtils.addElement(self.dom, personEl, 'position')
                for attr in [
                        'start', 'end', 'entity', 'lab', 'org', 'divProg',
                        'divCode'
                ]:
                    posEl.setAttribute(attr, str(getattr(ivRec, attr)))
コード例 #24
0
    def __init__(self, recordsDir, xmlFormat, collection, collectionName,
                 configDir):
        XmlRecord.__init__(self, xml=CMtemplate)
        self.pidCounter = 0
        self.xpath_delimiter = "/"
        self.recordsDir = recordsDir
        self.xmlFormat = xmlFormat
        self.collection = collection
        self.collectionName = collectionName
        self.itemDir = os.path.join(recordsDir, xmlFormat, collection)
        self.dcsDataDir = os.path.join(recordsDir, "dcs_data", xmlFormat,
                                       collection)
        self.configDir = configDir

        if not os.path.isdir(self.itemDir):
            raise "NotDirectoryError", "itemDir (%s)" % itemDir

        if not os.path.isdir(self.dcsDataDir):
            raise "NotDirectoryError", "dcsDataDir (%s)" % dcsDataDir
コード例 #25
0
ファイル: WebCatMetadata.py プロジェクト: ostwald/python-lib
    def __init__(self, url, prefix=None):

        ## print "reading from: '%s'" % url
        self.url = url
        self.prefix = prefix or self.default_prefix
        pagedata = urllib.urlopen(url)
        content = pagedata.read()

        XmlRecord.__init__(self, xml="<record/>")

        marker = content.find("<H3>Metadata Information</H3>")
        if marker < 0:
            raise Exception, "metadata marker not found"

        tablePat = re.compile("<TABLE[^>]*?>(.*?)</TABLE>", re.S)
        m = tablePat.search(content[marker:])
        if not m:
            raise Exception, "metadata TABLE not found"
        ## print m.group()
        self.populateXml(m.group())
        self.finalizeXml()
コード例 #26
0
	def __init__ (self, instance):
		self.instance = instance
		XmlRecord.__init__ (self, path=instance._get_tomcat_users_path())
		self.users = self._get_users()
コード例 #27
0
ファイル: ark_data_writer.py プロジェクト: ostwald/python-lib
 def __init__(self):
     stub = '<arkMappings date="%s"></arkMappings>' % time.asctime()
     XmlRecord.__init__(self, xml=stub)
コード例 #28
0
ファイル: lexicon_tree.py プロジェクト: ostwald/python-lib
	def __init__ (self, path=None):
		XmlRecord.__init__ (self, path=path or default_path)
		self.categories = self.get_categories()
		print "%d categories found" % len (self.categories)
コード例 #29
0
 def __init__(self, instance):
     self.instance = instance
     XmlRecord.__init__(self, path=instance._get_server_xml_path())
コード例 #30
0
ファイル: citation_reader.py プロジェクト: ostwald/python-lib
 def __init__(self, path):
     XmlRecord.__init__(self, path=path)