def __init__(self, xml, payload_constructor=None): self.payload_constructor = payload_constructor or self.default_payload_constructor # XmlRecord.__init__ (self, xml=element.toxml()) # xml is an Jlo Element XmlRecord.__init__(self, xml=xml) # xml is a string # self.recId = self.getTextAtPath("head:id") self.pid = self.get_header_field("PID") self.pub_date = self.get_header_field("keyDateYMD") self.ark = self.get_header_field("ark") self.doi = self.get_header_field("doi") # get values from payload self.payload = self.get_payload() self.title = self.payload.get_title() self.pub_type = self.payload.get_genre() self.journal = self.payload.get_journal() self.collaboration = self.payload.get_collaboration() try: self.authors = self.payload.get_authors_display() except: print 'could not get get_authors_display for %s' % self.pid self.authors = 'no authors found' self.ncar_authors = self.payload.get_ncar_authors() # self.ncar_author_names = self.payload.get_authors_display(self.ncar_authors) # self.num_ncar_authors = len(self.ncar_authors) # self.ncar_author_upids = map(lambda x:x.upid, self.ncar_authors) # these fields must be populated externally (e.g., by Reporter) self.sum_author_charges = 0 self.num_yellowstone_authors = '' self.yellowstone_authors = '' self.sum_author_charges = '' self.other_ncar_authors = ''
def __init__(self): XmlRecord.__init__(self, xml="<wgbh_lexicon/>") self.doc.setAttribute("timestamp", time.asctime()) self.terms = LexiconWorkSheet() for i, term in enumerate(self.terms): # some terms have no category, skip these! if term.category: if i % 100 == 0: print '%d/%d - %s' % (i, len(self.terms), term.prettyTerm) xpath = term.xpath parent = self.doc term_parts = term.segments term_parts.insert(0, term.category) for j, part in enumerate(term_parts): isLeaf = self._is_leaf_segment(j, term) isCategory = j == 0 itemText = isCategory and lexicon_set_map[part] or part child = self.findChild(parent, itemText) nodeName = isCategory and "category" or "segment" if not child: child = self.addElement(parent, nodeName) # print 'added %s' % nodeName child.setAttribute("text", itemText) if isLeaf: child.setAttribute("id", term.id) child.setAttribute("term", term.prettyTerm) # XmlUtils.setText (child, term.prettyTerm) parent = child if i > 10000: break
def __init__(self, path=None, xml=None): XmlRecord.__init__(self, path, xml) for attr in self.field_list: setattr(self, attr, None) for element in self.getElements(self.doc): setattr(self, element.tagName, self.getText(element)) print 'set %s to %s' % (element.tagName, self.getText(element))
def __init__(self): XmlRecord.__init__(self, xml="<opml></opml>") self.title = "Subject - Math" self.nameSpaceUri = "http://ns.nsdl.org/ncs/fields" self.schemaUri = "http://ns.nsdl.org/ncs/msp2/1.00/schemas/fields/mathSubject.xsd" self.version = "2.0" self.defaultNamespace = "http://ns.nsdl.org/ncs/fields" self.setSchemaLocation(self.schemaUri, self.nameSpaceUri) self.setSchemaNamespace() self.doc.setAttribute("xmlns:" + self.schema_instance_namespace, self.defaultNamespace) self.head = self.addElement(self.doc, "head") title = self.addElement(self.head, "title") self.setText(title, self.title) concept = self.addElement(self.head, "concept") concept.setAttribute("language", "en-us") concept.setAttribute("metaFormat", "osm") concept.setAttribute("metaVersion", "1.0.0") concept.setAttribute("text", "Mathematics Subject") concept.setAttribute("audience", "cataloger") concept.setAttribute("path", "/record/coverage/location/@state") concept.setAttribute("deftn", "mathematical topics the resource addresses") concept.setAttribute("collapseExpand", "true") self.body = self.addElement(self.doc, "body")
def __init__(self, url): ## print "reading from: '%s'" % url self.url = url pagedata = urllib.urlopen(url) html = pagedata.read() marker = html.find("<FONT SIZE=4 FACE=arial>Archive Browse</FONT>") if marker < 0: raise Exception, "browse html not found" tablePat = re.compile("<TABLE[^>]*?>(.*?)</TABLE>", re.S) m = tablePat.search(html[marker:]) if not m: raise Exception, "browse TABLE not found" ## following are manipulations required to convert HTML into XML tableXml = webcatUtils.stripComments(m.group()) tableXml = webcatUtils.fixAttributes( tableXml) # some attributes have no value or unquoted value tableXml = webcatUtils.removeBoldTags( tableXml) # bold tags are interleaved with "A" tags! tableXml = webcatUtils.removeFontTags( tableXml) # Font tags just make processing difficult if 0: fp = open("tableXml.xml", 'w') fp.write(tableXml) fp.close() ## print tableXml XmlRecord.__init__(self, xml=tableXml) if 0: fp = open("tableXml.xml", 'w') fp.write(self.__repr__()) fp.close() self.nodeElements = self.getElements(self.doc)
def __init__(self, path): XmlRecord.__init__(self, path=path) self.path = path self.filename = os.path.basename(self.path) self.accessionNum = self._get_field("accessionNum") self.recordID = self._get_field("recordID") self.url = self._get_field("url")
def __init__(self, instance, xmlFormat): self.instance = instance self.xmlFormat = xmlFormat path = instance._get_framework_config_path(xmlFormat + ".xml") # if not os.path.exists (path): # raise IOError, "file not found " XmlRecord.__init__(self, path=path)
def __init__ (self, path=None, xml=None): self.fields_list = self.field_specs.keys() if path or xml: XmlRecord.__init__ (self, path=path, xml=xml) ## NCARRec.__init__ (self, path=path)s else: self.makeRecord ()
def __init__(self, path, NSES=None): self.path = path self.NSES = NSES XmlRecord.__init__(self, path=path) self.group = self._get_group() self.band = self._get_band() self.nses_id = self._get_nses_id() self.numId = getNumId(self.nses_id)
def __init__ (self): self.docId = "td-lexicon" XmlRecord.__init__ (self, xml="<%s/>" % self.docId) self.lexiconData = LexiconWorkSheet () self.termMap, self.idMap = self.makeTermAndIdMap() self.nodeMap = NodeMap() self.processNodes() self.makeDoc()
def __init__ (self, path): XmlRecord.__init__ (self, path=path) archdescElement = self.selectSingleNode (self.dom, 'ead/archdesc') self.archdesc = ArchDesc (archdescElement, self) self.collections = self._get_collections() self.itemMap = UserDict() for col in self.collections: for item in col.getItems(): self.itemMap[item.id] = item
def __init__(self, element): XmlRecord.__init__(self, xml=element.toxml()) self.searchKey = self.getTextAtPath("collection:searchKey") self.recordId = self.getTextAtPath("collection:recordId") self.xmlFormat = self.getTextAtPath( "collection:additionalMetadata:dlese_collect:formatOfRecords") self.numRecords = self.getTextAtPath( "collection:additionalMetadata:dlese_collect:numRecords") self.name = self.getTextAtPath("collection:renderingGuidelines:label")
def __init__(self, xml): # MetaDataRecord.__init__ (self, xml=xml) XmlRecord.__init__(self, xml=xml) self.savedXmlFormat = self.getTextAtPath( 'savedResource/savedXmlFormat') self.id = self.getTextAtPath('savedResource/id') self.collection = self.getTextAtPath( 'savedResource/ddsRepoInfo/collectionKey')
def __init__(self): XmlRecord.__init__(self, path=self.data_path) collections = self.selectNodes(self.dom, "ncsCollections/collection") print '%d collections found' % len(collections) self.collectionInfos = [] for collection in collections: info = CollectionInfo(collection) # recordID = XmlUtils.getChildText (collection, "recordID") # setSpec = XmlUtils.getChildText (collection, "setSpec") self.collectionInfos.append(info)
def __init__(self, path): XmlRecord.__init__(self, path=path) self.children = self._get_children() self.creators = self.getFieldValues("creator") self.contributors = self.getFieldValues("contributor") self.title = self.getFieldValue("title") self.issue = self.getFieldValue("tn_isssue") self.accessionNum = self.getFieldValue("accessionNum") self.creators.sort() self.contributors.sort()
def __init__ (self, data, id): self.data = data self._validate() self.id = id XmlRecord.__init__ (self, xml="<record />") self.setSchemaNamespace () self.setSchemaLocation (self.schemaURI, self.targetNamespace) self.setDefaultNamespace (self.targetNamespace) # self.setNoNamespaceSchemaLocation (self.schemaURI) self.addChild ("recordId", id) self.populate ()
def __init__(self): XmlRecord.__init__(self, path=titles_listing) termElements = self.selectNodes(self.dom, 'DDSWebService/ListTerms/terms/term') print '%d termElements found' % len(termElements) self.terms = map(Term, termElements) # print self.terms[2] self.multiTerms = filter(lambda x: x.docCount > 1, self.terms) print '%d multiTerms found' % len(self.multiTerms)
def __init__ (self, path): XmlRecord.__init__ (self, path=path) self.id = self._make_id () self.filename = self.id+".xml" self.timeStamp = self._get_time_stamp() self.ndrHandle = self.getTextAtPath ("ndrMetadataInfo/ndrHandle") self.ncs_item = self._make_ncs_item () # print self.ncs_item self.dcs_data = self._make_dcs_data_record()
def __init__(self, url): self.url = url XmlRecord.__init__(self, xml=self.getUrl(url)) self.mapped_ids = self._getMappedIds() print "\nreading NEW standards" self.new_AsnStandards = StdDocument(new_asnpath) self.new_asn_ids = self.new_AsnStandards.keys() print "\nreading OLD standards" self.old_AsnStandards = StdDocument(old_asnpath) self.old_asn_ids = self.old_AsnStandards.keys()
def __init__ (self, path=None, xml=None): if path: print "reading %s" % path XmlRecord.__init__ (self, path=path, xml=xml) import xsd_globals xsd_globals.XSD_PREFIX = self.root_name_space_prefix del xsd_globals # since these use XSD prefix - we define only after figuring out what it is.. self.enumeration_path = qp("schema/simpleType/restriction/enumeration") self.restriction_path = qp("schema/simpleType/restriction") self._enumTypes = None
def __init__(self, table_html, encoding=None): self.encoding = encoding or self.default_encoding table_html = table_html.replace(' ', ' ') try: XmlRecord.__init__(self, xml=table_html, encoding=self.encoding) except: print '\topicRecord: XmlRecord couldnt parse table xml - halting' bog = 'bogus-table-html.xml' fp = codecs.open(bog, 'w', self.encoding) fp.write(table_html) fp.close() print 'wrote to', bog sys.exit() self.parsed_data = self.parse()
def __init__(self, element, payload_constructor=None): self.payload_constructor = payload_constructor or self.default_payload_constructor XmlRecord.__init__(self, xml=element.toxml()) # self.recId = self.getTextAtPath("head:id") self.recId = self.getTextAtPath("record:head:id") self.xmlFormat = self.getTextAtPath("record:head:xmlFormat") self.fileLastModified = self.getTextAtPath( "record:head:fileLastModified") self.collection = self.get_collection() self.collectionName = self.get_collectionName() self.storedContent = StoredContent( self.selectSingleNode(self.dom, "record:storedContent")) self.dcsstatus = self.get_status() self.dcsstatusNote = self.storedContent.get('dcsstatusNote') self.dcsisValid = self.storedContent.get('dcsisValid') self.payload = self.get_payload()
def __init__(self): XmlRecord.__init__(self, xml="<positionHistory/>") self.data = JoinedData() for upid in self.data.keys(): # print upid personEl = XmlUtils.addElement(self.dom, self.doc, 'person') personEl.setAttribute('upid', str(upid)) for i, ivRec in enumerate(self.data[upid]): if i == 0: personEl.setAttribute('peid', str(ivRec.peid)) posEl = XmlUtils.addElement(self.dom, personEl, 'position') for attr in [ 'start', 'end', 'entity', 'lab', 'org', 'divProg', 'divCode' ]: posEl.setAttribute(attr, str(getattr(ivRec, attr)))
def __init__(self, recordsDir, xmlFormat, collection, collectionName, configDir): XmlRecord.__init__(self, xml=CMtemplate) self.pidCounter = 0 self.xpath_delimiter = "/" self.recordsDir = recordsDir self.xmlFormat = xmlFormat self.collection = collection self.collectionName = collectionName self.itemDir = os.path.join(recordsDir, xmlFormat, collection) self.dcsDataDir = os.path.join(recordsDir, "dcs_data", xmlFormat, collection) self.configDir = configDir if not os.path.isdir(self.itemDir): raise "NotDirectoryError", "itemDir (%s)" % itemDir if not os.path.isdir(self.dcsDataDir): raise "NotDirectoryError", "dcsDataDir (%s)" % dcsDataDir
def __init__(self, url, prefix=None): ## print "reading from: '%s'" % url self.url = url self.prefix = prefix or self.default_prefix pagedata = urllib.urlopen(url) content = pagedata.read() XmlRecord.__init__(self, xml="<record/>") marker = content.find("<H3>Metadata Information</H3>") if marker < 0: raise Exception, "metadata marker not found" tablePat = re.compile("<TABLE[^>]*?>(.*?)</TABLE>", re.S) m = tablePat.search(content[marker:]) if not m: raise Exception, "metadata TABLE not found" ## print m.group() self.populateXml(m.group()) self.finalizeXml()
def __init__ (self, instance): self.instance = instance XmlRecord.__init__ (self, path=instance._get_tomcat_users_path()) self.users = self._get_users()
def __init__(self): stub = '<arkMappings date="%s"></arkMappings>' % time.asctime() XmlRecord.__init__(self, xml=stub)
def __init__ (self, path=None): XmlRecord.__init__ (self, path=path or default_path) self.categories = self.get_categories() print "%d categories found" % len (self.categories)
def __init__(self, instance): self.instance = instance XmlRecord.__init__(self, path=instance._get_server_xml_path())
def __init__(self, path): XmlRecord.__init__(self, path=path)