def update_old_funder_records(): """ The Kuali backfill updated the records that also had Kuali IDs, but there are 452 that still have old-school funder info (mods_name_corporate_funder_s:*). <name type="corporate"> <namePart>National Science Foundation (NSF)</namePart> <role> <roleTerm type="text" authority="marcrelator">funder</roleTerm> </role> </name> <note type="funding">National Science Foundation (NSF): 1852977</note> Now we want to remove the corporate funder info, and try to normalize the award_ids with Kuali-verfied versions. If ids can't be normlized thus, then add a 'displayLabel' = 'Legacy funding data' attribute """ dowrites = 0 args = { 'params': { 'q': 'mods_name_corporate_funder_s:*' }, 'baseUrl': CONFIG.get("fedora", "SERVER") + CONFIG.get("fedora", "SOLR_PATH"), } feed = feeds.SolrSearchFeed(**args) print 'feed has {}'.format(len(feed.pids)) NotesMODS.dowrites = dowrites for pid in feed.pids: print pid mods_xml = get_datastream(pid, 'MODS') mods = NotesMODS(mods_xml, pid) # print 'BEFORE Backfill' # mods.show_notes() mods.do_back_fill([])
def backfilltest(pid, ids): """ insert the award_id into the object's MODS stream """ if 1: # fedoraObj = FedoraObject (pid) # mods_xml = fedoraObj.get_datastream('MODS') mods_xml = get_datastream(pid, 'MODS') else: path = '/Users/ostwald/Downloads/archives-8918-updated.xml' mods_xml = open(path, 'r').read() NotesMODS.dowrites = 0 mods = NotesMODS(mods_xml, 'archives:8918') print 'BEFORE Backfill' mods.show_notes() mods.do_back_fill(ids) print 'AFTER Backfill' mods.show_notes()
def get_mods_instance(self): if self._mods is None: mods_xml = get_datastream(self['pid'], 'MODS') self._mods = NotesMODS(mods_xml, self['pid']) return self._mods
def get_mods(pid): ds = get_datastream(pid, 'MODS') return ds
if __name__ == '__main__': if 0: pid = 'articles:20981' # has dups # pid = 'articles:22793' # has legacy # award_ids = [] # ['fee', 'foo'] # award_ids = [ '55088'] # collision tester # award_ids = None award_ids = [ '1211668', ] backfilltest(pid, award_ids) if 1: pid = 'islandora:15' # pid = 'archives:8922' mods_xml = get_datastream(pid, 'MODS') mods = NotesMODS(mods_xml, pid) id = '4567' NS = "{%s}" % mods.namespaces['mods'] note = ET.SubElement(mods.dom, NS + 'note') note.attrib['type'] = 'funding' note.text = id print mods # res = update_datastream (pid, 'MODS', mods.dom) print res