def findDleseCollectionRecord(field, value): """ returns first DleseCollectRecord having the specified value for specified field """ dlese_collect_dir = os.path.join(dlese_records, 'dlese_collect', 'collect') for filename in filter(lambda x: x.endswith('xml'), os.listdir(dlese_collect_dir)): path = os.path.join(dlese_collect_dir, filename) rec = DleseCollectRecord(path=path) if (rec.get(field) == value): return rec
class DleseToNcsCollectTransform: ncs_collect_template = '/Users/ostwald/devel/python/python-lib/uconn/ncs_collect_template.xml' tmp_output_path = '/Users/ostwald/tmp/dlese_to_ncs_collect' """ field_mappings documentation """ field_mappings = [ # dlese_collect -> ncs_collect 'id', # we do id by hand 'description', ['collectionLocation', 'url'], ['fullTitle', 'title'], ['id', 'collSetSpec'], ['created', 'dateTime'], # involves a massage 'libraryFormat', ['key', 'oaiSetSpec'] ] def __init__(self, dcr_path, id=None): self.initialized = False self.dcr = DleseCollectRecord(path=dcr_path) self.ncr = NCSCollectRecord(path=self.ncs_collect_template) self.process_field_mappings() self.injectContributors() xmlFormat = self.dcr.get('libraryFormat') if (xmlFormat == 'adn'): self.ncr.addViewContext('DLESECollections') self.ncr.set('metadataPrefix', 'nsdl_dc') elif (xmlFormat == 'dlese_anno'): self.ncr.addViewContext('DLESEAnnotations') self.ncr.set('metadataPrefix', 'comm_anno') if id: self.ncr.setId(id) #set the destination (ncr) path for writing self.ncr.path = os.path.join(self.tmp_output_path, self.ncr.getId() + '.xml') self.initialized = True def injectField(self, dcr_field, ncr_field=None): if ncr_field is None: ncr_field = dcr_field # get value from dcr_field try: value = self.dcr.get(dcr_field) if not value: # msg = 'WARN: injectField - no value for "%s" in %s' % (dcr_field, self.dcr.getId()) # print msg raise Exception, "no value in metadata" except Exception, msg: print "WARN ingest did NOT get value for %s at '%s': %s" % ( self.dcr.getId(), dcr_field, msg) value = "" # kludges for certain fields if dcr_field == 'created': value += 'T00:00:00Z' if ncr_field == 'collSetSpec': value = 'ncs-' + value # inject value in ncr_field try: print 'setting "%s" at "%s"' % (value, ncr_field) self.ncr.set(ncr_field, value) except Exception, msg: print "ERROR setting value at '%s': %s" % (ncr_field, msg)