Esempio n. 1
0
def findDleseCollectionRecord(field, value):
    """
	returns first DleseCollectRecord having the specified value for specified field
	"""
    dlese_collect_dir = os.path.join(dlese_records, 'dlese_collect', 'collect')
    for filename in filter(lambda x: x.endswith('xml'),
                           os.listdir(dlese_collect_dir)):
        path = os.path.join(dlese_collect_dir, filename)
        rec = DleseCollectRecord(path=path)
        if (rec.get(field) == value):
            return rec
class DleseToNcsCollectTransform:

    ncs_collect_template = '/Users/ostwald/devel/python/python-lib/uconn/ncs_collect_template.xml'
    tmp_output_path = '/Users/ostwald/tmp/dlese_to_ncs_collect'
    """
	field_mappings documentation
	"""
    field_mappings = [
        # dlese_collect -> ncs_collect
        'id',  # we do id by hand
        'description',
        ['collectionLocation', 'url'],
        ['fullTitle', 'title'],
        ['id', 'collSetSpec'],
        ['created', 'dateTime'],  # involves a massage
        'libraryFormat',
        ['key', 'oaiSetSpec']
    ]

    def __init__(self, dcr_path, id=None):
        self.initialized = False
        self.dcr = DleseCollectRecord(path=dcr_path)
        self.ncr = NCSCollectRecord(path=self.ncs_collect_template)

        self.process_field_mappings()
        self.injectContributors()
        xmlFormat = self.dcr.get('libraryFormat')
        if (xmlFormat == 'adn'):
            self.ncr.addViewContext('DLESECollections')
            self.ncr.set('metadataPrefix', 'nsdl_dc')
        elif (xmlFormat == 'dlese_anno'):
            self.ncr.addViewContext('DLESEAnnotations')
            self.ncr.set('metadataPrefix', 'comm_anno')

        if id:
            self.ncr.setId(id)

        #set the destination (ncr) path for writing
        self.ncr.path = os.path.join(self.tmp_output_path,
                                     self.ncr.getId() + '.xml')
        self.initialized = True

    def injectField(self, dcr_field, ncr_field=None):
        if ncr_field is None:
            ncr_field = dcr_field

        # get value from dcr_field
        try:
            value = self.dcr.get(dcr_field)
            if not value:
                # msg = 'WARN: injectField - no value for "%s" in %s' % (dcr_field, self.dcr.getId())
                # print msg
                raise Exception, "no value in metadata"
        except Exception, msg:
            print "WARN ingest did NOT get value for %s at '%s': %s" % (
                self.dcr.getId(), dcr_field, msg)
            value = ""

        # kludges for certain fields
        if dcr_field == 'created':
            value += 'T00:00:00Z'

        if ncr_field == 'collSetSpec':
            value = 'ncs-' + value

        # inject value in ncr_field
        try:
            print 'setting "%s" at "%s"' % (value, ncr_field)
            self.ncr.set(ncr_field, value)
        except Exception, msg:
            print "ERROR setting value at '%s': %s" % (ncr_field, msg)