def processDescription(self): """ technotes: pdf - remove "pdf" but keep rest of description jpg - remove entire description manuscripts: pdf - remove entire description jpg - remove entire description """ rec = self.lib_dc_rec desc_field = "dc:description" issue_field = "library_dc:issue" altTitle_field = "library_dc:altTitle" for field in [desc_field, issue_field, altTitle_field]: utils.validateField(field) vals = rec.getFieldValues(desc_field) new_descriptions = [] for desc in vals: if desc is None: continue desc = desc.strip() if desc.lower().endswith('pdf'): desc = desc[:-3] if self.collection == 'manuscripts': continue if self.collection == 'monographs' and 'ASR' in desc: self.lib_dc_rec.addFieldValue(altTitle_field, desc.strip()) continue elif desc and desc.lower().endswith('.jpg'): continue if desc: new_descriptions.append(desc) rec.setFieldValues(desc_field, new_descriptions)
def toTitleCase(self, field): """ put the field into title-case """ utils.validateField(field) values = self.lib_dc_rec.getFieldValues(field) newValues = [] for val in values: newValues.append(titlecase(val.strip())) if newValues: self.lib_dc_rec.setFieldValues(field, newValues)
def showVocabs(rp): print "\n%s" % rp.recId rec = rp.lib_dc_rec field_list = [ "library_dc:instName", 'library_dc:instDivision', 'library_dc:libraryType' ] for field in field_list: utils.validateField(field) showFieldValues(field, rec.getFieldValues(field))
def periods(rp, field="dc:title"): rec = rp.lib_dc_rec utils.validateField(field) field_vals = rec.getFieldValues(field) period_vals = [] for val in field_vals: if val[-1] == '.': period_vals.append(val) if period_vals: print "\n%s (%s)" % (rp.recId, field) for val in period_vals: print "\t'%s'" % val
def dedup(self, field): """ eliminate duplicate values for each field. THIS DOES NOT NEED TO BE USED """ utils.validateField(field) nodups = [] rec = self.lib_dc_rec vals = rec.getFieldValues(field) for val in vals: if not val in nodups: nodups.append(val) rec.setFieldValues(field, nodups)
def multiFields(rp): rec = rp.lib_dc_rec multis = {} for field in field_list: utils.validateField(field) vals = rec.getFieldValues(field) if len(vals) > 1: multis[field] = vals if multis: print "\n", rp.recId for field in multis.keys(): showFieldValues(field, multis[field])
def normalizeField(self, field, fn): """ function takes a single value and returns the normalized value for this field """ rec = self.lib_dc_rec utils.validateField(field) vals = rec.getFieldValues(field) normalized = [] for val in vals: normalized_val = fn(val) if normalized_val: normalized.append(normalized_val) rec.setFieldValues(field, normalized)
def removeDupValues(self, field1, field2): """ remove values in field1 that are found in field2 """ utils.validateField(field1) utils.validateField(field2) rec = self.lib_dc_rec vals1 = rec.getFieldValues(field1) vals2 = map(string.upper, rec.getFieldValues(field2)) cleaned = [] for val in vals1: if not val.upper() in vals2: cleaned.append(val) rec.setFieldValues(field1, cleaned)
def fieldsHaveMatchingValues(rp, field1, field2, verbose=True): utils.validateField(field1) utils.validateField(field2) rec = rp.lib_dc_rec vals1 = map(string.upper, rec.getFieldValues(field1)) vals2 = map(string.upper, rec.getFieldValues(field2)) for val in vals1: if val in vals2: if verbose: print "\n", rp.recId showFieldValues(field1, vals1) showFieldValues(field2, vals2) return 1 return 0
def normalizeDateDigitized(self): """ Ensure this field has a SINGLE VALUE FOR NOW (as of 10/27/08) take LATEST date """ field = "library_dc:date_digitized" rec = self.lib_dc_rec utils.validateField(field) vals = rec.getFieldValues(field) touse = -1 if vals: for val in vals: year = int(val) touse = max(touse, year) rec.removeField(field) rec.setFieldValue(field, str(touse))
def massageTitleAndAltTitle(self): """ often, we need to swap title and alt title values. but the rules for this are different for each framework we don't process 'technotes' or 'theses' """ rec = self.lib_dc_rec title_field = 'dc:title' altTitle_field = 'library_dc:altTitle' utils.validateField(title_field) utils.validateField(altTitle_field) title_vals = rec.getFieldValues(title_field) altTitle_vals = rec.getFieldValues(altTitle_field) if title_vals and altTitle_vals: # manuscripts case if self.collection == "manuscripts" and len( title_vals) == 1 and len(altTitle_vals) == 1: title = title_vals[0] alt = altTitle_vals[0] if title.startswith( alt[:-1]): # some altTitles end in period, some don't rec.setFieldValue(title_field, alt) rec.setFieldValue(altTitle_field, title) if self.collection == "monographs": ## is there a title element containing 'ASR'? ASRTitle = None for title in title_vals: if "ASR" in title: ASRTitle = title continue ## is there an altTitle containing 'Annual Scientific Report'? ASRAltTitle = None pat = 'Annual Scientific Report' for altTitle in altTitle_vals: if pat in altTitle: ASRAltTitle = altTitle continue if ASRTitle and ASRAltTitle: rec.removeField(altTitle_field) rec.setFieldValue(title_field, ASRAltTitle) rec.setFieldValue(altTitle_field, ASRTitle)
def showTitleStuff(rp): print "\n%s" % rp.recId rec = rp.lib_dc_rec for field in field_list: utils.validateField(field) showFieldValues(field, rec.getFieldValues(field))