def clean_subfield(k, v): if k in 'abc': v = tidy_subfield(v) elif k == 'd': v = remove_trailing_number_dot(v.strip(' ,')) v = re_bc_date.sub(lambda m: m.group(1) + " B.C.", v) return (k, v)
def read_pub_date(rec): fields = rec.get_fields('260') if not fields: return found = [] for f in fields: found += [i for i in f.get_subfield_values('c') if i] return remove_trailing_number_dot(found[0]) if found else None
def read_pub_date(rec): fields = rec.get_fields('260') if not fields: return found = [] for f in fields: f.remove_brackets() found += [i for i in f.get_subfield_values('c') if i] return remove_trailing_number_dot(found[0]) if found else None
def do_normalize(author_key, best_key, authors): #print "do_normalize(%s, %s, %s)" % (author_key, best_key, authors) need_update = False a = ol.get(author_key) if author_key == best_key: for k, v in a.items(): if 'date' in k: m = re_number_dot.search(v) if m: need_update = True v = v[:-len(m.group(1))] if not isinstance(v, six.text_type): continue norm_v = norm(v) if v == norm_v: continue a[k] = norm_v need_update = True else: best = ol.get(best_key) author_keys = set(k for k in a.keys() + best.keys() if k not in ('key', 'last_modified', 'type', 'id', 'revision')) for k in author_keys: if k not in best: v = a[k] if not isinstance(v, six.text_type): continue norm_v = norm(v) if v == norm_v: continue a[k] = norm_v need_update = True continue v = best[k] if 'date' in k: v = remove_trailing_number_dot(v) if isinstance(v, six.text_type): v = norm(v) if k not in a or v != a[k]: a[k] = v need_update = True if not need_update: return #print 'save(%s, %s)' % (author_key, repr(a)) ol.save(author_key, a, 'merge authors')
def do_normalize(author_key, best_key, authors): #print "do_normalize(%s, %s, %s)" % (author_key, best_key, authors) need_update = False a = ol.get(author_key) if author_key == best_key: for k, v in a.items(): if 'date' in k: m = re_number_dot.search(v) if m: need_update = True v = v[:-len(m.group(1))] if not isinstance(v, unicode): continue norm_v = norm(v) if v == norm_v: continue a[k] = norm_v need_update = True else: best = ol.get(best_key) author_keys = set(k for k in a.keys() + best.keys() if k not in ('key', 'last_modified', 'type', 'id', 'revision')) for k in author_keys: if k not in best: v = a[k] if not isinstance(v, unicode): continue norm_v = norm(v) if v == norm_v: continue a[k] = norm_v need_update = True continue v = best[k] if 'date' in k: v = remove_trailing_number_dot(v) if isinstance(v, unicode): v = norm(v) if k not in a or v != a[k]: a[k] = v need_update = True if not need_update: return #print 'save(%s, %s)' % (author_key, `a`) ol.save(author_key, a, 'merge authors')