def read_line(line, name): if not line or '\x1fd' not in line: return subfields = tuple((k, v.strip(' /,;:')) for k, v in get_subfields(line, 'abcd')) marc_name = ' '.join(v for k, v in subfields if k in 'abc') flipped = flip_name(marc_name) if marc_name != name and flipped != name: return d = pick_first_date(v for k, v in subfields if k in 'abcd') dates = tuple(d.get(k, None) for k in ['birth_date', 'death_date', 'date']) return (marc_name, flipped, dates)
def east_in_by_statement(rec): if 'by_statement' not in rec: return False if 'authors' not in rec: return False name = rec['authors'][0]['name'] flipped = flip_name(name) name = name.replace('.', '') name = name.replace(', ', '') if name == flipped.replace('.', ''): return False return rec['by_statement'].find(name) != -1
def find_entity(author): name = author['name'] things = find_author(name) if author['entity_type'] != 'person': if not things: return None db_entity = withKey(things[0]) if db_entity['type']['key'] == '/type/redirect': db_entity = withKey(db_entity['location']) assert db_entity['type']['key'] == '/type/author' return db_entity if ', ' in name: things += find_author(flip_name(name)) match = [] seen = set() for key in things: if key in seen: continue seen.add(key) db_entity = withKey(key) if db_entity['type']['key'] == '/type/redirect': key = db_entity['location'] if key in seen: continue seen.add(key) db_entity = withKey(key) if db_entity['type']['key'] == '/type/delete': continue try: assert db_entity['type']['key'] == '/type/author' except: print name, key, db_entity raise if 'birth_date' in author and 'birth_date' not in db_entity: continue if 'birth_date' not in author and 'birth_date' in db_entity: continue if not author_dates_match(author, db_entity): continue match.append(db_entity) if not match: return None if len(match) == 1: return match[0] try: return pick_from_matches(author, match) except ValueError: print 'author:', author print 'match:', match raise
def do_flip(author): # given an author name flip it in place if 'personal_name' not in author: return if author['personal_name'] != author['name']: return first_comma = author['name'].find(', ') if first_comma == -1: return # e.g: Harper, John Murdoch, 1845- if author['name'].find(',', first_comma + 1) != -1: return if author['name'].find('i.e.') != -1: return if author['name'].find('i. e.') != -1: return name = flip_name(author['name']) author['name'] = name author['personal_name'] = name