def test_pick_first_date(): assert pick_first_date(["Mrs.", "1839-"]) == {'birth_date': '1839'} assert pick_first_date(["1882-."]) == {'birth_date': '1882'} assert pick_first_date(["1900-1990.."]) == { 'birth_date': '1900', 'death_date': '1990', } assert pick_first_date(["4th/5th cent."]) == {'date': '4th/5th cent.'}
def read_author_person(line): author = {} contents = get_contents(line, ['a', 'b', 'c', 'd']) if 'a' not in contents and 'c' not in contents: return None # should at least be a name or title name = [v.strip(' /,;:') for v in get_subfield_values(line, ['a', 'b', 'c'])] if 'd' in contents: author = pick_first_date(contents['d']) if 'death_date' in author and author['death_date']: death_date = author['death_date'] if re_number_dot.search(death_date): author['death_date'] = death_date[:-1] author['name'] = ' '.join(name) author['entity_type'] = 'person' subfields = [ ('a', 'personal_name'), ('b', 'numeration'), ('c', 'title') ] for subfield, field_name in subfields: if subfield in contents: author[field_name] = ' '.join([x.strip(' /,;:') for x in contents[subfield]]) if 'q' in contents: author['fuller_name'] = ' '.join(contents['q']) return author
def read_author_person(f): f.remove_brackets() author = {} contents = f.get_contents(['a', 'b', 'c', 'd', 'e']) if 'a' not in contents and 'c' not in contents: return # should at least be a name or title name = [v.strip(' /,;:') for v in f.get_subfield_values(['a', 'b', 'c'])] if 'd' in contents: author = pick_first_date(strip_foc(d).strip(',') for d in contents['d']) if 'death_date' in author and author['death_date']: death_date = author['death_date'] if re_number_dot.search(death_date): author['death_date'] = death_date[:-1] author['name'] = ' '.join(name) author['entity_type'] = 'person' subfields = [ ('a', 'personal_name'), ('b', 'numeration'), ('c', 'title'), ('e', 'role') ] for subfield, field_name in subfields: if subfield in contents: author[field_name] = remove_trailing_dot(' '.join([x.strip(' /,;:') for x in contents[subfield]])) if 'q' in contents: author['fuller_name'] = ' '.join(contents['q']) for f in 'name', 'personal_name': if f in author: author[f] = remove_trailing_dot(strip_foc(author[f])) return author
def read_author_person(f): f.remove_brackets() author = {} contents = f.get_contents(['a', 'b', 'c', 'd', 'e']) if 'a' not in contents and 'c' not in contents: return # should at least be a name or title name = [v.strip(' /,;:') for v in f.get_subfield_values(['a', 'b', 'c'])] if 'd' in contents: author = pick_first_date(strip_foc(d).strip(',') for d in contents['d']) if 'death_date' in author and author['death_date']: death_date = author['death_date'] if re_number_dot.search(death_date): author['death_date'] = death_date[:-1] author['name'] = ' '.join(name) author['entity_type'] = 'person' subfields = [ ('a', 'personal_name'), ('b', 'numeration'), ('c', 'title'), ('e', 'role') ] for subfield, field_name in subfields: if subfield in contents: author[field_name] = remove_trailing_dot(' '.join([x.strip(' /,;:') for x in contents[subfield]])) if 'q' in contents: author['fuller_name'] = ' '.join(contents['q']) for f in 'name', 'personal_name': author[f] = remove_trailing_dot(strip_foc(author[f])) return author
def read_author_person(line): author = {} contents = get_contents(line, ['a', 'b', 'c', 'd']) if 'a' not in contents and 'c' not in contents: return None # should at least be a name or title name = [ v.strip(' /,;:') for v in get_subfield_values(line, ['a', 'b', 'c']) ] if 'd' in contents: author = pick_first_date(contents['d']) if 'death_date' in author and author['death_date']: death_date = author['death_date'] if re_number_dot.search(death_date): author['death_date'] = death_date[:-1] author['name'] = ' '.join(name) author['entity_type'] = 'person' subfields = [('a', 'personal_name'), ('b', 'numeration'), ('c', 'title')] for subfield, field_name in subfields: if subfield in contents: author[field_name] = ' '.join( [x.strip(' /,;:') for x in contents[subfield]]) if 'q' in contents: author['fuller_name'] = ' '.join(contents['q']) return author
def build_person_object(p, marc_alt): ab = [(k, v.strip(' /,;:')) for k, v in p if k in 'ab'] has_b = any(k == 'b' for k, v in p) orig_name = ' '.join(v if k == 'a' else v for k, v in ab) c = ' '.join(v for k, v in p if k == 'c') name = flip_name(orig_name) if name[0].isdigit(): name = orig_name else: of_count = c.count('of ') # if of_count == 1 and not has_b and 'of the ' not in c: # if c.startswith('King') # # if c.startswith('Queen'): # name += ' ' + c[c.find('of '):] # if of_count == 1 and 'of the ' not in c and 'Emperor of ' not in c: name += ' ' + c[c.find('of '):] elif ' ' not in name and of_count > 1: name += ', ' + c elif c.endswith(' of') or c.endswith(' de') and any( k == 'a' and ', ' in v for k, v in p): name = ' '.join(v for k, v in ab) c += ' ' + name[:name.find(', ')] name = name[name.find(', ') + 2:] + ', ' + c person = {} d = [v for k, v in p if k == 'd'] if d: person = pick_first_date(d) person['name'] = name person['sort'] = orig_name if any(k == 'b' for k, v in p): person['enumeration'] = ' '.join(v for k, v in p if k == 'b') if c: person['title'] = c person['marc'] = [p] + list(marc_alt) return person
def test_lookup(): for line in marc: fields = tuple((k, v.strip(' /,;:')) for k, v in get_subfields(line, 'abcd')) found = name_lookup(fields) for i in found: print i dates = pick_first_date(v for k, v in fields if k == 'd') print dates match = look_for_match(found, dates, False) print len(match) for i in match: print i #pprint(match) if len(match) != 1: match = pick_from_match(match) if len(match) != 1: for i in more_than_one_match(match): print i print
def build_person_object(p, marc_alt): ab = [(k, v.strip(' /,;:')) for k, v in p if k in 'ab'] has_b = any(k=='b' for k, v in p) orig_name = ' '.join(v if k == 'a' else v for k, v in ab) c = ' '.join(v for k, v in p if k == 'c') name = flip_name(orig_name) if name[0].isdigit(): name = orig_name else: of_count = c.count('of ') # if of_count == 1 and not has_b and 'of the ' not in c: # if c.startswith('King') # # if c.startswith('Queen'): # name += ' ' + c[c.find('of '):] # if of_count == 1 and 'of the ' not in c and 'Emperor of ' not in c: name += ' ' + c[c.find('of '):] elif ' ' not in name and of_count > 1: name += ', ' + c elif c.endswith(' of') or c.endswith(' de') and any(k == 'a' and ', ' in v for k, v in p): name = ' '.join(v for k, v in ab) c += ' ' + name[:name.find(', ')] name = name[name.find(', ') + 2:] + ', ' + c person = {} d = [v for k, v in p if k =='d'] if d: person = pick_first_date(d) person['name'] = name person['sort'] = orig_name if any(k=='b' for k, v in p): person['enumeration'] = ' '.join(v for k, v in p if k == 'b') if c: person['title'] = c person['marc'] = [p] + list(marc_alt) return person
def test_lookup(): for line in marc: fields = tuple( (k, v.strip(' /,;:')) for k, v in get_subfields(line, 'abcd')) found = name_lookup(fields) for i in found: print i dates = pick_first_date(v for k, v in fields if k == 'd') print dates match = look_for_match(found, dates, False) print len(match) for i in match: print i #pprint(match) if len(match) != 1: match = pick_from_match(match) if len(match) != 1: for i in more_than_one_match(match): print i print
def test_date(): assert pick_first_date(["Mrs.", "1839-"]) == {'birth_date': '1839'} assert pick_first_date(["1882-."]) == {'birth_date': '1882'} assert pick_first_date(["1900-1990.."]) == {'birth_date': u'1900', 'death_date': u'1990'} assert pick_first_date(["4th/5th cent."]) == {'date': '4th/5th cent.'}