Example #1
0
def test_pick_first_date():
    assert pick_first_date(["Mrs.", "1839-"]) == {'birth_date': '1839'}
    assert pick_first_date(["1882-."]) == {'birth_date': '1882'}
    assert pick_first_date(["1900-1990.."]) == {
        'birth_date': '1900',
        'death_date': '1990',
    }
    assert pick_first_date(["4th/5th cent."]) == {'date': '4th/5th cent.'}
Example #2
0
def read_author_person(line):
    author = {}
    contents = get_contents(line, ['a', 'b', 'c', 'd'])
    if 'a' not in contents and 'c' not in contents:
        return None # should at least be a name or title
    name = [v.strip(' /,;:') for v in get_subfield_values(line, ['a', 'b', 'c'])]
    if 'd' in contents:
        author = pick_first_date(contents['d'])
        if 'death_date' in author and author['death_date']:
            death_date = author['death_date']
            if re_number_dot.search(death_date):
                author['death_date'] = death_date[:-1]

    author['name'] = ' '.join(name)
    author['entity_type'] = 'person'
    subfields = [
        ('a', 'personal_name'),
        ('b', 'numeration'),
        ('c', 'title')
    ]
    for subfield, field_name in subfields:
        if subfield in contents:
            author[field_name] = ' '.join([x.strip(' /,;:') for x in contents[subfield]])
    if 'q' in contents:
        author['fuller_name'] = ' '.join(contents['q'])
    return author
Example #3
0
def read_author_person(f):
    f.remove_brackets()
    author = {}
    contents = f.get_contents(['a', 'b', 'c', 'd', 'e'])
    if 'a' not in contents and 'c' not in contents:
        return # should at least be a name or title
    name = [v.strip(' /,;:') for v in f.get_subfield_values(['a', 'b', 'c'])]
    if 'd' in contents:
        author = pick_first_date(strip_foc(d).strip(',') for d in contents['d'])
        if 'death_date' in author and author['death_date']:
            death_date = author['death_date']
            if re_number_dot.search(death_date):
                author['death_date'] = death_date[:-1]
    author['name'] = ' '.join(name)
    author['entity_type'] = 'person'
    subfields = [
        ('a', 'personal_name'),
        ('b', 'numeration'),
        ('c', 'title'),
        ('e', 'role')
    ]
    for subfield, field_name in subfields:
        if subfield in contents:
            author[field_name] = remove_trailing_dot(' '.join([x.strip(' /,;:') for x in contents[subfield]]))
    if 'q' in contents:
        author['fuller_name'] = ' '.join(contents['q'])
    for f in 'name', 'personal_name':
        if f in author:
            author[f] = remove_trailing_dot(strip_foc(author[f]))
    return author
Example #4
0
def read_author_person(f):
    f.remove_brackets()
    author = {}
    contents = f.get_contents(['a', 'b', 'c', 'd', 'e'])
    if 'a' not in contents and 'c' not in contents:
        return # should at least be a name or title
    name = [v.strip(' /,;:') for v in f.get_subfield_values(['a', 'b', 'c'])]
    if 'd' in contents:
        author = pick_first_date(strip_foc(d).strip(',') for d in contents['d'])
        if 'death_date' in author and author['death_date']:
            death_date = author['death_date']
            if re_number_dot.search(death_date):
                author['death_date'] = death_date[:-1]

    author['name'] = ' '.join(name)
    author['entity_type'] = 'person'
    subfields = [
        ('a', 'personal_name'),
        ('b', 'numeration'),
        ('c', 'title'),
        ('e', 'role')
    ]
    for subfield, field_name in subfields:
        if subfield in contents:
            author[field_name] = remove_trailing_dot(' '.join([x.strip(' /,;:') for x in contents[subfield]]))
    if 'q' in contents:
        author['fuller_name'] = ' '.join(contents['q'])
    for f in 'name', 'personal_name':
        author[f] = remove_trailing_dot(strip_foc(author[f]))
    return author
Example #5
0
def read_author_person(line):
    author = {}
    contents = get_contents(line, ['a', 'b', 'c', 'd'])
    if 'a' not in contents and 'c' not in contents:
        return None  # should at least be a name or title
    name = [
        v.strip(' /,;:') for v in get_subfield_values(line, ['a', 'b', 'c'])
    ]
    if 'd' in contents:
        author = pick_first_date(contents['d'])
        if 'death_date' in author and author['death_date']:
            death_date = author['death_date']
            if re_number_dot.search(death_date):
                author['death_date'] = death_date[:-1]

    author['name'] = ' '.join(name)
    author['entity_type'] = 'person'
    subfields = [('a', 'personal_name'), ('b', 'numeration'), ('c', 'title')]
    for subfield, field_name in subfields:
        if subfield in contents:
            author[field_name] = ' '.join(
                [x.strip(' /,;:') for x in contents[subfield]])
    if 'q' in contents:
        author['fuller_name'] = ' '.join(contents['q'])
    return author
Example #6
0
def build_person_object(p, marc_alt):
    ab = [(k, v.strip(' /,;:')) for k, v in p if k in 'ab']

    has_b = any(k == 'b' for k, v in p)

    orig_name = ' '.join(v if k == 'a' else v for k, v in ab)
    c = ' '.join(v for k, v in p if k == 'c')
    name = flip_name(orig_name)
    if name[0].isdigit():
        name = orig_name
    else:
        of_count = c.count('of ')
        #    if of_count == 1 and not has_b and 'of the ' not in c:
        #        if c.startswith('King')
        #
        #        if c.startswith('Queen'):
        #        name += ' ' + c[c.find('of '):]
        #
        if of_count == 1 and 'of the ' not in c and 'Emperor of ' not in c:
            name += ' ' + c[c.find('of '):]
        elif ' ' not in name and of_count > 1:
            name += ', ' + c
        elif c.endswith(' of') or c.endswith(' de') and any(
                k == 'a' and ', ' in v for k, v in p):
            name = ' '.join(v for k, v in ab)
            c += ' ' + name[:name.find(', ')]
            name = name[name.find(', ') + 2:] + ', ' + c

    person = {}
    d = [v for k, v in p if k == 'd']
    if d:
        person = pick_first_date(d)
    person['name'] = name
    person['sort'] = orig_name

    if any(k == 'b' for k, v in p):
        person['enumeration'] = ' '.join(v for k, v in p if k == 'b')

    if c:
        person['title'] = c
    person['marc'] = [p] + list(marc_alt)

    return person
Example #7
0
def test_lookup():
    for line in marc:
        fields = tuple((k, v.strip(' /,;:')) for k, v in get_subfields(line, 'abcd'))
        found = name_lookup(fields)
        for i in found:
            print i
        dates = pick_first_date(v for k, v in fields if k == 'd')
        print dates
        match = look_for_match(found, dates, False)
        print len(match)
        for i in match:
            print i
        #pprint(match)
        if len(match) != 1:
            match = pick_from_match(match)
        if len(match) != 1:
            for i in more_than_one_match(match):
                print i
        print
Example #8
0
def build_person_object(p, marc_alt):
    ab = [(k, v.strip(' /,;:')) for k, v in p if k in 'ab']

    has_b = any(k=='b' for k, v in p)

    orig_name = ' '.join(v if k == 'a' else v for k, v in ab)
    c = ' '.join(v for k, v in p if k == 'c')
    name = flip_name(orig_name)
    if name[0].isdigit():
        name = orig_name
    else:
        of_count = c.count('of ')
    #    if of_count == 1 and not has_b and 'of the ' not in c:
    #        if c.startswith('King')
    #
    #        if c.startswith('Queen'):
    #        name += ' ' + c[c.find('of '):]
    #
        if of_count == 1 and 'of the ' not in c and 'Emperor of ' not in c:
            name += ' ' + c[c.find('of '):]
        elif ' ' not in name and of_count > 1:
            name += ', ' + c
        elif c.endswith(' of') or c.endswith(' de') and any(k == 'a' and ', ' in v for k, v in p):
            name = ' '.join(v for k, v in ab)
            c += ' ' + name[:name.find(', ')]
            name = name[name.find(', ') + 2:] + ', ' + c

    person = {}
    d = [v for k, v in p if k =='d']
    if d:
        person = pick_first_date(d)
    person['name'] = name
    person['sort'] = orig_name

    if any(k=='b' for k, v in p):
        person['enumeration'] = ' '.join(v for k, v in p if k == 'b')

    if c:
        person['title'] = c
    person['marc'] = [p] + list(marc_alt)

    return person
def test_lookup():
    for line in marc:
        fields = tuple(
            (k, v.strip(' /,;:')) for k, v in get_subfields(line, 'abcd'))
        found = name_lookup(fields)
        for i in found:
            print i
        dates = pick_first_date(v for k, v in fields if k == 'd')
        print dates
        match = look_for_match(found, dates, False)
        print len(match)
        for i in match:
            print i
        #pprint(match)
        if len(match) != 1:
            match = pick_from_match(match)
        if len(match) != 1:
            for i in more_than_one_match(match):
                print i
        print
Example #10
0
def test_date():
    assert pick_first_date(["Mrs.", "1839-"]) == {'birth_date': '1839'}
    assert pick_first_date(["1882-."]) == {'birth_date': '1882'}
    assert pick_first_date(["1900-1990.."]) == {'birth_date': u'1900', 'death_date': u'1990'}
    assert pick_first_date(["4th/5th cent."]) == {'date': '4th/5th cent.'}