Example #1
0
def test_author_dates_match():
    _atype = {'key': '/type/author'}
    basic = {
        'name': 'John Smith',
        'death_date': '1688',
        'key': '/a/OL6398451A',
        'birth_date': '1650',
        'type': _atype
    }
    full_dates = {
        'name': 'John Smith',
        'death_date': '23 June 1688',
        'key': '/a/OL6398452A',
        'birth_date': '01 January 1650',
        'type': _atype
    }
    full_different = {
        'name': 'John Smith',
        'death_date': '12 June 1688',
        'key': '/a/OL6398453A',
        'birth_date': '01 December 1650',
        'type': _atype
    }
    no_death = {
        'name': 'John Smith',
        'key': '/a/OL6398454A',
        'birth_date': '1650',
        'type': _atype
    }
    no_dates = {'name': 'John Smith', 'key': '/a/OL6398455A', 'type': _atype}
    non_match = {
        'name': 'John Smith',
        'death_date': u'1999',
        'key': '/a/OL6398456A',
        'birth_date': '1950',
        'type': _atype
    }
    different_name = {
        'name': 'Jane Farrier',
        'key': '/a/OL6398457A',
        'type': _atype
    }

    assert author_dates_match(basic, basic)
    assert author_dates_match(basic, full_dates)
    assert author_dates_match(basic, no_death)
    assert author_dates_match(basic, no_dates)
    assert author_dates_match(no_dates, no_dates)
    assert author_dates_match(
        no_dates, non_match)  # Without dates, the match returns True
    assert author_dates_match(
        no_dates,
        different_name)  # This method only compares dates and ignores names
    assert author_dates_match(basic, non_match) is False
    # FIXME: the following should properly be False:
    assert author_dates_match(
        full_different, full_dates
    )  # this shows matches are only occurring on year, full dates are ignored!
Example #2
0
def find_entity(author):
    name = author['name']
    things = find_author(name)
    if author['entity_type'] != 'person':
        if not things:
            return None
        db_entity = withKey(things[0])
        if db_entity['type']['key'] == '/type/redirect':
            db_entity = withKey(db_entity['location'])
        assert db_entity['type']['key'] == '/type/author'
        return db_entity
    if ', ' in name:
        things += find_author(flip_name(name))
    match = []
    seen = set()
    for key in things:
        if key in seen:
            continue
        seen.add(key)
        db_entity = withKey(key)
        if db_entity['type']['key'] == '/type/redirect':
            key = db_entity['location']
            if key in seen:
                continue
            seen.add(key)
            db_entity = withKey(key)
        if db_entity['type']['key'] == '/type/delete':
            continue
        try:
            assert db_entity['type']['key'] == '/type/author'
        except:
            print name, key, db_entity
            raise
        if 'birth_date' in author and 'birth_date' not in db_entity:
            continue
        if 'birth_date' not in author and 'birth_date' in db_entity:
            continue
        if not author_dates_match(author, db_entity):
            continue
        match.append(db_entity)
    if not match:
        return None
    if len(match) == 1:
        return match[0]
    try:
        return pick_from_matches(author, match)
    except ValueError:
        print 'author:', author
        print 'match:', match
        raise
Example #3
0
def test_author_dates_match():
    _atype = {u'key': u'/type/author'}
    basic = {
        u'name': u'John Smith',
        u'death_date': u'1688',
        'key': u'/a/OL6398452A',
        u'birth_date': u'1650',
        u'type': _atype
    }
    full_dates = {
        u'name': u'John Smith',
        u'death_date': u'23 June 1688',
        'key': u'/a/OL6398452A',
        u'birth_date': u'01 January 1650',
        u'type': _atype
    }
    full_different = {
        u'name': u'John Smith',
        u'death_date': u'12 June 1688',
        'key': u'/a/OL6398452A',
        u'birth_date': u'01 December 1650',
        u'type': _atype
    }
    no_death = {
        u'name': u'John Smith',
        'key': u'/a/OL6398452A',
        u'birth_date': u'1650',
        u'type': _atype
    }
    no_dates = {
        u'name': u'John Smith',
        'key': u'/a/OL6398452A',
        u'type': _atype
    }
    non_match = {
        u'name': u'John Smith',
        u'death_date': u'1999',
        'key': u'/a/OL6398452A',
        u'birth_date': u'1950',
        u'type': _atype
    }
    assert author_dates_match(basic, basic)
    assert author_dates_match(basic, full_dates)
    assert author_dates_match(basic, no_death)
    assert author_dates_match(basic, no_dates)
    assert author_dates_match(basic, non_match) is False
    # FIXME: the following should properly be False:
    assert author_dates_match(
        full_different, full_dates
    )  # this shows matches are only occurring on year, full dates are ignored!
Example #4
0
def find_entity(author):
    """
    Looks for an existing Author record in OL by name
    and returns it if found.

    :param dict author: Author import dict {"name": "Some One"}
    :rtype: dict|None
    :return: Existing Author record, if one is found
    """
    name = author['name']
    things = find_author(name)
    et = author.get('entity_type')
    if et and et != 'person':
        if not things:
            return None
        db_entity = things[0]
        assert db_entity['type']['key'] == '/type/author'
        return db_entity
    if ', ' in name:
        things += find_author(flip_name(name))
    match = []
    seen = set()
    for a in things:
        key = a['key']
        if key in seen:
            continue
        seen.add(key)
        orig_key = key
        assert a.type.key == '/type/author'
        if 'birth_date' in author and 'birth_date' not in a:
            continue
        if 'birth_date' not in author and 'birth_date' in a:
            continue
        if not author_dates_match(author, a):
            continue
        match.append(a)
    if not match:
        return None
    if len(match) == 1:
        return match[0]
    return pick_from_matches(author, match)
Example #5
0
def find_entity(author):
    name = author['name']
    things = find_author(name)
    if author['entity_type'] != 'person':
        if not things:
            return None
        db_entity = things[0]
        #        if db_entity['type']['key'] == '/type/redirect':
        #            db_entity = withKey(db_entity['location'])
        assert db_entity['type'] == '/type/author'
        return db_entity
    if ', ' in name:
        things += find_author(flip_name(name))
    match = []
    seen = set()
    for a in things:
        key = a['key']
        if key in seen:
            continue
        seen.add(key)
        orig_key = key
        assert a['type'] == '/type/author'
        if 'birth_date' in author and 'birth_date' not in a:
            continue
        if 'birth_date' not in author and 'birth_date' in a:
            continue
        if not author_dates_match(author, a):
            continue
        match.append(a)
    if not match:
        return None
    if len(match) == 1:
        return match[0]
    try:
        return pick_from_matches(author, match)
    except ValueError:
        print 'author:', author
        print 'match:', match
        raise
Example #6
0
def find_entity(author):
    name = author['name']
    things = find_author(name)
    if author['entity_type'] != 'person':
        if not things:
            return None
        db_entity = things[0]
#        if db_entity['type']['key'] == '/type/redirect':
#            db_entity = withKey(db_entity['location'])
        assert db_entity['type'] == '/type/author'
        return db_entity
    if ', ' in name:
        things += find_author(flip_name(name))
    match = []
    seen = set()
    for a in things:
        key = a['key']
        if key in seen:
            continue
        seen.add(key)
        orig_key = key
        assert a['type'] == '/type/author'
        if 'birth_date' in author and 'birth_date' not in a:
            continue
        if 'birth_date' not in author and 'birth_date' in a:
            continue
        if not author_dates_match(author, a):
            continue
        match.append(a)
    if not match:
        return None
    if len(match) == 1:
        return match[0]
    try:
        return pick_from_matches(author, match)
    except ValueError:
        print('author:', author)
        print('match:', match)
        raise
Example #7
0
def find_entity(author): # no direct DB calls
    name = author['name']
    things = find_author(name)
    et = author.get('entity_type')
    if et and et != 'person':
        if not things:
            return None
        db_entity = things[0]
        assert db_entity['type']['key'] == '/type/author'
        return db_entity
    if ', ' in name:
        things += find_author(flip_name(name))
    match = []
    seen = set()
    for a in things:
        key = a['key']
        if key in seen:
            continue
        seen.add(key)
        orig_key = key
        assert a.type.key == '/type/author'
        if 'birth_date' in author and 'birth_date' not in a:
            continue
        if 'birth_date' not in author and 'birth_date' in a:
            continue
        if not author_dates_match(author, a):
            continue
        match.append(a)
    if not match:
        return None
    if len(match) == 1:
        return match[0]
    try:
        return pick_from_matches(author, match)
    except ValueError:
        print 'author:', author
        print 'match:', match
        raise