def test_author_dates_match(): _atype = {'key': '/type/author'} basic = { 'name': 'John Smith', 'death_date': '1688', 'key': '/a/OL6398451A', 'birth_date': '1650', 'type': _atype } full_dates = { 'name': 'John Smith', 'death_date': '23 June 1688', 'key': '/a/OL6398452A', 'birth_date': '01 January 1650', 'type': _atype } full_different = { 'name': 'John Smith', 'death_date': '12 June 1688', 'key': '/a/OL6398453A', 'birth_date': '01 December 1650', 'type': _atype } no_death = { 'name': 'John Smith', 'key': '/a/OL6398454A', 'birth_date': '1650', 'type': _atype } no_dates = {'name': 'John Smith', 'key': '/a/OL6398455A', 'type': _atype} non_match = { 'name': 'John Smith', 'death_date': u'1999', 'key': '/a/OL6398456A', 'birth_date': '1950', 'type': _atype } different_name = { 'name': 'Jane Farrier', 'key': '/a/OL6398457A', 'type': _atype } assert author_dates_match(basic, basic) assert author_dates_match(basic, full_dates) assert author_dates_match(basic, no_death) assert author_dates_match(basic, no_dates) assert author_dates_match(no_dates, no_dates) assert author_dates_match( no_dates, non_match) # Without dates, the match returns True assert author_dates_match( no_dates, different_name) # This method only compares dates and ignores names assert author_dates_match(basic, non_match) is False # FIXME: the following should properly be False: assert author_dates_match( full_different, full_dates ) # this shows matches are only occurring on year, full dates are ignored!
def find_entity(author): name = author['name'] things = find_author(name) if author['entity_type'] != 'person': if not things: return None db_entity = withKey(things[0]) if db_entity['type']['key'] == '/type/redirect': db_entity = withKey(db_entity['location']) assert db_entity['type']['key'] == '/type/author' return db_entity if ', ' in name: things += find_author(flip_name(name)) match = [] seen = set() for key in things: if key in seen: continue seen.add(key) db_entity = withKey(key) if db_entity['type']['key'] == '/type/redirect': key = db_entity['location'] if key in seen: continue seen.add(key) db_entity = withKey(key) if db_entity['type']['key'] == '/type/delete': continue try: assert db_entity['type']['key'] == '/type/author' except: print name, key, db_entity raise if 'birth_date' in author and 'birth_date' not in db_entity: continue if 'birth_date' not in author and 'birth_date' in db_entity: continue if not author_dates_match(author, db_entity): continue match.append(db_entity) if not match: return None if len(match) == 1: return match[0] try: return pick_from_matches(author, match) except ValueError: print 'author:', author print 'match:', match raise
def test_author_dates_match(): _atype = {u'key': u'/type/author'} basic = { u'name': u'John Smith', u'death_date': u'1688', 'key': u'/a/OL6398452A', u'birth_date': u'1650', u'type': _atype } full_dates = { u'name': u'John Smith', u'death_date': u'23 June 1688', 'key': u'/a/OL6398452A', u'birth_date': u'01 January 1650', u'type': _atype } full_different = { u'name': u'John Smith', u'death_date': u'12 June 1688', 'key': u'/a/OL6398452A', u'birth_date': u'01 December 1650', u'type': _atype } no_death = { u'name': u'John Smith', 'key': u'/a/OL6398452A', u'birth_date': u'1650', u'type': _atype } no_dates = { u'name': u'John Smith', 'key': u'/a/OL6398452A', u'type': _atype } non_match = { u'name': u'John Smith', u'death_date': u'1999', 'key': u'/a/OL6398452A', u'birth_date': u'1950', u'type': _atype } assert author_dates_match(basic, basic) assert author_dates_match(basic, full_dates) assert author_dates_match(basic, no_death) assert author_dates_match(basic, no_dates) assert author_dates_match(basic, non_match) is False # FIXME: the following should properly be False: assert author_dates_match( full_different, full_dates ) # this shows matches are only occurring on year, full dates are ignored!
def find_entity(author): """ Looks for an existing Author record in OL by name and returns it if found. :param dict author: Author import dict {"name": "Some One"} :rtype: dict|None :return: Existing Author record, if one is found """ name = author['name'] things = find_author(name) et = author.get('entity_type') if et and et != 'person': if not things: return None db_entity = things[0] assert db_entity['type']['key'] == '/type/author' return db_entity if ', ' in name: things += find_author(flip_name(name)) match = [] seen = set() for a in things: key = a['key'] if key in seen: continue seen.add(key) orig_key = key assert a.type.key == '/type/author' if 'birth_date' in author and 'birth_date' not in a: continue if 'birth_date' not in author and 'birth_date' in a: continue if not author_dates_match(author, a): continue match.append(a) if not match: return None if len(match) == 1: return match[0] return pick_from_matches(author, match)
def find_entity(author): name = author['name'] things = find_author(name) if author['entity_type'] != 'person': if not things: return None db_entity = things[0] # if db_entity['type']['key'] == '/type/redirect': # db_entity = withKey(db_entity['location']) assert db_entity['type'] == '/type/author' return db_entity if ', ' in name: things += find_author(flip_name(name)) match = [] seen = set() for a in things: key = a['key'] if key in seen: continue seen.add(key) orig_key = key assert a['type'] == '/type/author' if 'birth_date' in author and 'birth_date' not in a: continue if 'birth_date' not in author and 'birth_date' in a: continue if not author_dates_match(author, a): continue match.append(a) if not match: return None if len(match) == 1: return match[0] try: return pick_from_matches(author, match) except ValueError: print 'author:', author print 'match:', match raise
def find_entity(author): name = author['name'] things = find_author(name) if author['entity_type'] != 'person': if not things: return None db_entity = things[0] # if db_entity['type']['key'] == '/type/redirect': # db_entity = withKey(db_entity['location']) assert db_entity['type'] == '/type/author' return db_entity if ', ' in name: things += find_author(flip_name(name)) match = [] seen = set() for a in things: key = a['key'] if key in seen: continue seen.add(key) orig_key = key assert a['type'] == '/type/author' if 'birth_date' in author and 'birth_date' not in a: continue if 'birth_date' not in author and 'birth_date' in a: continue if not author_dates_match(author, a): continue match.append(a) if not match: return None if len(match) == 1: return match[0] try: return pick_from_matches(author, match) except ValueError: print('author:', author) print('match:', match) raise
def find_entity(author): # no direct DB calls name = author['name'] things = find_author(name) et = author.get('entity_type') if et and et != 'person': if not things: return None db_entity = things[0] assert db_entity['type']['key'] == '/type/author' return db_entity if ', ' in name: things += find_author(flip_name(name)) match = [] seen = set() for a in things: key = a['key'] if key in seen: continue seen.add(key) orig_key = key assert a.type.key == '/type/author' if 'birth_date' in author and 'birth_date' not in a: continue if 'birth_date' not in author and 'birth_date' in a: continue if not author_dates_match(author, a): continue match.append(a) if not match: return None if len(match) == 1: return match[0] try: return pick_from_matches(author, match) except ValueError: print 'author:', author print 'match:', match raise