def find_match(e1, edition_pool): """ Find the best match for e1 in edition_pool and return its key. :param dict e1: the new edition we are trying to match, output of build_marc(import record) :param list edition_pool: list of possible edition matches, output of build_pool(import record) :rtype: str|None :return: None or the edition key '/books/OL...M' of the best edition match for e1 in edition_pool """ seen = set() for k, v in edition_pool.iteritems(): for edition_key in v: if edition_key in seen: continue thing = None found = True while not thing or is_redirect(thing): seen.add(edition_key) thing = web.ctx.site.get(edition_key) if thing is None: found = False break if is_redirect(thing): edition_key = thing['location'] # FIXME: this updates edition_key, but leaves thing as redirect, # which will raise an exception in try_merge() if not found: continue if try_merge(e1, edition_key, thing): return edition_key
def test_try_merge(mock_site): rec = { 'title': 'Test item', 'lccn': ['123'], 'authors': [{'name': 'Smith, John', 'birth_date': '1980'}], 'source_records': ['ia:test_item'], } reply = load(rec) ekey = reply['edition']['key'] e = mock_site.get(ekey) rec['full_title'] = rec['title'] e1 = build_marc(rec) add_db_name(e1) result = try_merge(e1, ekey, e) assert result is True
def test_try_merge_full(): web.ctx.site = MockSite() bpl = { 'authors': [{ 'birth_date': u'1897', 'db_name': u'Green, Constance McLaughlin 1897-', 'entity_type': 'person', 'name': u'Green, Constance McLaughlin', 'personal_name': u'Green, Constance McLaughlin' }], 'full_title': u'Eli Whitney and the birth of American technology', 'isbn': [u'188674632X'], 'normalized_title': u'eli whitney and the birth of american technology', 'number_of_pages': 215, 'publish_date': '1956', 'publishers': [u'HarperCollins', u'[distributed by Talman Pub.]'], 'short_title': u'eli whitney and the birth', 'source_record_loc': 'bpl101.mrc:0:1226', 'titles': [ u'Eli Whitney and the birth of American technology', u'eli whitney and the birth of american technology' ] } # This existing needs to be an Edition Thing object. existing = { 'authors': [{ 'birth_date': u'1897', 'db_name': u'Green, Constance McLaughlin 1897-', 'entity_type': 'person', 'name': u'Green, Constance McLaughlin', 'personal_name': u'Green, Constance McLaughlin' }], 'full_title': u'Eli Whitney and the birth of American technology.', 'isbn': [], 'normalized_title': u'eli whitney and the birth of american technology', 'number_of_pages': 215, 'publish_date': '1956', 'publishers': ['Little, Brown'], 'short_title': u'eli whitney and the birth', 'source_record_loc': 'marc_records_scriblio_net/part04.dat:119539872:591', 'title': 'Eli Whitney and the birth of American technology.', 'type': { 'key': '/type/edition' }, 'key': '/books/OL1M' } web.ctx.site.save_many([existing]) ed = web.ctx.site.get('/books/OL1M') assert try_merge(bpl, '/books/OL1M', ed) is True