seen.add(edition_key) thing = withKey(edition_key) assert thing if 'type' not in thing: print(thing) if thing.get('error') == 'notfound': found = False break if thing['type']['key'] == '/type/redirect': print('following redirect %s => %s' % (edition_key, thing['location'])) edition_key = thing['location'] if not found: continue if try_merge(e1, edition_key, thing): add_source_records(edition_key, ia) write_log(ia, when, "found match: " + edition_key) match = True break if match: break if not match: try: load(ia, use_binary=use_binary) except: print('bad item:', ia) raise write_log(ia, when, "loaded") print(row.updated, file=open(state_file, 'w')) start = row.updated
from openlibrary.catalog.utils.query import query, withKey from openlibrary.catalog.importer.update import add_source_records for num, line in enumerate(open('/1/edward/imagepdf/possible_match2')): doc = eval(line) if 'publisher' not in doc: continue item_id = doc['item_id'] if query({'type':'/type/edition','source_records':'ia:' + item_id}): continue e = withKey(doc['ol']) if 'publishers' not in e: continue title_match = False if doc['title'] == e['title']: title_match = True elif doc['title'] == e.get('title_prefix', '') + e['title']: title_match = True elif doc['title'] == e.get('title_prefix', '') + e['title'] + e.get('subtitle', ''): title_match = True elif doc['title'] == e['title'] + e.get('subtitle', ''): title_match = True if not title_match: continue if doc['publisher'] != e['publishers'][0]: continue print 'match:', item_id, doc['ol'] add_source_records(doc['ol'], item_id)
while not thing or thing['type']['key'] == '/type/redirect': seen.add(edition_key) thing = withKey(edition_key) assert thing if 'type' not in thing: print thing if thing.get('error') == 'notfound': found = False break if thing['type']['key'] == '/type/redirect': print 'following redirect %s => %s' % (edition_key, thing['location']) edition_key = thing['location'] if not found: continue if try_merge(e1, edition_key, thing): add_source_records(edition_key, ia) write_log(ia, when, "found match: " + edition_key) match = True break if match: break if not match: try: load(ia, use_binary=use_binary) except: print 'bad item:', ia raise write_log(ia, when, "loaded") print >> open(state_file, 'w'), row.updated start = row.updated
from openlibrary.catalog.utils.query import query, withKey from openlibrary.catalog.importer.update import add_source_records for num, line in enumerate(open('/1/edward/imagepdf/possible_match2')): doc = eval(line) if 'publisher' not in doc: continue item_id = doc['item_id'] if query({'type': '/type/edition', 'source_records': 'ia:' + item_id}): continue e = withKey(doc['ol']) if 'publishers' not in e: continue title_match = False if doc['title'] == e['title']: title_match = True elif doc['title'] == e.get('title_prefix', '') + e['title']: title_match = True elif doc['title'] == e.get('title_prefix', '') + e['title'] + e.get( 'subtitle', ''): title_match = True elif doc['title'] == e['title'] + e.get('subtitle', ''): title_match = True if not title_match: continue if doc['publisher'] != e['publishers'][0]: continue print 'match:', item_id, doc['ol'] add_source_records(doc['ol'], item_id)