Example #1
0
                        seen.add(edition_key)
                        thing = withKey(edition_key)
                        assert thing
                        if 'type' not in thing:
                            print(thing)
                        if thing.get('error') == 'notfound':
                            found = False
                            break
                        if thing['type']['key'] == '/type/redirect':
                            print('following redirect %s => %s' %
                                  (edition_key, thing['location']))
                            edition_key = thing['location']
                    if not found:
                        continue
                    if try_merge(e1, edition_key, thing):
                        add_source_records(edition_key, ia)
                        write_log(ia, when, "found match: " + edition_key)
                        match = True
                        break
                if match:
                    break

            if not match:
                try:
                    load(ia, use_binary=use_binary)
                except:
                    print('bad item:', ia)
                    raise
                write_log(ia, when, "loaded")
            print(row.updated, file=open(state_file, 'w'))
        start = row.updated
Example #2
0
from openlibrary.catalog.utils.query import query, withKey
from openlibrary.catalog.importer.update import add_source_records

for num, line in enumerate(open('/1/edward/imagepdf/possible_match2')):
    doc = eval(line)
    if 'publisher' not in doc:
        continue
    item_id = doc['item_id']
    if query({'type':'/type/edition','source_records':'ia:' + item_id}):
        continue
    e = withKey(doc['ol'])
    if 'publishers' not in e:
        continue
    title_match = False
    if doc['title'] == e['title']:
        title_match = True
    elif doc['title'] == e.get('title_prefix', '') + e['title']:
        title_match = True
    elif doc['title'] == e.get('title_prefix', '') + e['title'] + e.get('subtitle', ''):
        title_match = True
    elif doc['title'] == e['title'] + e.get('subtitle', ''):
        title_match = True
    if not title_match:
        continue
    if doc['publisher'] != e['publishers'][0]:
        continue
    print 'match:', item_id, doc['ol']
    add_source_records(doc['ol'], item_id)

Example #3
0
                    while not thing or thing['type']['key'] == '/type/redirect':
                        seen.add(edition_key)
                        thing = withKey(edition_key)
                        assert thing
                        if 'type' not in thing:
                            print thing
                        if thing.get('error') == 'notfound':
                            found = False
                            break
                        if thing['type']['key'] == '/type/redirect':
                            print 'following redirect %s => %s' % (edition_key, thing['location'])
                            edition_key = thing['location']
                    if not found:
                        continue
                    if try_merge(e1, edition_key, thing):
                        add_source_records(edition_key, ia)
                        write_log(ia, when, "found match: " + edition_key)
                        match = True
                        break
                if match:
                    break

            if not match:
                try:
                    load(ia, use_binary=use_binary)
                except:
                    print 'bad item:', ia
                    raise
                write_log(ia, when, "loaded")
            print >> open(state_file, 'w'), row.updated
        start = row.updated
Example #4
0
from openlibrary.catalog.utils.query import query, withKey
from openlibrary.catalog.importer.update import add_source_records

for num, line in enumerate(open('/1/edward/imagepdf/possible_match2')):
    doc = eval(line)
    if 'publisher' not in doc:
        continue
    item_id = doc['item_id']
    if query({'type': '/type/edition', 'source_records': 'ia:' + item_id}):
        continue
    e = withKey(doc['ol'])
    if 'publishers' not in e:
        continue
    title_match = False
    if doc['title'] == e['title']:
        title_match = True
    elif doc['title'] == e.get('title_prefix', '') + e['title']:
        title_match = True
    elif doc['title'] == e.get('title_prefix', '') + e['title'] + e.get(
            'subtitle', ''):
        title_match = True
    elif doc['title'] == e['title'] + e.get('subtitle', ''):
        title_match = True
    if not title_match:
        continue
    if doc['publisher'] != e['publishers'][0]:
        continue
    print 'match:', item_id, doc['ol']
    add_source_records(doc['ol'], item_id)