def run_work_finder(i): t0 = time() d = i['data'] print('timestamp:', i['timestamp']) print('author:', d['author']) print('%d records updated:' % len(d['result'])) if 'changeset' not in d: print('no changeset in author merge') print() return changeset = d['changeset'] try: assert len(changeset['data']) == 2 and 'master' in changeset[ 'data'] and 'duplicates' in changeset['data'] except: print(d['changeset']) raise akey = changeset['data']['master'] dup_keys = changeset['data']['duplicates'] #print d['changeset'] print('dups:', dup_keys) title_redirects = find_title_redirects(akey) works = find_works(get_books(akey, books_query(akey)), existing=title_redirects) print('author:', akey) print('works:', works) updated = update_works(akey, works, do_updates=True) print('%d records updated' % len(updated)) t1 = time() - t0 update_times.append(t1) print('update takes: %d seconds' % t1) print()
def run_work_finder(i): t0 = time() d = i['data'] print 'timestamp:', i['timestamp'] print 'author:', d['author'] print '%d records updated:' % len(d['result']) if 'changeset' not in d: print 'no changeset in author merge' print return changeset = d['changeset'] try: assert len(changeset['data']) == 2 and 'master' in changeset['data'] and 'duplicates' in changeset['data'] except: print d['changeset'] raise akey = changeset['data']['master'] dup_keys = changeset['data']['duplicates'] print d['changeset'] print 'dups:', dup_keys title_redirects = find_title_redirects(akey) works = find_works(akey, get_books(akey, books_query(akey)), existing=title_redirects) print 'author:', akey print 'works:', works updated = update_works(akey, works, do_updates=True) print '%d records updated' % len(updated) t1 = time() - t0 update_times.append(t1) print 'update takes: %d seconds' % t1 print
def run_work_finder(i): t0 = time() d = i["data"] print "timestamp:", i["timestamp"] print "author:", d["author"] print "%d records updated:" % len(d["result"]) if "changeset" not in d: print "no changeset in author merge" print return changeset = d["changeset"] try: assert len(changeset["data"]) == 2 and "master" in changeset["data"] and "duplicates" in changeset["data"] except: print d["changeset"] raise akey = changeset["data"]["master"] dup_keys = changeset["data"]["duplicates"] # print d['changeset'] print "dups:", dup_keys title_redirects = find_title_redirects(akey) works = find_works(get_books(akey, books_query(akey)), existing=title_redirects) print "author:", akey print "works:", works updated = update_works(akey, works, do_updates=True) print "%d records updated" % len(updated) t1 = time() - t0 update_times.append(t1) print "update takes: %d seconds" % t1 print
#!/usr/bin/python from __future__ import print_function from openlibrary.catalog.works.find_works import find_title_redirects, find_works, get_books, books_query, update_works import sys from pprint import pprint akey = sys.argv[1] title_redirects = find_title_redirects(akey) print('title_redirects:') pprint(title_redirects) print() works = find_works(akey, get_books(akey, books_query(akey)), existing=title_redirects) works = list(works) print('works:') pprint(works) print() updated = update_works(akey, works, do_updates=True) print('updated works:') pprint(updated)
def test_find_works(): works = list(find_works([])) assert works == [] books = [{'title': 'Magic', 'key': '/books/OL1M'}] book_iter = get_books('', books, do_get_mc=False) books2 = list(book_iter) assert books2 == [{ 'key': '/books/OL1M', 'norm_title': 'magic', 'title': 'Magic' }] var = find_works2(books2) assert var['equiv'] == {} assert var['norm_titles'] == {'magic': 1} assert var['books_by_key'] == {'/books/OL1M': books2[0]} assert var['books'] == books2 assert var['rev_wt'] == {} assert build_work_title_map({}, {'magic': 1}) == {} assert build_work_title_map({}, {'magic': 2, 'test': 0}) == {} works = list(find_works(books2, do_get_mc=False)) expect = [{ 'title': 'Magic', 'editions': [{ 'key': '/books/OL1M', 'norm_title': 'magic', 'title': 'Magic' }], }] assert works == expect books = [ { 'title': 'Magic', 'key': '/books/OL1M' }, { 'title': 'Magic', 'key': '/books/OL2M' }, ] book_iter = get_books('', books, do_get_mc=False) books2 = list(book_iter) var = find_works2(books2) assert var['equiv'] == {} assert var['norm_titles'] == {'magic': 2} assert var['books_by_key'] == { '/books/OL1M': books2[0], '/books/OL2M': books2[1] } assert var['books'] == books2 assert var['rev_wt'] == {} works = list(find_works(books2, do_get_mc=False)) expect = [{ 'title': 'Magic', 'editions': [ { 'key': '/books/OL1M', 'norm_title': 'magic', 'title': 'Magic' }, { 'key': '/books/OL2M', 'norm_title': 'magic', 'title': 'Magic' }, ], }] assert works == expect magico = u'm\xe1gico' assert normalize(magico) == magico books = [ { 'title': magico, 'work_title': ['magic'], 'key': '/books/OL1M' }, { 'title': 'magic', 'key': '/books/OL2M' }, { 'title': magico, 'work_title': ['magic'], 'key': '/books/OL3M' }, { 'title': 'magic', 'key': '/books/OL4M' }, ] expect_keys = sorted(e['key'] for e in books) book_iter = get_books('', books, do_get_mc=False) books2 = list(book_iter) expect = [ { 'key': '/books/OL1M', 'norm_title': magico, 'work_title': 'magic', 'norm_wt': 'magic', 'title': magico }, { 'key': '/books/OL2M', 'norm_title': 'magic', 'title': 'magic' }, { 'key': '/books/OL3M', 'norm_title': magico, 'work_title': 'magic', 'norm_wt': 'magic', 'title': magico }, { 'key': '/books/OL4M', 'norm_title': 'magic', 'title': 'magic' }, ] assert len(books2) == 4 for i in range(4): assert books2[i] == expect[i] var = find_works2(books2) assert var['equiv'] == {(magico, 'magic'): 2} assert var['norm_titles'] == {magico: 2, 'magic': 2} assert len(var['books_by_key']) == 4 bk = var['books_by_key'] assert bk['/books/OL1M'] == books2[0] assert bk['/books/OL2M'] == books2[1] assert bk['/books/OL3M'] == books2[2] assert bk['/books/OL4M'] == books2[3] assert var['books'] == books2 assert var['rev_wt'] == {'magic': {'magic': 2}} title_map = build_work_title_map(var['equiv'], var['norm_titles']) assert title_map == {magico: 'magic'} find_works3(var) assert var['works'] == {'magic': {'magic': expect_keys}} assert var['work_titles'] == {'magic': ['/books/OL1M', '/books/OL3M']} sorted_works = find_work_sort(var) assert sorted_works == [(6, 'magic', {'magic': expect_keys})] works = list(find_works(books2, do_get_mc=False)) expect = [{ 'title': u'Magic', 'editions': [ { 'key': '/books/OL2M', 'norm_title': 'magic', 'title': 'magic' }, { 'key': '/books/OL1M', 'norm_title': u'mágico', 'norm_wt': 'magic', 'title': u'Mágico' }, ], }] work_count = len(works) assert work_count == 1 editions = works[0]['editions'] edition_count = len(works[0]['editions']) edition_keys = sorted(e['key'] for e in editions) assert edition_keys == expect_keys assert edition_count == 4 del works[0]['editions'] assert works[0] == {'title': 'magic'}