def run_work_finder(i):
    t0 = time()
    d = i['data']
    print('timestamp:', i['timestamp'])
    print('author:', d['author'])
    print('%d records updated:' % len(d['result']))
    if 'changeset' not in d:
        print('no changeset in author merge')
        print()
        return
    changeset = d['changeset']

    try:
        assert len(changeset['data']) == 2 and 'master' in changeset[
            'data'] and 'duplicates' in changeset['data']
    except:
        print(d['changeset'])
        raise
    akey = changeset['data']['master']
    dup_keys = changeset['data']['duplicates']
    #print d['changeset']
    print('dups:', dup_keys)

    title_redirects = find_title_redirects(akey)
    works = find_works(get_books(akey, books_query(akey)),
                       existing=title_redirects)
    print('author:', akey)
    print('works:', works)
    updated = update_works(akey, works, do_updates=True)
    print('%d records updated' % len(updated))

    t1 = time() - t0
    update_times.append(t1)
    print('update takes: %d seconds' % t1)
    print()
def run_work_finder(i):
    t0 = time()
    d = i['data']
    print 'timestamp:', i['timestamp']
    print 'author:', d['author']
    print '%d records updated:' % len(d['result'])
    if 'changeset' not in d:
        print 'no changeset in author merge'
        print
        return
    changeset = d['changeset']

    try:
        assert len(changeset['data']) == 2 and 'master' in changeset['data'] and 'duplicates' in changeset['data']
    except:
        print d['changeset']
        raise
    akey = changeset['data']['master']
    dup_keys = changeset['data']['duplicates']
    print d['changeset']
    print 'dups:', dup_keys

    title_redirects = find_title_redirects(akey)
    works = find_works(akey, get_books(akey, books_query(akey)), existing=title_redirects)
    print 'author:', akey
    print 'works:', works
    updated = update_works(akey, works, do_updates=True)
    print '%d records updated' % len(updated)

    t1 = time() - t0
    update_times.append(t1)
    print 'update takes: %d seconds' % t1
    print
def run_work_finder(i):
    t0 = time()
    d = i["data"]
    print "timestamp:", i["timestamp"]
    print "author:", d["author"]
    print "%d records updated:" % len(d["result"])
    if "changeset" not in d:
        print "no changeset in author merge"
        print
        return
    changeset = d["changeset"]

    try:
        assert len(changeset["data"]) == 2 and "master" in changeset["data"] and "duplicates" in changeset["data"]
    except:
        print d["changeset"]
        raise
    akey = changeset["data"]["master"]
    dup_keys = changeset["data"]["duplicates"]
    # print d['changeset']
    print "dups:", dup_keys

    title_redirects = find_title_redirects(akey)
    works = find_works(get_books(akey, books_query(akey)), existing=title_redirects)
    print "author:", akey
    print "works:", works
    updated = update_works(akey, works, do_updates=True)
    print "%d records updated" % len(updated)

    t1 = time() - t0
    update_times.append(t1)
    print "update takes: %d seconds" % t1
    print
Beispiel #4
0
#!/usr/bin/python

from __future__ import print_function
from openlibrary.catalog.works.find_works import find_title_redirects, find_works, get_books, books_query, update_works
import sys
from pprint import pprint

akey = sys.argv[1]
title_redirects = find_title_redirects(akey)
print('title_redirects:')
pprint(title_redirects)
print()

works = find_works(akey, get_books(akey, books_query(akey)), existing=title_redirects)
works = list(works)
print('works:')
pprint(works)
print()

updated = update_works(akey, works, do_updates=True)
print('updated works:')
pprint(updated)
Beispiel #5
0
 elif action == 'save_many':
     author_merge = i['data']['comment'] == 'merge authors'
     if author_merge and skip_author_merge:
         continue
     if author_merge and only_author_merge:
         continue
     if handle_author_merge and not i['data']['author'].endswith(
             'Bot') and author_merge:
         first_redirect = i['data']['query'][0]
         assert first_redirect['type']['key'] == '/type/redirect'
         akey = first_redirect['location']
         if akey.startswith('/authors/'):
             akey = '/a/' + akey[len('/authors/'):]
         title_redirects = find_title_redirects(akey)
         works = find_works(akey,
                            get_books(akey, books_query(akey)),
                            existing=title_redirects)
         updated = update_works(akey, works, do_updates=True)
         works_to_update.update(w['key'] for w in updated)
     for query in i['data']['query']:
         key = query.pop('key')
         process_save(key, query)
 # store.put gets called when any document is updated in the store. Borrowing/Returning a book triggers one.
 elif action == 'store.put':
     # A sample record looks like this:
     # {
     #   "action": "store.put",
     #   "timestamp": "2011-12-01T00:00:44.241604",
     #   "data": {
     #       "data": {"borrowed": "false", "_key": "ebooks/books/OL5854888M", "_rev": "975708", "type": "ebook", "book_key": "/books/OL5854888M"},
     #       "key": "ebooks/books/OL5854888M"