def by_authors(): find_new_work_key() skipping = False skipping = True q = { 'type':'/type/author', 'name': None, 'works': None } for a in query_iter(q, offset=215000): akey = a['key'] if skipping: print 'skipping:', akey, a['name'] if akey == '/a/OL218496A': skipping = False continue q = { 'type':'/type/work', 'authors': akey, } if query(q): print akey, `a['name']`, 'has works' continue # print akey, a['name'] found = find_works(akey) works = [i for i in found if len(i['editions']) > 2] if works: #open('found/' + akey[3:], 'w').write(`works`) print akey, `a['name']` #pprint(works) #print_works(works) add_works(akey, works) print
import web, sys from catalog.utils.query import query, withKey from catalog.read_rc import read_rc sys.path.append('/home/edward/src/olapi') from olapi import OpenLibrary, unmarshal rc = read_rc() ol = OpenLibrary("http://openlibrary.org") ol.login('ImportBot', rc['ImportBot']) db = web.database(dbn='mysql', host=rc['ia_db_host'], user=rc['ia_db_user'], \ passwd=rc['ia_db_pass'], db='archive') db.printing = False iter = db.query("select identifier from metadata where noindex is null and mediatype='texts' and scanner='google'") for i in iter: ia = i.identifier print ia if query({'type': '/type/edition', 'ocaid': ia}): print 'already loaded' continue if query({'type': '/type/edition', 'source_records': 'ia:' + ia}): print 'already loaded' continue
print 'ret:', ret assert isinstance(ret, basestring) key = ret pool.update(key, q) skip = True for i in iter: ia = i.identifier if skip: if ia == 'bostoncityhospit12hugh': skip = False else: continue print ia if query({'type': '/type/edition', 'ocaid': ia}): print 'already loaded' continue try: loc, rec = get_ia(ia) except (KeyboardInterrupt, NameError): raise except urllib2.HTTPError: continue if loc is None or rec is None: continue print loc, rec if not loc.endswith('.xml'): print "not XML" continue
from __future__ import print_function import web, sys from catalog.utils.query import query, withKey from catalog.read_rc import read_rc sys.path.append('/home/edward/src/olapi') from olapi import OpenLibrary, unmarshal rc = read_rc() ol = OpenLibrary("http://openlibrary.org") ol.login('ImportBot', rc['ImportBot']) db = web.database(dbn='mysql', host=rc['ia_db_host'], user=rc['ia_db_user'], \ passwd=rc['ia_db_pass'], db='archive') db.printing = False iter = db.query( "select identifier from metadata where noindex is null and mediatype='texts' and scanner='google'" ) for i in iter: ia = i.identifier print(ia) if query({'type': '/type/edition', 'ocaid': ia}): print('already loaded') continue if query({'type': '/type/edition', 'source_records': 'ia:' + ia}): print('already loaded') continue