예제 #1
0
def by_authors():
    find_new_work_key()

    skipping = False
    skipping = True
    q = { 'type':'/type/author', 'name': None, 'works': None }
    for a in query_iter(q, offset=215000):
        akey = a['key']
        if skipping:
            print 'skipping:', akey, a['name']
            if akey == '/a/OL218496A':
                skipping = False
            continue

        q = {
            'type':'/type/work',
            'authors': akey,
        }
        if query(q):
            print akey, `a['name']`, 'has works'
            continue

    #    print akey, a['name']
        found = find_works(akey)
        works = [i for i in found if len(i['editions']) > 2]
        if works:
            #open('found/' + akey[3:], 'w').write(`works`)
            print akey, `a['name']`
            #pprint(works)
            #print_works(works)
            add_works(akey, works)
            print
예제 #2
0
import web,  sys
from catalog.utils.query import query, withKey
from catalog.read_rc import read_rc
sys.path.append('/home/edward/src/olapi')
from olapi import OpenLibrary, unmarshal

rc = read_rc()
ol = OpenLibrary("http://openlibrary.org")
ol.login('ImportBot', rc['ImportBot']) 

db = web.database(dbn='mysql', host=rc['ia_db_host'], user=rc['ia_db_user'], \
        passwd=rc['ia_db_pass'], db='archive')
db.printing = False

iter = db.query("select identifier from metadata where noindex is null and mediatype='texts' and scanner='google'")

for i in iter:
    ia = i.identifier
    print ia
    if query({'type': '/type/edition', 'ocaid': ia}):
        print 'already loaded'
        continue
    if query({'type': '/type/edition', 'source_records': 'ia:' + ia}):
        print 'already loaded'
        continue
예제 #3
0
    print 'ret:', ret
    assert isinstance(ret, basestring)
    key = ret
    pool.update(key, q)

skip = True

for i in iter:
    ia = i.identifier
    if skip:
        if ia == 'bostoncityhospit12hugh':
            skip = False
        else:
            continue
    print ia
    if query({'type': '/type/edition', 'ocaid': ia}):
        print 'already loaded'
        continue
    try:
        loc, rec = get_ia(ia)
    except (KeyboardInterrupt, NameError):
        raise
    except urllib2.HTTPError:
        continue
    if loc is None or rec is None:
        continue
    print loc, rec

    if not loc.endswith('.xml'):
        print "not XML"
        continue
예제 #4
0
from __future__ import print_function
import web, sys
from catalog.utils.query import query, withKey
from catalog.read_rc import read_rc
sys.path.append('/home/edward/src/olapi')
from olapi import OpenLibrary, unmarshal

rc = read_rc()
ol = OpenLibrary("http://openlibrary.org")
ol.login('ImportBot', rc['ImportBot'])

db = web.database(dbn='mysql', host=rc['ia_db_host'], user=rc['ia_db_user'], \
        passwd=rc['ia_db_pass'], db='archive')
db.printing = False

iter = db.query(
    "select identifier from metadata where noindex is null and mediatype='texts' and scanner='google'"
)

for i in iter:
    ia = i.identifier
    print(ia)
    if query({'type': '/type/edition', 'ocaid': ia}):
        print('already loaded')
        continue
    if query({'type': '/type/edition', 'source_records': 'ia:' + ia}):
        print('already loaded')
        continue