Ejemplo n.º 1
0
def add_source_records(key, ia, v=None):
    new = 'ia:' + ia
    sr = None
    m = re_edition_key.match(key)
    old_style_key = '/b/' + m.group(1)
    key = '/books/' + m.group(1)
    e = ol.get(key, v=v)
    need_update = False
    if 'ocaid' not in e:
        need_update = True
        e['ocaid'] = ia
    if 'source_records' in e:
        if new in e['source_records'] and not need_update:
            return
        e['source_records'].append(new)
    else:
        existing = get_mc(old_style_key)
        print 'get_mc(%s) == %s' % (old_style_key, existing)
        if existing is None:
            sr = []
        elif existing.startswith('ia:') or existing.startswith('amazon:'):
            sr = [existing]
        else:
            m = re_meta_mrc.match(existing)
            sr = ['marc:' + existing if not m else 'ia:' + m.group(1)]
        print 'ocaid:', e['ocaid']
        if 'ocaid' in e and 'ia:' + e['ocaid'] not in sr:
            sr.append('ia:' + e['ocaid'])
        print 'sr:', sr
        print 'ocaid:', e['ocaid']
        if new not in sr:
            e['source_records'] = sr + [new]
        else:
            e['source_records'] = sr
        assert 'source_records' in e

    # fix other bits of the record as well
    new_toc = fix_toc(e)
    if new_toc:
        e['table_of_contents'] = new_toc
    if e.get('subjects', None) and any(has_dot(s) for s in e['subjects']):
        subjects = [s[:-1] if has_dot(s) else s for s in e['subjects']]
        e['subjects'] = subjects
    if 'authors' in e:
        assert not any(a=='None' for a in e['authors'])
        print e['authors']
        authors = [ol.get(akey) for akey in e['authors']]
        authors = [ol.get(a['location']) if a['type'] == '/type/redirect' else a \
                for a in authors]
        for a in authors:
            if a['type'] == '/type/redirect':
                print 'double redirect on:', e['key']
        e['authors'] = [{'key': a['key']} for a in authors]
        undelete_authors(authors)
    print 'saving', key
    assert 'source_records' in e
    print ol.save(key, e, 'found a matching MARC record')
    add_cover_image(key, ia)
Ejemplo n.º 2
0
def add_source_records(key, ia, v=None):
    new = 'ia:' + ia
    sr = None
    m = re_edition_key.match(key)
    old_style_key = '/b/' + m.group(1)
    key = '/books/' + m.group(1)
    e = ol.get(key, v=v)
    need_update = False
    if 'ocaid' not in e:
        need_update = True
        e['ocaid'] = ia
    if 'source_records' in e:
        if new in e['source_records'] and not need_update:
            return
        e['source_records'].append(new)
    else:
        existing = get_mc(old_style_key)
        print('get_mc(%s) == %s' % (old_style_key, existing))
        if existing is None:
            sr = []
        elif existing.startswith('ia:') or existing.startswith('amazon:'):
            sr = [existing]
        else:
            m = re_meta_mrc.match(existing)
            sr = ['marc:' + existing if not m else 'ia:' + m.group(1)]
        print('ocaid:', e['ocaid'])
        if 'ocaid' in e and 'ia:' + e['ocaid'] not in sr:
            sr.append('ia:' + e['ocaid'])
        print('sr:', sr)
        print('ocaid:', e['ocaid'])
        if new not in sr:
            e['source_records'] = sr + [new]
        else:
            e['source_records'] = sr
        assert 'source_records' in e

    # fix other bits of the record as well
    new_toc = fix_toc(e)
    if new_toc:
        e['table_of_contents'] = new_toc
    if e.get('subjects', None) and any(has_dot(s) for s in e['subjects']):
        subjects = [s[:-1] if has_dot(s) else s for s in e['subjects']]
        e['subjects'] = subjects
    if 'authors' in e:
        assert not any(a == 'None' for a in e['authors'])
        print(e['authors'])
        authors = [ol.get(akey) for akey in e['authors']]
        authors = [ol.get(a['location']) if a['type'] == '/type/redirect' else a \
                for a in authors]
        for a in authors:
            if a['type'] == '/type/redirect':
                print('double redirect on:', e['key'])
        e['authors'] = [{'key': a['key']} for a in authors]
        undelete_authors(authors)
    print('saving', key)
    assert 'source_records' in e
    print(ol.save(key, e, 'found a matching MARC record'))
    add_cover_image(key, ia)
Ejemplo n.º 3
0
def write_edition(ia, edition):
    loc = 'ia:' + ia
    add_lang(edition)
    q = build_query(loc, edition)
    authors = []
    for a in q.get('authors', []):
        if 'key' in a:
            authors.append({'key': a['key']})
        else:
            try:
                ret = ol.new(a, comment='new author')
            except:
                print a
                raise
            print 'ret:', ret
            assert isinstance(ret, basestring)
            authors.append({'key': ret})
    q['source_records'] = [loc]
    if authors:
        q['authors'] = authors

    for attempt in range(50):
        if attempt > 0:
            print 'retrying'
        try:
            ret = ol.new(q, comment='initial import')
        except httplib.BadStatusLine:
            sleep(30)
            continue
        except: # httplib.BadStatusLine
            print q
            raise
        break
    print 'ret:', ret
    assert isinstance(ret, basestring)
    key = ret
    pool.update(key, q)

    print 'add_cover_image'
    add_cover_image(key, ia)
Ejemplo n.º 4
0
import csv
from openlibrary.catalog.title_page_img.load import add_cover_image

input_file = 'smashwords_ia_20110325-extended-20110406.csv'

headings = None
for row in csv.reader(open(input_file)):
    if not headings:
        headings = row
        print row
        continue
    book = dict(zip(headings, [s.decode('utf-8') for s in row]))

    isbn = book['ISBN']

    ia = isbn if isbn else 'SW000000' + book['SWID']

    q = {'type':'/type/edition', 'ocaid': ia, 'works': None}
    existing = list(ol.query(q))
    print (existing[0]['key'], ia)
    add_cover_image(existing[0]['key'], ia)
Ejemplo n.º 5
0
def add_source_records(key, ia, v=None):
    new = 'ia:' + ia
    sr = None
    e = ol.get(key, v=v)
    need_update = False
    if 'ocaid' not in e:
        need_update = True
        e['ocaid'] = ia
    if 'source_records' in e:
        if new in e['source_records'] and not need_update:
            return
        e['source_records'].append(new)
    else:
        existing = get_mc(key)
        amazon = 'amazon:'
        if existing is None:
            sr = []
        elif existing.startswith('ia:'):
            sr = [existing]
        elif existing.startswith(amazon):
            sr = amazon_source_records(existing[len(amazon):]) or [existing]
        else:
            m = re_meta_mrc.match(existing)
            sr = ['marc:' + existing if not m else 'ia:' + m.group(1)]
        if 'ocaid' in e and 'ia:' + e['ocaid'] not in sr:
            sr.append('ia:' + e['ocaid'])
        if new not in sr:
            e['source_records'] = sr + [new]

    # fix other bits of the record as well
    new_toc = fix_toc(e)
    if new_toc:
        e['table_of_contents'] = new_toc
    if e.get('subjects', None) and any(has_dot(s) for s in e['subjects']):
        subjects = [s[:-1] if has_dot(s) else s for s in e['subjects']]
        e['subjects'] = subjects
    if 'authors' in e:
        assert not any(a=='None' for a in e['authors'])
        print e['authors']
        authors = [ol.get(akey) for akey in e['authors']]
        authors = [ol.get(a['location']) if a['type'] == '/type/redirect' else a \
                for a in authors]
        e['authors'] = [{'key': a['key']} for a in authors]
        undelete_authors(authors)
    print 'saving', key
    print marshal(e)
    print ol.save(key, e, 'found a matching MARC record')
#    for attempt in range(50):
#        try:
#            print ol.save(key, e, 'found a matching MARC record')
#            break
#        except KeyboardInterrupt:
#            raise
#        except URLError:
#            if attempt == 49:
#                raise
#        except:
#            print e
#            raise
#        print 'attempt %d failed' % attempt
#        sleep(30)
    if new_toc:
        new_edition = ol.get(key)
        # [{u'type': <ref: u'/type/toc_item'>}, ...]
        assert 'title' in new_edition['table_of_contents'][0]
    add_cover_image(key, ia)
Ejemplo n.º 6
0
import csv
from openlibrary.catalog.title_page_img.load import add_cover_image

input_file = 'smashwords_ia_20110325-extended-20110406.csv'

headings = None
for row in csv.reader(open(input_file)):
    if not headings:
        headings = row
        print row
        continue
    book = dict(zip(headings, [s.decode('utf-8') for s in row]))

    isbn = book['ISBN']

    ia = isbn if isbn else 'SW000000' + book['SWID']

    q = {'type': '/type/edition', 'ocaid': ia, 'works': None}
    existing = list(ol.query(q))
    print(existing[0]['key'], ia)
    add_cover_image(existing[0]['key'], ia)