def add_source_records(key, ia, v=None): new = 'ia:' + ia sr = None m = re_edition_key.match(key) old_style_key = '/b/' + m.group(1) key = '/books/' + m.group(1) e = ol.get(key, v=v) need_update = False if 'ocaid' not in e: need_update = True e['ocaid'] = ia if 'source_records' in e: if new in e['source_records'] and not need_update: return e['source_records'].append(new) else: existing = get_mc(old_style_key) print 'get_mc(%s) == %s' % (old_style_key, existing) if existing is None: sr = [] elif existing.startswith('ia:') or existing.startswith('amazon:'): sr = [existing] else: m = re_meta_mrc.match(existing) sr = ['marc:' + existing if not m else 'ia:' + m.group(1)] print 'ocaid:', e['ocaid'] if 'ocaid' in e and 'ia:' + e['ocaid'] not in sr: sr.append('ia:' + e['ocaid']) print 'sr:', sr print 'ocaid:', e['ocaid'] if new not in sr: e['source_records'] = sr + [new] else: e['source_records'] = sr assert 'source_records' in e # fix other bits of the record as well new_toc = fix_toc(e) if new_toc: e['table_of_contents'] = new_toc if e.get('subjects', None) and any(has_dot(s) for s in e['subjects']): subjects = [s[:-1] if has_dot(s) else s for s in e['subjects']] e['subjects'] = subjects if 'authors' in e: assert not any(a=='None' for a in e['authors']) print e['authors'] authors = [ol.get(akey) for akey in e['authors']] authors = [ol.get(a['location']) if a['type'] == '/type/redirect' else a \ for a in authors] for a in authors: if a['type'] == '/type/redirect': print 'double redirect on:', e['key'] e['authors'] = [{'key': a['key']} for a in authors] undelete_authors(authors) print 'saving', key assert 'source_records' in e print ol.save(key, e, 'found a matching MARC record') add_cover_image(key, ia)
def add_source_records(key, ia, v=None): new = 'ia:' + ia sr = None m = re_edition_key.match(key) old_style_key = '/b/' + m.group(1) key = '/books/' + m.group(1) e = ol.get(key, v=v) need_update = False if 'ocaid' not in e: need_update = True e['ocaid'] = ia if 'source_records' in e: if new in e['source_records'] and not need_update: return e['source_records'].append(new) else: existing = get_mc(old_style_key) print('get_mc(%s) == %s' % (old_style_key, existing)) if existing is None: sr = [] elif existing.startswith('ia:') or existing.startswith('amazon:'): sr = [existing] else: m = re_meta_mrc.match(existing) sr = ['marc:' + existing if not m else 'ia:' + m.group(1)] print('ocaid:', e['ocaid']) if 'ocaid' in e and 'ia:' + e['ocaid'] not in sr: sr.append('ia:' + e['ocaid']) print('sr:', sr) print('ocaid:', e['ocaid']) if new not in sr: e['source_records'] = sr + [new] else: e['source_records'] = sr assert 'source_records' in e # fix other bits of the record as well new_toc = fix_toc(e) if new_toc: e['table_of_contents'] = new_toc if e.get('subjects', None) and any(has_dot(s) for s in e['subjects']): subjects = [s[:-1] if has_dot(s) else s for s in e['subjects']] e['subjects'] = subjects if 'authors' in e: assert not any(a == 'None' for a in e['authors']) print(e['authors']) authors = [ol.get(akey) for akey in e['authors']] authors = [ol.get(a['location']) if a['type'] == '/type/redirect' else a \ for a in authors] for a in authors: if a['type'] == '/type/redirect': print('double redirect on:', e['key']) e['authors'] = [{'key': a['key']} for a in authors] undelete_authors(authors) print('saving', key) assert 'source_records' in e print(ol.save(key, e, 'found a matching MARC record')) add_cover_image(key, ia)
def write_edition(ia, edition): loc = 'ia:' + ia add_lang(edition) q = build_query(loc, edition) authors = [] for a in q.get('authors', []): if 'key' in a: authors.append({'key': a['key']}) else: try: ret = ol.new(a, comment='new author') except: print a raise print 'ret:', ret assert isinstance(ret, basestring) authors.append({'key': ret}) q['source_records'] = [loc] if authors: q['authors'] = authors for attempt in range(50): if attempt > 0: print 'retrying' try: ret = ol.new(q, comment='initial import') except httplib.BadStatusLine: sleep(30) continue except: # httplib.BadStatusLine print q raise break print 'ret:', ret assert isinstance(ret, basestring) key = ret pool.update(key, q) print 'add_cover_image' add_cover_image(key, ia)
import csv from openlibrary.catalog.title_page_img.load import add_cover_image input_file = 'smashwords_ia_20110325-extended-20110406.csv' headings = None for row in csv.reader(open(input_file)): if not headings: headings = row print row continue book = dict(zip(headings, [s.decode('utf-8') for s in row])) isbn = book['ISBN'] ia = isbn if isbn else 'SW000000' + book['SWID'] q = {'type':'/type/edition', 'ocaid': ia, 'works': None} existing = list(ol.query(q)) print (existing[0]['key'], ia) add_cover_image(existing[0]['key'], ia)
def add_source_records(key, ia, v=None): new = 'ia:' + ia sr = None e = ol.get(key, v=v) need_update = False if 'ocaid' not in e: need_update = True e['ocaid'] = ia if 'source_records' in e: if new in e['source_records'] and not need_update: return e['source_records'].append(new) else: existing = get_mc(key) amazon = 'amazon:' if existing is None: sr = [] elif existing.startswith('ia:'): sr = [existing] elif existing.startswith(amazon): sr = amazon_source_records(existing[len(amazon):]) or [existing] else: m = re_meta_mrc.match(existing) sr = ['marc:' + existing if not m else 'ia:' + m.group(1)] if 'ocaid' in e and 'ia:' + e['ocaid'] not in sr: sr.append('ia:' + e['ocaid']) if new not in sr: e['source_records'] = sr + [new] # fix other bits of the record as well new_toc = fix_toc(e) if new_toc: e['table_of_contents'] = new_toc if e.get('subjects', None) and any(has_dot(s) for s in e['subjects']): subjects = [s[:-1] if has_dot(s) else s for s in e['subjects']] e['subjects'] = subjects if 'authors' in e: assert not any(a=='None' for a in e['authors']) print e['authors'] authors = [ol.get(akey) for akey in e['authors']] authors = [ol.get(a['location']) if a['type'] == '/type/redirect' else a \ for a in authors] e['authors'] = [{'key': a['key']} for a in authors] undelete_authors(authors) print 'saving', key print marshal(e) print ol.save(key, e, 'found a matching MARC record') # for attempt in range(50): # try: # print ol.save(key, e, 'found a matching MARC record') # break # except KeyboardInterrupt: # raise # except URLError: # if attempt == 49: # raise # except: # print e # raise # print 'attempt %d failed' % attempt # sleep(30) if new_toc: new_edition = ol.get(key) # [{u'type': <ref: u'/type/toc_item'>}, ...] assert 'title' in new_edition['table_of_contents'][0] add_cover_image(key, ia)
import csv from openlibrary.catalog.title_page_img.load import add_cover_image input_file = 'smashwords_ia_20110325-extended-20110406.csv' headings = None for row in csv.reader(open(input_file)): if not headings: headings = row print row continue book = dict(zip(headings, [s.decode('utf-8') for s in row])) isbn = book['ISBN'] ia = isbn if isbn else 'SW000000' + book['SWID'] q = {'type': '/type/edition', 'ocaid': ia, 'works': None} existing = list(ol.query(q)) print(existing[0]['key'], ia) add_cover_image(existing[0]['key'], ia)