def try_merge(edition, ekey, thing): thing_type = thing['type']['key'] if 'isbn_10' not in edition: print(edition) asin = edition.get('isbn_10', None) or edition['asin'] if 'authors' in edition: authors = [i['name'] for i in edition['authors']] else: authors = [] a = amazon_merge.build_amazon(edition, authors) assert isinstance(asin, six.string_types) assert thing_type == '/type/edition' #print edition['asin'], ekey if 'source_records' in thing: if 'amazon:' + asin in thing['source_records']: return True return source_records_match(a, thing) #print 'no source records' mc = get_mc(ekey) #print 'mc:', mc if mc == 'amazon:' + asin: return True if not mc: return False data = get_from_local(mc) e1 = build_marc(fast_parse.read_edition(data)) return amazon_merge.attempt_merge(a, e1, threshold, debug=False)
def try_merge(edition, ekey, thing): thing_type = thing['type']['key'] if 'isbn_10' not in edition: print edition asin = edition.get('isbn_10', None) or edition['asin'] if 'authors' in edition: authors = [i['name'] for i in edition['authors']] else: authors = [] a = amazon_merge.build_amazon(edition, authors) assert isinstance(asin, basestring) assert thing_type == '/type/edition' #print edition['asin'], ekey if 'source_records' in thing: if 'amazon:' + asin in thing['source_records']: return True return source_records_match(a, thing) #print 'no source records' mc = get_mc(ekey) #print 'mc:', mc if mc == 'amazon:' + asin: return True if not mc: return False data = get_from_local(mc) e1 = build_marc(fast_parse.read_edition(data)) return amazon_merge.attempt_merge(a, e1, threshold, debug=False)
def get_651(key): found = [] for src in get_src(key): data = get_from_local(src) for tag, line in get_tag_lines(data, ['651']): found.append(list(get_all_subfields(line))) return found
def marc_match(a, loc): assert loc rec = fast_parse.read_edition(get_from_local(loc)) e1 = build_marc(rec) #print 'amazon:', a return amazon_merge.attempt_merge(a, e1, threshold, debug=False)
from catalog.utils.query import query_iter, set_staging, withKey, get_mc import sys, codecs, re sys.path.append('/home/edward/src/olapi') from olapi import OpenLibrary, Reference from catalog.read_rc import read_rc from catalog.get_ia import get_from_archive, get_from_local from catalog.marc.fast_parse import get_first_tag, get_all_subfields rc = read_rc() sys.stdout = codecs.getwriter('utf-8')(sys.stdout) set_staging(True) ol = OpenLibrary("http://dev.openlibrary.org") ol.login('EdwardBot', rc['EdwardBot']) q = { 'type': '/type/edition', 'table_of_contents': None, 'subjects': None } queue = [] count = 0 for e in query_iter(q, limit=100): key = e['key'] mc = get_mc(key) if not mc: continue data = get_from_local(mc) line = get_first_tag(data, set(['041'])) if not line: continue print key, line[0:2], list(get_all_subfields(line))
from catalog.utils.query import query_iter, set_staging, withKey, get_mc import sys, codecs, re sys.path.append('/home/edward/src/olapi') from olapi import OpenLibrary, Reference from catalog.read_rc import read_rc from catalog.get_ia import get_from_archive, get_from_local from catalog.marc.fast_parse import get_first_tag, get_all_subfields rc = read_rc() sys.stdout = codecs.getwriter('utf-8')(sys.stdout) set_staging(True) ol = OpenLibrary("http://dev.openlibrary.org") ol.login('EdwardBot', rc['EdwardBot']) q = {'type': '/type/edition', 'table_of_contents': None, 'subjects': None} queue = [] count = 0 for e in query_iter(q, limit=100): key = e['key'] mc = get_mc(key) if not mc: continue data = get_from_local(mc) line = get_first_tag(data, set(['041'])) if not line: continue print key, line[0:2], list(get_all_subfields(line))