Ejemplo n.º 1
0
def try_merge(edition, ekey, thing):
    thing_type = thing['type']['key']
    if 'isbn_10' not in edition:
        print(edition)
    asin = edition.get('isbn_10', None) or edition['asin']
    if 'authors' in edition:
        authors = [i['name'] for i in edition['authors']]
    else:
        authors = []
    a = amazon_merge.build_amazon(edition, authors)
    assert isinstance(asin, six.string_types)
    assert thing_type == '/type/edition'
    #print edition['asin'], ekey
    if 'source_records' in thing:
        if 'amazon:' + asin in thing['source_records']:
            return True
        return source_records_match(a, thing)

    #print 'no source records'
    mc = get_mc(ekey)
    #print 'mc:', mc
    if mc == 'amazon:' + asin:
        return True
    if not mc:
        return False
    data = get_from_local(mc)
    e1 = build_marc(fast_parse.read_edition(data))
    return amazon_merge.attempt_merge(a, e1, threshold, debug=False)
Ejemplo n.º 2
0
def try_merge(edition, ekey, thing):
    thing_type = thing['type']['key']
    if 'isbn_10' not in edition:
        print edition
    asin = edition.get('isbn_10', None) or edition['asin']
    if 'authors' in edition:
        authors = [i['name'] for i in edition['authors']]
    else:
        authors = []
    a = amazon_merge.build_amazon(edition, authors)
    assert isinstance(asin, basestring)
    assert thing_type == '/type/edition'
    #print edition['asin'], ekey
    if 'source_records' in thing:
        if 'amazon:' + asin in thing['source_records']:
            return True
        return source_records_match(a, thing)

    #print 'no source records'
    mc = get_mc(ekey)
    #print 'mc:', mc
    if mc == 'amazon:' + asin:
        return True
    if not mc:
        return False
    data = get_from_local(mc)
    e1 = build_marc(fast_parse.read_edition(data))
    return amazon_merge.attempt_merge(a, e1, threshold, debug=False)
Ejemplo n.º 3
0
def get_651(key):
    found = []
    for src in get_src(key):
        data = get_from_local(src)
        for tag, line in get_tag_lines(data, ['651']):
            found.append(list(get_all_subfields(line)))
    return found
Ejemplo n.º 4
0
def get_651(key):
    found = []
    for src in get_src(key):
        data = get_from_local(src)
        for tag, line in get_tag_lines(data, ['651']):
            found.append(list(get_all_subfields(line)))
    return found
Ejemplo n.º 5
0
def marc_match(a, loc):
    assert loc
    rec = fast_parse.read_edition(get_from_local(loc))
    e1 = build_marc(rec)
    #print 'amazon:', a
    return amazon_merge.attempt_merge(a, e1, threshold, debug=False)
Ejemplo n.º 6
0
from catalog.utils.query import query_iter, set_staging, withKey, get_mc
import sys, codecs, re
sys.path.append('/home/edward/src/olapi')
from olapi import OpenLibrary, Reference
from catalog.read_rc import read_rc
from catalog.get_ia import get_from_archive, get_from_local
from catalog.marc.fast_parse import get_first_tag, get_all_subfields
rc = read_rc()

sys.stdout = codecs.getwriter('utf-8')(sys.stdout)
set_staging(True)

ol = OpenLibrary("http://dev.openlibrary.org")
ol.login('EdwardBot', rc['EdwardBot'])

q = { 'type': '/type/edition', 'table_of_contents': None, 'subjects': None }
queue = []
count = 0
for e in query_iter(q, limit=100):
    key = e['key']
    mc = get_mc(key)
    if not mc:
        continue
    data = get_from_local(mc)
    line = get_first_tag(data, set(['041']))
    if not line:
        continue
    print key, line[0:2], list(get_all_subfields(line))

Ejemplo n.º 7
0
def marc_match(a, loc):
    assert loc
    rec = fast_parse.read_edition(get_from_local(loc))
    e1 = build_marc(rec)
    #print 'amazon:', a
    return amazon_merge.attempt_merge(a, e1, threshold, debug=False)
Ejemplo n.º 8
0
from catalog.utils.query import query_iter, set_staging, withKey, get_mc
import sys, codecs, re
sys.path.append('/home/edward/src/olapi')
from olapi import OpenLibrary, Reference
from catalog.read_rc import read_rc
from catalog.get_ia import get_from_archive, get_from_local
from catalog.marc.fast_parse import get_first_tag, get_all_subfields
rc = read_rc()

sys.stdout = codecs.getwriter('utf-8')(sys.stdout)
set_staging(True)

ol = OpenLibrary("http://dev.openlibrary.org")
ol.login('EdwardBot', rc['EdwardBot'])

q = {'type': '/type/edition', 'table_of_contents': None, 'subjects': None}
queue = []
count = 0
for e in query_iter(q, limit=100):
    key = e['key']
    mc = get_mc(key)
    if not mc:
        continue
    data = get_from_local(mc)
    line = get_first_tag(data, set(['041']))
    if not line:
        continue
    print key, line[0:2], list(get_all_subfields(line))