Esempio n. 1
0
def try_merge(edition, ekey, thing):
    thing_type = thing['type']['key']
    if 'isbn_10' not in edition:
        print edition
    asin = edition.get('isbn_10', None) or edition['asin']
    if 'authors' in edition:
        authors = [i['name'] for i in edition['authors']]
    else:
        authors = []
    a = amazon_merge.build_amazon(edition, authors)
    assert isinstance(asin, basestring)
    assert thing_type == '/type/edition'
    #print edition['asin'], ekey
    if 'source_records' in thing:
        if 'amazon:' + asin in thing['source_records']:
            return True
        return source_records_match(a, thing)

    #print 'no source records'
    mc = get_mc(ekey)
    #print 'mc:', mc
    if mc == 'amazon:' + asin:
        return True
    if not mc:
        return False
    data = get_from_local(mc)
    e1 = build_marc(fast_parse.read_edition(data))
    return amazon_merge.attempt_merge(a, e1, threshold, debug=False)
def add_source_records(key, new, thing):
    sr = None
    e = ol.get(key)
    if 'source_records' in e:
        if new in e['source_records']:
            return
        e['source_records'].append(new)
    else:
        existing = get_mc(key)
        amazon = 'amazon:'
        if existing.startswith(amazon):
            sr = amazon_source_records(existing[len(amazon):]) or [existing]
        else:
            m = re_meta_mrc.match(existing)
            sr = ['marc:' + existing if not m else 'ia:' + m.group(1)]
        assert new not in sr
        e['source_records'] = sr + [new]

    # fix other bits of the record as well
    new_toc = fix_toc(e)
    if new_toc:
        e['table_of_contents'] = new_toc
    if e.get('subjects', None) and any(has_dot(s) for s in e['subjects']):
        subjects = [s[:-1] if has_dot(s) else s for s in e['subjects']]
        e['subjects'] = subjects
    print(ol.save(key, e, 'found a matching MARC record'))
    if new_toc:
        new_edition = ol.get(key)
        # [{u'type': <ref: u'/type/toc_item'>}, ...]
        assert 'title' in new_edition['table_of_contents'][0]
Esempio n. 3
0
def get_src(key):
    e = withKey(key)
    if 'source_records' in e:
        return e['source_records']
    src = get_mc(key)
    if src:
        return [src]
Esempio n. 4
0
def try_merge(edition, ekey, thing):
    thing_type = thing['type']['key']
    if 'isbn_10' not in edition:
        print(edition)
    asin = edition.get('isbn_10', None) or edition['asin']
    if 'authors' in edition:
        authors = [i['name'] for i in edition['authors']]
    else:
        authors = []
    a = amazon_merge.build_amazon(edition, authors)
    assert isinstance(asin, six.string_types)
    assert thing_type == '/type/edition'
    #print edition['asin'], ekey
    if 'source_records' in thing:
        if 'amazon:' + asin in thing['source_records']:
            return True
        return source_records_match(a, thing)

    #print 'no source records'
    mc = get_mc(ekey)
    #print 'mc:', mc
    if mc == 'amazon:' + asin:
        return True
    if not mc:
        return False
    data = get_from_local(mc)
    e1 = build_marc(fast_parse.read_edition(data))
    return amazon_merge.attempt_merge(a, e1, threshold, debug=False)
Esempio n. 5
0
def add_source_records(key, new, thing):
    sr = None
    e = ol.get(key)
    if 'source_records' in e:
        if new in e['source_records']:
            return
        e['source_records'].append(new)
    else:
        existing = get_mc(key)
        amazon = 'amazon:'
        if existing.startswith(amazon):
            sr = amazon_source_records(existing[len(amazon):]) or [existing]
        else:
            m = re_meta_mrc.match(existing)
            sr = ['marc:' + existing if not m else 'ia:' + m.group(1)]
        assert new not in sr
        e['source_records'] = sr + [new]

    # fix other bits of the record as well
    new_toc = fix_toc(e)
    if new_toc:
        e['table_of_contents'] = new_toc
    if e.get('subjects', None) and any(has_dot(s) for s in e['subjects']):
        subjects = [s[:-1] if has_dot(s) else s for s in e['subjects']]
        e['subjects'] = subjects
    print(ol.save(key, e, 'found a matching MARC record'))
    if new_toc:
        new_edition = ol.get(key)
        # [{u'type': <ref: u'/type/toc_item'>}, ...]
        assert 'title' in new_edition['table_of_contents'][0]
Esempio n. 6
0
def get_src(key):
    e = withKey(key)
    if 'source_records' in e:
        return e['source_records']
    src = get_mc(key)
    if src:
        return [src]
Esempio n. 7
0
def add_source_records(key, new, thing, data):
    sr = None
    e = ol.get(key)
    if 'source_records' in e:
        if new in e['source_records']:
            return
        e['source_records'].append(new)
    else:
        existing = get_mc(key)
        amazon = 'amazon:'
        if existing.startswith('ia:'):
            sr = [existing]
        elif existing.startswith(amazon):
            sr = amazon_source_records(existing[len(amazon):]) or [existing]
        else:
            m = re_meta_mrc.match(existing)
            sr = ['marc:' + existing if not m else 'ia:' + m.group(1)]
        assert new not in sr
        e['source_records'] = sr + [new]

    # fix other bits of the record as well
    new_toc = fix_toc(e)
    if new_toc:
        e['table_of_contents'] = new_toc
    if e.get('subjects', None) and any(has_dot(s) for s in e['subjects']):
        subjects = [s[:-1] if has_dot(s) else s for s in e['subjects']]
        e['subjects'] = subjects
    if 'authors' in e:
        if any(a=='None' for a in e['authors']):
            assert len(e['authors']) == 1
            new_author = author_from_data(new, data)
            e['authors'] = [new_author]
        else:
            print e['authors']
            authors = [ol.get(akey) for akey in e['authors']]
            authors = [ol.get(a['location']) if a['type'] == '/type/redirect' else a \
                    for a in authors]
            e['authors'] = [a['key'] for a in authors]
            undelete_authors(authors)
    try:
        print ol.save(key, e, 'found a matching MARC record')
    except:
        print e
        raise
    if new_toc:
        new_edition = ol.get(key)
        # [{u'type': <ref: u'/type/toc_item'>}, ...]
        assert 'title' in new_edition['table_of_contents'][0]
Esempio n. 8
0
def try_merge(e1, edition_key, thing):
    thing_type = thing['type']['key']
    if thing_type == '/type/delete': # 
        return False
    assert thing_type == '/type/edition'

    if 'source_records' in thing:
        if fix_source_records(edition_key, thing):
            thing = withKey(edition_key) # reload
        return source_records_match(e1, thing)

    ia = thing.get('ocaid', None)
    print edition_key
    mc = get_mc(edition_key)
    print mc
    if mc:
        if mc.startswith('ia:'):
            ia = mc[3:]
        elif mc.endswith('.xml') or mc.endswith('.mrc'):
            ia = mc[:mc.find('/')]
        if '_meta.mrc:' in mc:
            assert 'ocaid' in thing
            ia = thing['ocaid']
    rec2 = None
    if ia:
        if is_dark_or_bad(ia):
            return False
        try:
            loc2, rec2 = get_ia(ia)
        except xml.parsers.expat.ExpatError:
            return False
        except urllib2.HTTPError, error:
            print error.code
            assert error.code in (404, 403)
        if not rec2:
            return True
Esempio n. 9
0
 rec2 = None
 if ia:
     if is_dark_or_bad(ia):
         return False
     try:
         loc2, rec2 = get_ia(ia)
     except xml.parsers.expat.ExpatError:
         return False
     except urllib2.HTTPError, error:
         print error.code
         assert error.code in (404, 403)
     if not rec2:
         return True
 if not rec2:
     if not mc:
         mc = get_mc(thing['key'])
     if not mc or mc == 'initial import':
         return False
     if mc.startswith('amazon:'):
         try:
             a = try_amazon(thing)
         except IndexError:
             print thing['key']
             raise
         except AttributeError:
             return False
         if not a:
             return False
         try:
             return amazon.attempt_merge(a, e1, threshold, debug=False)
         except: