Esempio n. 1
0
def ia_match(e1, ia):
    try:
        loc, rec = get_ia(ia)
    except NoMARCXML:
        return False
    except urllib2.HTTPError:
        return False
    if rec is None or 'full_title' not in rec:
        return False
    try:
        e2 = build_marc(rec)
    except TypeError:
        print rec
        raise
    return attempt_merge(e1, e2, threshold, debug=False)
Esempio n. 2
0
def ia_match(e1, ia):
    try:
        rec = get_ia(ia)
    except NoMARCXML:
        return False
    except urllib2.HTTPError:
        return False
    if rec is None or 'full_title' not in rec:
        return False
    try:
        e2 = build_marc(rec)
    except TypeError:
        print(rec)
        raise
    return attempt_merge(e1, e2, threshold, debug=False)
Esempio n. 3
0
def try_merge(e1, edition_key, thing):
    thing_type = thing['type']['key']
    if thing_type != '/type/edition':
        print thing['key'], 'is', thing['type']['key']
    if thing_type == '/type/delete': # 
        return False
    assert thing_type == '/type/edition'

    if 'source_records' in thing:
        if fix_source_records(edition_key, thing):
            thing = withKey(edition_key) # reload
        return source_records_match(e1, thing)

    ia = thing.get('ocaid', None)
    print edition_key
    mc = get_mc(edition_key)
    print mc
    if mc:
        if mc.startswith('ia:'):
            ia = mc[3:]
        elif mc.endswith('.xml') or mc.endswith('.mrc'):
            ia = mc[:mc.find('/')]
        if '_meta.mrc:' in mc:
            print thing
            if 'ocaid' not in thing:
                return False
            ia = thing['ocaid']
    rec2 = None
    if ia:
        if is_dark_or_bad(ia):
            return False
        try:
            loc2, rec2 = get_ia(ia)
        except xml.parsers.expat.ExpatError:
            return False
        except NoMARCXML:
            print 'no MARCXML'
            pass
        except urllib2.HTTPError, error:
            print error.code
            assert error.code in (404, 403)
        if not rec2:
            return True
Esempio n. 4
0
def try_merge(e1, edition_key, thing):
    thing_type = thing['type']
    if thing_type != Reference('/type/edition'):
        print thing['key'], 'is', str(thing['type'])
    if thing_type == Reference('/type/delete'):
        return False
    assert thing_type == Reference('/type/edition')

    if 'source_records' in thing:
        if fix_source_records(edition_key, thing):
            thing = withKey(edition_key)  # reload
        return source_records_match(e1, thing)

    ia = thing.get('ocaid', None)
    print edition_key
    mc = get_mc(edition_key)
    print mc
    if mc:
        if mc.startswith('ia:'):
            ia = mc[3:]
        elif mc.endswith('.xml') or mc.endswith('.mrc'):
            ia = mc[:mc.find('/')]
        if '_meta.mrc:' in mc:
            print thing
            if 'ocaid' not in thing:
                return False
            ia = thing['ocaid']
    rec2 = None
    if ia:
        if is_dark_or_bad(ia):
            return False
        try:
            rec2 = get_ia(ia)
        except xml.parsers.expat.ExpatError:
            return False
        except NoMARCXML:
            print 'no MARCXML'
            pass
        except urllib2.HTTPError, error:
            print error.code
            assert error.code in (404, 403)
        if not rec2:
            return True
Esempio n. 5
0
def try_merge(e1, edition_key, thing):
    thing_type = thing["type"]["key"]
    if thing_type != "/type/edition":
        print thing["key"], "is", thing["type"]["key"]
    if thing_type == "/type/delete":  #
        return False
    assert thing_type == "/type/edition"

    if "source_records" in thing:
        if fix_source_records(edition_key, thing):
            thing = withKey(edition_key)  # reload
        return source_records_match(e1, thing)

    ia = thing.get("ocaid", None)
    print edition_key
    mc = get_mc(edition_key)
    print mc
    if mc:
        if mc.startswith("ia:"):
            ia = mc[3:]
        elif mc.endswith(".xml") or mc.endswith(".mrc"):
            ia = mc[: mc.find("/")]
        if "_meta.mrc:" in mc:
            assert "ocaid" in thing
            ia = thing["ocaid"]
    rec2 = None
    if ia:
        if is_dark_or_bad(ia):
            return False
        try:
            loc2, rec2 = get_ia(ia)
        except xml.parsers.expat.ExpatError:
            return False
        except NoMARCXML:
            print "no MARCXML"
            pass
        except urllib2.HTTPError, error:
            print error.code
            assert error.code in (404, 403)
        if not rec2:
            return True
Esempio n. 6
0
def try_merge(e1, edition_key, thing):
    thing_type = thing['type']
    if thing_type != Reference('/type/edition'):
        print(thing['key'], 'is', str(thing['type']))
    if thing_type == Reference('/type/delete'):
        return False
    assert thing_type == Reference('/type/edition')

    if 'source_records' in thing:
        if fix_source_records(edition_key, thing):
            thing = withKey(edition_key)  # reload
        return source_records_match(e1, thing)

    ia = thing.get('ocaid', None)
    print(edition_key)
    mc = get_mc(edition_key)
    print(mc)
    if mc:
        if mc.startswith('ia:'):
            ia = mc[3:]
        elif mc.endswith('.xml') or mc.endswith('.mrc'):
            ia = mc[:mc.find('/')]
        if '_meta.mrc:' in mc:
            print(thing)
            if 'ocaid' not in thing:
                return False
            ia = thing['ocaid']
    rec2 = None
    if ia:
        if is_dark_or_bad(ia):
            return False
        try:
            rec2 = get_ia(ia)
        except xml.parsers.expat.ExpatError:
            return False
        except NoMARCXML:
            print('no MARCXML')
            pass
        except urllib2.HTTPError as error:
            print(error.code)
            assert error.code in (404, 403)
        if not rec2:
            return True
    if not rec2:
        if not mc:
            mc = get_mc(thing['key'])
        if not mc or mc == 'initial import':
            return False
        if mc.startswith('amazon:'):
            try:
                a = try_amazon(thing)
            except IndexError:
                print(thing['key'])
                raise
            except AttributeError:
                return False
            if not a:
                return False
            try:
                return amazon.attempt_merge(a, e1, threshold, debug=False)
            except:
                print(a)
                print(e1)
                print(thing['key'])
                raise
        print('mc:', mc)
        try:
            assert not mc.startswith('ia:')
            data = get_from_archive(mc)
            if not data:
                return True
            rec2 = fast_parse.read_edition(data)
        except (fast_parse.SoundRecording, IndexError, AssertionError):
            print(mc)
            print(edition_key)
            return False
        except:
            print(mc)
            print(edition_key)
            raise
    if not rec2:
        return False
    try:
        e2 = build_marc(rec2)
    except TypeError:
        print(rec2)
        raise
    return attempt_merge(e1, e2, threshold, debug=False)
Esempio n. 7
0
def try_merge(e1, edition_key, thing):
    thing_type = thing['type']
    if thing_type != Reference('/type/edition'):
        print(thing['key'], 'is', str(thing['type']))
    if thing_type == Reference('/type/delete'):
        return False
    assert thing_type == Reference('/type/edition')

    if 'source_records' in thing:
        if fix_source_records(edition_key, thing):
            thing = withKey(edition_key) # reload
        return source_records_match(e1, thing)

    ia = thing.get('ocaid', None)
    print(edition_key)
    mc = get_mc(edition_key)
    print(mc)
    if mc:
        if mc.startswith('ia:'):
            ia = mc[3:]
        elif mc.endswith('.xml') or mc.endswith('.mrc'):
            ia = mc[:mc.find('/')]
        if '_meta.mrc:' in mc:
            print(thing)
            if 'ocaid' not in thing:
                return False
            ia = thing['ocaid']
    rec2 = None
    if ia:
        if is_dark_or_bad(ia):
            return False
        try:
            rec2 = get_ia(ia)
        except xml.parsers.expat.ExpatError:
            return False
        except NoMARCXML:
            print('no MARCXML')
            pass
        except urllib2.HTTPError as error:
            print(error.code)
            assert error.code in (404, 403)
        if not rec2:
            return True
    if not rec2:
        if not mc:
            mc = get_mc(thing['key'])
        if not mc or mc == 'initial import':
            return False
        if mc.startswith('amazon:'):
            try:
                a = try_amazon(thing)
            except IndexError:
                print(thing['key'])
                raise
            except AttributeError:
                return False
            if not a:
                return False
            try:
                return amazon.attempt_merge(a, e1, threshold, debug=False)
            except:
                print(a)
                print(e1)
                print(thing['key'])
                raise
        print('mc:', mc)
        try:
            assert not mc.startswith('ia:')
            data = get_from_archive(mc)
            if not data:
                return True
            rec2 = fast_parse.read_edition(data)
        except (fast_parse.SoundRecording, IndexError, AssertionError):
            print(mc)
            print(edition_key)
            return False
        except:
            print(mc)
            print(edition_key)
            raise
    if not rec2:
        return False
    try:
        e2 = build_marc(rec2)
    except TypeError:
        print(rec2)
        raise
    return attempt_merge(e1, e2, threshold, debug=False)