Exemple #1
0
def get_subjects_from_ia(ia):
    formats = marc_formats(ia)
    if not any(formats.values()):
        return {}
    rec = None
    if formats['bin']:
        rec = load_binary(ia)
    if not rec:
        assert formats['xml']
        rec = load_xml(ia)
    return read_subjects(rec)
Exemple #2
0
def get_subjects_from_ia(ia):
    formats = marc_formats(ia)
    if not any(formats.values()):
        return {}
    rec = None
    if formats['bin']:
        rec = load_binary(ia)
    if not rec:
        assert formats['xml']
        rec = load_xml(ia)
    return read_subjects(rec)
Exemple #3
0
                print('skip passport applications for now:', ia)
                continue
            if 'passengerlistsof' in ia:
                print('skip passenger lists', ia)
                continue
            print((repr(ia), row.updated))
            when = str(row.updated)
            if query({'type': '/type/edition', 'ocaid': ia}):
                print('already loaded')
                continue
            if query({'type': '/type/edition', 'source_records': 'ia:' + ia}):
                print('already loaded')
                continue

            try:
                formats = marc_formats(ia, host, path)
            except urllib2.HTTPError as error:
                write_log(ia, when, "error: HTTPError: " + str(error))
                continue
            use_binary = False
            bad_binary = None
            print(formats)
            rec = {}
            if formats['bin']:
                print('binary')
                use_binary = True
                try:
                    marc_data = get_marc_ia_data(ia, host, path)
                except urllib2.HTTPError as error:
                    if error.code == 403:
                        error_marc_403(ia)
Exemple #4
0
def load_book(ia, collections, boxid, scanned=True):
    if ia.startswith('annualreportspri'):
        print 'skipping:', ia
        return
    if 'shenzhentest' in collections:
        return

    if any('census' in c for c in collections):
        print 'skipping census'
        return

    if re_census.match(ia) or ia.startswith('populationschedu') or ia.startswith('michigancensus') or 'census00reel' in ia or ia.startswith('populationsc1880'):
        print 'ia:', ia
        print 'collections:', list(collections)
        print 'census not marked correctly'
        return
    try:
        host, path = find_item(ia)
    except socket.timeout:
        print 'socket timeout:', ia
        return
    except FindItemError:
        print 'find item error:', ia
    bad_binary = None
    try:
        formats = marc_formats(ia, host, path)
    except urllib2.HTTPError as error:
        return

    if formats['bin']: # binary MARC
        marc_data = get_marc_ia_data(ia, host, path)
        assert isinstance(marc_data, str)
        marc_error = check_marc_data(marc_data)
        if marc_error == 'double encode':
            marc_data = marc_data.decode('utf-8').encode('raw_unicode_escape')
            marc_error = None
        if marc_error:
            return
        contenttype = 'application/marc'
    elif formats['xml']: # MARC XML
        return # waiting for Raj to fox MARC XML loader
        marc_data = urllib2.urlopen('http://' + host + path + '/' + ia + '_meta.xml').read()
        contenttype = 'text/xml'
    else:
        return
    subjects = []
    if scanned:
        if 'lendinglibrary' in collections:
            subjects += ['Protected DAISY', 'Lending library']
        elif 'inlibrary' in collections:
            subjects += ['Protected DAISY', 'In library']
        elif 'printdisabled' in collections:
            subjects.append('Protected DAISY')

    if not boxid:
        boxid = None
    try:
        post_to_import_api(ia, marc_data, contenttype, subjects, boxid, scanned=scanned)
    except BadImport:
        print >> bad, ia
        bad.flush()
    except BadLang:
        print >> bad_lang, ia
        bad_lang.flush()
                print 'skip passport applications for now:', ia
                continue
            if 'passengerlistsof' in ia:
                print 'skip passenger lists', ia
                continue
            print(repr(ia), row.updated)
            when = str(row.updated)
            if query({'type': '/type/edition', 'ocaid': ia}):
                print 'already loaded'
                continue
            if query({'type': '/type/edition', 'source_records': 'ia:' + ia}):
                print 'already loaded'
                continue

            try:
                formats = marc_formats(ia, host, path)
            except urllib2.HTTPError as error:
                write_log(ia, when, "error: HTTPError: " + str(error))
                continue
            use_binary = False
            bad_binary = None
            print formats
            rec = {}
            if formats['bin']:
                print 'binary'
                use_binary = True
                try:
                    marc_data = get_marc_ia_data(ia, host, path)
                except urllib2.HTTPError as error:
                    if error.code == 403:
                        error_marc_403(ia)
Exemple #6
0
                print "skip passport applications for now:", ia
                continue
            if "passengerlistsof" in ia:
                print "skip passenger lists", ia
                continue
            print ` ia `, row.updated
            when = str(row.updated)
            if query({"type": "/type/edition", "ocaid": ia}):
                print "already loaded"
                continue
            if query({"type": "/type/edition", "source_records": "ia:" + ia}):
                print "already loaded"
                continue

            try:
                formats = marc_formats(ia)
            except urllib2.HTTPError as error:
                write_log(ia, when, "error: HTTPError: " + str(error))
                continue
            use_binary = False
            bad_binary = None
            print formats
            rec = {}
            if formats["bin"]:
                print "binary"
                use_binary = True
                marc_data = get_marc_ia_data(ia)
                if marc_data == "":
                    bad_binary = "MARC binary empty string"
                if not bad_binary and is_display_marc(marc_data):
                    use_binary = False
def load_book(ia, collections, boxid, scanned=True):
    if ia.startswith('annualreportspri'):
        print('skipping:', ia)
        return
    if 'shenzhentest' in collections:
        return

    if any('census' in c for c in collections):
        print('skipping census')
        return

    if re_census.match(ia) or ia.startswith(
            'populationschedu') or ia.startswith(
                'michigancensus') or 'census00reel' in ia or ia.startswith(
                    'populationsc1880'):
        print('ia:', ia)
        print('collections:', list(collections))
        print('census not marked correctly')
        return
    try:
        host, path = find_item(ia)
    except socket.timeout:
        print('socket timeout:', ia)
        return
    except FindItemError:
        print('find item error:', ia)
    bad_binary = None
    try:
        formats = marc_formats(ia, host, path)
    except urllib2.HTTPError as error:
        return

    if formats['bin']:  # binary MARC
        marc_data = get_marc_ia_data(ia, host, path)
        assert isinstance(marc_data, str)
        marc_error = check_marc_data(marc_data)
        if marc_error == 'double encode':
            marc_data = marc_data.decode('utf-8').encode('raw_unicode_escape')
            marc_error = None
        if marc_error:
            return
        contenttype = 'application/marc'
    elif formats['xml']:  # MARC XML
        return  # waiting for Raj to fox MARC XML loader
        marc_data = urllib2.urlopen('http://' + host + path + '/' + ia +
                                    '_meta.xml').read()
        contenttype = 'text/xml'
    else:
        return
    subjects = []
    if scanned:
        if 'lendinglibrary' in collections:
            subjects += ['Protected DAISY', 'Lending library']
        elif 'inlibrary' in collections:
            subjects += ['Protected DAISY', 'In library']
        elif 'printdisabled' in collections:
            subjects.append('Protected DAISY')

    if not boxid:
        boxid = None
    try:
        post_to_import_api(ia,
                           marc_data,
                           contenttype,
                           subjects,
                           boxid,
                           scanned=scanned)
    except BadImport:
        print(ia, file=bad)
        bad.flush()
    except BadLang:
        print(ia, file=bad_lang)
        bad_lang.flush()