Esempio n. 1
0
                continue

            try:
                formats = marc_formats(ia, host, path)
            except urllib2.HTTPError as error:
                write_log(ia, when, "error: HTTPError: " + str(error))
                continue
            use_binary = False
            bad_binary = None
            print(formats)
            rec = {}
            if formats['bin']:
                print('binary')
                use_binary = True
                try:
                    marc_data = get_marc_ia_data(ia, host, path)
                except urllib2.HTTPError as error:
                    if error.code == 403:
                        error_marc_403(ia)
                        continue
                    raise
                if marc_data == '':
                    bad_binary = 'MARC binary empty string'
                if not bad_binary and is_display_marc(marc_data):
                    use_binary = False
                    bad_binary = marc_data
                    bad_marc.append((ia, marc_data))
                if not bad_binary:
                    try:
                        length = int(marc_data[0:5])
                    except ValueError:
Esempio n. 2
0
def load_book(ia, collections, boxid, scanned=True):
    if ia.startswith('annualreportspri'):
        print 'skipping:', ia
        return
    if 'shenzhentest' in collections:
        return

    if any('census' in c for c in collections):
        print 'skipping census'
        return

    if re_census.match(ia) or ia.startswith('populationschedu') or ia.startswith('michigancensus') or 'census00reel' in ia or ia.startswith('populationsc1880'):
        print 'ia:', ia
        print 'collections:', list(collections)
        print 'census not marked correctly'
        return
    try:
        host, path = find_item(ia)
    except socket.timeout:
        print 'socket timeout:', ia
        return
    except FindItemError:
        print 'find item error:', ia
    bad_binary = None
    try:
        formats = marc_formats(ia, host, path)
    except urllib2.HTTPError as error:
        return

    if formats['bin']: # binary MARC
        marc_data = get_marc_ia_data(ia, host, path)
        assert isinstance(marc_data, str)
        marc_error = check_marc_data(marc_data)
        if marc_error == 'double encode':
            marc_data = marc_data.decode('utf-8').encode('raw_unicode_escape')
            marc_error = None
        if marc_error:
            return
        contenttype = 'application/marc'
    elif formats['xml']: # MARC XML
        return # waiting for Raj to fox MARC XML loader
        marc_data = urllib2.urlopen('http://' + host + path + '/' + ia + '_meta.xml').read()
        contenttype = 'text/xml'
    else:
        return
    subjects = []
    if scanned:
        if 'lendinglibrary' in collections:
            subjects += ['Protected DAISY', 'Lending library']
        elif 'inlibrary' in collections:
            subjects += ['Protected DAISY', 'In library']
        elif 'printdisabled' in collections:
            subjects.append('Protected DAISY')

    if not boxid:
        boxid = None
    try:
        post_to_import_api(ia, marc_data, contenttype, subjects, boxid, scanned=scanned)
    except BadImport:
        print >> bad, ia
        bad.flush()
    except BadLang:
        print >> bad_lang, ia
        bad_lang.flush()
Esempio n. 3
0
                print "already loaded"
                continue

            try:
                formats = marc_formats(ia)
            except urllib2.HTTPError as error:
                write_log(ia, when, "error: HTTPError: " + str(error))
                continue
            use_binary = False
            bad_binary = None
            print formats
            rec = {}
            if formats["bin"]:
                print "binary"
                use_binary = True
                marc_data = get_marc_ia_data(ia)
                if marc_data == "":
                    bad_binary = "MARC binary empty string"
                if not bad_binary and is_display_marc(marc_data):
                    use_binary = False
                    bad_binary = marc_data
                    bad_marc.append((ia, marc_data))
                if not bad_binary:
                    try:
                        length = int(marc_data[0:5])
                    except ValueError:
                        bad_binary = "MARC doesn't start with number"
                if not bad_binary and len(marc_data) != length:
                    try:
                        marc_marc_data = marc_data.decode("utf-8").encode("raw_unicode_escape")
                    except:
Esempio n. 4
0
                continue

            try:
                formats = marc_formats(ia, host, path)
            except urllib2.HTTPError as error:
                write_log(ia, when, "error: HTTPError: " + str(error))
                continue
            use_binary = False
            bad_binary = None
            print formats
            rec = {}
            if formats['bin']:
                print 'binary'
                use_binary = True
                try:
                    marc_data = get_marc_ia_data(ia, host, path)
                except urllib2.HTTPError as error:
                    if error.code == 403:
                        error_marc_403(ia)
                        continue
                    raise
                if marc_data == '':
                    bad_binary = 'MARC binary empty string'
                if not bad_binary and is_display_marc(marc_data):
                    use_binary = False
                    bad_binary = marc_data
                    bad_marc.append((ia, marc_data))
                if not bad_binary:
                    try:
                        length = int(marc_data[0:5])
                    except ValueError:
Esempio n. 5
0
def load_book(ia, collections, boxid, scanned=True):
    if ia.startswith('annualreportspri'):
        print('skipping:', ia)
        return
    if 'shenzhentest' in collections:
        return

    if any('census' in c for c in collections):
        print('skipping census')
        return

    if re_census.match(ia) or ia.startswith(
            'populationschedu') or ia.startswith(
                'michigancensus') or 'census00reel' in ia or ia.startswith(
                    'populationsc1880'):
        print('ia:', ia)
        print('collections:', list(collections))
        print('census not marked correctly')
        return
    try:
        host, path = find_item(ia)
    except socket.timeout:
        print('socket timeout:', ia)
        return
    except FindItemError:
        print('find item error:', ia)
    bad_binary = None
    try:
        formats = marc_formats(ia, host, path)
    except urllib2.HTTPError as error:
        return

    if formats['bin']:  # binary MARC
        marc_data = get_marc_ia_data(ia, host, path)
        assert isinstance(marc_data, str)
        marc_error = check_marc_data(marc_data)
        if marc_error == 'double encode':
            marc_data = marc_data.decode('utf-8').encode('raw_unicode_escape')
            marc_error = None
        if marc_error:
            return
        contenttype = 'application/marc'
    elif formats['xml']:  # MARC XML
        return  # waiting for Raj to fox MARC XML loader
        marc_data = urllib2.urlopen('http://' + host + path + '/' + ia +
                                    '_meta.xml').read()
        contenttype = 'text/xml'
    else:
        return
    subjects = []
    if scanned:
        if 'lendinglibrary' in collections:
            subjects += ['Protected DAISY', 'Lending library']
        elif 'inlibrary' in collections:
            subjects += ['Protected DAISY', 'In library']
        elif 'printdisabled' in collections:
            subjects.append('Protected DAISY')

    if not boxid:
        boxid = None
    try:
        post_to_import_api(ia,
                           marc_data,
                           contenttype,
                           subjects,
                           boxid,
                           scanned=scanned)
    except BadImport:
        print(ia, file=bad)
        bad.flush()
    except BadLang:
        print(ia, file=bad_lang)
        bad_lang.flush()