def check_marc_data(marc_data): if marc_data == '': return 'MARC binary empty string' if is_display_marc(marc_data): return 'display MARC' try: length = int(marc_data[0:5]) except ValueError: return "MARC doesn't start with number" double_encode = False if len(marc_data) != length: try: marc_data = marc_data.decode('utf-8').encode('raw_unicode_escape') double_encode = True except: return "double UTF-8 decode error" if len(marc_data) != length: return 'MARC length mismatch: %d != %d' % (len(marc_data), length) if str(marc_data)[6:8] != 'am': # only want books return 'not a book!' if double_encode: return 'double encoded' return None
def check_marc_data(marc_data): if marc_data == '': return 'MARC binary empty string' if is_display_marc(marc_data): return 'display MARC' try: length = int(marc_data[0:5]) except ValueError: return "MARC doesn't start with number" double_encode = False if len(marc_data) != length: try: marc_data = marc_data.decode('utf-8').encode('raw_unicode_escape') double_encode = True except: return "double UTF-8 decode error" if len(marc_data) != length: return 'MARC length mismatch: %d != %d' % (len(marc_data), length) if str(marc_data)[6:8] != 'am': # only want books return 'not a book!' if double_encode: return 'double encoded' return None
bad_binary = None print(formats) rec = {} if formats['bin']: print('binary') use_binary = True try: marc_data = get_marc_ia_data(ia, host, path) except urllib2.HTTPError as error: if error.code == 403: error_marc_403(ia) continue raise if marc_data == '': bad_binary = 'MARC binary empty string' if not bad_binary and is_display_marc(marc_data): use_binary = False bad_binary = marc_data bad_marc.append((ia, marc_data)) if not bad_binary: try: length = int(marc_data[0:5]) except ValueError: bad_binary = "MARC doesn't start with number" if not bad_binary and len(marc_data) != length: try: marc_marc_data = marc_data.decode('utf-8').encode( 'raw_unicode_escape') except: bad_binary = "double UTF-8 decode error" if not bad_binary and len(marc_data) != length:
bad_binary = None print formats rec = {} if formats['bin']: print 'binary' use_binary = True try: marc_data = get_marc_ia_data(ia, host, path) except urllib2.HTTPError as error: if error.code == 403: error_marc_403(ia) continue raise if marc_data == '': bad_binary = 'MARC binary empty string' if not bad_binary and is_display_marc(marc_data): use_binary = False bad_binary = marc_data bad_marc.append((ia, marc_data)) if not bad_binary: try: length = int(marc_data[0:5]) except ValueError: bad_binary = "MARC doesn't start with number" if not bad_binary and len(marc_data) != length: try: marc_marc_data = marc_data.decode('utf-8').encode('raw_unicode_escape') except: bad_binary = "double UTF-8 decode error" if not bad_binary and len(marc_data) != length: bad_binary = 'MARC length mismatch: %d != %d' % (len(marc_data), length)