def get_subjects_from_ia(ia): formats = marc_formats(ia) if not any(formats.values()): return {} rec = None if formats['bin']: rec = load_binary(ia) if not rec: assert formats['xml'] rec = load_xml(ia) return read_subjects(rec)
print('skip passport applications for now:', ia) continue if 'passengerlistsof' in ia: print('skip passenger lists', ia) continue print((repr(ia), row.updated)) when = str(row.updated) if query({'type': '/type/edition', 'ocaid': ia}): print('already loaded') continue if query({'type': '/type/edition', 'source_records': 'ia:' + ia}): print('already loaded') continue try: formats = marc_formats(ia, host, path) except urllib2.HTTPError as error: write_log(ia, when, "error: HTTPError: " + str(error)) continue use_binary = False bad_binary = None print(formats) rec = {} if formats['bin']: print('binary') use_binary = True try: marc_data = get_marc_ia_data(ia, host, path) except urllib2.HTTPError as error: if error.code == 403: error_marc_403(ia)
def load_book(ia, collections, boxid, scanned=True): if ia.startswith('annualreportspri'): print 'skipping:', ia return if 'shenzhentest' in collections: return if any('census' in c for c in collections): print 'skipping census' return if re_census.match(ia) or ia.startswith('populationschedu') or ia.startswith('michigancensus') or 'census00reel' in ia or ia.startswith('populationsc1880'): print 'ia:', ia print 'collections:', list(collections) print 'census not marked correctly' return try: host, path = find_item(ia) except socket.timeout: print 'socket timeout:', ia return except FindItemError: print 'find item error:', ia bad_binary = None try: formats = marc_formats(ia, host, path) except urllib2.HTTPError as error: return if formats['bin']: # binary MARC marc_data = get_marc_ia_data(ia, host, path) assert isinstance(marc_data, str) marc_error = check_marc_data(marc_data) if marc_error == 'double encode': marc_data = marc_data.decode('utf-8').encode('raw_unicode_escape') marc_error = None if marc_error: return contenttype = 'application/marc' elif formats['xml']: # MARC XML return # waiting for Raj to fox MARC XML loader marc_data = urllib2.urlopen('http://' + host + path + '/' + ia + '_meta.xml').read() contenttype = 'text/xml' else: return subjects = [] if scanned: if 'lendinglibrary' in collections: subjects += ['Protected DAISY', 'Lending library'] elif 'inlibrary' in collections: subjects += ['Protected DAISY', 'In library'] elif 'printdisabled' in collections: subjects.append('Protected DAISY') if not boxid: boxid = None try: post_to_import_api(ia, marc_data, contenttype, subjects, boxid, scanned=scanned) except BadImport: print >> bad, ia bad.flush() except BadLang: print >> bad_lang, ia bad_lang.flush()
print 'skip passport applications for now:', ia continue if 'passengerlistsof' in ia: print 'skip passenger lists', ia continue print(repr(ia), row.updated) when = str(row.updated) if query({'type': '/type/edition', 'ocaid': ia}): print 'already loaded' continue if query({'type': '/type/edition', 'source_records': 'ia:' + ia}): print 'already loaded' continue try: formats = marc_formats(ia, host, path) except urllib2.HTTPError as error: write_log(ia, when, "error: HTTPError: " + str(error)) continue use_binary = False bad_binary = None print formats rec = {} if formats['bin']: print 'binary' use_binary = True try: marc_data = get_marc_ia_data(ia, host, path) except urllib2.HTTPError as error: if error.code == 403: error_marc_403(ia)
print "skip passport applications for now:", ia continue if "passengerlistsof" in ia: print "skip passenger lists", ia continue print ` ia `, row.updated when = str(row.updated) if query({"type": "/type/edition", "ocaid": ia}): print "already loaded" continue if query({"type": "/type/edition", "source_records": "ia:" + ia}): print "already loaded" continue try: formats = marc_formats(ia) except urllib2.HTTPError as error: write_log(ia, when, "error: HTTPError: " + str(error)) continue use_binary = False bad_binary = None print formats rec = {} if formats["bin"]: print "binary" use_binary = True marc_data = get_marc_ia_data(ia) if marc_data == "": bad_binary = "MARC binary empty string" if not bad_binary and is_display_marc(marc_data): use_binary = False
def load_book(ia, collections, boxid, scanned=True): if ia.startswith('annualreportspri'): print('skipping:', ia) return if 'shenzhentest' in collections: return if any('census' in c for c in collections): print('skipping census') return if re_census.match(ia) or ia.startswith( 'populationschedu') or ia.startswith( 'michigancensus') or 'census00reel' in ia or ia.startswith( 'populationsc1880'): print('ia:', ia) print('collections:', list(collections)) print('census not marked correctly') return try: host, path = find_item(ia) except socket.timeout: print('socket timeout:', ia) return except FindItemError: print('find item error:', ia) bad_binary = None try: formats = marc_formats(ia, host, path) except urllib2.HTTPError as error: return if formats['bin']: # binary MARC marc_data = get_marc_ia_data(ia, host, path) assert isinstance(marc_data, str) marc_error = check_marc_data(marc_data) if marc_error == 'double encode': marc_data = marc_data.decode('utf-8').encode('raw_unicode_escape') marc_error = None if marc_error: return contenttype = 'application/marc' elif formats['xml']: # MARC XML return # waiting for Raj to fox MARC XML loader marc_data = urllib2.urlopen('http://' + host + path + '/' + ia + '_meta.xml').read() contenttype = 'text/xml' else: return subjects = [] if scanned: if 'lendinglibrary' in collections: subjects += ['Protected DAISY', 'Lending library'] elif 'inlibrary' in collections: subjects += ['Protected DAISY', 'In library'] elif 'printdisabled' in collections: subjects.append('Protected DAISY') if not boxid: boxid = None try: post_to_import_api(ia, marc_data, contenttype, subjects, boxid, scanned=scanned) except BadImport: print(ia, file=bad) bad.flush() except BadLang: print(ia, file=bad_lang) bad_lang.flush()