from __future__ import print_function from catalog.marc.fast_parse import * from catalog.read_rc import read_rc from catalog.get_ia import files from sources import sources import sys, os rc = read_rc() read_count = 10000 show_bad_records = False for ia, name in sources(): # find which sources include '001' tag has_001 = 0 rec_no = 0 for part, size in files(ia): filename = rc['marc_path'] + ia + "/" + part if not os.path.exists(filename): continue for data, length in read_file(open(filename)): if rec_no == read_count: break rec_no += 1 if list(get_tag_lines(data, ['001'])): has_001 += 1 elif show_bad_records: print(data[:24]) for tag, line in get_all_tag_lines(data): if tag.startswith('00'): print(tag, line[:-1]) else:
from catalog.marc.fast_parse import * from catalog.read_rc import read_rc from catalog.get_ia import files from sources import sources import sys import os rc = read_rc() read_count = 10000 show_bad_records = False for ia, name in sources(): # find which sources include '001' tag has_001 = 0 rec_no = 0 for part, size in files(ia): filename = rc['marc_path'] + ia + "/" + part if not os.path.exists(filename): continue for data, length in read_file(open(filename)): if rec_no == read_count: break rec_no += 1 if list(get_tag_lines(data, ['001'])): has_001 += 1 elif show_bad_records: print(data[:24]) for tag, line in get_all_tag_lines(data): if tag.startswith('00'): print(tag, line[:-1]) else:
rec = fast_parse.read_edition(data) e1 = build_marc(rec) match = False seen = set() for k, v in edition_pool.iteritems(): for edition_key in v: if edition_key in seen: continue seen.add(edition_key) thing = withKey(edition_key) assert thing if try_merge(e1, edition_key, thing): add_source_records(edition_key, loc, thing) match = True if not match: yield loc, data start = pool.get_start(archive_id) go = 'part' not in start print(archive_id) for part, size in files(archive_id): print(part, size) load_part(archive_id, part) print("finished")