def read_marc_file(part, f, pos=0): try: for data, int_length in fast_parse.read_file(f): loc = "marc:%s:%d:%d" % (part, pos, int_length) pos += int_length yield (pos, loc, data) except ValueError: print f raise
rec_id = web.insert('rec', marc_file = file_id, pos=pos, len=length, **extra) for f in (f for f in ('isbn', 'oclc') if f in rec): for v in rec[f]: web.insert(f, seqname=False, rec=rec_id, value=v) t_prev = time() rec_no = 0 chunk = 1000 total = 32856039 for ia, name in sources(): print ia, name for part, size in files(ia): file_id = web.insert('files', ia=ia, part=part) print part, size full_part = ia + "/" + part filename = rc['marc_path'] + full_part if not os.path.exists(filename): continue pos = 0 for data, length in read_file(open(filename)): pos += length rec_no +=1 if rec_no % chunk == 0: t = time() - t_prev progress_update(rec_no, t) t_prev = time() process_record(file_id, pos, length, data) print rec_no
for f in (f for f in ('isbn', 'oclc') if f in rec): for v in rec[f]: web.insert(f, seqname=False, rec=rec_id, value=v) t_prev = time() rec_no = 0 chunk = 1000 total = 32856039 for ia, name in sources(): print(ia, name) for part, size in files(ia): file_id = web.insert('files', ia=ia, part=part) print(part, size) full_part = ia + "/" + part filename = rc['marc_path'] + full_part if not os.path.exists(filename): continue pos = 0 for data, length in read_file(open(filename)): pos += length rec_no += 1 if rec_no % chunk == 0: t = time() - t_prev progress_update(rec_no, t) t_prev = time() process_record(file_id, pos, length, data) print(rec_no)