Beispiel #1
0
def read_marc_file(part, f, pos=0):
    try:
        for data, int_length in fast_parse.read_file(f):
            loc = "marc:%s:%d:%d" % (part, pos, int_length)
            pos += int_length
            yield (pos, loc, data)
    except ValueError:
        print f
        raise
Beispiel #2
0
    rec_id = web.insert('rec', marc_file = file_id, pos=pos, len=length, **extra)
    for f in (f for f in ('isbn', 'oclc') if f in rec):
        for v in rec[f]:
            web.insert(f, seqname=False, rec=rec_id, value=v)

t_prev = time()
rec_no = 0
chunk = 1000
total = 32856039

for ia, name in sources():
    print ia, name
    for part, size in files(ia):
        file_id = web.insert('files', ia=ia, part=part)
        print part, size
        full_part = ia + "/" + part
        filename = rc['marc_path'] + full_part
        if not os.path.exists(filename):
            continue
        pos = 0
        for data, length in read_file(open(filename)):
            pos += length
            rec_no +=1
            if rec_no % chunk == 0:
                t = time() - t_prev
                progress_update(rec_no, t)
                t_prev = time()
            process_record(file_id, pos, length, data)

print rec_no
Beispiel #3
0
    for f in (f for f in ('isbn', 'oclc') if f in rec):
        for v in rec[f]:
            web.insert(f, seqname=False, rec=rec_id, value=v)


t_prev = time()
rec_no = 0
chunk = 1000
total = 32856039

for ia, name in sources():
    print(ia, name)
    for part, size in files(ia):
        file_id = web.insert('files', ia=ia, part=part)
        print(part, size)
        full_part = ia + "/" + part
        filename = rc['marc_path'] + full_part
        if not os.path.exists(filename):
            continue
        pos = 0
        for data, length in read_file(open(filename)):
            pos += length
            rec_no += 1
            if rec_no % chunk == 0:
                t = time() - t_prev
                progress_update(rec_no, t)
                t_prev = time()
            process_record(file_id, pos, length, data)

print(rec_no)