예제 #1
0
from __future__ import print_function
from catalog.marc.fast_parse import *
from catalog.read_rc import read_rc
from catalog.get_ia import files
from sources import sources
import sys, os

rc = read_rc()
read_count = 10000

show_bad_records = False

for ia, name in sources(): # find which sources include '001' tag
    has_001 = 0
    rec_no = 0
    for part, size in files(ia):
        filename = rc['marc_path'] + ia + "/" + part
        if not os.path.exists(filename):
            continue
        for data, length in read_file(open(filename)):
            if rec_no == read_count:
                break
            rec_no += 1
            if list(get_tag_lines(data, ['001'])):
                has_001 += 1
            elif show_bad_records:
                print(data[:24])
                for tag, line in get_all_tag_lines(data):
                    if tag.startswith('00'):
                        print(tag, line[:-1])
                    else:
예제 #2
0
from catalog.marc.fast_parse import *
from catalog.read_rc import read_rc
from catalog.get_ia import files
from sources import sources
import sys
import os

rc = read_rc()
read_count = 10000

show_bad_records = False

for ia, name in sources():  # find which sources include '001' tag
    has_001 = 0
    rec_no = 0
    for part, size in files(ia):
        filename = rc['marc_path'] + ia + "/" + part
        if not os.path.exists(filename):
            continue
        for data, length in read_file(open(filename)):
            if rec_no == read_count:
                break
            rec_no += 1
            if list(get_tag_lines(data, ['001'])):
                has_001 += 1
            elif show_bad_records:
                print(data[:24])
                for tag, line in get_all_tag_lines(data):
                    if tag.startswith('00'):
                        print(tag, line[:-1])
                    else:
        rec = fast_parse.read_edition(data)
        e1 = build_marc(rec)

        match = False
        seen = set()
        for k, v in edition_pool.iteritems():
            for edition_key in v:
                if edition_key in seen:
                    continue
                seen.add(edition_key)
                thing = withKey(edition_key)
                assert thing
                if try_merge(e1, edition_key, thing):
                    add_source_records(edition_key, loc, thing)
                    match = True

        if not match:
            yield loc, data

start = pool.get_start(archive_id)
go = 'part' not in start

print(archive_id)

for part, size in files(archive_id):
    print(part, size)
    load_part(archive_id, part)

print("finished")
예제 #4
0
        rec = fast_parse.read_edition(data)
        e1 = build_marc(rec)

        match = False
        seen = set()
        for k, v in edition_pool.iteritems():
            for edition_key in v:
                if edition_key in seen:
                    continue
                seen.add(edition_key)
                thing = withKey(edition_key)
                assert thing
                if try_merge(e1, edition_key, thing):
                    add_source_records(edition_key, loc, thing)
                    match = True

        if not match:
            yield loc, data


start = pool.get_start(archive_id)
go = 'part' not in start

print(archive_id)

for part, size in files(archive_id):
    print(part, size)
    load_part(archive_id, part)

print("finished")