Example #1
0
def process_record(pos, loc, data):
    for tag in '100', '700':
        line = get_first_tag(data, set([tag]))
        if line:
            fields = list(get_all_subfields(line))
            if any(k == 'c' for k, v in fields):
                print((loc, fields))
Example #2
0
def process_record(pos, loc, data):
    for tag in '100', '700':
        line = get_first_tag(data, set([tag]))
        if line:
            fields = list(get_all_subfields(line))
            if any(k == 'c' for k, v in fields):
                print((loc, fields))
Example #3
0
def data_from_marc(locs, name):
    lines = defaultdict(list)
    for loc in locs:
        data = marc_data(loc)
        line = read_line(get_first_tag(data, set(['100'])), name)
        if line:
            lines[line].append(loc)
        for tag, line in get_tag_lines(data, set(['700'])):
            line = read_line(line, name)
            if line:
                lines[line].append(loc)
    return lines
Example #4
0
def data_from_marc(locs, name):
    lines = defaultdict(list)
    for loc in locs:
        data = marc_data(loc)
        line = read_line(get_first_tag(data, set(['100'])), name)
        if line:
            lines[line].append(loc)
        for tag, line in get_tag_lines(data, set(['700'])):
            line = read_line(line, name)
            if line:
                lines[line].append(loc)
    return lines
Example #5
0
def marc_publisher(data):
    line = get_first_tag(data, set(['260']))
    return ''.join("<b>$%s</b>%s" % (esc(k), esc(v))
                   for k, v in get_all_subfield)
Example #6
0
def marc_authors(data):
    line = get_first_tag(data, set(['100', '110', '111']))
    return ''.join("<b>$%s</b>%s" % (esc(k), esc(v))
                   for k, v in get_all_subfields(line)) if line else None
Example #7
0
from catalog.utils.query import query_iter, set_staging, withKey, get_mc
import sys, codecs, re
sys.path.append('/home/edward/src/olapi')
from olapi import OpenLibrary, Reference
from catalog.read_rc import read_rc
from catalog.get_ia import get_from_archive, get_from_local
from catalog.marc.fast_parse import get_first_tag, get_all_subfields
rc = read_rc()

sys.stdout = codecs.getwriter('utf-8')(sys.stdout)
set_staging(True)

ol = OpenLibrary("http://dev.openlibrary.org")
ol.login('EdwardBot', rc['EdwardBot'])

q = {'type': '/type/edition', 'table_of_contents': None, 'subjects': None}
queue = []
count = 0
for e in query_iter(q, limit=100):
    key = e['key']
    mc = get_mc(key)
    if not mc:
        continue
    data = get_from_local(mc)
    line = get_first_tag(data, set(['041']))
    if not line:
        continue
    print key, line[0:2], list(get_all_subfields(line))
def marc_title(data):
    line = get_first_tag(data, set(['245']))
    return ''.join("<b>$%s</b>%s" % (esc(k), esc(v)) for k, v in get_subfields(line, set(['a', 'b']))) if line else None
Example #9
0
    print part
    if skipping:
        if part != 'marc_western_washington_univ/wwu_bibs.mrc_revrev.mrc':
            print 'skipping'
            continue
    for pos, loc, data in read_marc_file(part, f):
        if skipping:
            if loc.startswith(
                    'marc:marc_western_washington_univ/wwu_bibs.mrc_revrev.mrc:668652795:1299'
            ):
                skipping = False
            continue

        if str(data)[6:8] != 'am':  # only want books
            continue
        tag_003 = get_first_tag(data, ['003'])
        if not tag_003 or not tag_003.lower().startswith('ocolc'):
            continue
        oclc = get_first_tag(data, ['001'])
        if not oclc:
            #            print get_first_tag(data, ['010'])
            continue
        assert oclc[-1] == '\x1e'
        oclc = oclc[:-1].strip()
        if not oclc.isdigit():
            m = re_oclc.match(oclc)
            if not m:
                print "can't read:", ` oclc `
                continue
            oclc = m.group(1)
        keys = get_keys(loc)
Example #10
0
locs = set()
for ek in edition_keys:
    e = site.withKey(ek)
    for i in e.isbn_10 if e.isbn_10 else []:
        locs.update(search_query('isbn', i))
    for i in e.lccn if e.lccn else []:
        locs.update(search_query('lccn', i))
    for i in e.oclc_numbers if e.oclc_numbers else []:
        locs.update(search_query('oclc', i))
print len(locs), 'MARC records found'

def ldv(line):
    for s in ('1452', '1519', 'eonard', 'inci'):
        if line.find(s) != -1:
            return True
    return False

for loc in locs:
#    print loc
    data = get_data(loc)
    if not data:
        print "couldn't get"
        continue
    line = get_first_tag(data, set(['100', '110', '111']))
    if line and ldv(line):
        print list(get_all_subfields(line))

    line = get_first_tag(data, set(['700', '710', '711']))
    if line and ldv(line):
        print list(get_all_subfields(line))
Example #11
0
locs = set()
for ek in edition_keys:
    e = site.withKey(ek)
    for i in e.isbn_10 if e.isbn_10 else []:
        locs.update(search_query('isbn', i))
    for i in e.lccn if e.lccn else []:
        locs.update(search_query('lccn', i))
    for i in e.oclc_numbers if e.oclc_numbers else []:
        locs.update(search_query('oclc', i))
print len(locs), 'MARC records found'

def ldv(line):
    for s in ('1452', '1519', 'eonard', 'inci'):
        if line.find(s) != -1:
            return True
    return False
    
for loc in locs:
#    print loc
    data = get_data(loc)
    if not data:
        print "couldn't get"
        continue
    line = get_first_tag(data, set(['100', '110', '111']))
    if line and ldv(line):
        print list(get_all_subfields(line))

    line = get_first_tag(data, set(['700', '710', '711']))
    if line and ldv(line):
        print list(get_all_subfields(line))
Example #12
0
def marc_title(data):
    line = get_first_tag(data, set(["245"]))
    return "".join("<b>$%s</b>%s" % (esc(k), esc(v)) for k, v in get_subfields(line, set(["a", "b"]))) if line else None
Example #13
0
def marc_publisher(data):
    line = get_first_tag(data, set(["260"]))
    return "".join("<b>$%s</b>%s" % (esc(k), esc(v)) for k, v in get_all_subfield)
Example #14
0
def marc_authors(data):
    line = get_first_tag(data, set(["100", "110", "111"]))
    return "".join("<b>$%s</b>%s" % (esc(k), esc(v)) for k, v in get_all_subfields(line)) if line else None
Example #15
0
for name, part, size in files():
    f = open(name)
    print part
    if skipping:
        if part != 'marc_western_washington_univ/wwu_bibs.mrc_revrev.mrc':
            print 'skipping'
            continue
    for pos, loc, data in read_marc_file(part, f):
        if skipping:
            if loc.startswith('marc:marc_western_washington_univ/wwu_bibs.mrc_revrev.mrc:668652795:1299'):
                skipping = False
            continue

        if str(data)[6:8] != 'am': # only want books
            continue
        tag_003 = get_first_tag(data, ['003'])
        if not tag_003 or not tag_003.lower().startswith('ocolc'):
            continue
        oclc = get_first_tag(data, ['001'])
        if not oclc:
#            print get_first_tag(data, ['010'])
            continue
        assert oclc[-1] == '\x1e'
        oclc = oclc[:-1].strip()
        if not oclc.isdigit():
            m = re_oclc.match(oclc)
            if not m:
                print "can't read:", `oclc`
                continue
            oclc = m.group(1)
        keys = get_keys(loc)
Example #16
0
def marc_title(data):
    line = get_first_tag(data, set(['245']))
    return ''.join(
        "<b>$%s</b>%s" % (esc(k), esc(v))
        for k, v in get_subfields(line, set(['a', 'b']))) if line else None
Example #17
0
from catalog.utils.query import query_iter, set_staging, withKey, get_mc
import sys, codecs, re
sys.path.append('/home/edward/src/olapi')
from olapi import OpenLibrary, Reference
from catalog.read_rc import read_rc
from catalog.get_ia import get_from_archive, get_from_local
from catalog.marc.fast_parse import get_first_tag, get_all_subfields
rc = read_rc()

sys.stdout = codecs.getwriter('utf-8')(sys.stdout)
set_staging(True)

ol = OpenLibrary("http://dev.openlibrary.org")
ol.login('EdwardBot', rc['EdwardBot'])

q = { 'type': '/type/edition', 'table_of_contents': None, 'subjects': None }
queue = []
count = 0
for e in query_iter(q, limit=100):
    key = e['key']
    mc = get_mc(key)
    if not mc:
        continue
    data = get_from_local(mc)
    line = get_first_tag(data, set(['041']))
    if not line:
        continue
    print key, line[0:2], list(get_all_subfields(line))

Example #18
0
def marc_publisher(data):
    line = get_first_tag(data, set(['260']))
    return ''.join("<b>$%s</b>%s" % (esc(k), esc(v)) for k, v in get_all_subfields(line)) if line else None