def process_record(pos, loc, data): for tag in '100', '700': line = get_first_tag(data, set([tag])) if line: fields = list(get_all_subfields(line)) if any(k == 'c' for k, v in fields): print((loc, fields))
def data_from_marc(locs, name): lines = defaultdict(list) for loc in locs: data = marc_data(loc) line = read_line(get_first_tag(data, set(['100'])), name) if line: lines[line].append(loc) for tag, line in get_tag_lines(data, set(['700'])): line = read_line(line, name) if line: lines[line].append(loc) return lines
def marc_publisher(data): line = get_first_tag(data, set(['260'])) return ''.join("<b>$%s</b>%s" % (esc(k), esc(v)) for k, v in get_all_subfield)
def marc_authors(data): line = get_first_tag(data, set(['100', '110', '111'])) return ''.join("<b>$%s</b>%s" % (esc(k), esc(v)) for k, v in get_all_subfields(line)) if line else None
from catalog.utils.query import query_iter, set_staging, withKey, get_mc import sys, codecs, re sys.path.append('/home/edward/src/olapi') from olapi import OpenLibrary, Reference from catalog.read_rc import read_rc from catalog.get_ia import get_from_archive, get_from_local from catalog.marc.fast_parse import get_first_tag, get_all_subfields rc = read_rc() sys.stdout = codecs.getwriter('utf-8')(sys.stdout) set_staging(True) ol = OpenLibrary("http://dev.openlibrary.org") ol.login('EdwardBot', rc['EdwardBot']) q = {'type': '/type/edition', 'table_of_contents': None, 'subjects': None} queue = [] count = 0 for e in query_iter(q, limit=100): key = e['key'] mc = get_mc(key) if not mc: continue data = get_from_local(mc) line = get_first_tag(data, set(['041'])) if not line: continue print key, line[0:2], list(get_all_subfields(line))
def marc_title(data): line = get_first_tag(data, set(['245'])) return ''.join("<b>$%s</b>%s" % (esc(k), esc(v)) for k, v in get_subfields(line, set(['a', 'b']))) if line else None
print part if skipping: if part != 'marc_western_washington_univ/wwu_bibs.mrc_revrev.mrc': print 'skipping' continue for pos, loc, data in read_marc_file(part, f): if skipping: if loc.startswith( 'marc:marc_western_washington_univ/wwu_bibs.mrc_revrev.mrc:668652795:1299' ): skipping = False continue if str(data)[6:8] != 'am': # only want books continue tag_003 = get_first_tag(data, ['003']) if not tag_003 or not tag_003.lower().startswith('ocolc'): continue oclc = get_first_tag(data, ['001']) if not oclc: # print get_first_tag(data, ['010']) continue assert oclc[-1] == '\x1e' oclc = oclc[:-1].strip() if not oclc.isdigit(): m = re_oclc.match(oclc) if not m: print "can't read:", ` oclc ` continue oclc = m.group(1) keys = get_keys(loc)
locs = set() for ek in edition_keys: e = site.withKey(ek) for i in e.isbn_10 if e.isbn_10 else []: locs.update(search_query('isbn', i)) for i in e.lccn if e.lccn else []: locs.update(search_query('lccn', i)) for i in e.oclc_numbers if e.oclc_numbers else []: locs.update(search_query('oclc', i)) print len(locs), 'MARC records found' def ldv(line): for s in ('1452', '1519', 'eonard', 'inci'): if line.find(s) != -1: return True return False for loc in locs: # print loc data = get_data(loc) if not data: print "couldn't get" continue line = get_first_tag(data, set(['100', '110', '111'])) if line and ldv(line): print list(get_all_subfields(line)) line = get_first_tag(data, set(['700', '710', '711'])) if line and ldv(line): print list(get_all_subfields(line))
def marc_title(data): line = get_first_tag(data, set(["245"])) return "".join("<b>$%s</b>%s" % (esc(k), esc(v)) for k, v in get_subfields(line, set(["a", "b"]))) if line else None
def marc_publisher(data): line = get_first_tag(data, set(["260"])) return "".join("<b>$%s</b>%s" % (esc(k), esc(v)) for k, v in get_all_subfield)
def marc_authors(data): line = get_first_tag(data, set(["100", "110", "111"])) return "".join("<b>$%s</b>%s" % (esc(k), esc(v)) for k, v in get_all_subfields(line)) if line else None
for name, part, size in files(): f = open(name) print part if skipping: if part != 'marc_western_washington_univ/wwu_bibs.mrc_revrev.mrc': print 'skipping' continue for pos, loc, data in read_marc_file(part, f): if skipping: if loc.startswith('marc:marc_western_washington_univ/wwu_bibs.mrc_revrev.mrc:668652795:1299'): skipping = False continue if str(data)[6:8] != 'am': # only want books continue tag_003 = get_first_tag(data, ['003']) if not tag_003 or not tag_003.lower().startswith('ocolc'): continue oclc = get_first_tag(data, ['001']) if not oclc: # print get_first_tag(data, ['010']) continue assert oclc[-1] == '\x1e' oclc = oclc[:-1].strip() if not oclc.isdigit(): m = re_oclc.match(oclc) if not m: print "can't read:", `oclc` continue oclc = m.group(1) keys = get_keys(loc)
def marc_title(data): line = get_first_tag(data, set(['245'])) return ''.join( "<b>$%s</b>%s" % (esc(k), esc(v)) for k, v in get_subfields(line, set(['a', 'b']))) if line else None
from catalog.utils.query import query_iter, set_staging, withKey, get_mc import sys, codecs, re sys.path.append('/home/edward/src/olapi') from olapi import OpenLibrary, Reference from catalog.read_rc import read_rc from catalog.get_ia import get_from_archive, get_from_local from catalog.marc.fast_parse import get_first_tag, get_all_subfields rc = read_rc() sys.stdout = codecs.getwriter('utf-8')(sys.stdout) set_staging(True) ol = OpenLibrary("http://dev.openlibrary.org") ol.login('EdwardBot', rc['EdwardBot']) q = { 'type': '/type/edition', 'table_of_contents': None, 'subjects': None } queue = [] count = 0 for e in query_iter(q, limit=100): key = e['key'] mc = get_mc(key) if not mc: continue data = get_from_local(mc) line = get_first_tag(data, set(['041'])) if not line: continue print key, line[0:2], list(get_all_subfields(line))
def marc_publisher(data): line = get_first_tag(data, set(['260'])) return ''.join("<b>$%s</b>%s" % (esc(k), esc(v)) for k, v in get_all_subfields(line)) if line else None