def get_work_title(e): # use first work title we find in source MARC records wt = None for src_type, src in get_marc_src(e): if src_type == 'ia': wt = get_ia_work_title(src) if wt: break continue assert src_type == 'marc' try: data = get_from_archive(src) except ValueError: print 'bad record source:', src print 'http://openlibrary.org' + e['key'] continue if not data: continue try: line = get_first_tag(data, set(['240'])) except BadDictionary: print 'bad dictionary:', src print 'http://openlibrary.org' + e['key'] continue if line: wt = ' '.join(get_subfield_values(line, ['a'])).strip('. ') break if wt: return wt if not e.get('work_titles', []): return print 'work title in MARC, but not in OL' print 'http://openlibrary.org' + e['key'] return e['work_titles'][0]
def get_work_subjects(w, do_get_mc=True): found = set() for e in w['editions']: sr = e.get('source_records', []) if sr: for i in sr: if i.endswith('initial import'): continue if i.startswith(('ia:', 'marc:')): found.add(i) continue else: mc = None if do_get_mc: m = re_edition_key.match(e['key']) mc = get_mc('/b/' + m.group(1)) if mc: if mc.endswith('initial import'): continue if not mc.startswith('amazon:') and not re_ia_marc.match(mc): found.add('marc:' + mc) subjects = [] for sr in found: if sr.startswith('marc:ia:'): subjects.append(get_subjects_from_ia(sr[8:])) elif sr.startswith('marc:'): loc = sr[5:] data = get_from_archive(loc) rec = MarcBinary(data) subjects.append(read_subjects(rec)) else: assert sr.startswith('ia:') subjects.append(get_subjects_from_ia(sr[3:])) return combine_subjects(subjects)
def get_work_subjects(w): found = set() for e in w['editions']: sr = e.get('source_records', []) if sr: for i in sr: if i.endswith('initial import'): bad_source_record(e, i) continue if i.startswith('ia:') or i.startswith('marc:'): found.add(i) continue else: m = re_edition_key.match(e['key']) mc = get_mc('/b/' + m.group(1)) if mc: if mc.endswith('initial import'): bad_source_record(e, mc) continue if not mc.startswith('amazon:') and not re_ia_marc.match(mc): found.add('marc:' + mc) subjects = [] for sr in found: if sr.startswith('marc:ia:'): subjects.append(get_subjects_from_ia(sr[8:])) elif sr.startswith('marc:'): loc = sr[5:] data = get_from_archive(loc) rec = MarcBinary(data) subjects.append(read_subjects(rec)) else: assert sr.startswith('ia:') subjects.append(get_subjects_from_ia(sr[3:])) return combine_subjects(subjects)
def get_record(key, mc): data = get_from_archive(mc) try: rec = fast_parse.read_edition(data) except (fast_parse.SoundRecording, IndexError, AssertionError): print(mc) print(key) return False try: return marc.build_marc(rec) except TypeError: print(rec) raise
sys.stdout = codecs.getwriter('utf-8')(sys.stdout) re_subtag = re.compile('\x1f(.)([^\x1f]*)') def fmt_subfields(line): def bold(s): return ''.join(c + "\b" + c for c in s) assert line[-1] == '\x1e' return ''.join(' ' + bold('$' + m.group(1)) + ' ' + translate(m.group(2)) for m in re_subtag.finditer(line[2:-1])) def show_book(data): print 'leader:', data[:24] for tag, line in get_all_tag_lines(data): if tag.startswith('00'): print tag, line[:-1] else: print tag, line[0:2], fmt_subfields(line) if __name__ == '__main__': source = sys.argv[1] if ':' in source: data = get_from_archive(source) else: data = open(source).read() show_book(data)
sys.stdout = codecs.getwriter("utf-8")(sys.stdout) re_subtag = re.compile("\x1f(.)([^\x1f]*)") def fmt_subfields(line): def bold(s): return "".join(c + "\b" + c for c in s) assert line[-1] == "\x1e" return "".join(" " + bold("$" + m.group(1)) + " " + translate(m.group(2)) for m in re_subtag.finditer(line[2:-1])) def show_book(data): print "leader:", data[:24] for tag, line in get_all_tag_lines(data): if tag.startswith("00"): print tag, line[:-1] else: print tag, line[0:2], fmt_subfields(line) if __name__ == "__main__": source = sys.argv[1] if ":" in source: data = get_from_archive(source) else: data = open(source).read() show_book(data)