def translate(bytes_in, leader_says_marc8=False): """ Converts MARC8 to unicode """ marc8 = MARC8ToUnicode(quiet=True) if leader_says_marc8: data = marc8.translate(mnemonics.read(bytes_in)) else: data = bytes_in.decode('utf-8') return normalize('NFC', data)
def translate(bytes_in, leader_says_marc8=False): marc8 = MARC8ToUnicode(quiet=True) try: if leader_says_marc8: data = marc8.translate(mnemonics.read(bytes_in)) else: data = bytes_in.decode('utf-8') return normalize('NFC', data) except: print('translate error for:', repr(bytes_in)) print('marc8:', leader_says_marc8) raise
def translate(bytes_in, leader_says_marc8=False): """ Converts a binary MARC field value to unicode str, from either MARC8 or UTF8 encoded bytes. :param bytes_in bytes: :rtype: str """ assert isinstance(bytes_in, bytes) marc8 = MARC8ToUnicode(quiet=True) if leader_says_marc8: data = marc8.translate(mnemonics.read(bytes_in)) else: data = bytes_in.decode('utf-8') return normalize('NFC', data)
def record_to_xml_node(self, record, quiet=False, namespace=False): """ converts a record object to a chunk of xml # include the marcxml namespace in the root tag (default: False) record_to_xml(record, namespace=True) """ # helper for converting non-unicode data to unicode # TODO: maybe should set g0 and g1 appropriately using 066 $a and $b? prefix = 'marc:' marc8 = MARC8ToUnicode(quiet=quiet) def translate(data): if type(data) == six.text_type: return data else: return marc8.translate(data) root = ET.Element(prefix + 'record') if namespace: root.set('xmlns', MARC_XML_NS) root.set('xmlns:xsi', XSI_NS) root.set('xsi:schemaLocation', MARC_XML_SCHEMA) leader = ET.SubElement(root, prefix + 'leader') leader.text = record.leader for field in record: if field.is_control_field(): control_field = ET.SubElement(root, prefix + 'controlfield') control_field.set('tag', field.tag) control_field.text = translate(field.data) else: data_field = ET.SubElement(root, prefix + 'datafield') data_field.set('tag', field.tag) data_field.set('ind1', field.indicators[0]) data_field.set('ind2', field.indicators[1]) for subfield in field: data_subfield = ET.SubElement(data_field, prefix + 'subfield') data_subfield.set('code', subfield[0]) data_subfield.text = translate(subfield[1]) return root
def record_to_xml_node(record, quiet=False, namespace=False): """Converts a record object to a chunk of XML. If you would like to include the marcxml namespace in the root tag set namespace to True. """ # helper for converting non-unicode data to unicode # TODO: maybe should set g0 and g1 appropriately using 066 $a and $b? marc8 = MARC8ToUnicode(quiet=quiet) def translate(data): if type(data) == str: return data else: return marc8.translate(data) root = ET.Element("record") if namespace: root.set("xmlns", MARC_XML_NS) root.set("xmlns:xsi", XSI_NS) root.set("xsi:schemaLocation", MARC_XML_SCHEMA) leader = ET.SubElement(root, "leader") leader.text = str(record.leader) for field in record: if field.is_control_field(): control_field = ET.SubElement(root, "controlfield") control_field.set("tag", field.tag) control_field.text = translate(field.data) else: data_field = ET.SubElement(root, "datafield") data_field.set("ind1", field.indicators[0]) data_field.set("ind2", field.indicators[1]) data_field.set("tag", field.tag) for subfield in field: data_subfield = ET.SubElement(data_field, "subfield") data_subfield.set("code", subfield[0]) data_subfield.text = translate(subfield[1]) return root
from pymarc import MARC8ToUnicode from unicodedata import normalize from openlibrary.catalog.marc import mnemonics from openlibrary.catalog.marc.marc_base import MarcBase, MarcException, BadMARC marc8 = MARC8ToUnicode(quiet=True) class BadLength(MarcException): pass def handle_wrapped_lines(_iter): """ Handles wrapped MARC fields, which appear to be multiple fields with the same field number ending with ++ Have not found an official spec which describe this. """ cur_lines = [] cur_tag = None maybe_wrap = False for t, l in _iter: if len(l) > 500 and l.endswith(b'++\x1e'): assert not cur_tag or cur_tag == t cur_tag = t cur_lines.append(l) continue if cur_lines: yield cur_tag, cur_lines[0][:-3] + b''.join( i[2:-3] for i in cur_lines[1:]) + l[2:]