def translate(self, data): """ :param data bytes: raw MARC21 field data content, in either utf8 or marc8 encoding :rtype: str :return: A NFC normalized unicode str """ if self.rec.marc8(): data = mnemonics.read(data) return marc8.translate(data) return normalize('NFC', data.decode('utf8'))
def translate(bytes_in, leader_says_marc8=False): """ Converts MARC8 to unicode """ marc8 = MARC8ToUnicode(quiet=True) if leader_says_marc8: data = marc8.translate(mnemonics.read(bytes_in)) else: data = bytes_in.decode('utf-8') return normalize('NFC', data)
def translate(self, data): utf8 = self.rec.leader()[9] == 'a' if utf8: try: data = data.decode('utf-8') except: utf8 = False if not utf8: data = mnemonics.read(data) data = marc8.translate(data) data = normalize('NFC', data) return data
def translate(data, bad_ia_charset=False, leader_says_marc8=False): if leader_says_marc8: data = marc8.translate(data) data = mnemonics.read(data) return data if bad_ia_charset: #data = data.decode('utf-8') data = marc8.translate(data) return normalize('NFC', data) data = mnemonics.read(data) if type(data) == unicode: return normalize('NFC', data) try: data = data.decode('utf-8') is_utf8 = True except UnicodeDecodeError: is_utf8 = False if not is_utf8: data = marc8.translate(data) if type(data) == unicode: return normalize('NFC', data) else: return data
def translate(bytes_in, leader_says_marc8=False): """ Converts a binary MARC field value to unicode str, from either MARC8 or UTF8 encoded bytes. :param bytes_in bytes: :rtype: str """ assert isinstance(bytes_in, bytes) marc8 = MARC8ToUnicode(quiet=True) if leader_says_marc8: data = marc8.translate(mnemonics.read(bytes_in)) else: data = bytes_in.decode('utf-8') return normalize('NFC', data)
def translate(data, bad_ia_charset=False): if bad_ia_charset: data = data.decode("utf-8") data = marc8.translate(data) return normalize("NFC", data) data = mnemonics.read(data) if type(data) == unicode: return normalize("NFC", data) try: data = data.decode("utf8") is_utf8 = True except UnicodeDecodeError: is_utf8 = False if not is_utf8: data = marc8.translate(data) if type(data) == unicode: return normalize("NFC", data) else: return data
def test_read_no_change(): input_ = b'El Ing.{eniero} Federico E. Capurro y el nacimiento de la profesi\xe2on bibliotecaria en el Uruguay.' assert read(input_) == input_
def test_read_conversion_to_marc8(): input_ = b'Tha{mllhring}{macr}alib{macr}i, {mllhring}Abd al-Malik ibn Mu{dotb}hammad,' output = b'Tha\xb0\xe5alib\xe5i, \xb0Abd al-Malik ibn Mu\xf2hammad,' assert read(input_) == output