Ejemplo n.º 1
0
 def translate(self, data):
     """
     :param data bytes: raw MARC21 field data content, in either utf8 or marc8 encoding
     :rtype: str
     :return: A NFC normalized unicode str
     """
     if self.rec.marc8():
         data = mnemonics.read(data)
         return marc8.translate(data)
     return normalize('NFC', data.decode('utf8'))
Ejemplo n.º 2
0
def translate(bytes_in, leader_says_marc8=False):
    """
    Converts MARC8 to unicode
    """
    marc8 = MARC8ToUnicode(quiet=True)
    if leader_says_marc8:
        data = marc8.translate(mnemonics.read(bytes_in))
    else:
        data = bytes_in.decode('utf-8')
    return normalize('NFC', data)
Ejemplo n.º 3
0
 def translate(self, data):
     utf8 = self.rec.leader()[9] == 'a'
     if utf8:
         try:
             data = data.decode('utf-8')
         except:
             utf8 = False
     if not utf8:
         data = mnemonics.read(data)
         data = marc8.translate(data)
     data = normalize('NFC', data)
     return data
Ejemplo n.º 4
0
 def translate(self, data):
     utf8 = self.rec.leader()[9] == 'a'
     if utf8:
         try:
             data = data.decode('utf-8')
         except:
             utf8 = False
     if not utf8:
         data = mnemonics.read(data)
         data = marc8.translate(data)
     data = normalize('NFC', data)
     return data
Ejemplo n.º 5
0
def translate(data, bad_ia_charset=False, leader_says_marc8=False):
    if leader_says_marc8:
        data = marc8.translate(data)
        data = mnemonics.read(data)
        return data
    if bad_ia_charset:
        #data = data.decode('utf-8')
        data = marc8.translate(data)
        return normalize('NFC', data)
    data = mnemonics.read(data)
    if type(data) == unicode:
        return normalize('NFC', data)
    try:
        data = data.decode('utf-8')
        is_utf8 = True
    except UnicodeDecodeError:
        is_utf8 = False
    if not is_utf8:
        data = marc8.translate(data)
    if type(data) == unicode:
        return normalize('NFC', data)
    else:
        return data
Ejemplo n.º 6
0
def translate(bytes_in, leader_says_marc8=False):
    """
    Converts a binary MARC field value to unicode str,
    from either MARC8 or UTF8 encoded bytes.

    :param bytes_in bytes:
    :rtype: str
    """
    assert isinstance(bytes_in, bytes)
    marc8 = MARC8ToUnicode(quiet=True)
    if leader_says_marc8:
        data = marc8.translate(mnemonics.read(bytes_in))
    else:
        data = bytes_in.decode('utf-8')
    return normalize('NFC', data)
Ejemplo n.º 7
0
def translate(data, bad_ia_charset=False):
    if bad_ia_charset:
        data = data.decode("utf-8")
        data = marc8.translate(data)
        return normalize("NFC", data)
    data = mnemonics.read(data)
    if type(data) == unicode:
        return normalize("NFC", data)
    try:
        data = data.decode("utf8")
        is_utf8 = True
    except UnicodeDecodeError:
        is_utf8 = False
    if not is_utf8:
        data = marc8.translate(data)
    if type(data) == unicode:
        return normalize("NFC", data)
    else:
        return data
Ejemplo n.º 8
0
def test_read_no_change():
    input_ = b'El Ing.{eniero} Federico E. Capurro y el nacimiento de la profesi\xe2on bibliotecaria en el Uruguay.'
    assert read(input_) == input_
Ejemplo n.º 9
0
def test_read_conversion_to_marc8():
    input_ = b'Tha{mllhring}{macr}alib{macr}i, {mllhring}Abd al-Malik ibn Mu{dotb}hammad,'
    output = b'Tha\xb0\xe5alib\xe5i, \xb0Abd al-Malik ibn Mu\xf2hammad,'
    assert read(input_) == output