#!/usr/bin/env python # -*- coding: utf-8 -*- from codecs_to_hex import to_hex import codecs from cStringIO import StringIO # Versione grezza dei dati originali data = u'pi: \u03c0' # Codifica manuale come UTF-8 utf8 = data.encode('utf-8') print 'Inizia come UTF-8 :', to_hex(utf8, 1) # Imposta un buffer in uscita, quindi lo incapsula come EncodedFile. output = StringIO() encoded_file = codecs.EncodedFile(output, data_encoding='utf-8', file_encoding='utf-16') encoded_file.write(utf8) # Recupera il contenuto del buffer con codifica della stringa # di byte in UTF-16 utf16 = output.getvalue() print 'Codificato in UTF-16:', to_hex(utf16, 2) # Imposta un altro buffer con i dati UTF-16 per la lettura # e li incapsula con un altro EncodedFile. buffer = StringIO(utf16) encoded_file = codecs.EncodedFile(buffer, data_encoding='utf-8', file_encoding='utf-16')
def codecs_bom(): for name in BOM_TYPES: print('{:12} : {}'.format(name, to_hex(getattr(codecs, name), 2)))
import codecs from codecs_to_hex import to_hex for name in [ 'BOM', 'BOM_BE', 'BOM_LE', 'BOM_UTF8', 'BOM_UTF16', 'BOM_UTF16_BE', 'BOM_UTF16_LE', 'BOM_UTF32', 'BOM_UTF32_BE', 'BOM_UTF32_LE', ]: print '{:12} : {}'.format(name, to_hex(getattr(codecs, name), 2))
from codecs_to_hex import to_hex text = u'pi: p' print 'Raw :', repr(text) print 'UTF-8 :', to_hex(text.encode('utf-8'), 1) print 'UTF-16:', to_hex(text.encode('utf-16'), 2)
import codecs import sys from codecs_to_hex import to_hex error_handling = sys.argv[1] text = u'pi: \u03c0' print 'Originale :', repr(text) # Salva i dati con una codifica with codecs.open('decode_error.txt', 'w', encoding='utf-16') as f: f.write(text) # Scarica i dati dal file with open('decode_error.txt', 'rb') as f: print 'Contenuto del file:', to_hex(f.read(), 1) # Tenta di leggere i dati con la codifica errata with codecs.open('decode_error.txt', 'r', encoding='utf-8', errors=error_handling) as f: try: data = f.read() except UnicodeDecodeError, err: print 'ERRORE:', err else: print 'Letto :', repr(data)
# codecs_bom_detection.py import codecs from codecs_to_hex import to_hex # Lettura dei dati grezzi with open('nonnative-encoded.txt', mode='rb') as f: raw_bytes = f.read() print('Grezzo :', to_hex(raw_bytes, 2)) # Riapertura del file lasciando che codecs identifichi il BOM with codecs.open('nonnative-encoded.txt', mode='r', encoding='utf-16', ) as f: decoded_text = f.read() print('Decodificato:', repr(decoded_text))
#!/usr/bin/python # -*- coding: utf-8 -*- # # Copyright (c) 2008 Doug Hellmann All rights reserved. # import codecs from codecs_to_hex import to_hex for name in [ 'BOM', 'BOM_BE', 'BOM_LE', 'BOM_UTF8', 'BOM_UTF16', 'BOM_UTF16_BE', 'BOM_UTF16_LE', 'BOM_UTF32', 'BOM_UTF32_BE', 'BOM_UTF32_LE', ]: print '{:12} : {}'.format(name, to_hex(getattr(codecs, name), 2))
# -*- coding: utf-8 -*- """Translating between encodings on the fly. """ from codecs_to_hex import to_hex import codecs from cStringIO import StringIO # Raw version of the original data. data = u'pi: \u03c0' # Manually encode it as UTF-8. utf8 = data.encode('utf-8') print 'Start as UTF-8 :', to_hex(utf8, 1) # Set up an output buffer, then wrap it as an EncodedFile. output = StringIO() encoded_file = codecs.EncodedFile(output, data_encoding='utf-8', file_encoding='utf-16') encoded_file.write(utf8) # Fetch the buffer contents as a UTF-16 encoded byte string utf16 = output.getvalue() print 'Encoded to UTF-16:', to_hex(utf16, 2) # Set up another buffer with the UTF-16 data for reading, # and wrap it with another EncodedFile. buffer = StringIO(utf16) encoded_file = codecs.EncodedFile(buffer, data_encoding='utf-8',
import codecs import sys from codecs_to_hex import to_hex error_handling = sys.argv[1] text = u'pi: \u03c0' print 'Original :', repr(text) # Save the data with one encoding with codecs.open('decode_error.txt', 'w', encoding='utf-16') as f: f.write(text) # Dump the bytes from the file with open('decode_error.txt', 'rb') as f: print 'File contents:', to_hex(f.read(), 1) # Try to read the data with the wrong encoding with codecs.open('decode_error.txt', 'r', encoding='utf-8', errors=error_handling) as f: try: data = f.read() except UnicodeDecodeError, err: print 'ERROR:', err else: print 'Read :', repr(data)
#!/usr/bin/env python # -*- coding: utf-8 -*- """Translating between encodings on the fly. """ from codecs_to_hex import to_hex import codecs from cStringIO import StringIO # Raw version of the original data. data = u'pi: \u03c0' # Manually encode it as UTF-8. utf8 = data.encode('utf-8') print 'Start as UTF-8 :', to_hex(utf8, 1) # Set up an output buffer, then wrap it as an EncodedFile. output = StringIO() encoded_file = codecs.EncodedFile(output, data_encoding='utf-8', file_encoding='utf-16') encoded_file.write(utf8) # Fetch the buffer contents as a UTF-16 encoded byte string utf16 = output.getvalue() print 'Encoded to UTF-16:', to_hex(utf16, 2) # Set up another buffer with the UTF-16 data for reading, # and wrap it with another EncodedFile. buffer = StringIO(utf16)
#!/usr/bin/python # -*- coding: utf-8 -*- # # Copyright (c) 2008 Doug Hellmann All rights reserved. # from codecs_to_hex import to_hex text = u'pi: π' print 'Raw :', repr(text) print 'UTF-8 :', to_hex(text.encode('utf-8'), 1) print 'UTF-16:', to_hex(text.encode('utf-16'), 2)
from codecs_to_hex import to_hex import codecs import sys encoding = sys.argv[1] filename = encoding + '.txt' print('writing to >', filename) with codecs.open(filename, mode='w', encoding=encoding) as f: f.write('français') nbytes = { 'utf-8': 1, 'utf-16': 2, 'utf-32': 4, }.get(encoding, 1) print('File contents:') with open(filename, mode='rb') as f: print(to_hex(f.read(), nbytes))
#!/usr/bin/env python # -*- coding: utf-8 -*- """Demonstrate the representations of values using different encodings. """ from codecs_to_hex import to_hex text = u'pi: π' encoded = text.encode('utf-8') decoded = encoded.decode('utf-8') print 'Original :', repr(text) print 'Encoded :', to_hex(encoded, 1), type(encoded) print 'Decoded :', repr(decoded), type(decoded)
# -*- coding: utf-8 -*- import codecs from codecs_to_hex import to_hex # Pick the non-native version of UTF-16 encoding if codecs.BOM_UTF16 == codecs.BOM_UTF16_BE: bom = codecs.BOM_UTF16_LE encoding = 'utf_16_le' else: bom = codecs.BOM_UTF16_BE encoding = 'utf_16_be' print 'Native order: ', to_hex(codecs.BOM_UTF16, 2) print 'Selected order:', to_hex(bom, 2) # Encode the text encoded_text = u'pi: \u03c0'.encode(encoding) print '{:14}: {}'.format(encoding, to_hex(encoded_text, 2)) with open('non-native-encoded.txt', mode='wb') as f: # Write the selected byte-order marker. It is not included in the # encoded text because we were explicit about the byte order when # selecting the encoding. f.write(bom) # Write the byte string for the encoded text. f.write(encoded_text)
# # Copyright (c) 2010 Doug Hellmann. All rights reserved. # """Writing Unicode data to a file. """ #end_pymotw_header from codecs_to_hex import to_hex import codecs import sys encoding = sys.argv[1] filename = encoding + '.txt' print 'Writing to', filename with codecs.open(filename, mode='wt', encoding=encoding) as f: f.write(u'pi: \u03c0') # Determine the byte grouping to use for to_hex() nbytes = { 'utf-8': 1, 'utf-16': 2, 'utf-32': 4, }.get(encoding, 1) # Show the raw bytes in the file print 'File contents:' with open(filename, mode='rt') as f: print to_hex(f.read(), nbytes)
# codecs_open_write.py from codecs_to_hex import to_hex import codecs import sys encoding = sys.argv[1] filename = encoding + '.txt' print('Scrittura verso', filename) with codecs.open(filename, mode='w', encoding=encoding) as f: f.write('français') # Determina il raggruppamento di byte da usare per to_hex() nbytes = { 'utf-8': 1, 'utf-16': 2, 'utf-32': 4, }.get(encoding, 1) # Mostra i byte raw nel file print('Contenuto del file:') with open(filename, mode='rb') as f: print(to_hex(f.read(), nbytes))
passed as the first argument, and data_encoding value refers to the encoding in use by the data passing through the read() and write() calls. """ from codecs_to_hex import to_hex import codecs from cStringIO import StringIO # Raw version of the original data. data = u'pi: \u03c0' # Manually encode it as utf-8 utf8 = data.encode('utf-8') print 'Start as utf-8 :', to_hex(utf8, 1) # Set up an output buffer, then wrap it as EncodedFile output = StringIO() encoded_file = codecs.EncodedFile(output, data_encoding='utf-8', file_encoding='utf-16') encoded_file.write(utf8) # Fetch the buffer contents as a utf-16 encoded byte string utf16 = output.getvalue() print 'Encoded to utf-16:', to_hex(utf16, 2) # Set up another buffer with the utf-16 data from reading # and wrap it with another EncodedFile buffer = StringIO(utf16) encoded_file = codecs.EncodedFile(buffer, data_encoding='utf-8',
#!/usr/bin/env python # encoding: utf-8 # # Copyright (c) 2010 Doug Hellmann. All rights reserved. # """Demonstrate the representations of values using different encodings. """ #end_pymotw_header from codecs_to_hex import to_hex text = u'pi: π' encoded = text.encode('utf-8') decoded = encoded.decode('utf-8') print 'Original :', repr(text) print 'Encoded :', to_hex(encoded, 1), type(encoded) print 'Decoded :', repr(decoded), type(decoded)
# """Translating between encodings on the fly. """ # end_pymotw_header from codecs_to_hex import to_hex import codecs from cStringIO import StringIO # Raw version of the original data. data = u"pi: \u03c0" # Manually encode it as UTF-8. utf8 = data.encode("utf-8") print "Start as UTF-8 :", to_hex(utf8, 1) # Set up an output buffer, then wrap it as an EncodedFile. output = StringIO() encoded_file = codecs.EncodedFile(output, data_encoding="utf-8", file_encoding="utf-16") encoded_file.write(utf8) # Fetch the buffer contents as a UTF-16 encoded byte string utf16 = output.getvalue() print "Encoded to UTF-16:", to_hex(utf16, 2) # Set up another buffer with the UTF-16 data for reading, # and wrap it with another EncodedFile. buffer = StringIO(utf16) encoded_file = codecs.EncodedFile(buffer, data_encoding="utf-8", file_encoding="utf-16")
#end_pymotw_header import codecs import sys from codecs_to_hex import to_hex error_handling = sys.argv[1] text = u'pi: \u03c0' print 'Original :', repr(text) # Save the data with one encoding with codecs.open('decode_error.txt', 'w', encoding='utf-16') as f: f.write(text) # Dump the bytes from the file with open('decode_error.txt', 'rb') as f: print 'File contents:', to_hex(f.read(), 1) # Try to read the data with the wrong encoding with codecs.open('decode_error.txt', 'r', encoding='utf-8', errors=error_handling) as f: try: data = f.read() except UnicodeDecodeError, err: print 'ERROR:', err else: print 'Read :', repr(data)
# codecs_bom_create_file.py import codecs from codecs_to_hex import to_hex # Pick the nonnative version of UTF-16 encoding if codecs.BOM_UTF16 == codecs.BOM_UTF16_BE: bom = codecs.BOM_UTF16_LE encoding = 'utf_16_le' else: bom = codecs.BOM_UTF16_BE encoding = 'utf_16_be' print('Ordina nativo :', to_hex(codecs.BOM_UTF16, 2)) print('Ordine selezionato:', to_hex(bom, 2)) # Codifica il testo encoded_text = 'français'.encode(encoding) print('{:14}: {}'.format(encoding, to_hex(encoded_text, 2))) with open('nonnative-encoded.txt', mode='wb') as f: # Scrive il byte-order marker selezionato. Non e' incluso nel # testo codificato in quanto la cosa e' stata resa esplicita in fase di # selezione della codifica f.write(bom) # Scrive la stringa di byte per il testo codificato. f.write(encoded_text)
# encoding: utf-8 # # Copyright (c) 2010 Doug Hellmann. All rights reserved. # """Writing Unicode data to a file. """ #end_pymotw_header from codecs_to_hex import to_hex import codecs import sys encoding = sys.argv[1] filename = encoding + '.txt' print 'Writing to', filename with codecs.open(filename, mode='wt', encoding=encoding) as f: f.write(u'pi: \u03c0') # Determine the byte grouping to use for to_hex() nbytes = { 'utf-8':1, 'utf-16':2, 'utf-32':4, }.get(encoding, 1) # Show the raw bytes in the file print 'File contents:' with open(filename, mode='rt') as f: print to_hex(f.read(), nbytes)
# """Create a file with nonnative BOM. """ # end_pymotw_header import codecs from codecs_to_hex import to_hex # Pick the nonnative version of UTF-16 encoding if codecs.BOM_UTF16 == codecs.BOM_UTF16_BE: bom = codecs.BOM_UTF16_LE encoding = "utf_16_le" else: bom = codecs.BOM_UTF16_BE encoding = "utf_16_be" print "Native order :", to_hex(codecs.BOM_UTF16, 2) print "Selected order:", to_hex(bom, 2) # Encode the text. encoded_text = u"pi: \u03c0".encode(encoding) print "{:14}: {}".format(encoding, to_hex(encoded_text, 2)) with open("nonnative-encoded.txt", mode="wb") as f: # Write the selected byte-order marker. It is not included # in the encoded text because the byte order was given # explicitly when selecting the encoding. f.write(bom) # Write the byte string for the encoded text. f.write(encoded_text)
from codecs_to_hex import to_hex text = u'pi: p' encoded = text.encode('utf-8') decoded = encoded.decode('utf-8') print 'Originali :', repr(text) print 'Codificati :', to_hex(encoded, 1), type(encoded) print 'Decodificati:', repr(decoded), type(decoded)
# codecs_encodings.py import unicodedata from codecs_to_hex import to_hex text = 'français' print('Raw : {!r}'.format(text)) for c in text: print(' {!r}: {}'.format(c, unicodedata.name(c, c))) print('UTF-8 : {!r}'.format(to_hex(text.encode('utf-8'), 1))) print('UTF-16: {!r}'.format(to_hex(text.encode('utf-16'), 2)))
# codecs_encodedfile.py from codecs_to_hex import to_hex import codecs import io # Versione grezza dei dati originali data = 'français' # Codifica manuale come UTF-8 utf8 = data.encode('utf-8') print('Parte com UTF-8 :', to_hex(utf8, 1)) # Imposta un buffer in uscita, quindi lo incapsula come EncodedFile. output = io.BytesIO() encoded_file = codecs.EncodedFile(output, data_encoding='utf-8', file_encoding='utf-16') encoded_file.write(utf8) # Recupera il contenuto del buffer con codifica della stringa # di byte in UTF-16 utf16 = output.getvalue() print('Codificato in UTF-16:', to_hex(utf16, 2)) # Imposta un altro buffer con i dati UTF-16 per la lettura # e li incapsula con un altro EncodedFile. buffer = io.BytesIO(utf16) encoded_file = codecs.EncodedFile(buffer, data_encoding='utf-8', file_encoding='utf-16')
# codecs_bom.py import codecs from codecs_to_hex import to_hex BOM_TYPES = [ 'BOM', 'BOM_BE', 'BOM_LE', 'BOM_UTF8', 'BOM_UTF16', 'BOM_UTF16_BE', 'BOM_UTF16_LE', 'BOM_UTF32', 'BOM_UTF32_BE', 'BOM_UTF32_LE', ] for name in BOM_TYPES: print('{:12} : {}'.format( name, to_hex(getattr(codecs, name), 2)))
#!/usr/bin/env python # encoding: utf-8 # # Copyright (c) 2010 Doug Hellmann. All rights reserved. # """Detecting the BOM. """ #end_pymotw_header import codecs from codecs_to_hex import to_hex # Look at the raw data with open('non-native-encoded.txt', mode='rb') as f: raw_bytes = f.read() print 'Raw :', to_hex(raw_bytes, 2) # Re-open the file and let codecs detect the BOM with codecs.open('non-native-encoded.txt', mode='rt', encoding='utf-16') as f: decoded_text = f.read() print 'Decoded:', repr(decoded_text)
# codecs_bom_detection.py import codecs from codecs_to_hex import to_hex # Look at the raw data with open('nonnative-encoded.txt', mode='rb') as f: raw_bytes = f.read() print('Raw :', to_hex(raw_bytes, 2)) # Re-open the file and let codecs detect the BOM with codecs.open( 'nonnative-encoded.txt', mode='r', encoding='utf-16', ) as f: decoded_text = f.read() print('Decoded:', repr(decoded_text))