コード例 #1
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-

from codecs_to_hex import to_hex

import codecs
from cStringIO import StringIO

# Versione grezza dei dati originali
data = u'pi: \u03c0'

# Codifica manuale come UTF-8
utf8 = data.encode('utf-8')
print 'Inizia come UTF-8   :', to_hex(utf8, 1)

# Imposta un buffer in uscita, quindi lo incapsula come EncodedFile.
output = StringIO()
encoded_file = codecs.EncodedFile(output, data_encoding='utf-8',
                                  file_encoding='utf-16')
encoded_file.write(utf8)

# Recupera il contenuto del buffer con codifica della stringa
# di byte in UTF-16
utf16 = output.getvalue()
print 'Codificato in UTF-16:', to_hex(utf16, 2)

# Imposta un altro buffer con i dati UTF-16 per la lettura
# e li incapsula con un altro EncodedFile.
buffer = StringIO(utf16)
encoded_file = codecs.EncodedFile(buffer, data_encoding='utf-8',
                                  file_encoding='utf-16')
コード例 #2
0
def codecs_bom():
    for name in BOM_TYPES:
        print('{:12} : {}'.format(name, to_hex(getattr(codecs, name), 2)))
コード例 #3
0
import codecs
from codecs_to_hex import to_hex

for name in [ 'BOM', 'BOM_BE', 'BOM_LE',
              'BOM_UTF8',
              'BOM_UTF16', 'BOM_UTF16_BE', 'BOM_UTF16_LE',
              'BOM_UTF32', 'BOM_UTF32_BE', 'BOM_UTF32_LE',
              ]:
    print '{:12} : {}'.format(name, to_hex(getattr(codecs, name), 2))
コード例 #4
0
from codecs_to_hex import to_hex

text = u'pi: p'

print 'Raw   :', repr(text)
print 'UTF-8 :', to_hex(text.encode('utf-8'), 1)
print 'UTF-16:', to_hex(text.encode('utf-16'), 2)
コード例 #5
0
import codecs
import sys

from codecs_to_hex import to_hex

error_handling = sys.argv[1]

text = u'pi: \u03c0'
print 'Originale         :', repr(text)

# Salva i dati con una codifica   
with codecs.open('decode_error.txt', 'w', encoding='utf-16') as f:
    f.write(text)

# Scarica i dati dal file
with open('decode_error.txt', 'rb') as f:
    print 'Contenuto del file:', to_hex(f.read(), 1)

# Tenta di leggere i dati con la codifica errata
with codecs.open('decode_error.txt', 'r',
                 encoding='utf-8',
                 errors=error_handling) as f:
    try:
        data = f.read()
    except UnicodeDecodeError, err:
        print 'ERRORE:', err
    else:
        print 'Letto        :', repr(data)
コード例 #6
0
# codecs_bom_detection.py
import codecs
from codecs_to_hex import to_hex

# Lettura dei dati grezzi
with open('nonnative-encoded.txt', mode='rb') as f:
    raw_bytes = f.read()

print('Grezzo      :', to_hex(raw_bytes, 2))

# Riapertura del file lasciando che codecs identifichi il  BOM
with codecs.open('nonnative-encoded.txt',
                 mode='r',
                 encoding='utf-16',
                 ) as f:
    decoded_text = f.read()

print('Decodificato:', repr(decoded_text))
コード例 #7
0
ファイル: codecs_bom.py プロジェクト: socmap/hello-python2
#!/usr/bin/python
# -*- coding: utf-8 -*-

#
# Copyright (c) 2008 Doug Hellmann All rights reserved.
#

import codecs
from codecs_to_hex import to_hex

for name in [
        'BOM',
        'BOM_BE',
        'BOM_LE',
        'BOM_UTF8',
        'BOM_UTF16',
        'BOM_UTF16_BE',
        'BOM_UTF16_LE',
        'BOM_UTF32',
        'BOM_UTF32_BE',
        'BOM_UTF32_LE',
]:
    print '{:12} : {}'.format(name, to_hex(getattr(codecs, name), 2))
# -*- coding: utf-8 -*-

"""Translating between encodings on the fly.
"""

from codecs_to_hex import to_hex

import codecs
from cStringIO import StringIO

# Raw version of the original data.
data = u'pi: \u03c0'

# Manually encode it as UTF-8.
utf8 = data.encode('utf-8')
print 'Start as UTF-8   :', to_hex(utf8, 1)

# Set up an output buffer, then wrap it as an EncodedFile.
output = StringIO()
encoded_file = codecs.EncodedFile(output, data_encoding='utf-8',
                                  file_encoding='utf-16')
encoded_file.write(utf8)

# Fetch the buffer contents as a UTF-16 encoded byte string
utf16 = output.getvalue()
print 'Encoded to UTF-16:', to_hex(utf16, 2)

# Set up another buffer with the UTF-16 data for reading,
# and wrap it with another EncodedFile.
buffer = StringIO(utf16)
encoded_file = codecs.EncodedFile(buffer, data_encoding='utf-8',
コード例 #9
0
import codecs
import sys

from codecs_to_hex import to_hex

error_handling = sys.argv[1]

text = u'pi: \u03c0'
print 'Original     :', repr(text)

# Save the data with one encoding
with codecs.open('decode_error.txt', 'w', encoding='utf-16') as f:
    f.write(text)

# Dump the bytes from the file
with open('decode_error.txt', 'rb') as f:
    print 'File contents:', to_hex(f.read(), 1)

# Try to read the data with the wrong encoding
with codecs.open('decode_error.txt',
                 'r',
                 encoding='utf-8',
                 errors=error_handling) as f:
    try:
        data = f.read()
    except UnicodeDecodeError, err:
        print 'ERROR:', err
    else:
        print 'Read         :', repr(data)
コード例 #10
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""Translating between encodings on the fly.
"""

from codecs_to_hex import to_hex

import codecs
from cStringIO import StringIO

# Raw version of the original data.
data = u'pi: \u03c0'

# Manually encode it as UTF-8.
utf8 = data.encode('utf-8')
print 'Start as UTF-8   :', to_hex(utf8, 1)

# Set up an output buffer, then wrap it as an EncodedFile.
output = StringIO()
encoded_file = codecs.EncodedFile(output,
                                  data_encoding='utf-8',
                                  file_encoding='utf-16')
encoded_file.write(utf8)

# Fetch the buffer contents as a UTF-16 encoded byte string
utf16 = output.getvalue()
print 'Encoded to UTF-16:', to_hex(utf16, 2)

# Set up another buffer with the UTF-16 data for reading,
# and wrap it with another EncodedFile.
buffer = StringIO(utf16)
コード例 #11
0
#!/usr/bin/python
# -*- coding: utf-8 -*-

#
# Copyright (c) 2008 Doug Hellmann All rights reserved.
#


from codecs_to_hex import to_hex

text = u'pi: π'

print 'Raw   :', repr(text)
print 'UTF-8 :', to_hex(text.encode('utf-8'), 1)
print 'UTF-16:', to_hex(text.encode('utf-16'), 2)
コード例 #12
0
from codecs_to_hex import to_hex

import codecs
import sys

encoding = sys.argv[1]
filename = encoding + '.txt'

print('writing to >', filename)
with codecs.open(filename, mode='w', encoding=encoding) as f:
    f.write('français')

nbytes = {
    'utf-8': 1,
    'utf-16': 2,
    'utf-32': 4,
}.get(encoding, 1)

print('File contents:')
with open(filename, mode='rb') as f:
    print(to_hex(f.read(), nbytes))
コード例 #13
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""Demonstrate the representations of values using different encodings.
"""

from codecs_to_hex import to_hex

text = u'pi: π'
encoded = text.encode('utf-8')
decoded = encoded.decode('utf-8')

print 'Original :', repr(text)
print 'Encoded  :', to_hex(encoded, 1), type(encoded)
print 'Decoded  :', repr(decoded), type(decoded)
コード例 #14
0
# -*- coding: utf-8 -*-

import codecs
from codecs_to_hex import to_hex

# Pick the non-native version of UTF-16 encoding
if codecs.BOM_UTF16 == codecs.BOM_UTF16_BE:
    bom = codecs.BOM_UTF16_LE
    encoding = 'utf_16_le'
else:
    bom = codecs.BOM_UTF16_BE
    encoding = 'utf_16_be'

print 'Native order:  ', to_hex(codecs.BOM_UTF16, 2)
print 'Selected order:', to_hex(bom, 2)

# Encode the text
encoded_text = u'pi: \u03c0'.encode(encoding)
print '{:14}: {}'.format(encoding, to_hex(encoded_text, 2))

with open('non-native-encoded.txt', mode='wb') as f:
    # Write the selected byte-order marker. It is not included in the
    # encoded text because we were explicit about the byte order when
    # selecting the encoding.
    f.write(bom)
    # Write the byte string for the encoded text.
    f.write(encoded_text)
コード例 #15
0
#
# Copyright (c) 2010 Doug Hellmann.  All rights reserved.
#
"""Writing Unicode data to a file.
"""
#end_pymotw_header

from codecs_to_hex import to_hex

import codecs
import sys

encoding = sys.argv[1]
filename = encoding + '.txt'

print 'Writing to', filename
with codecs.open(filename, mode='wt', encoding=encoding) as f:
    f.write(u'pi: \u03c0')

# Determine the byte grouping to use for to_hex()
nbytes = {
    'utf-8': 1,
    'utf-16': 2,
    'utf-32': 4,
}.get(encoding, 1)

# Show the raw bytes in the file
print 'File contents:'
with open(filename, mode='rt') as f:
    print to_hex(f.read(), nbytes)
コード例 #16
0
# codecs_open_write.py

from codecs_to_hex import to_hex

import codecs
import sys

encoding = sys.argv[1]
filename = encoding + '.txt'

print('Scrittura verso', filename)
with codecs.open(filename, mode='w', encoding=encoding) as f:
    f.write('français')

# Determina il raggruppamento di byte da usare per to_hex()
nbytes = {
    'utf-8': 1,
    'utf-16': 2,
    'utf-32': 4,
}.get(encoding, 1)

# Mostra i byte raw nel file
print('Contenuto del file:')
with open(filename, mode='rb') as f:
    print(to_hex(f.read(), nbytes))
コード例 #17
0
ファイル: codecs_encodedfile.py プロジェクト: rApeNB/PyMOTW
passed as the first argument, and data_encoding value refers to the encoding in
use by the data passing through the
read() and write() calls.
"""

from codecs_to_hex import to_hex

import codecs
from cStringIO import StringIO

# Raw version of the original data.
data = u'pi: \u03c0'

# Manually encode it as utf-8
utf8 = data.encode('utf-8')
print 'Start as utf-8   :', to_hex(utf8, 1)

# Set up an output buffer, then wrap it as EncodedFile
output = StringIO()
encoded_file = codecs.EncodedFile(output, data_encoding='utf-8',
                                  file_encoding='utf-16')
encoded_file.write(utf8)

# Fetch the buffer contents as a utf-16 encoded byte string
utf16 = output.getvalue()
print 'Encoded to utf-16:', to_hex(utf16, 2)

# Set up another buffer with the utf-16 data from reading
# and wrap it with another EncodedFile
buffer = StringIO(utf16)
encoded_file = codecs.EncodedFile(buffer, data_encoding='utf-8',
コード例 #18
0
#!/usr/bin/env python
# encoding: utf-8
#
# Copyright (c) 2010 Doug Hellmann.  All rights reserved.
#
"""Demonstrate the representations of values using different encodings.
"""
#end_pymotw_header

from codecs_to_hex import to_hex

text = u'pi: π'
encoded = text.encode('utf-8')
decoded = encoded.decode('utf-8')

print 'Original :', repr(text)
print 'Encoded  :', to_hex(encoded, 1), type(encoded)
print 'Decoded  :', repr(decoded), type(decoded)
コード例 #19
0
#
"""Translating between encodings on the fly.
"""
# end_pymotw_header

from codecs_to_hex import to_hex

import codecs
from cStringIO import StringIO

# Raw version of the original data.
data = u"pi: \u03c0"

# Manually encode it as UTF-8.
utf8 = data.encode("utf-8")
print "Start as UTF-8   :", to_hex(utf8, 1)

# Set up an output buffer, then wrap it as an EncodedFile.
output = StringIO()
encoded_file = codecs.EncodedFile(output, data_encoding="utf-8", file_encoding="utf-16")
encoded_file.write(utf8)

# Fetch the buffer contents as a UTF-16 encoded byte string
utf16 = output.getvalue()
print "Encoded to UTF-16:", to_hex(utf16, 2)

# Set up another buffer with the UTF-16 data for reading,
# and wrap it with another EncodedFile.
buffer = StringIO(utf16)
encoded_file = codecs.EncodedFile(buffer, data_encoding="utf-8", file_encoding="utf-16")
コード例 #20
0
#end_pymotw_header

import codecs
import sys

from codecs_to_hex import to_hex

error_handling = sys.argv[1]

text = u'pi: \u03c0'
print 'Original     :', repr(text)

# Save the data with one encoding
with codecs.open('decode_error.txt', 'w', encoding='utf-16') as f:
    f.write(text)

# Dump the bytes from the file
with open('decode_error.txt', 'rb') as f:
    print 'File contents:', to_hex(f.read(), 1)

# Try to read the data with the wrong encoding
with codecs.open('decode_error.txt', 'r',
                 encoding='utf-8',
                 errors=error_handling) as f:
    try:
        data = f.read()
    except UnicodeDecodeError, err:
        print 'ERROR:', err
    else:
        print 'Read         :', repr(data)
コード例 #21
0
# codecs_bom_create_file.py

import codecs
from codecs_to_hex import to_hex

# Pick the nonnative version of UTF-16 encoding
if codecs.BOM_UTF16 == codecs.BOM_UTF16_BE:
    bom = codecs.BOM_UTF16_LE
    encoding = 'utf_16_le'
else:
    bom = codecs.BOM_UTF16_BE
    encoding = 'utf_16_be'

print('Ordina nativo     :', to_hex(codecs.BOM_UTF16, 2))
print('Ordine selezionato:', to_hex(bom, 2))

# Codifica il testo
encoded_text = 'français'.encode(encoding)
print('{:14}: {}'.format(encoding, to_hex(encoded_text, 2)))

with open('nonnative-encoded.txt', mode='wb') as f:
    # Scrive il byte-order marker selezionato.  Non e' incluso nel
    # testo codificato in quanto la cosa e' stata resa esplicita in fase di
    # selezione della codifica
    f.write(bom)
    # Scrive la stringa di byte per il testo codificato.
    f.write(encoded_text)
コード例 #22
0
# encoding: utf-8
#
# Copyright (c) 2010 Doug Hellmann.  All rights reserved.
#
"""Writing Unicode data to a file.
"""
#end_pymotw_header

from codecs_to_hex import to_hex

import codecs
import sys

encoding = sys.argv[1]
filename = encoding + '.txt'

print 'Writing to', filename
with codecs.open(filename, mode='wt', encoding=encoding) as f:
    f.write(u'pi: \u03c0')

# Determine the byte grouping to use for to_hex()
nbytes = { 'utf-8':1,
           'utf-16':2,
           'utf-32':4,
           }.get(encoding, 1) 

# Show the raw bytes in the file
print 'File contents:'
with open(filename, mode='rt') as f:
    print to_hex(f.read(), nbytes)
コード例 #23
0
#
"""Create a file with nonnative BOM.
"""
# end_pymotw_header

import codecs
from codecs_to_hex import to_hex

# Pick the nonnative version of UTF-16 encoding
if codecs.BOM_UTF16 == codecs.BOM_UTF16_BE:
    bom = codecs.BOM_UTF16_LE
    encoding = "utf_16_le"
else:
    bom = codecs.BOM_UTF16_BE
    encoding = "utf_16_be"

print "Native order  :", to_hex(codecs.BOM_UTF16, 2)
print "Selected order:", to_hex(bom, 2)

# Encode the text.
encoded_text = u"pi: \u03c0".encode(encoding)
print "{:14}: {}".format(encoding, to_hex(encoded_text, 2))

with open("nonnative-encoded.txt", mode="wb") as f:
    # Write the selected byte-order marker.  It is not included
    # in the encoded text because the byte order was given
    # explicitly when selecting the encoding.
    f.write(bom)
    # Write the byte string for the encoded text.
    f.write(encoded_text)
コード例 #24
0
from codecs_to_hex import to_hex

text = u'pi: p'
encoded = text.encode('utf-8')
decoded = encoded.decode('utf-8')

print 'Originali   :', repr(text)
print 'Codificati  :', to_hex(encoded, 1), type(encoded)
print 'Decodificati:', repr(decoded), type(decoded)
コード例 #25
0
# codecs_encodings.py

import unicodedata
from codecs_to_hex import to_hex

text = 'français'

print('Raw   : {!r}'.format(text))
for c in text:
    print('  {!r}: {}'.format(c, unicodedata.name(c, c)))
print('UTF-8 : {!r}'.format(to_hex(text.encode('utf-8'), 1)))
print('UTF-16: {!r}'.format(to_hex(text.encode('utf-16'), 2)))
コード例 #26
0
# codecs_encodedfile.py
from codecs_to_hex import to_hex

import codecs
import io

# Versione grezza dei dati originali
data = 'français'

# Codifica manuale come UTF-8
utf8 = data.encode('utf-8')
print('Parte com UTF-8     :', to_hex(utf8, 1))


# Imposta un buffer in uscita, quindi lo incapsula come EncodedFile.
output = io.BytesIO()
encoded_file = codecs.EncodedFile(output, data_encoding='utf-8',
                                  file_encoding='utf-16')
encoded_file.write(utf8)

# Recupera il contenuto del buffer con codifica della stringa
# di byte in UTF-16
utf16 = output.getvalue()
print('Codificato in UTF-16:', to_hex(utf16, 2))

# Imposta un altro buffer con i dati UTF-16 per la lettura
# e li incapsula con un altro EncodedFile.
buffer = io.BytesIO(utf16)
encoded_file = codecs.EncodedFile(buffer, data_encoding='utf-8',
                                  file_encoding='utf-16')
コード例 #27
0
# codecs_bom.py

import codecs
from codecs_to_hex import to_hex

BOM_TYPES = [
    'BOM', 'BOM_BE', 'BOM_LE',
    'BOM_UTF8',
    'BOM_UTF16', 'BOM_UTF16_BE', 'BOM_UTF16_LE',
    'BOM_UTF32', 'BOM_UTF32_BE', 'BOM_UTF32_LE',
]

for name in BOM_TYPES:
    print('{:12} : {}'.format(
        name, to_hex(getattr(codecs, name), 2)))
コード例 #28
0
#!/usr/bin/env python
# encoding: utf-8
#
# Copyright (c) 2010 Doug Hellmann.  All rights reserved.
#
"""Detecting the BOM.
"""
#end_pymotw_header

import codecs
from codecs_to_hex import to_hex

# Look at the raw data
with open('non-native-encoded.txt', mode='rb') as f:
    raw_bytes = f.read()

print 'Raw    :', to_hex(raw_bytes, 2)

# Re-open the file and let codecs detect the BOM
with codecs.open('non-native-encoded.txt', mode='rt', encoding='utf-16') as f:
    decoded_text = f.read()

print 'Decoded:', repr(decoded_text)
コード例 #29
0
# codecs_bom_detection.py

import codecs
from codecs_to_hex import to_hex

# Look at the raw data
with open('nonnative-encoded.txt', mode='rb') as f:
    raw_bytes = f.read()

print('Raw    :', to_hex(raw_bytes, 2))

# Re-open the file and let codecs detect the BOM
with codecs.open(
        'nonnative-encoded.txt',
        mode='r',
        encoding='utf-16',
) as f:
    decoded_text = f.read()

print('Decoded:', repr(decoded_text))