Example #1
1
def _fixcp1252(envkey=None):
    """
    Fixup cp1252 codec in order to use it as a real superset of latin-1

    :Parameters:
     - `envkey`: The environment key to lookup. If this key is set and ``1``
       the charset definition won't be fixed and this function is a no-op.
       If unset or ``None``, no lookup is made.

    :Types:
     - `envkey`: ``str``
    """
    import os
    if envkey is not None and os.environ.get(envkey) == '1':
        return

    import codecs
    from encodings import cp1252

    try:
        dmap = cp1252.decoding_map # pylint: disable = E1101
    except AttributeError:
        dtable = list(cp1252.decoding_table)
        codepoint = 0
        try:
            while True:
                codepoint = dtable.index(u'\ufffe', codepoint)
                dtable[codepoint] = unichr(codepoint)
        except ValueError:
            # no more undefined points there
            pass
        dtable = u''.join(dtable)
        cp1252.decoding_table = dtable
        cp1252.encoding_table = codecs.charmap_build(dtable)
    else:
        # Python 2.4
        for key, value in dmap.iteritems():
            if value is None:
                dmap[key] = key
        cp1252.encoding_map = codecs.make_encoding_map(dmap)
Example #2
0
    def __init__(self, locking_shift_decode_map=None, single_shift_decode_map=None):
        if locking_shift_decode_map is None:
            locking_shift_decode_map = BASIC_CHARACTER_SET
        if single_shift_decode_map is None:
            single_shift_decode_map = BASIC_CHARACTER_SET_EXTENSION

        self._decode_map = locking_shift_decode_map
        self._decode_map.update(
            dict(((self._ESCAPE << 8 | key), value)
                 for (key, value) in single_shift_decode_map.items()))

        self._encoding_map = codecs.make_encoding_map(self._decode_map)
def find_pdfdocencoding(encoding):
    """ This function conforms to the codec module registration
        protocol.  It defers calculating data structures until
        a pdfdocencoding encode or decode is required.

        PDFDocEncoding is described in the PDF 1.7 reference manual.
    """

    if encoding != 'pdfdocencoding':
        return

    # Create the decoding map based on the table in section D.2 of the
    # PDF 1.7 manual

    # Start off with the characters with 1:1 correspondence
    decoding_map = set(range(0x20, 0x7F)) | set(range(0xA1, 0x100))
    decoding_map.update((0x09, 0x0A, 0x0D))
    decoding_map.remove(0xAD)
    decoding_map = dict((x, x) for x in decoding_map)

    # Add in the special Unicode characters
    decoding_map.update(zip(range(0x18, 0x20), (
            0x02D8, 0x02C7, 0x02C6, 0x02D9, 0x02DD, 0x02DB, 0x02DA, 0x02DC)))
    decoding_map.update(zip(range(0x80, 0x9F), (
            0x2022, 0x2020, 0x2021, 0x2026, 0x2014, 0x2013, 0x0192, 0x2044,
            0x2039, 0x203A, 0x2212, 0x2030, 0x201E, 0x201C, 0x201D, 0x2018,
            0x2019, 0x201A, 0x2122, 0xFB01, 0xFB02, 0x0141, 0x0152, 0x0160,
            0x0178, 0x017D, 0x0131, 0x0142, 0x0153, 0x0161, 0x017E)))
    decoding_map[0xA0] = 0x20AC

    # Make the encoding map from the decoding map
    encoding_map = codecs.make_encoding_map(decoding_map)

    # Not every PDF producer follows the spec, so conform to Postel's law
    # and interpret encoded strings if at all possible.  In particular, they
    # might have nulls and form-feeds, judging by random code snippets
    # floating around the internet.
    decoding_map.update(((x, x) for x in range(0x18)))

    def encode(input, errors='strict'):
        return codecs.charmap_encode(input, errors, encoding_map)

    def decode(input, errors='strict'):
        return codecs.charmap_decode(input, errors, decoding_map)

    return codecs.CodecInfo(encode, decode, name='pdfdocencoding')
Example #4
0
""" Python Character Mapping Codec generated from 'CP874.TXT' with gencodec.py.
Written by Marc-Andre Lemburg ([email protected]).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#"
import codecs
### Codec APIs

class Codec(codecs.Codec):
    def encode(self, input, errors='strict'):
        return codecs.charmap_encode(input, errors, encoding_map)
    def decode(self, input, errors='strict'):
        return codecs.charmap_decode(input, errors, decoding_map)

class StreamWriter(Codec, codecs.StreamWriter):
    pass

class StreamReader(Codec, codecs.StreamReader):
    pass

### encodings module API

def getregentry():
    return (Codec().encode, Codec().decode, StreamReader, StreamWriter)

### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
    0x0080: 0x20ac,  # EURO SIGN
    0x0081: None,  # UNDEFINED
Example #5
0
""" Python Character Mapping Codec generated from '8859-10.TXT' with gencodec.py.
Written by Marc-Andre Lemburg ([email protected]).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#"
import codecs
### Codec APIs

class Codec(codecs.Codec):
    def encode(self, input, errors='strict'):
        return codecs.charmap_encode(input, errors, encoding_map)
    def decode(self, input, errors='strict'):
        return codecs.charmap_decode(input, errors, decoding_map)

class StreamWriter(Codec, codecs.StreamWriter):
    pass

class StreamReader(Codec, codecs.StreamReader):
    pass

### encodings module API

def getregentry():
    return (Codec().encode, Codec().decode, StreamReader, StreamWriter)

### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
    0x00a1: 0x0104,  # 	LATIN CAPITAL LETTER A WITH OGONEK
    0x00a2: 0x0112,  # 	LATIN CAPITAL LETTER E WITH MACRON
Example #6
0
""" Python Character Mapping Codec generated from 'CP1006.TXT' with gencodec.py.
Written by Marc-Andre Lemburg ([email protected]).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#"
import codecs
### Codec APIs

class Codec(codecs.Codec):
    def encode(self, input, errors='strict'):
        return codecs.charmap_encode(input, errors, encoding_map)
    def decode(self, input, errors='strict'):
        return codecs.charmap_decode(input, errors, decoding_map)

class StreamWriter(Codec, codecs.StreamWriter):
    pass

class StreamReader(Codec, codecs.StreamReader):
    pass

### encodings module API

def getregentry():
    return (Codec().encode, Codec().decode, StreamReader, StreamWriter)

### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
    0x00a1: 0x06f0,  # 	EXTENDED ARABIC-INDIC DIGIT ZERO
    0x00a2: 0x06f1,  # 	EXTENDED ARABIC-INDIC DIGIT ONE
Example #7
0
""" Python Character Mapping Codec generated from 'CP852.TXT' with gencodec.py.
Written by Marc-Andre Lemburg ([email protected]).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
    def encode(self,input,errors='strict'):
        return codecs.charmap_encode(input,errors,encoding_map)
        
    def decode(self,input,errors='strict'):
        return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
    pass
        
class StreamReader(Codec,codecs.StreamReader):
    pass
### encodings module API
def getregentry():
    return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
	0x0080: 0x00c7,	# LATIN CAPITAL LETTER C WITH CEDILLA
	0x0081: 0x00fc,	# LATIN SMALL LETTER U WITH DIAERESIS
	0x0082: 0x00e9,	# LATIN SMALL LETTER E WITH ACUTE
	0x0083: 0x00e2,	# LATIN SMALL LETTER A WITH CIRCUMFLEX
	0x0084: 0x00e4,	# LATIN SMALL LETTER A WITH DIAERESIS
	0x0085: 0x016f,	# LATIN SMALL LETTER U WITH RING ABOVE
Example #8
0
""" Python Character Mapping Codec generated from '8859-9.TXT' with gencodec.py.
Written by Marc-Andre Lemburg ([email protected]).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
    def encode(self,input,errors='strict'):
        return codecs.charmap_encode(input,errors,encoding_map)
        
    def decode(self,input,errors='strict'):
        return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
    pass
        
class StreamReader(Codec,codecs.StreamReader):
    pass
### encodings module API
def getregentry():
    return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
	0x00d0: 0x011e,	# 	LATIN CAPITAL LETTER G WITH BREVE
	0x00dd: 0x0130,	# 	LATIN CAPITAL LETTER I WITH DOT ABOVE
	0x00de: 0x015e,	# 	LATIN CAPITAL LETTER S WITH CEDILLA
	0x00f0: 0x011f,	# 	LATIN SMALL LETTER G WITH BREVE
	0x00fd: 0x0131,	# 	LATIN SMALL LETTER DOTLESS I
	0x00fe: 0x015f,	# 	LATIN SMALL LETTER S WITH CEDILLA
Example #9
0
""" Python Character Mapping Codec generated from 'CP424.TXT' with gencodec.py.
Written by Marc-Andre Lemburg ([email protected]).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
    def encode(self,input,errors='strict'):
        return codecs.charmap_encode(input,errors,encoding_map)
        
    def decode(self,input,errors='strict'):
        return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
    pass
        
class StreamReader(Codec,codecs.StreamReader):
    pass
### encodings module API
def getregentry():
    return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
	0x0004: 0x009c,	# SELECT
	0x0005: 0x0009,	# HORIZONTAL TABULATION
	0x0006: 0x0086,	# REQUIRED NEW LINE
	0x0007: 0x007f,	# DELETE
	0x0008: 0x0097,	# GRAPHIC ESCAPE
	0x0009: 0x008d,	# SUPERSCRIPT
Example #10
0
""" Python Character Mapping Codec generated from 'CP1257.TXT' with gencodec.py.
Written by Marc-Andre Lemburg ([email protected]).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#"
import codecs
### Codec APIs

class Codec(codecs.Codec):
    def encode(self, input, errors='strict'):
        return codecs.charmap_encode(input, errors, encoding_map)
    def decode(self, input, errors='strict'):
        return codecs.charmap_decode(input, errors, decoding_map)

class StreamWriter(Codec, codecs.StreamWriter):
    pass

class StreamReader(Codec, codecs.StreamReader):
    pass

### encodings module API

def getregentry():
    return (Codec().encode, Codec().decode, StreamReader, StreamWriter)

### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
    0x0080: 0x20ac,  # EURO SIGN
    0x0081: None,  # UNDEFINED
Example #11
0
""" Python Character Mapping Codec generated from 'CP856.TXT' with gencodec.py.
Written by Marc-Andre Lemburg ([email protected]).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
    def encode(self,input,errors='strict'):
        return codecs.charmap_encode(input,errors,encoding_map)
        
    def decode(self,input,errors='strict'):
        return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
    pass
        
class StreamReader(Codec,codecs.StreamReader):
    pass
### encodings module API
def getregentry():
    return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
	0x0080: 0x05d0,	# HEBREW LETTER ALEF
	0x0081: 0x05d1,	# HEBREW LETTER BET
	0x0082: 0x05d2,	# HEBREW LETTER GIMEL
	0x0083: 0x05d3,	# HEBREW LETTER DALET
	0x0084: 0x05d4,	# HEBREW LETTER HE
	0x0085: 0x05d5,	# HEBREW LETTER VAV
Example #12
0
""" Python Character Mapping Codec generated from '8859-6.TXT' with gencodec.py.
Written by Marc-Andre Lemburg ([email protected]).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#"
import codecs
### Codec APIs

class Codec(codecs.Codec):
    def encode(self, input, errors='strict'):
        return codecs.charmap_encode(input, errors, encoding_map)
    def decode(self, input, errors='strict'):
        return codecs.charmap_decode(input, errors, decoding_map)

class StreamWriter(Codec, codecs.StreamWriter):
    pass

class StreamReader(Codec, codecs.StreamReader):
    pass

### encodings module API

def getregentry():
    return (Codec().encode, Codec().decode, StreamReader, StreamWriter)

### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
    0x00a1: None,
    0x00a2: None,
Example #13
0
""" Python Character Mapping Codec generated from 'CP864.TXT' with gencodec.py.
Written by Marc-Andre Lemburg ([email protected]).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
    def encode(self,input,errors='strict'):
        return codecs.charmap_encode(input,errors,encoding_map)
        
    def decode(self,input,errors='strict'):
        return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
    pass
        
class StreamReader(Codec,codecs.StreamReader):
    pass
### encodings module API
def getregentry():
    return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
	0x0025: 0x066a,	# ARABIC PERCENT SIGN
	0x0080: 0x00b0,	# DEGREE SIGN
	0x0081: 0x00b7,	# MIDDLE DOT
	0x0082: 0x2219,	# BULLET OPERATOR
	0x0083: 0x221a,	# SQUARE ROOT
	0x0084: 0x2592,	# MEDIUM SHADE
Example #14
0
""" Python Character Mapping Codec generated from '8859-6.TXT' with gencodec.py.
Written by Marc-Andre Lemburg ([email protected]).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
    def encode(self,input,errors='strict'):
        return codecs.charmap_encode(input,errors,encoding_map)
        
    def decode(self,input,errors='strict'):
        return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
    pass
        
class StreamReader(Codec,codecs.StreamReader):
    pass
### encodings module API
def getregentry():
    return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
	0x00a1: None,
	0x00a2: None,
	0x00a3: None,
	0x00a5: None,
	0x00a6: None,
	0x00a7: None,
Example #15
0
""" Python Character Mapping Codec generated from 'CP856.TXT' with gencodec.py.
Written by Marc-Andre Lemburg ([email protected]).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#"
import codecs
### Codec APIs

class Codec(codecs.Codec):
    def encode(self, input, errors='strict'):
        return codecs.charmap_encode(input, errors, encoding_map)
    def decode(self, input, errors='strict'):
        return codecs.charmap_decode(input, errors, decoding_map)

class StreamWriter(Codec, codecs.StreamWriter):
    pass

class StreamReader(Codec, codecs.StreamReader):
    pass

### encodings module API

def getregentry():
    return (Codec().encode, Codec().decode, StreamReader, StreamWriter)

### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
    0x0080: 0x05d0,  # HEBREW LETTER ALEF
    0x0081: 0x05d1,  # HEBREW LETTER BET
Example #16
0
""" Python Character Mapping Codec generated from '8859-15.TXT' with gencodec.py.
Written by Marc-Andre Lemburg ([email protected]).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
    def encode(self,input,errors='strict'):
        return codecs.charmap_encode(input,errors,encoding_map)
        
    def decode(self,input,errors='strict'):
        return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
    pass
        
class StreamReader(Codec,codecs.StreamReader):
    pass
### encodings module API
def getregentry():
    return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
	0x00a4: 0x20ac,	# 	EURO SIGN
	0x00a6: 0x0160,	# 	LATIN CAPITAL LETTER S WITH CARON
	0x00a8: 0x0161,	# 	LATIN SMALL LETTER S WITH CARON
	0x00b4: 0x017d,	# 	LATIN CAPITAL LETTER Z WITH CARON
	0x00b8: 0x017e,	# 	LATIN SMALL LETTER Z WITH CARON
	0x00bc: 0x0152,	# 	LATIN CAPITAL LIGATURE OE
Example #17
0
""" Python Character Mapping Codec generated from 'CP875.TXT' with gencodec.py.
Written by Marc-Andre Lemburg ([email protected]).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
    def encode(self,input,errors='strict'):
        return codecs.charmap_encode(input,errors,encoding_map)
        
    def decode(self,input,errors='strict'):
        return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
    pass
        
class StreamReader(Codec,codecs.StreamReader):
    pass
### encodings module API
def getregentry():
    return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
	0x0004: 0x009c,	# CONTROL
	0x0005: 0x0009,	# HORIZONTAL TABULATION
	0x0006: 0x0086,	# CONTROL
	0x0007: 0x007f,	# DELETE
	0x0008: 0x0097,	# CONTROL
	0x0009: 0x008d,	# CONTROL
Example #18
0
""" Python Character Mapping Codec generated from 'CP861.TXT' with gencodec.py.
Written by Marc-Andre Lemburg ([email protected]).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
    def encode(self,input,errors='strict'):
        return codecs.charmap_encode(input,errors,encoding_map)
        
    def decode(self,input,errors='strict'):
        return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
    pass
        
class StreamReader(Codec,codecs.StreamReader):
    pass
### encodings module API
def getregentry():
    return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
	0x0080: 0x00c7,	# LATIN CAPITAL LETTER C WITH CEDILLA
	0x0081: 0x00fc,	# LATIN SMALL LETTER U WITH DIAERESIS
	0x0082: 0x00e9,	# LATIN SMALL LETTER E WITH ACUTE
	0x0083: 0x00e2,	# LATIN SMALL LETTER A WITH CIRCUMFLEX
	0x0084: 0x00e4,	# LATIN SMALL LETTER A WITH DIAERESIS
	0x0085: 0x00e0,	# LATIN SMALL LETTER A WITH GRAVE
Example #19
0
""" Python Character Mapping Codec generated from 'CP775.TXT' with gencodec.py.
Written by Marc-Andre Lemburg ([email protected]).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#"
import codecs
### Codec APIs

class Codec(codecs.Codec):
    def encode(self, input, errors='strict'):
        return codecs.charmap_encode(input, errors, encoding_map)
    def decode(self, input, errors='strict'):
        return codecs.charmap_decode(input, errors, decoding_map)

class StreamWriter(Codec, codecs.StreamWriter):
    pass

class StreamReader(Codec, codecs.StreamReader):
    pass

### encodings module API

def getregentry():
    return (Codec().encode, Codec().decode, StreamReader, StreamWriter)

### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
    0x0080: 0x0106,  # LATIN CAPITAL LETTER C WITH ACUTE
    0x0081: 0x00fc,  # LATIN SMALL LETTER U WITH DIAERESIS
Example #20
0
""" Python Character Mapping Codec generated from 'CP869.TXT' with gencodec.py.
Written by Marc-Andre Lemburg ([email protected]).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
    def encode(self,input,errors='strict'):
        return codecs.charmap_encode(input,errors,encoding_map)
        
    def decode(self,input,errors='strict'):
        return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
    pass
        
class StreamReader(Codec,codecs.StreamReader):
    pass
### encodings module API
def getregentry():
    return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
	0x0080: None,	# UNDEFINED
	0x0081: None,	# UNDEFINED
	0x0082: None,	# UNDEFINED
	0x0083: None,	# UNDEFINED
	0x0084: None,	# UNDEFINED
	0x0085: None,	# UNDEFINED
Example #21
0
""" Python Character Mapping Codec generated from 'CP1254.TXT' with gencodec.py.
Written by Marc-Andre Lemburg ([email protected]).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
    def encode(self,input,errors='strict'):
        return codecs.charmap_encode(input,errors,encoding_map)
        
    def decode(self,input,errors='strict'):
        return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
    pass
        
class StreamReader(Codec,codecs.StreamReader):
    pass
### encodings module API
def getregentry():
    return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
	0x0080: 0x20ac,	# EURO SIGN
	0x0081: None,	# UNDEFINED
	0x0082: 0x201a,	# SINGLE LOW-9 QUOTATION MARK
	0x0083: 0x0192,	# LATIN SMALL LETTER F WITH HOOK
	0x0084: 0x201e,	# DOUBLE LOW-9 QUOTATION MARK
	0x0085: 0x2026,	# HORIZONTAL ELLIPSIS
Example #22
0
""" Python Character Mapping Codec generated from 'CP1256.TXT' with gencodec.py.
Written by Marc-Andre Lemburg ([email protected]).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
    def encode(self,input,errors='strict'):
        return codecs.charmap_encode(input,errors,encoding_map)
        
    def decode(self,input,errors='strict'):
        return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
    pass
        
class StreamReader(Codec,codecs.StreamReader):
    pass
### encodings module API
def getregentry():
    return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
	0x0080: 0x20ac,	# EURO SIGN
	0x0081: 0x067e,	# ARABIC LETTER PEH
	0x0082: 0x201a,	# SINGLE LOW-9 QUOTATION MARK
	0x0083: 0x0192,	# LATIN SMALL LETTER F WITH HOOK
	0x0084: 0x201e,	# DOUBLE LOW-9 QUOTATION MARK
	0x0085: 0x2026,	# HORIZONTAL ELLIPSIS
Example #23
0
""" Python Character Mapping Codec generated from 'CP1254.TXT' with gencodec.py.
Written by Marc-Andre Lemburg ([email protected]).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#"
import codecs
### Codec APIs

class Codec(codecs.Codec):
    def encode(self, input, errors='strict'):
        return codecs.charmap_encode(input, errors, encoding_map)
    def decode(self, input, errors='strict'):
        return codecs.charmap_decode(input, errors, decoding_map)

class StreamWriter(Codec, codecs.StreamWriter):
    pass

class StreamReader(Codec, codecs.StreamReader):
    pass

### encodings module API

def getregentry():
    return (Codec().encode, Codec().decode, StreamReader, StreamWriter)

### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
    0x0080: 0x20ac,  # EURO SIGN
    0x0081: None,  # UNDEFINED
Example #24
0
""" Python Character Mapping Codec generated from 'GREEK.TXT' with gencodec.py.
Written by Marc-Andre Lemburg ([email protected]).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
    def encode(self,input,errors='strict'):
        return codecs.charmap_encode(input,errors,encoding_map)
        
    def decode(self,input,errors='strict'):
        return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
    pass
        
class StreamReader(Codec,codecs.StreamReader):
    pass
### encodings module API
def getregentry():
    return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
	0x0080: 0x00c4,	# LATIN CAPITAL LETTER A WITH DIAERESIS
	0x0081: 0x00b9,	# SUPERSCRIPT ONE
	0x0082: 0x00b2,	# SUPERSCRIPT TWO
	0x0083: 0x00c9,	# LATIN CAPITAL LETTER E WITH ACUTE
	0x0084: 0x00b3,	# SUPERSCRIPT THREE
	0x0085: 0x00d6,	# LATIN CAPITAL LETTER O WITH DIAERESIS
Example #25
0
""" Python Character Mapping Codec generated from 'CP1253.TXT' with gencodec.py.
Written by Marc-Andre Lemburg ([email protected]).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
    def encode(self,input,errors='strict'):
        return codecs.charmap_encode(input,errors,encoding_map)
        
    def decode(self,input,errors='strict'):
        return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
    pass
        
class StreamReader(Codec,codecs.StreamReader):
    pass
### encodings module API
def getregentry():
    return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
	0x0080: 0x20ac,	# EURO SIGN
	0x0081: None,	# UNDEFINED
	0x0082: 0x201a,	# SINGLE LOW-9 QUOTATION MARK
	0x0083: 0x0192,	# LATIN SMALL LETTER F WITH HOOK
	0x0084: 0x201e,	# DOUBLE LOW-9 QUOTATION MARK
	0x0085: 0x2026,	# HORIZONTAL ELLIPSIS
Example #26
0
    0x00e7: 0x03b3,  # GREEK SMALL LETTER GAMMA
    0x00e8: 0x03b7,  # GREEK SMALL LETTER ETA
    0x00e9: 0x03b9,  # GREEK SMALL LETTER IOTA
    0x00ea: 0x03be,  # GREEK SMALL LETTER XI
    0x00eb: 0x03ba,  # GREEK SMALL LETTER KAPPA
    0x00ec: 0x03bb,  # GREEK SMALL LETTER LAMBDA
    0x00ed: 0x03bc,  # GREEK SMALL LETTER MU
    0x00ee: 0x03bd,  # GREEK SMALL LETTER NU
    0x00ef: 0x03bf,  # GREEK SMALL LETTER OMICRON
    0x00f0: 0x03c0,  # GREEK SMALL LETTER PI
    0x00f1: 0x03ce,  # GREEK SMALL LETTER OMEGA WITH TONOS
    0x00f2: 0x03c1,  # GREEK SMALL LETTER RHO
    0x00f3: 0x03c3,  # GREEK SMALL LETTER SIGMA
    0x00f4: 0x03c4,  # GREEK SMALL LETTER TAU
    0x00f5: 0x03b8,  # GREEK SMALL LETTER THETA
    0x00f6: 0x03c9,  # GREEK SMALL LETTER OMEGA
    0x00f7: 0x03c2,  # GREEK SMALL LETTER FINAL SIGMA
    0x00f8: 0x03c7,  # GREEK SMALL LETTER CHI
    0x00f9: 0x03c5,  # GREEK SMALL LETTER UPSILON
    0x00fa: 0x03b6,  # GREEK SMALL LETTER ZETA
    0x00fb: 0x03ca,  # GREEK SMALL LETTER IOTA WITH DIALYTIKA
    0x00fc: 0x03cb,  # GREEK SMALL LETTER UPSILON WITH DIALYTIKA
    0x00fd: 0x0390,  # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
    0x00fe: 0x03b0,  # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
    0x00ff: None,  # UNDEFINED
})

### Encoding Map

encoding_map = codecs.make_encoding_map(decoding_map)
Example #27
0
""" Python Character Mapping Codec generated from 'LATIN2.TXT' with gencodec.py.
Written by Marc-Andre Lemburg ([email protected]).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
    def encode(self,input,errors='strict'):
        return codecs.charmap_encode(input,errors,encoding_map)
        
    def decode(self,input,errors='strict'):
        return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
    pass
        
class StreamReader(Codec,codecs.StreamReader):
    pass
### encodings module API
def getregentry():
    return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
	0x0080: 0x00c4,	# LATIN CAPITAL LETTER A WITH DIAERESIS
	0x0081: 0x0100,	# LATIN CAPITAL LETTER A WITH MACRON
	0x0082: 0x0101,	# LATIN SMALL LETTER A WITH MACRON
	0x0083: 0x00c9,	# LATIN CAPITAL LETTER E WITH ACUTE
	0x0084: 0x0104,	# LATIN CAPITAL LETTER A WITH OGONEK
	0x0085: 0x00d6,	# LATIN CAPITAL LETTER O WITH DIAERESIS
Example #28
0
""" Python Character Mapping Codec generated from '8859-9.TXT' with gencodec.py.
Written by Marc-Andre Lemburg ([email protected]).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#"
import codecs
### Codec APIs

class Codec(codecs.Codec):
    def encode(self, input, errors='strict'):
        return codecs.charmap_encode(input, errors, encoding_map)
    def decode(self, input, errors='strict'):
        return codecs.charmap_decode(input, errors, decoding_map)

class StreamWriter(Codec, codecs.StreamWriter):
    pass

class StreamReader(Codec, codecs.StreamReader):
    pass

### encodings module API

def getregentry():
    return (Codec().encode, Codec().decode, StreamReader, StreamWriter)

### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
    0x00d0: 0x011e,  # 	LATIN CAPITAL LETTER G WITH BREVE
    0x00dd: 0x0130,  # 	LATIN CAPITAL LETTER I WITH DOT ABOVE
Example #29
0
""" Python Character Mapping Codec generated from 'CP850.TXT' with gencodec.py.
Written by Marc-Andre Lemburg ([email protected]).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
    def encode(self,input,errors='strict'):
        return codecs.charmap_encode(input,errors,encoding_map)
        
    def decode(self,input,errors='strict'):
        return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
    pass
        
class StreamReader(Codec,codecs.StreamReader):
    pass
### encodings module API
def getregentry():
    return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
	0x0080: 0x00c7,	# LATIN CAPITAL LETTER C WITH CEDILLA
	0x0081: 0x00fc,	# LATIN SMALL LETTER U WITH DIAERESIS
	0x0082: 0x00e9,	# LATIN SMALL LETTER E WITH ACUTE
	0x0083: 0x00e2,	# LATIN SMALL LETTER A WITH CIRCUMFLEX
	0x0084: 0x00e4,	# LATIN SMALL LETTER A WITH DIAERESIS
	0x0085: 0x00e0,	# LATIN SMALL LETTER A WITH GRAVE
Example #30
0
""" Python Character Mapping Codec generated from 'CP1256.TXT' with gencodec.py.
Written by Marc-Andre Lemburg ([email protected]).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#"
import codecs
### Codec APIs

class Codec(codecs.Codec):
    def encode(self, input, errors='strict'):
        return codecs.charmap_encode(input, errors, encoding_map)
    def decode(self, input, errors='strict'):
        return codecs.charmap_decode(input, errors, decoding_map)

class StreamWriter(Codec, codecs.StreamWriter):
    pass

class StreamReader(Codec, codecs.StreamReader):
    pass

### encodings module API

def getregentry():
    return (Codec().encode, Codec().decode, StreamReader, StreamWriter)

### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
    0x0080: 0x20ac,  # EURO SIGN
    0x0081: 0x067e,  # ARABIC LETTER PEH
Example #31
0
#!/usr/local/bin/python2.1
""" Python Character Mapping Codec for ROT13.
    See http://ucsub.colorado.edu/~kominek/rot13/ for details.
    Written by Marc-Andre Lemburg ([email protected]).
"""#"
import codecs
### Codec APIs

class Codec(codecs.Codec):
    def encode(self, input, errors='strict'):
        return codecs.charmap_encode(input, errors, encoding_map)
    def decode(self, input, errors='strict'):
        return codecs.charmap_decode(input, errors, decoding_map)

class StreamWriter(Codec, codecs.StreamWriter):
    pass

class StreamReader(Codec, codecs.StreamReader):
    pass

### encodings module API

def getregentry():
    return (Codec().encode, Codec().decode, StreamReader, StreamWriter)

### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
    0x0041: 0x004e,
    0x0042: 0x004f,
Example #32
0
""" Python Character Mapping Codec generated from '8859-3.TXT' with gencodec.py.
Written by Marc-Andre Lemburg ([email protected]).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#"
import codecs
### Codec APIs

class Codec(codecs.Codec):
    def encode(self, input, errors='strict'):
        return codecs.charmap_encode(input, errors, encoding_map)
    def decode(self, input, errors='strict'):
        return codecs.charmap_decode(input, errors, decoding_map)

class StreamWriter(Codec, codecs.StreamWriter):
    pass

class StreamReader(Codec, codecs.StreamReader):
    pass

### encodings module API

def getregentry():
    return (Codec().encode, Codec().decode, StreamReader, StreamWriter)

### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
    0x00a1: 0x0126,  # 	LATIN CAPITAL LETTER H WITH STROKE
    0x00a2: 0x02d8,  # 	BREVE
Example #33
0
        0x007f: 0x00e0, #       LATIN SMALL LETTER A WITH GRAVE
        0x1b0a: 0x000c, #       FORM FEED
        0x1b14: 0x005e, #       CIRCUMFLEX ACCENT
        0x1b28: 0x007b, #       LEFT CURLY BRACKET
        0x1b29: 0x007d, #       RIGHT CURLY BRACKET
        0x1b2f: 0x005c, #       REVERSE SOLIDUS
        0x1b3c: 0x005b, #       LEFT SQUARE BRACKET
        0x1b3d: 0x007e, #       TILDE
        0x1b3e: 0x005d, #       RIGHT SQUARE BRACKET
        0x1b40: 0x007c, #       VERTICAL LINE
        0x1b65: 0x20ac, #       EURO SIGN
}

### Encoding Map

encoding_map = codecs.make_encoding_map(decoding_map)
extra_encoding_map = codecs.make_encoding_map(extra_decoding_map)

if __name__=='__main__':
    import string
    c = Codec()
    def test(s):
        r = c.decode(c.encode(s))[0]
        if r != s: print 'in %r != out %r'%(s, r)
    test(unicode(string.letters))
    test(u'\u20ac')
    test(u'\xa0')
    try:
        test(u'av\u20ad')
    except Exception, e:
        print `u'av\u20ad'`, 'raised', e
Example #34
0
""" Python Character Mapping Codec generated from '8859-13.TXT' with gencodec.py.
Written by Marc-Andre Lemburg ([email protected]).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#"
import codecs
### Codec APIs

class Codec(codecs.Codec):
    def encode(self, input, errors='strict'):
        return codecs.charmap_encode(input, errors, encoding_map)
    def decode(self, input, errors='strict'):
        return codecs.charmap_decode(input, errors, decoding_map)

class StreamWriter(Codec, codecs.StreamWriter):
    pass

class StreamReader(Codec, codecs.StreamReader):
    pass

### encodings module API

def getregentry():
    return (Codec().encode, Codec().decode, StreamReader, StreamWriter)

### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
    0x00a1: 0x201d,  # 	RIGHT DOUBLE QUOTATION MARK
    0x00a5: 0x201e,  # 	DOUBLE LOW-9 QUOTATION MARK
Example #35
0
""" Python Character Mapping Codec generated from 'CP737.TXT' with gencodec.py.
Written by Marc-Andre Lemburg ([email protected]).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#"
import codecs
### Codec APIs

class Codec(codecs.Codec):
    def encode(self, input, errors='strict'):
        return codecs.charmap_encode(input, errors, encoding_map)
    def decode(self, input, errors='strict'):
        return codecs.charmap_decode(input, errors, decoding_map)

class StreamWriter(Codec, codecs.StreamWriter):
    pass

class StreamReader(Codec, codecs.StreamReader):
    pass

### encodings module API

def getregentry():
    return (Codec().encode, Codec().decode, StreamReader, StreamWriter)

### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
    0x0080: 0x0391,  # GREEK CAPITAL LETTER ALPHA
    0x0081: 0x0392,  # GREEK CAPITAL LETTER BETA
Example #36
0
""" Python Character Mapping Codec generated from 'ICELAND.TXT' with gencodec.py.
Written by Marc-Andre Lemburg ([email protected]).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
    def encode(self,input,errors='strict'):
        return codecs.charmap_encode(input,errors,encoding_map)
        
    def decode(self,input,errors='strict'):
        return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
    pass
        
class StreamReader(Codec,codecs.StreamReader):
    pass
### encodings module API
def getregentry():
    return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
	0x0080: 0x00c4,	# LATIN CAPITAL LETTER A WITH DIAERESIS
	0x0081: 0x00c5,	# LATIN CAPITAL LETTER A WITH RING ABOVE
	0x0082: 0x00c7,	# LATIN CAPITAL LETTER C WITH CEDILLA
	0x0083: 0x00c9,	# LATIN CAPITAL LETTER E WITH ACUTE
	0x0084: 0x00d1,	# LATIN CAPITAL LETTER N WITH TILDE
	0x0085: 0x00d6,	# LATIN CAPITAL LETTER O WITH DIAERESIS
Example #37
0
# GNU Lesser General Public License for more details.
#####################################################################
"""JIS X 0201 Codec required for JIS8 encoding of SecsVarJIS8"""

import codecs

jis8_decoding_map = codecs.make_identity_dict(range(256))
jis8_decoding_map.update({
    0x005C: 0x00A5,  # Yen Sign
    0x007E: 0x203E,  # Overline
})

for i in range(0x00A1, 0x00E0):
    jis8_decoding_map[i] = i + 0xFEC0

jis8_encoding_map = codecs.make_encoding_map(jis8_decoding_map)

def jis_x_0201_encode(data, errors='strict'):
    return codecs.charmap_encode(data,errors,jis8_encoding_map)

def jis_x_0201_decode(data, errors='strict'):
    return codecs.charmap_decode(data,errors,jis8_decoding_map)
    
def jis_x_0201_search(name):
    if name == "jis-8":
        return codecs.CodecInfo(encode=jis_x_0201_encode, decode=jis_x_0201_decode, name="jis-8")

    return None

# register the codec
codecs.register(jis_x_0201_search)
Example #38
0
""" Python Character Mapping Codec generated from 'CP1006.TXT' with gencodec.py.
Written by Marc-Andre Lemburg ([email protected]).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
    def encode(self,input,errors='strict'):
        return codecs.charmap_encode(input,errors,encoding_map)
        
    def decode(self,input,errors='strict'):
        return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
    pass
        
class StreamReader(Codec,codecs.StreamReader):
    pass
### encodings module API
def getregentry():
    return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
	0x00a1: 0x06f0,	# 	EXTENDED ARABIC-INDIC DIGIT ZERO
	0x00a2: 0x06f1,	# 	EXTENDED ARABIC-INDIC DIGIT ONE
	0x00a3: 0x06f2,	# 	EXTENDED ARABIC-INDIC DIGIT TWO
	0x00a4: 0x06f3,	# 	EXTENDED ARABIC-INDIC DIGIT THREE
	0x00a5: 0x06f4,	# 	EXTENDED ARABIC-INDIC DIGIT FOUR
	0x00a6: 0x06f5,	# 	EXTENDED ARABIC-INDIC DIGIT FIVE
Example #39
0
""" Python Character Mapping Codec generated from 'CYRILLIC.TXT' with gencodec.py.
Written by Marc-Andre Lemburg ([email protected]).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
    def encode(self,input,errors='strict'):
        return codecs.charmap_encode(input,errors,encoding_map)
        
    def decode(self,input,errors='strict'):
        return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
    pass
        
class StreamReader(Codec,codecs.StreamReader):
    pass
### encodings module API
def getregentry():
    return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
	0x0080: 0x0410,	# CYRILLIC CAPITAL LETTER A
	0x0081: 0x0411,	# CYRILLIC CAPITAL LETTER BE
	0x0082: 0x0412,	# CYRILLIC CAPITAL LETTER VE
	0x0083: 0x0413,	# CYRILLIC CAPITAL LETTER GHE
	0x0084: 0x0414,	# CYRILLIC CAPITAL LETTER DE
	0x0085: 0x0415,	# CYRILLIC CAPITAL LETTER IE
Example #40
0
""" Python Character Mapping Codec generated from 'CP860.TXT' with gencodec.py.
Written by Marc-Andre Lemburg ([email protected]).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
    def encode(self,input,errors='strict'):
        return codecs.charmap_encode(input,errors,encoding_map)
        
    def decode(self,input,errors='strict'):
        return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
    pass
        
class StreamReader(Codec,codecs.StreamReader):
    pass
### encodings module API
def getregentry():
    return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
	0x0080: 0x00c7,	# LATIN CAPITAL LETTER C WITH CEDILLA
	0x0081: 0x00fc,	# LATIN SMALL LETTER U WITH DIAERESIS
	0x0082: 0x00e9,	# LATIN SMALL LETTER E WITH ACUTE
	0x0083: 0x00e2,	# LATIN SMALL LETTER A WITH CIRCUMFLEX
	0x0084: 0x00e3,	# LATIN SMALL LETTER A WITH TILDE
	0x0085: 0x00e0,	# LATIN SMALL LETTER A WITH GRAVE
Example #41
0
""" Python Character Mapping Codec generated from 'KOI8-R.TXT' with gencodec.py.
Written by Marc-Andre Lemburg ([email protected]).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#"
import codecs
### Codec APIs

class Codec(codecs.Codec):
    def encode(self, input, errors='strict'):
        return codecs.charmap_encode(input, errors, encoding_map)
    def decode(self, input, errors='strict'):
        return codecs.charmap_decode(input, errors, decoding_map)

class StreamWriter(Codec, codecs.StreamWriter):
    pass

class StreamReader(Codec, codecs.StreamReader):
    pass

### encodings module API

def getregentry():
    return (Codec().encode, Codec().decode, StreamReader, StreamWriter)

### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
    0x0080: 0x2500,  # 	BOX DRAWINGS LIGHT HORIZONTAL
    0x0081: 0x2502,  # 	BOX DRAWINGS LIGHT VERTICAL
Example #42
0
""" Python Character Mapping Codec generated from 'CP1257.TXT' with gencodec.py.
Written by Marc-Andre Lemburg ([email protected]).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
    def encode(self,input,errors='strict'):
        return codecs.charmap_encode(input,errors,encoding_map)
        
    def decode(self,input,errors='strict'):
        return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
    pass
        
class StreamReader(Codec,codecs.StreamReader):
    pass
### encodings module API
def getregentry():
    return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
	0x0080: 0x20ac,	# EURO SIGN
	0x0081: None,	# UNDEFINED
	0x0082: 0x201a,	# SINGLE LOW-9 QUOTATION MARK
	0x0083: None,	# UNDEFINED
	0x0084: 0x201e,	# DOUBLE LOW-9 QUOTATION MARK
	0x0085: 0x2026,	# HORIZONTAL ELLIPSIS
Example #43
0
def codegen(name, map, encodingname, comments=1):

    """ Returns Python source for the given map.

        Comments are included in the source, if comments is true (default).

    """
    # Generate code
    decoding_map_code = python_mapdef_code(
        'decoding_map',
        map,
        comments=comments)
    decoding_table_code = python_tabledef_code(
        'decoding_table',
        map,
        comments=comments)
    encoding_map_code = python_mapdef_code(
        'encoding_map',
        codecs.make_encoding_map(map),
        comments=comments,
        precisions=(4, 2))

    if decoding_table_code:
        suffix = 'table'
    else:
        suffix = 'map'

    l = [
        '''\
""" Python Character Mapping Codec %s generated from '%s' with gencodec.py.

"""#"

import codecs

### Codec APIs

class Codec(codecs.Codec):

    def encode(self, input, errors='strict'):
        return codecs.charmap_encode(input, errors, encoding_%s)

    def decode(self, input, errors='strict'):
        return codecs.charmap_decode(input, errors, decoding_%s)
''' % (encodingname, name, suffix, suffix)]
    l.append('''\
class IncrementalEncoder(codecs.IncrementalEncoder):
    def encode(self, input, final=False):
        return codecs.charmap_encode(input, self.errors, encoding_%s)[0]

class IncrementalDecoder(codecs.IncrementalDecoder):
    def decode(self, input, final=False):
        return codecs.charmap_decode(input, self.errors, decoding_%s)[0]''' %
        (suffix, suffix))

    l.append('''
class StreamWriter(Codec, codecs.StreamWriter):
    pass

class StreamReader(Codec, codecs.StreamReader):
    pass

### encodings module API

def getregentry():
    return codecs.CodecInfo(
        name=%r,
        encode=Codec().encode,
        decode=Codec().decode,
        incrementalencoder=IncrementalEncoder,
        incrementaldecoder=IncrementalDecoder,
        streamreader=StreamReader,
        streamwriter=StreamWriter,
    )
''' % encodingname.replace('_', '-'))

    # Add decoding table or map (with preference to the table)
    if not decoding_table_code:
        l.append('''
### Decoding Map
''')
        l.extend(decoding_map_code)
    else:
        l.append('''
### Decoding Table
''')
        l.extend(decoding_table_code)

    # Add encoding map
    if decoding_table_code:
        l.append('''
### Encoding table
encoding_table = codecs.charmap_build(decoding_table)
''')
    else:
        l.append('''
### Encoding Map
''')
        l.extend(encoding_map_code)

    # Final new-line
    l.append('')

    return '\n'.join(l).expandtabs()
Example #44
0
""" Python Character Mapping Codec generated from 'KOI8-R.TXT' with gencodec.py.
Written by Marc-Andre Lemburg ([email protected]).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
    def encode(self,input,errors='strict'):
        return codecs.charmap_encode(input,errors,encoding_map)
        
    def decode(self,input,errors='strict'):
        return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
    pass
        
class StreamReader(Codec,codecs.StreamReader):
    pass
### encodings module API
def getregentry():
    return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
	0x0080: 0x2500,	# 	BOX DRAWINGS LIGHT HORIZONTAL
	0x0081: 0x2502,	# 	BOX DRAWINGS LIGHT VERTICAL
	0x0082: 0x250c,	# 	BOX DRAWINGS LIGHT DOWN AND RIGHT
	0x0083: 0x2510,	# 	BOX DRAWINGS LIGHT DOWN AND LEFT
	0x0084: 0x2514,	# 	BOX DRAWINGS LIGHT UP AND RIGHT
	0x0085: 0x2518,	# 	BOX DRAWINGS LIGHT UP AND LEFT
Example #45
0
""" Python Character Mapping Codec generated from 'CP037.TXT' with gencodec.py.
Written by Marc-Andre Lemburg ([email protected]).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
    def encode(self,input,errors='strict'):
        return codecs.charmap_encode(input,errors,encoding_map)
        
    def decode(self,input,errors='strict'):
        return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
    pass
        
class StreamReader(Codec,codecs.StreamReader):
    pass
### encodings module API
def getregentry():
    return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
	0x0004: 0x009c,	# CONTROL
	0x0005: 0x0009,	# HORIZONTAL TABULATION
	0x0006: 0x0086,	# CONTROL
	0x0007: 0x007f,	# DELETE
	0x0008: 0x0097,	# CONTROL
	0x0009: 0x008d,	# CONTROL
Example #46
0
""" Python Character Mapping Codec generated from 'CP1251.TXT' with gencodec.py.
Written by Marc-Andre Lemburg ([email protected]).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#"
import codecs
### Codec APIs

class Codec(codecs.Codec):
    def encode(self, input, errors='strict'):
        return codecs.charmap_encode(input, errors, encoding_map)
    def decode(self, input, errors='strict'):
        return codecs.charmap_decode(input, errors, decoding_map)

class StreamWriter(Codec, codecs.StreamWriter):
    pass

class StreamReader(Codec, codecs.StreamReader):
    pass

### encodings module API

def getregentry():
    return (Codec().encode, Codec().decode, StreamReader, StreamWriter)

### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
    0x0080: 0x0402,  # CYRILLIC CAPITAL LETTER DJE
    0x0081: 0x0403,  # CYRILLIC CAPITAL LETTER GJE
Example #47
0
""" Python Character Mapping Codec generated from 'CP855.TXT' with gencodec.py.
Written by Marc-Andre Lemburg ([email protected]).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#"
import codecs
### Codec APIs

class Codec(codecs.Codec):
    def encode(self, input, errors='strict'):
        return codecs.charmap_encode(input, errors, encoding_map)
    def decode(self, input, errors='strict'):
        return codecs.charmap_decode(input, errors, decoding_map)

class StreamWriter(Codec, codecs.StreamWriter):
    pass

class StreamReader(Codec, codecs.StreamReader):
    pass

### encodings module API

def getregentry():
    return (Codec().encode, Codec().decode, StreamReader, StreamWriter)

### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
    0x0080: 0x0452,  # CYRILLIC SMALL LETTER DJE
    0x0081: 0x0402,  # CYRILLIC CAPITAL LETTER DJE
Example #48
0
""" Python Character Mapping Codec generated from '8859-10.TXT' with gencodec.py.
Written by Marc-Andre Lemburg ([email protected]).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
    def encode(self,input,errors='strict'):
        return codecs.charmap_encode(input,errors,encoding_map)
        
    def decode(self,input,errors='strict'):
        return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
    pass
        
class StreamReader(Codec,codecs.StreamReader):
    pass
### encodings module API
def getregentry():
    return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
	0x00a1: 0x0104,	# 	LATIN CAPITAL LETTER A WITH OGONEK
	0x00a2: 0x0112,	# 	LATIN CAPITAL LETTER E WITH MACRON
	0x00a3: 0x0122,	# 	LATIN CAPITAL LETTER G WITH CEDILLA
	0x00a4: 0x012a,	# 	LATIN CAPITAL LETTER I WITH MACRON
	0x00a5: 0x0128,	# 	LATIN CAPITAL LETTER I WITH TILDE
	0x00a6: 0x0136,	# 	LATIN CAPITAL LETTER K WITH CEDILLA
Example #49
0
""" Python Character Mapping Codec generated from 'CP775.TXT' with gencodec.py.
Written by Marc-Andre Lemburg ([email protected]).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
    def encode(self,input,errors='strict'):
        return codecs.charmap_encode(input,errors,encoding_map)
        
    def decode(self,input,errors='strict'):
        return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
    pass
        
class StreamReader(Codec,codecs.StreamReader):
    pass
### encodings module API
def getregentry():
    return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
	0x0080: 0x0106,	# LATIN CAPITAL LETTER C WITH ACUTE
	0x0081: 0x00fc,	# LATIN SMALL LETTER U WITH DIAERESIS
	0x0082: 0x00e9,	# LATIN SMALL LETTER E WITH ACUTE
	0x0083: 0x0101,	# LATIN SMALL LETTER A WITH MACRON
	0x0084: 0x00e4,	# LATIN SMALL LETTER A WITH DIAERESIS
	0x0085: 0x0123,	# LATIN SMALL LETTER G WITH CEDILLA
Example #50
0
def codegen(name, map, encodingname, comments=1):

    """ Returns Python source for the given map.

        Comments are included in the source, if comments is true (default).

    """
    # Generate code
    decoding_map_code = python_mapdef_code(
        'decoding_map',
        map,
        comments=comments)
    decoding_table_code = python_tabledef_code(
        'decoding_table',
        map,
        comments=comments)
    encoding_map_code = python_mapdef_code(
        'encoding_map',
        codecs.make_encoding_map(map),
        comments=comments,
        precisions=(4, 2))

    if decoding_table_code:
        suffix = 'table'
    else:
        suffix = 'map'

    l = [
        '''\
""" Python Character Mapping Codec %s generated from '%s' with gencodec.py.

"""#"
# Ensure the generated codec works with Python 2.6+.
from __future__ import unicode_literals

import codecs

### Codec APIs

class Codec(codecs.Codec):

    def encode(self,input,errors='strict'):
        return codecs.charmap_encode(input,errors,encoding_%s)

    def decode(self,input,errors='strict'):
        return codecs.charmap_decode(input,errors,decoding_%s)
''' % (encodingname, name, suffix, suffix)]
    l.append('''\
class IncrementalEncoder(codecs.IncrementalEncoder):
    def encode(self, input, final=False):
        return codecs.charmap_encode(input,self.errors,encoding_%s)[0]

class IncrementalDecoder(codecs.IncrementalDecoder):
    def decode(self, input, final=False):
        return codecs.charmap_decode(input,self.errors,decoding_%s)[0]''' %
        (suffix, suffix))

    l.append('''
class StreamWriter(Codec,codecs.StreamWriter):
    pass

class StreamReader(Codec,codecs.StreamReader):
    pass

### encodings module API

def getregentry():
    return codecs.CodecInfo(
        name=%r,
        encode=Codec().encode,
        decode=Codec().decode,
        incrementalencoder=IncrementalEncoder,
        incrementaldecoder=IncrementalDecoder,
        streamreader=StreamReader,
        streamwriter=StreamWriter,
    )
''' % encodingname.replace('_', '-'))

    # Add decoding table or map (with preference to the table)
    if not decoding_table_code:
        l.append('''
### Decoding Map
''')
        l.extend(decoding_map_code)
    else:
        l.append('''
### Decoding Table
''')
        l.extend(decoding_table_code)

    # Add encoding map
    if decoding_table_code:
        l.append('''
### Encoding table
encoding_table=codecs.charmap_build(decoding_table)
''')
    else:
        l.append('''
### Encoding Map
''')
        l.extend(encoding_map_code)

    # Final new-line
    l.append('')

    return '\n'.join(l).expandtabs()
Example #51
0
""" Python Character Mapping Codec generated from 'CP852.TXT' with gencodec.py.
Written by Marc-Andre Lemburg ([email protected]).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#"
import codecs
### Codec APIs

class Codec(codecs.Codec):
    def encode(self, input, errors='strict'):
        return codecs.charmap_encode(input, errors, encoding_map)
    def decode(self, input, errors='strict'):
        return codecs.charmap_decode(input, errors, decoding_map)

class StreamWriter(Codec, codecs.StreamWriter):
    pass

class StreamReader(Codec, codecs.StreamReader):
    pass

### encodings module API

def getregentry():
    return (Codec().encode, Codec().decode, StreamReader, StreamWriter)

### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
    0x0080: 0x00c7,  # LATIN CAPITAL LETTER C WITH CEDILLA
    0x0081: 0x00fc,  # LATIN SMALL LETTER U WITH DIAERESIS
Example #52
0
    0x007f: 0x00e0,  #		LATIN SMALL LETTER A WITH GRAVE
    0x1b0a: 0x000c,  #		FORM FEED
    0x1b14: 0x005e,  #		CIRCUMFLEX ACCENT
    0x1b28: 0x007b,  #		LEFT CURLY BRACKET
    0x1b29: 0x007d,  #		RIGHT CURLY BRACKET
    0x1b2f: 0x005c,  #		REVERSE SOLIDUS
    0x1b3c: 0x005b,  #		LEFT SQUARE BRACKET
    0x1b3d: 0x007e,  #		TILDE
    0x1b3e: 0x005d,  #		RIGHT SQUARE BRACKET
    0x1b40: 0x007c,  #		VERTICAL LINE
    0x1b65: 0x20ac,  #		EURO SIGN
}

### Encoding Map

encoding_map = codecs.make_encoding_map(decoding_map)
extra_encoding_map = codecs.make_encoding_map(extra_decoding_map)

if __name__ == '__main__':
    import string
    c = Codec()
    r = c.decode('0001000791282143F5000005E8329BFD06')
    print(r)
    #def test(s):
    #	 r = c.decode(c.encode(s))[0]
    #	 if r != s: print('in %r != out %r'%(s, r))
    #test(unicode(string.letters))
    #test(u'\u20ac')
    #test(u'\xa0')
    #try:
    #	 test(u'av\u20ad')
Example #53
0
""" Python Character Mapping Codec generated from '8859-13.TXT' with gencodec.py.
Written by Marc-Andre Lemburg ([email protected]).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
    def encode(self,input,errors='strict'):
        return codecs.charmap_encode(input,errors,encoding_map)
        
    def decode(self,input,errors='strict'):
        return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
    pass
        
class StreamReader(Codec,codecs.StreamReader):
    pass
### encodings module API
def getregentry():
    return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
	0x00a1: 0x201d,	# 	RIGHT DOUBLE QUOTATION MARK
	0x00a5: 0x201e,	# 	DOUBLE LOW-9 QUOTATION MARK
	0x00a8: 0x00d8,	# 	LATIN CAPITAL LETTER O WITH STROKE
	0x00aa: 0x0156,	# 	LATIN CAPITAL LETTER R WITH CEDILLA
	0x00af: 0x00c6,	# 	LATIN CAPITAL LETTER AE
	0x00b4: 0x201c,	# 	LEFT DOUBLE QUOTATION MARK
Example #54
0
        0x00e7: 0x0647, #       ARABIC LETTER HEH
        0x00e8: 0x0648, #       ARABIC LETTER WAW
        0x00e9: 0x0649, #       ARABIC LETTER ALEF MAKSURA
        0x00ea: 0x064a, #       ARABIC LETTER YEH
        0x00eb: 0x064b, #       ARABIC FATHATAN
        0x00ec: 0x064c, #       ARABIC DAMMATAN
        0x00ed: 0x064d, #       ARABIC KASRATAN
        0x00ee: 0x064e, #       ARABIC FATHA
        0x00ef: 0x064f, #       ARABIC DAMMA
        0x00f0: 0x0650, #       ARABIC KASRA
        0x00f1: 0x0651, #       ARABIC SHADDA
        0x00f2: 0x0652, #       ARABIC SUKUN
        0x00f3: None,
        0x00f4: None,
        0x00f5: None,
        0x00f6: None,
        0x00f7: None,
        0x00f8: None,
        0x00f9: None,
        0x00fa: None,
        0x00fb: None,
        0x00fc: None,
        0x00fd: None,
        0x00fe: None,
        0x00ff: None,
})

### Encoding Map

encoding_map = codecs.make_encoding_map(decoding_map)
Example #55
0
""" Python Character Mapping Codec generated from '8859-7.TXT' with gencodec.py.
Written by Marc-Andre Lemburg ([email protected]).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#"
import codecs
### Codec APIs

class Codec(codecs.Codec):
    def encode(self, input, errors='strict'):
        return codecs.charmap_encode(input, errors, encoding_map)
    def decode(self, input, errors='strict'):
        return codecs.charmap_decode(input, errors, decoding_map)

class StreamWriter(Codec, codecs.StreamWriter):
    pass

class StreamReader(Codec, codecs.StreamReader):
    pass

### encodings module API

def getregentry():
    return (Codec().encode, Codec().decode, StreamReader, StreamWriter)

### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
    0x00a1: 0x2018,  # 	LEFT SINGLE QUOTATION MARK
    0x00a2: 0x2019,  # 	RIGHT SINGLE QUOTATION MARK