Esempio n. 1
0
def rawwcharp2unicoden(wcp, maxlen):
    b = UnicodeBuilder(maxlen)
    i = 0
    while i < maxlen and rffi.cast(lltype.Signed, wcp[i]) != 0:
        b.append(code_to_unichr(wcp[i]))
        i += 1
    return assert_str0(b.build())
Esempio n. 2
0
    def get(self, ch, errorchar):
        space = self.space

        # get the character from the mapping
        if self.mapping_w is not None:
            w_ch = self.mapping_w[ord(ch)]
        else:
            try:
                w_ch = space.getitem(self.w_mapping, space.newint(ord(ch)))
            except OperationError as e:
                if not e.match(space, space.w_LookupError):
                    raise
                return errorchar

        if space.isinstance_w(w_ch, space.w_unicode):
            # Charmap may return a unicode string
            return space.unicode_w(w_ch)
        elif space.isinstance_w(w_ch, space.w_int):
            # Charmap may return a number
            x = space.int_w(w_ch)
            if not 0 <= x <= 0x10FFFF:
                raise oefmt(space.w_TypeError,
                            "character mapping must be in range(0x110000)")
            return code_to_unichr(x)
        elif space.is_w(w_ch, space.w_None):
            # Charmap may return None
            return errorchar

        raise oefmt(space.w_TypeError,
                    "character mapping must return integer, None or str")
Esempio n. 3
0
 def lookup(self, space, name):
     try:
         code = self._lookup(name.upper())
     except KeyError:
         msg = space.mod(space.wrap("undefined character name '%s'"), space.wrap(name))
         raise OperationError(space.w_KeyError, msg)
     return space.wrap(code_to_unichr(code))
Esempio n. 4
0
    def get(self, ch, errorchar):
        space = self.space

        # get the character from the mapping
        if self.mapping_w is not None:
            w_ch = self.mapping_w[ord(ch)]
        else:
            try:
                w_ch = space.getitem(self.w_mapping, space.newint(ord(ch)))
            except OperationError as e:
                if not e.match(space, space.w_LookupError):
                    raise
                return errorchar

        if space.isinstance_w(w_ch, space.w_unicode):
            # Charmap may return a unicode string
            return space.unicode_w(w_ch)
        elif space.isinstance_w(w_ch, space.w_int):
            # Charmap may return a number
            x = space.int_w(w_ch)
            if not 0 <= x <= 0x10FFFF:
                raise oefmt(space.w_TypeError,
                    "character mapping must be in range(0x110000)")
            return code_to_unichr(x)
        elif space.is_w(w_ch, space.w_None):
            # Charmap may return None
            return errorchar

        raise oefmt(space.w_TypeError,
            "character mapping must return integer, None or unicode")
Esempio n. 5
0
def rawwcharp2unicoden(wcp, maxlen):
    b = UnicodeBuilder(maxlen)
    i = 0
    while i < maxlen and rffi.cast(lltype.Signed, wcp[i]) != 0:
        b.append(code_to_unichr(wcp[i]))
        i += 1
    return assert_str0(b.build())
Esempio n. 6
0
 def lookup(self, space, name):
     try:
         code = self._lookup(name.upper())
     except KeyError:
         msg = space.mod(space.newtext("undefined character name '%s'"),
                         space.newtext(name))
         raise OperationError(space.w_KeyError, msg)
     return space.newunicode(code_to_unichr(code))
Esempio n. 7
0
    def lookup(self, space, name):
        try:
            code = self._lookup(name.upper(), with_named_sequence=True)
        except KeyError:
            msg = space.mod(space.newtext("undefined character name '%s'"),
                            space.newtext(name))
            raise OperationError(space.w_KeyError, msg)

        # The code may be a named sequence
        sequence = self._lookup_named_sequence(code)
        if sequence is not None:
            # named sequences only contain UCS2 codes, no surrogates &co.
            return space.newunicode(sequence)

        return space.newunicode(code_to_unichr(code))
Esempio n. 8
0
 def decode_escape_sequence_unicode(self, i, builder):
     # at this point we are just after the 'u' of the \u1234 sequence.
     start = i
     i += 4
     hexdigits = self.getslice(start, i)
     try:
         val = int(hexdigits, 16)
         if val & 0xfc00 == 0xd800:
             # surrogate pair
             val = self.decode_surrogate_pair(i, val)
             i += 6
     except ValueError:
         self._raise("Invalid \uXXXX escape (char %d)", i-1)
         return # help the annotator to know that we'll never go beyond
                # this point
     #
     uchr = runicode.code_to_unichr(val)     # may be a surrogate pair again
     utf8_ch = unicodehelper.encode_utf8(self.space, uchr)
     builder.append(utf8_ch)
     return i
Esempio n. 9
0
 def decode_escape_sequence_unicode(self, i, builder):
     # at this point we are just after the 'u' of the \u1234 sequence.
     start = i
     i += 4
     hexdigits = self.getslice(start, i)
     try:
         val = int(hexdigits, 16)
         if val & 0xfc00 == 0xd800:
             # surrogate pair
             val = self.decode_surrogate_pair(i, val)
             i += 6
     except ValueError:
         self._raise("Invalid \uXXXX escape (char %d)", i - 1)
         return  # help the annotator to know that we'll never go beyond
         # this point
     #
     uchr = runicode.code_to_unichr(val)  # may be a surrogate pair again
     utf8_ch = unicodehelper.encode_utf8(self.space, uchr)
     builder.append(utf8_ch)
     return i
Esempio n. 10
0
class Charmap_Decode:
    def __init__(self, space, w_mapping):
        self.space = space
        self.w_mapping = w_mapping

        # fast path for all the stuff in the encodings module
        if space.isinstance_w(w_mapping, space.w_tuple):
            self.mapping_w = space.fixedview(w_mapping)
        else:
            self.mapping_w = None

    def get(self, ch, errorchar):
        space = self.space

        # get the character from the mapping
        if self.mapping_w is not None:
            w_ch = self.mapping_w[ord(ch)]
        else:
            try:
                w_ch = space.getitem(self.w_mapping, space.newint(ord(ch)))
            except OperationError, e:
                if not e.match(space, space.w_LookupError):
                    raise
                return errorchar

        if space.isinstance_w(w_ch, space.w_unicode):
            # Charmap may return a unicode string
            return space.unicode_w(w_ch)
        elif space.isinstance_w(w_ch, space.w_int):
            # Charmap may return a number
            x = space.int_w(w_ch)
            if not 0 <= x <= 0x10FFFF:
                raise oefmt(space.w_TypeError,
                            "character mapping must be in range(0x110000)")
            return code_to_unichr(x)
        elif space.is_w(w_ch, space.w_None):
            # Charmap may return None
            return errorchar

        raise oefmt(space.w_TypeError,
                    "character mapping must return integer, None or unicode")
Esempio n. 11
0
 def decode_escape_sequence_unicode(self, i, builder):
     # at this point we are just after the 'u' of the \u1234 sequence.
     start = i
     i += 4
     hexdigits = self.getslice(start, i)
     try:
         val = int(hexdigits, 16)
         if sys.maxunicode > 65535 and 0xd800 <= val <= 0xdfff:
             # surrogate pair
             if self.ll_chars[i] == '\\' and self.ll_chars[i + 1] == 'u':
                 val = self.decode_surrogate_pair(i, val)
                 i += 6
     except ValueError:
         raise DecoderError("Invalid \\uXXXX escape", i - 1)
     #
     uchr = runicode.code_to_unichr(val)  # may be a surrogate pair again
     utf8_ch = unicodehelper.encode_utf8(self.space,
                                         uchr,
                                         allow_surrogates=True)
     builder.append(utf8_ch)
     return i
Esempio n. 12
0
 def f(c):
     return ord(code_to_unichr(c)[0])
Esempio n. 13
0
 def f(c):
     return code_to_unichr(c) + u''