Python encode 예제들, pypy.interpreter.unicodehelper.encode Python 예제들

예제 #1

0

파일 보기

파일: tscmp.py 프로젝트: grubermeister/kamina-script

def compare_digest(space, w_a, w_b):
    """compare_digest(a, b) -> bool

    Return 'a == b'.  This function uses an approach designed to prevent
    timing analysis, making it appropriate for cryptography.  a and b
    must both be of the same type: either str (ASCII only), or any type
    that supports the buffer protocol (e.g. bytes).

    Note: If a and b are of different lengths, or if an error occurs, a
    timing attack could theoretically reveal information about the types
    and lengths of a and b--but not their values.
    """
    if (space.isinstance_w(w_a, space.w_unicode)
            and space.isinstance_w(w_b, space.w_unicode)):
        try:
            w_a = encode(space, w_a, 'ascii')
            w_b = encode(space, w_b, 'ascii')
        except OperationError as e:
            if not e.match(space, space.w_UnicodeEncodeError):
                raise
            raise oefmt(
                space.w_TypeError,
                "comparing strings with non-ASCII characters is not "
                "supported")
    return compare_digest_buffer(space, w_a, w_b)

예제 #2

0

파일 보기

파일: parsestring.py 프로젝트: zcxowwww/pypy

def decode_utf8_recode(space, s, ps, end, recode_encoding):
    p = ps
    while p < end and ord(s[p]) & 0x80:
        p += 1
    lgt = unicodehelper.check_utf8_or_raise(space, s, ps, p)
    w_v = unicodehelper.encode(space, space.newutf8(s[ps:p], lgt),
                               recode_encoding)
    v = space.bytes_w(w_v)
    return v, p

예제 #3

0

파일 보기

파일: parsestring.py 프로젝트: charred/pypy

def decode_utf8(space, s, ps, end, encoding):
    assert ps >= 0
    pt = ps
    # while (s < end && *s != '\\') s++; */ /* inefficient for u".."
    while ps < end and ord(s[ps]) & 0x80:
        ps += 1
    w_u = space.wrap(unicodehelper.decode_utf8(space, s[pt:ps]))
    w_v = unicodehelper.encode(space, w_u, encoding)
    v = space.str_w(w_v)
    return v, ps

예제 #4

0

파일 보기

파일: tscmp.py 프로젝트: Qointum/pypy

def compare_digest(space, w_a, w_b):
    """compare_digest(a, b) -> bool

    Return 'a == b'.  This function uses an approach designed to prevent
    timing analysis, making it appropriate for cryptography.  a and b
    must both be of the same type: either str (ASCII only), or any type
    that supports the buffer protocol (e.g. bytes).

    Note: If a and b are of different lengths, or if an error occurs, a
    timing attack could theoretically reveal information about the types
    and lengths of a and b--but not their values.
    """
    if (space.isinstance_w(w_a, space.w_unicode) and
        space.isinstance_w(w_b, space.w_unicode)):
        try:
            w_a = encode(space, w_a, 'ascii')
            w_b = encode(space, w_b, 'ascii')
        except OperationError as e:
            if not e.match(space, space.w_UnicodeEncodeError):
                raise
            raise oefmt(space.w_TypeError,
                        "comparing strings with non-ASCII characters is not "
                        "supported")
    return compare_digest_buffer(space, w_a, w_b)

예제 #5

0

파일 보기

파일: parsestring.py 프로젝트: charred/pypy

def parsestr(space, encoding, s, unicode_literal=False):
    """Parses a string or unicode literal, and return a wrapped value.

    If encoding=iso8859-1, the source string is also in this encoding.
    If encoding=None, the source string is ascii only.
    In other cases, the source string is in utf-8 encoding.

    When a bytes string is returned, it will be encoded with the
    original encoding.

    Yes, it's very inefficient.
    Yes, CPython has very similar code.
    """

    # we use ps as "pointer to s"
    # q is the virtual last char index of the string
    ps = 0
    quote = s[ps]
    rawmode = False

    # string decoration handling
    if quote == 'b' or quote == 'B':
        ps += 1
        quote = s[ps]
        unicode_literal = False
    elif quote == 'u' or quote == 'U':
        ps += 1
        quote = s[ps]
        unicode_literal = True
    if quote == 'r' or quote == 'R':
        ps += 1
        quote = s[ps]
        rawmode = True
    if quote != "'" and quote != '"':
        raise_app_valueerror(space,
                             'Internal error: parser passed unquoted literal')
    ps += 1
    q = len(s) - 1
    if s[q] != quote:
        raise_app_valueerror(space, 'Internal error: parser passed unmatched '
                                    'quotes in literal')
    if q-ps >= 4 and s[ps] == quote and s[ps+1] == quote:
        # triple quotes
        ps += 2
        if s[q-1] != quote or s[q-2] != quote:
            raise_app_valueerror(space, 'Internal error: parser passed '
                                        'unmatched triple quotes in literal')
        q -= 2

    if unicode_literal: # XXX Py_UnicodeFlag is ignored for now
        if encoding is None or encoding == "iso-8859-1":
            # 'unicode_escape' expects latin-1 bytes, string is ready.
            buf = s
            bufp = ps
            bufq = q
            u = None
        else:
            # String is utf8-encoded, but 'unicode_escape' expects
            # latin-1; So multibyte sequences must be escaped.
            lis = [] # using a list to assemble the value
            end = q
            # Worst case: "\XX" may become "\u005c\uHHLL" (12 bytes)
            while ps < end:
                if s[ps] == '\\':
                    lis.append(s[ps])
                    ps += 1
                    if ord(s[ps]) & 0x80:
                        # A multibyte sequence will follow, it will be
                        # escaped like \u1234. To avoid confusion with
                        # the backslash we just wrote, we emit "\u005c"
                        # instead.
                        lis.append("u005c")
                if ord(s[ps]) & 0x80: # XXX inefficient
                    w, ps = decode_utf8(space, s, ps, end, "utf-16-be")
                    rn = len(w)
                    assert rn % 2 == 0
                    for i in range(0, rn, 2):
                        lis.append('\\u')
                        lis.append(hexbyte(ord(w[i])))
                        lis.append(hexbyte(ord(w[i+1])))
                else:
                    lis.append(s[ps])
                    ps += 1
            buf = ''.join(lis)
            bufp = 0
            bufq = len(buf)
        assert 0 <= bufp <= bufq
        substr = buf[bufp:bufq]
        if rawmode:
            v = unicodehelper.decode_raw_unicode_escape(space, substr)
        else:
            v = unicodehelper.decode_unicode_escape(space, substr)
        return space.wrap(v)

    need_encoding = (encoding is not None and
                     encoding != "utf-8" and encoding != "utf8" and
                     encoding != "iso-8859-1")
    assert 0 <= ps <= q
    substr = s[ps : q]
    if rawmode or '\\' not in s[ps:]:
        if need_encoding:
            w_u = space.wrap(unicodehelper.decode_utf8(space, substr))
            w_v = unicodehelper.encode(space, w_u, encoding)
            return w_v
        else:
            return space.wrap(substr)

    enc = None
    if need_encoding:
        enc = encoding
    v = PyString_DecodeEscape(space, substr, enc)
    return space.wrap(v)

예제 #6

0

파일 보기

파일: parsestring.py 프로젝트: kipras/pypy

def parsestr(space, encoding, s, unicode_literal=False):
    """Parses a string or unicode literal, and return a wrapped value.

    If encoding=iso8859-1, the source string is also in this encoding.
    If encoding=None, the source string is ascii only.
    In other cases, the source string is in utf-8 encoding.

    When a bytes string is returned, it will be encoded with the
    original encoding.

    Yes, it's very inefficient.
    Yes, CPython has very similar code.
    """
    # we use ps as "pointer to s"
    # q is the virtual last char index of the string
    ps = 0
    quote = s[ps]
    rawmode = False

    # string decoration handling
    if quote == 'b' or quote == 'B':
        ps += 1
        quote = s[ps]
        unicode_literal = False
    elif quote == 'u' or quote == 'U':
        ps += 1
        quote = s[ps]
        unicode_literal = True
    if quote == 'r' or quote == 'R':
        ps += 1
        quote = s[ps]
        rawmode = True
    if quote != "'" and quote != '"':
        raise_app_valueerror(space,
                             'Internal error: parser passed unquoted literal')
    ps += 1
    q = len(s) - 1
    if s[q] != quote:
        raise_app_valueerror(space, 'Internal error: parser passed unmatched '
                                    'quotes in literal')
    if q-ps >= 4 and s[ps] == quote and s[ps+1] == quote:
        # triple quotes
        ps += 2
        if s[q-1] != quote or s[q-2] != quote:
            raise_app_valueerror(space, 'Internal error: parser passed '
                                        'unmatched triple quotes in literal')
        q -= 2

    if unicode_literal: # XXX Py_UnicodeFlag is ignored for now
        if encoding is None or encoding == "iso-8859-1":
            # 'unicode_escape' expects latin-1 bytes, string is ready.
            assert 0 <= ps <= q
            substr = s[ps:q]
        else:
            substr = decode_unicode_utf8(space, s, ps, q)
        if rawmode:
            v = unicodehelper.decode_raw_unicode_escape(space, substr)
        else:
            v = unicodehelper.decode_unicode_escape(space, substr)
        return space.wrap(v)

    need_encoding = (encoding is not None and
                     encoding != "utf-8" and encoding != "utf8" and
                     encoding != "iso-8859-1")
    assert 0 <= ps <= q
    substr = s[ps : q]
    if rawmode or '\\' not in s[ps:]:
        if need_encoding:
            w_u = space.wrap(unicodehelper.decode_utf8(space, substr))
            w_v = unicodehelper.encode(space, w_u, encoding)
            return w_v
        else:
            return space.wrap(substr)

    enc = None
    if need_encoding:
        enc = encoding
    v = PyString_DecodeEscape(space, substr, 'strict', enc)
    return space.wrap(v)

예제 #7

0

파일 보기

파일: parsestring.py 프로젝트: zcxowwww/pypy

def parsestr(space, encoding, s, unicode_literal=False):
    """Parses a string or unicode literal, and return a wrapped value.

    If encoding=iso8859-1, the source string is also in this encoding.
    If encoding=None, the source string is ascii only.
    In other cases, the source string is in utf-8 encoding.

    When a bytes string is returned, it will be encoded with the
    original encoding.

    Yes, it's very inefficient.
    Yes, CPython has very similar code.
    """
    # we use ps as "pointer to s"
    # q is the virtual last char index of the string
    ps = 0
    quote = s[ps]
    rawmode = False

    # string decoration handling
    if quote == 'b' or quote == 'B':
        ps += 1
        quote = s[ps]
        unicode_literal = False
    elif quote == 'u' or quote == 'U':
        ps += 1
        quote = s[ps]
        unicode_literal = True
    if quote == 'r' or quote == 'R':
        ps += 1
        quote = s[ps]
        rawmode = True
    if quote != "'" and quote != '"':
        raise_app_valueerror(space,
                             'Internal error: parser passed unquoted literal')
    ps += 1
    q = len(s) - 1
    if s[q] != quote:
        raise_app_valueerror(
            space, 'Internal error: parser passed unmatched '
            'quotes in literal')
    if q - ps >= 4 and s[ps] == quote and s[ps + 1] == quote:
        # triple quotes
        ps += 2
        if s[q - 1] != quote or s[q - 2] != quote:
            raise_app_valueerror(
                space, 'Internal error: parser passed '
                'unmatched triple quotes in literal')
        q -= 2

    if unicode_literal:
        if encoding is None or encoding == "iso-8859-1":
            # 'unicode_escape' expects latin-1 bytes, string is ready.
            assert 0 <= ps <= q
            substr = s[ps:q]
        else:
            unicodehelper.check_utf8_or_raise(space, s, ps, q)
            substr = decode_unicode_utf8(space, s, ps, q)
        if rawmode:
            r = unicodehelper.decode_raw_unicode_escape(space, substr)
        else:
            r = unicodehelper.decode_unicode_escape(space, substr)
        v, length = r
        return space.newutf8(v, length)

    need_encoding = (encoding is not None and encoding != "utf-8"
                     and encoding != "utf8" and encoding != "iso-8859-1")
    assert 0 <= ps <= q
    substr = s[ps:q]
    if rawmode or '\\' not in s[ps:]:
        if need_encoding:
            lgt = unicodehelper.check_utf8_or_raise(space, substr)
            w_u = space.newutf8(substr, lgt)
            w_v = unicodehelper.encode(space, w_u, encoding)
            return w_v
        else:
            return space.newbytes(substr)

    enc = None
    if need_encoding:
        enc = encoding
    v = PyString_DecodeEscape(space, substr, 'strict', enc)
    return space.newbytes(v)

예제 #8

0

파일 보기

파일: parsestring.py 프로젝트: Qointum/pypy

def decode_utf8_recode(space, s, ps, end, recode_encoding):
    u, ps = decode_utf8(space, s, ps, end)
    w_v = unicodehelper.encode(space, space.wrap(u), recode_encoding)
    v = space.bytes_w(w_v)
    return v, ps

예제 #9

0

파일 보기

파일: parsestring.py 프로젝트: grubermeister/kamina-script

def decode_utf8_recode(space, s, ps, end, recode_encoding):
    u, ps = decode_utf8(space, s, ps, end)
    w_v = unicodehelper.encode(space, space.newunicode(u), recode_encoding)
    v = space.bytes_w(w_v)
    return v, ps