def compare_digest(space, w_a, w_b): """compare_digest(a, b) -> bool Return 'a == b'. This function uses an approach designed to prevent timing analysis, making it appropriate for cryptography. a and b must both be of the same type: either str (ASCII only), or any type that supports the buffer protocol (e.g. bytes). Note: If a and b are of different lengths, or if an error occurs, a timing attack could theoretically reveal information about the types and lengths of a and b--but not their values. """ if (space.isinstance_w(w_a, space.w_unicode) and space.isinstance_w(w_b, space.w_unicode)): try: w_a = encode(space, w_a, 'ascii') w_b = encode(space, w_b, 'ascii') except OperationError as e: if not e.match(space, space.w_UnicodeEncodeError): raise raise oefmt( space.w_TypeError, "comparing strings with non-ASCII characters is not " "supported") return compare_digest_buffer(space, w_a, w_b)
def decode_utf8_recode(space, s, ps, end, recode_encoding): p = ps while p < end and ord(s[p]) & 0x80: p += 1 lgt = unicodehelper.check_utf8_or_raise(space, s, ps, p) w_v = unicodehelper.encode(space, space.newutf8(s[ps:p], lgt), recode_encoding) v = space.bytes_w(w_v) return v, p
def decode_utf8(space, s, ps, end, encoding): assert ps >= 0 pt = ps # while (s < end && *s != '\\') s++; */ /* inefficient for u".." while ps < end and ord(s[ps]) & 0x80: ps += 1 w_u = space.wrap(unicodehelper.decode_utf8(space, s[pt:ps])) w_v = unicodehelper.encode(space, w_u, encoding) v = space.str_w(w_v) return v, ps
def compare_digest(space, w_a, w_b): """compare_digest(a, b) -> bool Return 'a == b'. This function uses an approach designed to prevent timing analysis, making it appropriate for cryptography. a and b must both be of the same type: either str (ASCII only), or any type that supports the buffer protocol (e.g. bytes). Note: If a and b are of different lengths, or if an error occurs, a timing attack could theoretically reveal information about the types and lengths of a and b--but not their values. """ if (space.isinstance_w(w_a, space.w_unicode) and space.isinstance_w(w_b, space.w_unicode)): try: w_a = encode(space, w_a, 'ascii') w_b = encode(space, w_b, 'ascii') except OperationError as e: if not e.match(space, space.w_UnicodeEncodeError): raise raise oefmt(space.w_TypeError, "comparing strings with non-ASCII characters is not " "supported") return compare_digest_buffer(space, w_a, w_b)
def parsestr(space, encoding, s, unicode_literal=False): """Parses a string or unicode literal, and return a wrapped value. If encoding=iso8859-1, the source string is also in this encoding. If encoding=None, the source string is ascii only. In other cases, the source string is in utf-8 encoding. When a bytes string is returned, it will be encoded with the original encoding. Yes, it's very inefficient. Yes, CPython has very similar code. """ # we use ps as "pointer to s" # q is the virtual last char index of the string ps = 0 quote = s[ps] rawmode = False # string decoration handling if quote == 'b' or quote == 'B': ps += 1 quote = s[ps] unicode_literal = False elif quote == 'u' or quote == 'U': ps += 1 quote = s[ps] unicode_literal = True if quote == 'r' or quote == 'R': ps += 1 quote = s[ps] rawmode = True if quote != "'" and quote != '"': raise_app_valueerror(space, 'Internal error: parser passed unquoted literal') ps += 1 q = len(s) - 1 if s[q] != quote: raise_app_valueerror(space, 'Internal error: parser passed unmatched ' 'quotes in literal') if q-ps >= 4 and s[ps] == quote and s[ps+1] == quote: # triple quotes ps += 2 if s[q-1] != quote or s[q-2] != quote: raise_app_valueerror(space, 'Internal error: parser passed ' 'unmatched triple quotes in literal') q -= 2 if unicode_literal: # XXX Py_UnicodeFlag is ignored for now if encoding is None or encoding == "iso-8859-1": # 'unicode_escape' expects latin-1 bytes, string is ready. buf = s bufp = ps bufq = q u = None else: # String is utf8-encoded, but 'unicode_escape' expects # latin-1; So multibyte sequences must be escaped. lis = [] # using a list to assemble the value end = q # Worst case: "\XX" may become "\u005c\uHHLL" (12 bytes) while ps < end: if s[ps] == '\\': lis.append(s[ps]) ps += 1 if ord(s[ps]) & 0x80: # A multibyte sequence will follow, it will be # escaped like \u1234. To avoid confusion with # the backslash we just wrote, we emit "\u005c" # instead. lis.append("u005c") if ord(s[ps]) & 0x80: # XXX inefficient w, ps = decode_utf8(space, s, ps, end, "utf-16-be") rn = len(w) assert rn % 2 == 0 for i in range(0, rn, 2): lis.append('\\u') lis.append(hexbyte(ord(w[i]))) lis.append(hexbyte(ord(w[i+1]))) else: lis.append(s[ps]) ps += 1 buf = ''.join(lis) bufp = 0 bufq = len(buf) assert 0 <= bufp <= bufq substr = buf[bufp:bufq] if rawmode: v = unicodehelper.decode_raw_unicode_escape(space, substr) else: v = unicodehelper.decode_unicode_escape(space, substr) return space.wrap(v) need_encoding = (encoding is not None and encoding != "utf-8" and encoding != "utf8" and encoding != "iso-8859-1") assert 0 <= ps <= q substr = s[ps : q] if rawmode or '\\' not in s[ps:]: if need_encoding: w_u = space.wrap(unicodehelper.decode_utf8(space, substr)) w_v = unicodehelper.encode(space, w_u, encoding) return w_v else: return space.wrap(substr) enc = None if need_encoding: enc = encoding v = PyString_DecodeEscape(space, substr, enc) return space.wrap(v)
def parsestr(space, encoding, s, unicode_literal=False): """Parses a string or unicode literal, and return a wrapped value. If encoding=iso8859-1, the source string is also in this encoding. If encoding=None, the source string is ascii only. In other cases, the source string is in utf-8 encoding. When a bytes string is returned, it will be encoded with the original encoding. Yes, it's very inefficient. Yes, CPython has very similar code. """ # we use ps as "pointer to s" # q is the virtual last char index of the string ps = 0 quote = s[ps] rawmode = False # string decoration handling if quote == 'b' or quote == 'B': ps += 1 quote = s[ps] unicode_literal = False elif quote == 'u' or quote == 'U': ps += 1 quote = s[ps] unicode_literal = True if quote == 'r' or quote == 'R': ps += 1 quote = s[ps] rawmode = True if quote != "'" and quote != '"': raise_app_valueerror(space, 'Internal error: parser passed unquoted literal') ps += 1 q = len(s) - 1 if s[q] != quote: raise_app_valueerror(space, 'Internal error: parser passed unmatched ' 'quotes in literal') if q-ps >= 4 and s[ps] == quote and s[ps+1] == quote: # triple quotes ps += 2 if s[q-1] != quote or s[q-2] != quote: raise_app_valueerror(space, 'Internal error: parser passed ' 'unmatched triple quotes in literal') q -= 2 if unicode_literal: # XXX Py_UnicodeFlag is ignored for now if encoding is None or encoding == "iso-8859-1": # 'unicode_escape' expects latin-1 bytes, string is ready. assert 0 <= ps <= q substr = s[ps:q] else: substr = decode_unicode_utf8(space, s, ps, q) if rawmode: v = unicodehelper.decode_raw_unicode_escape(space, substr) else: v = unicodehelper.decode_unicode_escape(space, substr) return space.wrap(v) need_encoding = (encoding is not None and encoding != "utf-8" and encoding != "utf8" and encoding != "iso-8859-1") assert 0 <= ps <= q substr = s[ps : q] if rawmode or '\\' not in s[ps:]: if need_encoding: w_u = space.wrap(unicodehelper.decode_utf8(space, substr)) w_v = unicodehelper.encode(space, w_u, encoding) return w_v else: return space.wrap(substr) enc = None if need_encoding: enc = encoding v = PyString_DecodeEscape(space, substr, 'strict', enc) return space.wrap(v)
def parsestr(space, encoding, s, unicode_literal=False): """Parses a string or unicode literal, and return a wrapped value. If encoding=iso8859-1, the source string is also in this encoding. If encoding=None, the source string is ascii only. In other cases, the source string is in utf-8 encoding. When a bytes string is returned, it will be encoded with the original encoding. Yes, it's very inefficient. Yes, CPython has very similar code. """ # we use ps as "pointer to s" # q is the virtual last char index of the string ps = 0 quote = s[ps] rawmode = False # string decoration handling if quote == 'b' or quote == 'B': ps += 1 quote = s[ps] unicode_literal = False elif quote == 'u' or quote == 'U': ps += 1 quote = s[ps] unicode_literal = True if quote == 'r' or quote == 'R': ps += 1 quote = s[ps] rawmode = True if quote != "'" and quote != '"': raise_app_valueerror(space, 'Internal error: parser passed unquoted literal') ps += 1 q = len(s) - 1 if s[q] != quote: raise_app_valueerror( space, 'Internal error: parser passed unmatched ' 'quotes in literal') if q - ps >= 4 and s[ps] == quote and s[ps + 1] == quote: # triple quotes ps += 2 if s[q - 1] != quote or s[q - 2] != quote: raise_app_valueerror( space, 'Internal error: parser passed ' 'unmatched triple quotes in literal') q -= 2 if unicode_literal: if encoding is None or encoding == "iso-8859-1": # 'unicode_escape' expects latin-1 bytes, string is ready. assert 0 <= ps <= q substr = s[ps:q] else: unicodehelper.check_utf8_or_raise(space, s, ps, q) substr = decode_unicode_utf8(space, s, ps, q) if rawmode: r = unicodehelper.decode_raw_unicode_escape(space, substr) else: r = unicodehelper.decode_unicode_escape(space, substr) v, length = r return space.newutf8(v, length) need_encoding = (encoding is not None and encoding != "utf-8" and encoding != "utf8" and encoding != "iso-8859-1") assert 0 <= ps <= q substr = s[ps:q] if rawmode or '\\' not in s[ps:]: if need_encoding: lgt = unicodehelper.check_utf8_or_raise(space, substr) w_u = space.newutf8(substr, lgt) w_v = unicodehelper.encode(space, w_u, encoding) return w_v else: return space.newbytes(substr) enc = None if need_encoding: enc = encoding v = PyString_DecodeEscape(space, substr, 'strict', enc) return space.newbytes(v)
def decode_utf8_recode(space, s, ps, end, recode_encoding): u, ps = decode_utf8(space, s, ps, end) w_v = unicodehelper.encode(space, space.wrap(u), recode_encoding) v = space.bytes_w(w_v) return v, ps
def decode_utf8_recode(space, s, ps, end, recode_encoding): u, ps = decode_utf8(space, s, ps, end) w_v = unicodehelper.encode(space, space.newunicode(u), recode_encoding) v = space.bytes_w(w_v) return v, ps