def read_number_or_id(f, init): sofar = StringBuilder(64) sofar.append(init) while True: c = f.peek() if c == "": break if idchar(c): v = f.read(1) assert v == c sofar.append(v) else: break got = sofar.build() try: val = string_to_int(got) return values.W_Fixnum.make_or_interned(val) except ParseStringOverflowError: val = rbigint.fromdecimalstr(got) return values.W_Bignum(val) except ParseStringError: try: return values.W_Flonum(float(got)) except: return values.W_Symbol.make(got)
def my_replace(string): from rpython.rlib.rstring import StringBuilder result = StringBuilder() for char in string: if not char==' ': result.append(char) return result.build()
def func(): s = StringBuilder() s.append("a") s.append("abc") s.append_slice("abc", 1, 2) s.append_multiple_char('d', 4) return s.build()
def direct_read(self, n=-1): stream = self.getstream() self.check_readable() if n < 0: return stream.readall() else: result = StringBuilder(n) while n > 0: try: data = stream.read(n) except OSError, e: # a special-case only for read() (similar to CPython, which # also loses partial data with other methods): if we get # EAGAIN after already some data was received, return it. # Note that we can get EAGAIN while there is buffered data # waiting; read that too. if is_wouldblock_error(e.errno): m = stream.count_buffered_bytes() if m > 0: result.append(stream.read(min(n, m))) got = result.build() if len(got) > 0: return got raise if not data: break n -= len(data) result.append(data) return result.build()
def bitwise_not(self, space): length = self.strlen() builder = StringBuilder(length) for i in range(length): c = ord(self.character(i)) builder.append(chr(c ^ 0xff)) return W_ConstStringObject(builder.build())
def http_build_query(interp, w_data, num_prefix="", arg_sep=None, enctype=1): space = interp.space if arg_sep is None: arg_sep = interp.config.get_ini_str("arg_separator.output") w_data = w_data.deref() out = StringBuilder() if not w_data.tp in [space.tp_array, space.tp_object]: interp.space.ec.warn("http_build_query(): Parameter 1 " "expected to be Array or Object. " "Incorrect value given") if w_data.tp == space.tp_array: with space.iter(w_data) as itr: while not itr.done(): w_key, w_value = itr.next_item(space) key = _get_key(space, num_prefix, w_key) res = _build_query(space, [], key, w_value, num_prefix, arg_sep, enctype) out.append(''.join(res)) if w_data.tp == space.tp_object: for key, w_value in w_data.get_instance_attrs(interp).iteritems(): _, prop = demangle_property(key) if prop: continue res = _build_query(space, [], key, w_value, num_prefix, arg_sep, enctype) out.append(''.join(res)) outstr = out.build() if outstr.endswith(arg_sep): outstr = outstr.rstrip(arg_sep) return interp.space.newstr(outstr)
class W_StringOutputPort(W_OutputPort): errorname = "output-port" _attrs_ = ["closed", "str"] def __init__(self): self.closed = False self.str = StringBuilder() def write(self, s): self.str.append(s) def contents(self): return self.str.build() def seek(self, offset, end=False): if end or offset == self.str.getlength(): return if offset > self.str.getlength(): self.str.append("\0" * (self.str.getlength() - offset)) else: # FIXME: this is potentially slow. content = self.contents() self.str = StringBuilder(offset) self.str.append_slice(content, 0, offset) def tell(self): return self.str.getlength()
def _read_all(self, space): "Read all the file, don't update the cache" # Must run with the lock held! builder = StringBuilder() # First copy what we have in the current buffer current_size = self._readahead() data = None if current_size: data = ''.join(self.buffer[self.pos:self.pos + current_size]) builder.append(data) self.pos += current_size # We're going past the buffer's bounds, flush it if self.writable: self._flush_and_rewind_unlocked(space) self._reader_reset_buf() while True: # Read until EOF or until read() would block w_data = space.call_method(self.w_raw, "read") if space.is_w(w_data, space.w_None): if current_size == 0: return w_data break data = space.str_w(w_data) size = len(data) if size == 0: break builder.append(data) current_size += size if self.abs_pos != -1: self.abs_pos += size return space.wrap(builder.build())
def string_append(args): if jit.isconstant(len(args)): return string_append_fastpath(args) if not args: return W_String.fromascii("") builder = StringBuilder(len(args)) unibuilder = None ascii_idx = 0 try: for ascii_idx in range(len(args)): arg = args[ascii_idx] if not isinstance(arg, W_String): raise SchemeException("string-append: expected a string") builder.append(arg.as_str_ascii()) except ValueError: unibuilder = UnicodeBuilder(len(args)) unibuilder.append(unicode(builder.build())) builder = None for i in range(ascii_idx, len(args)): arg = args[i] if not isinstance(arg, W_String): raise SchemeException("string-append: expected a string") unibuilder.append(arg.as_unicode()) if unibuilder is None: assert builder is not None return W_String.fromascii(builder.build()) else: assert unibuilder is not None return W_String.fromunicode(unibuilder.build())
def direct_readline(self, size=-1): stream = self.getstream() self.check_readable() if size < 0: return stream.readline() else: # very inefficient unless there is a peek() result = StringBuilder() while size > 0: # "peeks" on the underlying stream to see how many chars # we can safely read without reading past an end-of-line startindex, peeked = stream.peek() assert 0 <= startindex <= len(peeked) endindex = startindex + size pn = peeked.find("\n", startindex, endindex) if pn < 0: pn = min(endindex - 1, len(peeked)) c = stream.read(pn - startindex + 1) if not c: break result.append(c) if c.endswith('\n'): break size -= len(c) return result.build()
def str_translate__String_ANY_ANY(space, w_string, w_table, w_deletechars=''): """charfilter - unicode handling is not implemented Return a copy of the string where all characters occurring in the optional argument deletechars are removed, and the remaining characters have been mapped through the given translation table, which must be a string of length 256""" if space.is_w(w_table, space.w_None): table = DEFAULT_NOOP_TABLE else: table = space.bufferstr_w(w_table) if len(table) != 256: raise OperationError( space.w_ValueError, space.wrap("translation table must be 256 characters long")) string = w_string._value deletechars = space.str_w(w_deletechars) if len(deletechars) == 0: buf = StringBuilder(len(string)) for char in string: buf.append(table[ord(char)]) else: buf = StringBuilder() deletion_table = [False] * 256 for c in deletechars: deletion_table[ord(c)] = True for char in string: if not deletion_table[ord(char)]: buf.append(table[ord(char)]) return W_StringObject(buf.build())
def rledecode_hqx(space, hexbin): "Decode hexbin RLE-coded string." # that's a guesstimation of the resulting length res = StringBuilder(len(hexbin)) end = len(hexbin) i = 0 lastpushed = -1 while i < end: c = hexbin[i] i += 1 if c != '\x90': res.append(c) lastpushed = ord(c) else: if i == end: raise_Incomplete(space, 'String ends with the RLE code \\x90') count = ord(hexbin[i]) - 1 i += 1 if count < 0: res.append('\x90') lastpushed = 0x90 else: if lastpushed < 0: raise_Error(space, 'String starts with the RLE code \\x90') res.append_multiple_char(chr(lastpushed), count) return space.newbytes(res.build())
def a2b_hqx(space, ascii): """Decode .hqx coding. Returns (bin, done).""" # overestimate the resulting length res = StringBuilder(len(ascii)) done = 0 pending_value = 0 pending_bits = 0 for c in ascii: n = ord(table_a2b_hqx[ord(c)]) if n <= 0x3F: pending_value = (pending_value << 6) | n pending_bits += 6 if pending_bits == 24: # flush res.append(chr(pending_value >> 16)) res.append(chr((pending_value >> 8) & 0xff)) res.append(chr(pending_value & 0xff)) pending_value = 0 pending_bits = 0 elif n == FAIL: raise_Error(space, 'Illegal character') elif n == DONE: if pending_bits >= 8: res.append(chr(pending_value >> (pending_bits - 8))) if pending_bits >= 16: res.append(chr((pending_value >> (pending_bits - 16)) & 0xff)) done = 1 break #elif n == SKIP: pass else: if pending_bits > 0: raise_Incomplete(space, 'String has incomplete number of bytes') return space.newtuple([space.newbytes(res.build()), space.wrap(done)])
def unwrap(self): # note: always overriden so far length = self.strlen() builder = StringBuilder(length) for i in range(length): builder.append(self.character(i)) return builder.build()
def _charp2str_to_null(cp, index): index = rffi.cast(lltype.Signed, index) string = StringBuilder() while cp[index] != '\x00': string.append(cp[index]) index += 1 return string.build()
def readall_w(self, space): self._check_closed(space) self._check_readable(space) total = 0 builder = StringBuilder() while True: newsize = int(new_buffersize(self.fd, total)) try: chunk = os.read(self.fd, newsize - total) except OSError, e: if e.errno == errno.EINTR: space.getexecutioncontext().checksignals() continue if total > 0: # return what we've got so far break if e.errno == errno.EAGAIN: return space.w_None raise wrap_oserror(space, e, exception_name='w_IOError') if not chunk: break builder.append(chunk) total += len(chunk)
def add__StringBuffer_String(space, w_self, w_other): if w_self.builder.getlength() != w_self.length: builder = StringBuilder() builder.append(w_self.force()) else: builder = w_self.builder builder.append(w_other._value) return W_StringBufferObject(builder)
def sanitize(self, s): res = StringBuilder(len(s)) for c in s: if c in CONTROL_CHARS: res.append('_') else: res.append(c) return res.build()
def text_build(self, space, frame, bytecode, no): items = [None] * no for i in range(no - 1, -1, -1): items[i] = space.str(frame.pop()) sb = StringBuilder() for item in items: sb.append(item) frame.push(space.newtext(sb.build()))
def fn(_): s = StringBuilder(4) got = [] for i in range(50): s.append(chr(33+i)) got.append(s.build()) gc.collect() return ' '.join(got)
def hexdigest(self, space): "Return the digest value as a string of hexadecimal digits." digest = self._digest(space) hexdigits = '0123456789abcdef' result = StringBuilder(self.digest_size * 2) for c in digest: result.append(hexdigits[(ord(c) >> 4) & 0xf]) result.append(hexdigits[ ord(c) & 0xf]) return space.wrap(result.build())
def fn(_): got = [] for j in range(3, 76, 5): s = StringBuilder() for i in range(j): s.append(chr(33+i)) gc.collect() got.append(s.build()) return ' '.join(got)
def string_func(space, w_left, w_right): left = w_left.unwrap() right = w_right.unwrap() n = min(len(left), len(right)) s = StringBuilder(n) for i in range(n): char = chr(bitwise_op(ord(left[i]), ord(right[i]))) s.append(char) return space.newstr(s.build())
def test_prebuilt_string_builder(self): s = StringBuilder(100) s.append("abc") def f(): return len(s.build()) res = self.interpret(f, []) assert res == 3
def str2hexstr(arr, size): HEXCHARS = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'] s = StringBuilder(size) for i in range(size): s.append(HEXCHARS[(ord(arr[i]) >> 4)]) s.append(HEXCHARS[(ord(arr[i])) & 15]) return s.build()
def _parse_plain_flags(source): b = StringBuilder(4) while True: ch = source.get() if ch == ":": break else: b.append(ch) return b.build()
def readline_w(self, space, w_limit=None): # For backwards compatibility, a (slowish) readline(). limit = convert_size(space, w_limit) has_peek = space.findattr(self, space.wrap("peek")) builder = StringBuilder() size = 0 while limit < 0 or size < limit: nreadahead = 1 if has_peek: w_readahead = space.call_method(self, "peek", space.wrap(1)) if not space.isinstance_w(w_readahead, space.w_str): raise operationerrfmt( space.w_IOError, "peek() should have returned a bytes object, not '%T'", w_readahead) length = space.len_w(w_readahead) if length > 0: n = 0 buf = space.str_w(w_readahead) if limit >= 0: while True: if n >= length or n >= limit: break n += 1 if buf[n-1] == '\n': break else: while True: if n >= length: break n += 1 if buf[n-1] == '\n': break nreadahead = n w_read = space.call_method(self, "read", space.wrap(nreadahead)) if not space.isinstance_w(w_read, space.w_str): raise operationerrfmt( space.w_IOError, "peek() should have returned a bytes object, not '%T'", w_read) read = space.str_w(w_read) if not read: break size += len(read) builder.append(read) if read[-1] == '\n': break return space.wrap(builder.build())
class W_StringBuilder(W_Root): def __init__(self, length): self._s = StringBuilder(length) @unwrap_spec(txt='utf8') def append(self, txt): self._s.append(txt) def build(self, space): return space.newtext(self._s.build())
def _flush_codes(self, space): b = StringBuilder() for code in self.current_codes: name = code._get_full_name() b.append('\x02') write_long_to_string_builder(code._unique_id, b) write_long_to_string_builder(len(name), b) b.append(name) os.write(self.fileno, b.build()) self.current_codes = []
def f(n): while n > 0: jitdriver.jit_merge_point(n=n) sb = StringBuilder(36) for s in IN: sb.append(s) s = sb.build() if s != JOINED: raise ValueError n -= 1 return n
def b2a_base64(space, bin, __kwonly__, newline=True): "Base64-code line of data." newlength = (len(bin) + 2) // 3 try: newlength = ovfcheck(newlength * 4) except OverflowError: raise OperationError(space.w_MemoryError, space.w_None) newlength += 1 res = StringBuilder(newlength) leftchar = 0 leftbits = 0 for c in bin: # Shift into our buffer, and output any 6bits ready leftchar = (leftchar << 8) | ord(c) leftbits += 8 res.append(table_b2a_base64[(leftchar >> (leftbits - 6)) & 0x3f]) leftbits -= 6 if leftbits >= 6: res.append(table_b2a_base64[(leftchar >> (leftbits - 6)) & 0x3f]) leftbits -= 6 # if leftbits == 2: res.append(table_b2a_base64[(leftchar & 3) << 4]) res.append(PAD) res.append(PAD) elif leftbits == 4: res.append(table_b2a_base64[(leftchar & 0xf) << 2]) res.append(PAD) if newline: res.append('\n') return space.newbytes(res.build())
def format_number(digits, buflen, sign, decpt, code, precision, flags, upper): # We got digits back, format them. We may need to pad 'digits' # either on the left or right (or both) with extra zeros, so in # general the resulting string has the form # # [<sign>]<zeros><digits><zeros>[<exponent>] # # where either of the <zeros> pieces could be empty, and there's a # decimal point that could appear either in <digits> or in the # leading or trailing <zeros>. # # Imagine an infinite 'virtual' string vdigits, consisting of the # string 'digits' (starting at index 0) padded on both the left # and right with infinite strings of zeros. We want to output a # slice # # vdigits[vdigits_start : vdigits_end] # # of this virtual string. Thus if vdigits_start < 0 then we'll # end up producing some leading zeros; if vdigits_end > digits_len # there will be trailing zeros in the output. The next section of # code determines whether to use an exponent or not, figures out # the position 'decpt' of the decimal point, and computes # 'vdigits_start' and 'vdigits_end'. builder = StringBuilder(20) use_exp = False vdigits_end = buflen if code == 'e': use_exp = True vdigits_end = precision elif code == 'f': vdigits_end = decpt + precision elif code == 'g': if decpt <= -4: use_exp = True elif decpt > precision: use_exp = True elif flags & rfloat.DTSF_ADD_DOT_0 and decpt == precision: use_exp = True if flags & rfloat.DTSF_ALT: vdigits_end = precision elif code == 'r': # convert to exponential format at 1e16. We used to convert # at 1e17, but that gives odd-looking results for some values # when a 16-digit 'shortest' repr is padded with bogus zeros. # For example, repr(2e16+8) would give 20000000000000010.0; # the true value is 20000000000000008.0. if decpt <= -4 or decpt > 16: use_exp = True else: raise ValueError # if using an exponent, reset decimal point position to 1 and # adjust exponent accordingly. if use_exp: exp = decpt - 1 decpt = 1 else: exp = 0 # ensure vdigits_start < decpt <= vdigits_end, or vdigits_start < # decpt < vdigits_end if add_dot_0_if_integer and no exponent if decpt <= 0: vdigits_start = decpt-1 else: vdigits_start = 0 if vdigits_end <= decpt: if not use_exp and flags & rfloat.DTSF_ADD_DOT_0: vdigits_end = decpt + 1 else: vdigits_end = decpt # double check inequalities assert vdigits_start <= 0 assert 0 <= buflen <= vdigits_end # decimal point should be in (vdigits_start, vdigits_end] assert vdigits_start < decpt <= vdigits_end if sign == 1: builder.append('-') elif flags & rfloat.DTSF_SIGN: builder.append('+') # note that exactly one of the three 'if' conditions is true, so # we include exactly one decimal point # 1. Zero padding on left of digit string if decpt <= 0: builder.append_multiple_char('0', decpt - vdigits_start) builder.append('.') builder.append_multiple_char('0', 0 - decpt) else: builder.append_multiple_char('0', 0 - vdigits_start) # 2. Digits, with included decimal point if 0 < decpt <= buflen: builder.append(rffi.charpsize2str(digits, decpt - 0)) builder.append('.') ptr = rffi.ptradd(digits, decpt) builder.append(rffi.charpsize2str(ptr, buflen - decpt)) else: builder.append(rffi.charpsize2str(digits, buflen)) # 3. And zeros on the right if buflen < decpt: builder.append_multiple_char('0', decpt - buflen) builder.append('.') builder.append_multiple_char('0', vdigits_end - decpt) else: builder.append_multiple_char('0', vdigits_end - buflen) s = builder.build() # Delete a trailing decimal pt unless using alternative formatting. if not flags & rfloat.DTSF_ALT: last = len(s) - 1 if last >= 0 and s[last] == '.': s = s[:last] # Now that we've done zero padding, add an exponent if needed. if use_exp: if upper: e = 'E' else: e = 'e' if exp >= 0: exp_str = str(exp) if len(exp_str) < 2 and not (flags & rfloat.DTSF_CUT_EXP_0): s += e + '+0' + exp_str else: s += e + '+' + exp_str else: exp_str = str(-exp) if len(exp_str) < 2 and not (flags & rfloat.DTSF_CUT_EXP_0): s += e + '-0' + exp_str else: s += e + '-' + exp_str return s
last_pos = ctx.match_end if filter_is_callable: w_match = self.getmatch(ctx, True) # make a copy of 'ctx'; see test_sub_matches_stay_valid ctx = ctx.fresh_copy( start) # match_start/match_end dropped w_piece = space.call_function(w_filter, w_match) if not space.is_w(w_piece, space.w_None): assert strbuilder is None and unicodebuilder is None assert not use_builder sublist_w.append(w_piece) else: if use_builder: if strbuilder is not None: assert filter_as_string is not None strbuilder.append(filter_as_string) else: assert unicodebuilder is not None assert filter_as_unicode is not None unicodebuilder.append(filter_as_unicode) else: sublist_w.append(w_filter) n += 1 elif last_pos >= ctx.end: break # empty match at the end: finished ctx.reset(start) if last_pos < ctx.end: _sub_append_slice(ctx, space, use_builder, sublist_w, strbuilder, unicodebuilder, last_pos, ctx.end) if use_builder:
def raise_mismatch_err(args): name = args[0] assert isinstance(name, values.W_Symbol) message = args[1] assert isinstance(message, values_string.W_String) v = args[2] assert isinstance(v, values.W_Object) from rpython.rlib.rstring import StringBuilder error_msg = StringBuilder() error_msg.append(name.utf8value) error_msg.append(": ") error_msg.append(message.as_str_utf8()) error_msg.append(v.tostring()) i = 3 while i + 1 < len(args): message = args[i] assert isinstance(message, values_string.W_String) error_msg.append(message.as_str_utf8()) v = args[i + 1] assert isinstance(v, values.W_Object) error_msg.append(v.tostring()) i += 2 raise SchemeException(error_msg.build())
def PyString_DecodeEscape(space, s, errors, recode_encoding): """ Unescape a backslash-escaped string. If recode_encoding is non-zero, the string is UTF-8 encoded and should be re-encoded in the specified encoding. """ builder = StringBuilder(len(s)) ps = 0 end = len(s) while ps < end: if s[ps] != '\\': # note that the C code has a label here. # the logic is the same. if recode_encoding and ord(s[ps]) & 0x80: w, ps = decode_utf8_recode(space, s, ps, end, recode_encoding) # Append bytes to output buffer. builder.append(w) else: builder.append(s[ps]) ps += 1 continue ps += 1 if ps == end: raise_app_valueerror(space, 'Trailing \\ in string') prevps = ps ch = s[ps] ps += 1 # XXX This assumes ASCII! if ch == '\n': pass elif ch == '\\': builder.append('\\') elif ch == "'": builder.append("'") elif ch == '"': builder.append('"') elif ch == 'b': builder.append("\010") elif ch == 'f': builder.append('\014') # FF elif ch == 't': builder.append('\t') elif ch == 'n': builder.append('\n') elif ch == 'r': builder.append('\r') elif ch == 'v': builder.append('\013') # VT elif ch == 'a': builder.append('\007') # BEL, not classic C elif ch in '01234567': # Look for up to two more octal digits span = ps span += (span < end) and (s[span] in '01234567') span += (span < end) and (s[span] in '01234567') octal = s[prevps:span] # emulate a strange wrap-around behavior of CPython: # \400 is the same as \000 because 0400 == 256 num = int(octal, 8) & 0xFF builder.append(chr(num)) ps = span elif ch == 'x': if ps + 2 <= end and isxdigit(s[ps]) and isxdigit(s[ps + 1]): hexa = s[ps:ps + 2] num = int(hexa, 16) builder.append(chr(num)) ps += 2 else: if errors == 'strict': raise_app_valueerror( space, "invalid \\x escape at position %d" % (ps - 2)) elif errors == 'replace': builder.append('?') elif errors == 'ignore': pass else: raise oefmt( space.w_ValueError, "decoding error; " "unknown error handling code: %s", errors) if ps + 1 <= end and isxdigit(s[ps]): ps += 1 else: # this was not an escape, so the backslash # has to be added, and we start over in # non-escape mode. builder.append('\\') ps -= 1 assert ps >= 0 continue # an arbitry number of unescaped UTF-8 bytes may follow. buf = builder.build() return buf
def escape(self): if self.args is None: return self.command s = StringBuilder() s.append(self.command) for arg in self.args: s.append(" ") for c in arg: if c == '\\': s.append('\\\\') elif c == ';': s.append('\\;') elif c == "\n": s.append("\\n") elif c == " ": s.append("\\_") else: s.append(c) return s.build()
def raw_encode_basestring_ascii(space, w_string): if space.isinstance_w(w_string, space.w_str): s = space.str_w(w_string) for i in range(len(s)): c = s[i] if c >= ' ' and c <= '~' and c != '"' and c != '\\': pass else: first = i break else: # the input is a string with only non-special ascii chars return w_string eh = unicodehelper.decode_error_handler(space) u = str_decode_utf_8(s, len(s), None, final=True, errorhandler=eh, allow_surrogates=True)[0] sb = StringBuilder(len(u)) sb.append_slice(s, 0, first) else: # We used to check if 'u' contains only safe characters, and return # 'w_string' directly. But this requires an extra pass over all # characters, and the expected use case of this function, from # json.encoder, will anyway re-encode a unicode result back to # a string (with the ascii encoding). This requires two passes # over the characters. So we may as well directly turn it into a # string here --- only one pass. u = space.unicode_w(w_string) sb = StringBuilder(len(u)) first = 0 for i in range(first, len(u)): c = u[i] if c <= u'~': if c == u'"' or c == u'\\': sb.append('\\') elif c < u' ': sb.append(ESCAPE_BEFORE_SPACE[ord(c)]) continue sb.append(chr(ord(c))) else: if c <= u'\uffff': sb.append('\\u') sb.append(HEX[ord(c) >> 12]) sb.append(HEX[(ord(c) >> 8) & 0x0f]) sb.append(HEX[(ord(c) >> 4) & 0x0f]) sb.append(HEX[ord(c) & 0x0f]) else: # surrogate pair n = ord(c) - 0x10000 s1 = 0xd800 | ((n >> 10) & 0x3ff) sb.append('\\ud') sb.append(HEX[(s1 >> 8) & 0x0f]) sb.append(HEX[(s1 >> 4) & 0x0f]) sb.append(HEX[s1 & 0x0f]) s2 = 0xdc00 | (n & 0x3ff) sb.append('\\ud') sb.append(HEX[(s2 >> 8) & 0x0f]) sb.append(HEX[(s2 >> 4) & 0x0f]) sb.append(HEX[s2 & 0x0f]) res = sb.build() return space.wrap(res)
def unicode_escape(s): size = len(s) result = StringBuilder(size) if quotes: if prefix: result.append(prefix) if s.find('\'') != -1 and s.find('\"') == -1: quote = ord('\"') result.append('"') else: quote = ord('\'') result.append('\'') else: quote = 0 if size == 0: return '' pos = 0 while pos < size: oc = codepoint_at_pos(s, pos) ch = s[pos] # Escape quotes if quotes and (oc == quote or ch == '\\'): result.append('\\') next_pos = next_codepoint_pos(s, pos) result.append_slice(s, pos, next_pos) pos = next_pos continue # The following logic is enabled only if MAXUNICODE == 0xffff, or # for testing on top of a host Python where sys.maxunicode == 0xffff if (not we_are_translated() and sys.maxunicode == 0xFFFF and 0xD800 <= oc < 0xDC00 and pos + 3 < size): # Map UTF-16 surrogate pairs to Unicode \UXXXXXXXX escapes pos += 3 oc2 = codepoint_at_pos(s, pos) if 0xDC00 <= oc2 <= 0xDFFF: ucs = (((oc & 0x03FF) << 10) | (oc2 & 0x03FF)) + 0x00010000 char_escape_helper(result, ucs) pos += 3 continue # Fall through: isolated surrogates are copied as-is pos -= 3 # Map special whitespace to '\t', \n', '\r' if ch == '\t': result.append('\\t') elif ch == '\n': result.append('\\n') elif ch == '\r': result.append('\\r') elif ch == '\\': result.append('\\\\') # Map non-printable or non-ascii to '\xhh' or '\uhhhh' elif pass_printable and not (oc <= 0x10ffff and unicodedb.isprintable(oc)): char_escape_helper(result, oc) elif not pass_printable and (oc < 32 or oc >= 0x7F): char_escape_helper(result, oc) # Copy everything else as-is else: if oc < 128: result.append(ch) else: next_pos = next_codepoint_pos(s, pos) result.append_slice(s, pos, next_pos) pos = next_codepoint_pos(s, pos) if quotes: result.append(chr(quote)) return result.build()
def string_escape_encode(s, quote): buf = StringBuilder(len(s) + 2) buf.append(quote) startslice = 0 for i in range(len(s)): c = s[i] use_bs_char = False # character quoted by backspace if c == '\\' or c == quote: bs_char = c use_bs_char = True elif c == '\t': bs_char = 't' use_bs_char = True elif c == '\r': bs_char = 'r' use_bs_char = True elif c == '\n': bs_char = 'n' use_bs_char = True elif not '\x20' <= c < '\x7f': n = ord(c) if i != startslice: buf.append_slice(s, startslice, i) startslice = i + 1 buf.append('\\x') buf.append("0123456789abcdef"[n >> 4]) buf.append("0123456789abcdef"[n & 0xF]) if use_bs_char: if i != startslice: buf.append_slice(s, startslice, i) startslice = i + 1 buf.append('\\') buf.append(bs_char) if len(s) != startslice: buf.append_slice(s, startslice, len(s)) buf.append(quote) return buf.build()
def lower(string): """Return the lowercase version of the string in the current locale.""" builder = StringBuilder(len(string)) for c in string: builder.append(lower_char(c)) return builder.build()
def fstring_find_literal(astbuilder, fstr, atom_node, rec): space = astbuilder.space raw = fstr.raw_mode # Return the next literal part. Updates the current index inside 'fstr'. # Differs from CPython: this version handles double-braces on its own. s = fstr.unparsed literal_start = fstr.current_index assert literal_start >= 0 # Get any literal string. It ends when we hit an un-doubled left # brace (which isn't part of a unicode name escape such as # "\N{EULER CONSTANT}"), or the end of the string. i = literal_start builder = StringBuilder() while i < len(s): ch = s[i] i += 1 if not raw and ch == '\\' and i < len(s): ch = s[i] i += 1 if ch == 'N': if i < len(s) and s[i] == '{': while i < len(s) and s[i] != '}': i += 1 if i < len(s): i += 1 continue elif i < len(s): i += 1 break if ch == '{': msg = "invalid escape sequence '%s'" try: space.warn(space.newtext(msg % ch), space.w_DeprecationWarning) except error.OperationError as e: if e.match(space, space.w_DeprecationWarning): astbuilder.error(msg % ch, atom_node) else: raise if ch == '{' or ch == '}': # Check for doubled braces, but only at the top level. If # we checked at every level, then f'{0:{3}}' would fail # with the two closing braces. if rec == 0 and i < len(s) and s[i] == ch: assert 0 <= i <= len(s) builder.append(s[literal_start:i]) i += 1 # skip over the second brace literal_start = i elif rec == 0 and ch == '}': i -= 1 assert i >= 0 fstr.current_index = i # Where a single '{' is the start of a new expression, a # single '}' is not allowed. astbuilder.error("f-string: single '}' is not allowed", atom_node) else: # We're either at a '{', which means we're starting another # expression; or a '}', which means we're at the end of this # f-string (for a nested format_spec). i -= 1 break assert 0 <= i <= len(s) assert i == len(s) or s[i] == '{' or s[i] == '}' builder.append(s[literal_start:i]) fstr.current_index = i literal = builder.build() lgt = codepoints_in_utf8(literal) if not raw and '\\' in literal: literal = parsestring.decode_unicode_utf8(space, literal, 0, len(literal)) literal, lgt, pos = unicodehelper.decode_unicode_escape(space, literal) return space.newtext(literal, lgt)
def escapeshellarg(interp, arg): s = StringBuilder(len(arg) + 2) s.append("'") for c in arg: if c == "'": s.append("'") s.append("\\") s.append("'") s.append("'") else: s.append(c) s.append("'") return interp.space.wrap(s.build())
def array_var_export(dct_w, space, indent, recursion, w_reckey, header, prefix=' ', suffix='', arr_in_arr=False): acc = StringBuilder() if w_reckey in recursion: return '%s*RECURSION*\n' % indent recursion[w_reckey] = None if arr_in_arr: acc.append('%s%s%s(\n' % (' ', header, prefix)) else: acc.append('%s%s%s(\n' % (indent, header, prefix)) if indent.endswith('&'): indent = indent[:-1] subindent = indent + ' ' for key, w_value in dct_w.iteritems(): w_value = w_value.deref_temp() # case where atrrib is protected... if key.startswith('\x00') and len(key) > 1: key = key[3:] if w_value is w_reckey: # space.ec.error("Nesting level too deep - recursive dependency?") space.ec.warn("var_export does not handle circular references") return "" try: index = try_convert_str_to_int(key) s = '%s%d =>' % (subindent, index) except ValueError: key = string_var_export(key) s = '%s%s =>' % (subindent, key) acc.append(s) if isinstance(w_value, W_ArrayObject): acc.append( array_var_export(w_value.as_rdict(), space, ' ', recursion, w_value, '\n array', suffix=',', arr_in_arr=True)) elif w_value.tp == space.tp_object: acc.append('\n') acc.append(w_value.var_export(space, ' ', recursion, suffix='),')) else: acc.append(w_value.var_export(space, ' ', recursion, suffix=',')) acc.append('\n') acc.append('%s)%s' % (indent, suffix)) del recursion[w_reckey] return acc.build()
def decode_w(self, space, w_input, final=False): if self.w_decoder is None: raise oefmt(space.w_ValueError, "IncrementalNewlineDecoder.__init__ not called") # decode input (with the eventual \r from a previous pass) if not space.is_w(self.w_decoder, space.w_None): w_output = space.call_method(self.w_decoder, "decode", w_input, space.newbool(bool(final))) else: w_output = w_input if not space.isinstance_w(w_output, space.w_unicode): raise oefmt(space.w_TypeError, "decoder should return a string result") output, output_len = space.utf8_len_w(w_output) output_len = len(output) if self.pendingcr and (final or output_len): output = '\r' + output self.pendingcr = False output_len += 1 # retain last \r even when not translating data: # then readline() is sure to get \r\n in one pass if not final and output_len > 0: last = len(output) - 1 assert last >= 0 if output[last] == '\r': output = output[:last] self.pendingcr = True output_len -= 1 if output_len == 0: return space.newutf8("", 0) # Record which newlines are read and do newline translation if # desired, all in one pass. seennl = self.seennl if output.find('\r') < 0: # If no \r, quick scan for a possible "\n" character. # (there's nothing else to be done, even when in translation mode) if output.find('\n') >= 0: seennl |= SEEN_LF # Finished: we have scanned for newlines, and none of them # need translating. elif not self.translate: i = 0 while i < len(output): if seennl == SEEN_ALL: break c = output[i] i += 1 if c == '\n': seennl |= SEEN_LF elif c == '\r': if i < len(output) and output[i] == '\n': seennl |= SEEN_CRLF i += 1 else: seennl |= SEEN_CR elif output.find('\r') >= 0: # Translate! builder = StringBuilder(len(output)) i = 0 while i < output_len: c = output[i] i += 1 if c == '\n': seennl |= SEEN_LF elif c == '\r': if i < len(output) and output[i] == '\n': seennl |= SEEN_CRLF i += 1 else: seennl |= SEEN_CR builder.append('\n') continue builder.append(c) output = builder.build() self.seennl |= seennl lgt = check_utf8(output, True) return space.newutf8(output, lgt)
def raw_encode_basestring_ascii(space, w_unicode): u = space.utf8_w(w_unicode) for i in range(len(u)): c = ord(u[i]) if c < 32 or c > 126 or c == ord('\\') or c == ord('"'): break else: # The unicode string 'u' contains only safe characters. return w_unicode sb = StringBuilder(len(u) + 20) for c in Utf8StringIterator(u): if c <= ord('~'): if c == ord('"') or c == ord('\\'): sb.append('\\') elif c < ord(' '): sb.append(ESCAPE_BEFORE_SPACE[c]) continue sb.append(chr(c)) else: if c <= ord(u'\uffff'): sb.append('\\u') sb.append(HEX[c >> 12]) sb.append(HEX[(c >> 8) & 0x0f]) sb.append(HEX[(c >> 4) & 0x0f]) sb.append(HEX[c & 0x0f]) else: # surrogate pair n = c - 0x10000 s1 = 0xd800 | ((n >> 10) & 0x3ff) sb.append('\\ud') sb.append(HEX[(s1 >> 8) & 0x0f]) sb.append(HEX[(s1 >> 4) & 0x0f]) sb.append(HEX[s1 & 0x0f]) s2 = 0xdc00 | (n & 0x3ff) sb.append('\\ud') sb.append(HEX[(s2 >> 8) & 0x0f]) sb.append(HEX[(s2 >> 4) & 0x0f]) sb.append(HEX[s2 & 0x0f]) res = sb.build() return space.newtext(res)
class Serializer(object): def __init__(self, space): self.builder = StringBuilder() self.space = space def write_int(self, i): self.builder.append(struct.pack("l", i)) def write_char(self, c): assert len(c) == 1 self.builder.append(c) def write_str(self, s): self.write_int(len(s)) self.builder.append(s) def write_wrapped_item(self, w_item): w_item.ll_serialize(self.builder) def write_wrapped_list(self, lst_w): self.write_int(len(lst_w)) for w_item in lst_w: self.write_wrapped_item(w_item) def write_list_of_str(self, lst): self.write_int(len(lst)) for item in lst: self.write_str(item) def write_list_of_int(self, lst): self.write_int(len(lst)) for item in lst: self.write_int(item) def write_list_of_char(self, lst): self.write_int(len(lst)) for item in lst: self.write_char(item) def write_list_of_functions(self, lst): from hippy.function import Function from hippy.klass import ClassDeclaration self.write_int(len(lst)) for func in lst: if isinstance(func, Function): self.write_char("f") self.write_function(func) elif isinstance(func, ClassDeclaration): self.write_char("u") self.write_class(func) else: raise NotImplementedError def write_function(self, func): self.write_bytecode(func.bytecode) self.write_list_of_str(func.names) self.write_list_of_char(func.types) # closuredecls, defaults_w, typehints are missing def write_class(self, klass): # extends_name, property_decl, all_parents, access_flags, # const_decl, constructor_method, constants_w, initial_instance_dct_w, # base_interface_names, identifier, properties, methods self.write_str(klass.name) self.write_int(klass.lineno) self.write_int(len(klass.method_decl)) for decl in klass.method_decl.itervalues(): self.write_str(decl.func.name) self.write_int(decl.access_flags) self.write_function(decl.func) # XXX def write_bytecode(self, bc): self.write_str(bc.code) self.write_wrapped_list(bc.consts) self.write_str(bc.name) self.write_str(bc.filename) self.write_int(bc.startlineno) self.write_list_of_str(bc.sourcelines[:]) self.write_list_of_str(bc.names[:]) self.write_list_of_str(bc.varnames[:]) self.write_list_of_int(bc.superglobals[:]) self.write_int(bc.this_var_num) self.write_list_of_functions(bc.late_declarations[:]) self.write_list_of_functions(bc.classes[:]) self.write_list_of_functions(bc.functions[:]) self.write_list_of_int(bc.bc_mapping[:]) return self def finish(self): return self.builder.build()
def surrogatepass_errors(space, w_exc): check_exception(space, w_exc) if space.isinstance_w(w_exc, space.w_UnicodeEncodeError): w_obj = space.getattr(w_exc, space.newtext('object')) w_obj = space.convert_arg_to_w_unicode(w_obj) start = space.int_w(space.getattr(w_exc, space.newtext('start'))) w_end = space.getattr(w_exc, space.newtext('end')) encoding = space.text_w(space.getattr(w_exc, space.newtext('encoding'))) bytelength, code = get_standard_encoding(encoding) if code == ENC_UNKNOWN: # Not supported, fail with original exception raise OperationError(space.type(w_exc), w_exc) end = space.int_w(w_end) builder = StringBuilder() start = w_obj._index_to_byte(start) end = w_obj._index_to_byte(end) obj = w_obj._utf8 pos = start while pos < end: ch = rutf8.codepoint_at_pos(obj, pos) pos = rutf8.next_codepoint_pos(obj, pos) if ch < 0xd800 or ch > 0xdfff: # Not a surrogate, fail with original exception raise OperationError(space.type(w_exc), w_exc) if code == ENC_UTF8: builder.append(chr(0xe0 | (ch >> 12))) builder.append(chr(0x80 | ((ch >> 6) & 0x3f))) builder.append(chr(0x80 | (ch & 0x3f))) elif code == ENC_UTF16LE: builder.append(chr(ch & 0xff)) builder.append(chr(ch >> 8)) elif code == ENC_UTF16BE: builder.append(chr(ch >> 8)) builder.append(chr(ch & 0xff)) elif code == ENC_UTF32LE: builder.append(chr(ch & 0xff)) builder.append(chr(ch >> 8)) builder.append(chr(0)) builder.append(chr(0)) elif code == ENC_UTF32BE: builder.append(chr(0)) builder.append(chr(0)) builder.append(chr(ch >> 8)) builder.append(chr(ch & 0xff)) return space.newtuple([space.newbytes(builder.build()), w_end]) elif space.isinstance_w(w_exc, space.w_UnicodeDecodeError): start = space.int_w(space.getattr(w_exc, space.newtext('start'))) obj = space.bytes_w(space.getattr(w_exc, space.newtext('object'))) encoding = space.text_w(space.getattr(w_exc, space.newtext('encoding'))) bytelength, code = get_standard_encoding(encoding) ch = 0 # Try decoding a single surrogate character. If there are more, # let the codec call us again ch0 = ord(obj[start + 0]) if len(obj) > start + 0 else -1 ch1 = ord(obj[start + 1]) if len(obj) > start + 1 else -1 ch2 = ord(obj[start + 2]) if len(obj) > start + 2 else -1 ch3 = ord(obj[start + 3]) if len(obj) > start + 3 else -1 if code == ENC_UTF8: if (ch1 != -1 and ch2 != -1 and ch0 & 0xf0 == 0xe0 and ch1 & 0xc0 == 0x80 and ch2 & 0xc0 == 0x80): # it's a three-byte code ch = ((ch0 & 0x0f) << 12) + ((ch1 & 0x3f) << 6) + (ch2 & 0x3f) elif code == ENC_UTF16LE: ch = (ch1 << 8) | ch0 elif code == ENC_UTF16BE: ch = (ch0 << 8) | ch1 elif code == ENC_UTF32LE: ch = (ch3 << 24) | (ch2 << 16) | (ch1 << 8) | ch0 elif code == ENC_UTF32BE: ch = (ch0 << 24) | (ch1 << 16) | (ch2 << 8) | ch3 if ch < 0xd800 or ch > 0xdfff: # it's not a surrogate - fail ch = 0 if ch == 0: raise OperationError(space.type(w_exc), w_exc) ch_utf8 = rutf8.unichr_as_utf8(ch, allow_surrogates=True) return space.newtuple( [space.newtext(ch_utf8, 1), space.newint(start + bytelength)]) else: raise oefmt(space.w_TypeError, "don't know how to handle %T in error callback", w_exc)
def fn(n): a = StringBuilder() x = [1] a.append("hello world") return x[0]
def here_doc(self): ch = self.read() indent = ch == "-" expand = True regexp = False if indent: ch = self.read() if ch in "'\"`": term = ch if term == "'": expand = False elif term == "`": regexp = True marker = StringBuilder() while True: ch = self.read() if ch == self.EOF: self.unread() break elif ch == term: break else: marker.append(ch) else: if not (ch.isalnum() or ch == "_"): self.unread() if indent: self.unread() return marker = StringBuilder() marker.append(ch) while True: ch = self.read() if ch == self.EOF or not (ch.isalnum() or ch == "_"): self.unread() break marker.append(ch) last_line = StringBuilder() while True: ch = self.read() if ch in "\r\n": self.newline(ch) break elif ch == self.EOF: self.unread() break last_line.append(ch) self.str_term = HeredocTerm(self, marker.build(), last_line.build(), indent=indent, expand=expand) if regexp: yield self.emit("XSTRING_BEG") else: yield self.emit("STRING_BEG")
def descr_repr(self, space): s = self.data # Good default if there are no replacements. buf = StringBuilder(len("bytearray(b'')") + len(s)) buf.append("bytearray(b") quote = "'" for c in s: if c == '"': quote = "'" break elif c == "'": quote = '"' buf.append(quote) for i in range(len(s)): c = s[i] if c == '\\' or c == "'": buf.append('\\') buf.append(c) elif c == '\t': buf.append('\\t') elif c == '\r': buf.append('\\r') elif c == '\n': buf.append('\\n') elif not '\x20' <= c < '\x7f': n = ord(c) buf.append('\\x') buf.append("0123456789abcdef"[n >> 4]) buf.append("0123456789abcdef"[n & 0xF]) else: buf.append(c) buf.append(quote) buf.append(")") return space.wrap(buf.build())
def subx(self, w_ptemplate, w_string, count): space = self.space # use a (much faster) string builder (possibly utf8) if w_ptemplate and # w_string are both string or both unicode objects, and if w_ptemplate # is a literal use_builder = '\x00' # or 'S'tring or 'U'nicode/UTF8 is_buffer = False filter_as_string = None if space.isinstance_w(w_string, space.w_unicode): if not self.is_known_unicode(): raise oefmt( space.w_TypeError, "cannot use a bytes pattern on a string-like object") else: if self.is_known_unicode(): raise oefmt( space.w_TypeError, "cannot use a string pattern on a bytes-like object") if space.is_true(space.callable(w_ptemplate)): w_filter = w_ptemplate filter_is_callable = True else: if space.isinstance_w(w_ptemplate, space.w_unicode): filter_as_string = space.utf8_w(w_ptemplate) literal = '\\' not in filter_as_string if space.isinstance_w(w_string, space.w_unicode) and literal: use_builder = 'U' elif space.isinstance_w(w_ptemplate, space.w_bytes): filter_as_string = space.bytes_w(w_ptemplate) literal = '\\' not in filter_as_string if space.isinstance_w(w_string, space.w_bytes) and literal: use_builder = 'S' else: if space.isinstance_w(w_ptemplate, space.w_bytes): filter_as_string = space.bytes_w(w_ptemplate) else: filter_as_string = space.readbuf_w(w_ptemplate).as_str() is_buffer = True literal = '\\' not in filter_as_string if space.isinstance_w(w_string, space.w_bytes) and literal: use_builder = 'S' if literal: w_filter = w_ptemplate filter_is_callable = False else: # not a literal; hand it over to the template compiler # FIX for a CPython 3.5 bug: if w_ptemplate is a buffer # (e.g. a bytearray), convert it to a byte string here. if is_buffer: w_ptemplate = space.newbytes(filter_as_string) w_re = import_re(space) w_filter = space.call_method(w_re, '_subx', self, w_ptemplate) filter_is_callable = space.is_true(space.callable(w_filter)) # # XXX this is a bit of a mess, but it improves performance a lot ctx = self.make_ctx(w_string) sublist_w = strbuilder = None if use_builder != '\x00': assert filter_as_string is not None strbuilder = StringBuilder(ctx.end) else: sublist_w = [] n = 0 last_pos = ctx.ZERO while not count or n < count: pattern = self.code sub_jitdriver.jit_merge_point( self=self, use_builder=use_builder, filter_is_callable=filter_is_callable, filter_type=type(w_filter), ctx=ctx, pattern=pattern, w_filter=w_filter, strbuilder=strbuilder, filter_as_string=filter_as_string, count=count, w_string=w_string, n=n, last_pos=last_pos, sublist_w=sublist_w) space = self.space if not searchcontext(space, ctx, pattern): break if last_pos < ctx.match_start: _sub_append_slice(ctx, space, use_builder, sublist_w, strbuilder, last_pos, ctx.match_start) if not (last_pos == ctx.match_start == ctx.match_end and n > 0): # the above ignores empty matches on latest position last_pos = ctx.match_end if filter_is_callable: w_match = self.getmatch(ctx, True) # make a copy of 'ctx'; see test_sub_matches_stay_valid ctx = self.fresh_copy(ctx) w_piece = space.call_function(w_filter, w_match) if not space.is_w(w_piece, space.w_None): assert strbuilder is None assert use_builder == '\x00' sublist_w.append(w_piece) else: if use_builder != '\x00': assert filter_as_string is not None assert strbuilder is not None strbuilder.append(filter_as_string) else: sublist_w.append(w_filter) n += 1 elif last_pos >= ctx.end: break # empty match at the end: finished start = ctx.match_end if start == ctx.match_start: if start == ctx.end: break start = ctx.next_indirect(start) ctx.reset(start) if last_pos < ctx.end: _sub_append_slice(ctx, space, use_builder, sublist_w, strbuilder, last_pos, ctx.end) if use_builder != '\x00': assert strbuilder is not None result_bytes = strbuilder.build() if use_builder == 'S': assert not isinstance(ctx, rsre_utf8.Utf8MatchContext) return space.newbytes(result_bytes), n elif use_builder == 'U': assert (isinstance(ctx, UnicodeAsciiMatchContext) or isinstance(ctx, rsre_utf8.Utf8MatchContext)) return space.newutf8(result_bytes, rutf8.codepoints_in_utf8(result_bytes)), n else: raise AssertionError(use_builder) else: if space.isinstance_w(w_string, space.w_unicode): w_emptystr = space.newutf8('', 0) else: w_emptystr = space.newbytes('') w_item = space.call_method(w_emptystr, 'join', space.newlist(sublist_w)) return w_item, n
def readline(self, size=-1): self._check_closed() if size == 0: return "" elif size < 0 and not self._univ_newline: with rffi.scoped_alloc_buffer(BASE_LINE_SIZE) as buf: c = self._readline1(buf.raw) if c >= 0: return buf.str(c) # this is the rare case: the line is longer than BASE_LINE_SIZE s = StringBuilder() while True: s.append_charpsize(buf.raw, BASE_LINE_SIZE - 1) c = self._readline1(buf.raw) if c >= 0: break s.append_charpsize(buf.raw, c) return s.build() else: # size > 0 or self._univ_newline ll_file = self._ll_file c = 0 s = StringBuilder() if self._univ_newline: newlinetypes = self._newlinetypes skipnextlf = self._skipnextlf while size < 0 or s.getlength() < size: c = c_getc(ll_file) if c == EOF: break if skipnextlf: skipnextlf = False if c == ord('\n'): newlinetypes |= NEWLINE_CRLF c = c_getc(ll_file) if c == EOF: break else: newlinetypes |= NEWLINE_CR if c == ord('\r'): skipnextlf = True c = ord('\n') elif c == ord('\n'): newlinetypes |= NEWLINE_LF s.append(chr(c)) if c == ord('\n'): break if c == EOF: if skipnextlf: newlinetypes |= NEWLINE_CR self._newlinetypes = newlinetypes self._skipnextlf = skipnextlf else: while s.getlength() < size: c = c_getc(ll_file) if c == EOF: break s.append(chr(c)) if c == ord('\n'): break if c == EOF: if c_ferror(ll_file): raise _error(ll_file) return s.build()
def descr_repr(self, space): s, start, end, _ = self._convert_idx_params(space, None, None) # Good default if there are no replacements. buf = StringBuilder(len("bytearray(b'')") + (end - start)) buf.append("bytearray(b") quote = "'" for i in range(start, end): c = s[i] if c == '"': quote = "'" break elif c == "'": quote = '"' buf.append(quote) for i in range(start, end): c = s[i] if c == '\\' or c == "'": buf.append('\\') buf.append(c) elif c == '\t': buf.append('\\t') elif c == '\r': buf.append('\\r') elif c == '\n': buf.append('\\n') elif not '\x20' <= c < '\x7f': n = ord(c) buf.append('\\x') buf.append("0123456789abcdef"[n >> 4]) buf.append("0123456789abcdef"[n & 0xF]) else: buf.append(c) buf.append(quote) buf.append(")") return space.newtext(buf.build())
def readline_w(self, space, w_limit=None): # For backwards compatibility, a (slowish) readline(). limit = convert_size(space, w_limit) has_peek = space.findattr(self, space.newtext("peek")) builder = StringBuilder() size = 0 while limit < 0 or size < limit: nreadahead = 1 if has_peek: try: w_readahead = space.call_method(self, "peek", space.newint(1)) except OperationError as e: if trap_eintr(space, e): continue raise if not space.isinstance_w(w_readahead, space.w_bytes): raise oefmt( space.w_IOError, "peek() should have returned a bytes object, " "not '%T'", w_readahead) length = space.len_w(w_readahead) if length > 0: n = 0 buf = space.bytes_w(w_readahead) if limit >= 0: while True: if n >= length or n >= limit: break n += 1 if buf[n - 1] == '\n': break else: while True: if n >= length: break n += 1 if buf[n - 1] == '\n': break nreadahead = n try: w_read = space.call_method(self, "read", space.newint(nreadahead)) except OperationError as e: if trap_eintr(space, e): continue raise if not space.isinstance_w(w_read, space.w_bytes): raise oefmt( space.w_IOError, "peek() should have returned a bytes object, not " "'%T'", w_read) read = space.bytes_w(w_read) if not read: break size += len(read) builder.append(read) if read[-1] == '\n': break return space.newbytes(builder.build())
def descr___str__(self, space): s = StringBuilder() s.append(' C_CONTIGUOUS : ') s.append(get_tf_str(self.flags, NPY.ARRAY_C_CONTIGUOUS)) s.append('\n F_CONTIGUOUS : ') s.append(get_tf_str(self.flags, NPY.ARRAY_F_CONTIGUOUS)) s.append('\n OWNDATA : ') s.append(get_tf_str(self.flags, NPY.ARRAY_OWNDATA)) s.append('\n WRITEABLE : ') s.append(get_tf_str(self.flags, NPY.ARRAY_WRITEABLE)) s.append('\n ALIGNED : ') s.append(get_tf_str(self.flags, NPY.ARRAY_ALIGNED)) s.append('\n UPDATEIFCOPY : ') s.append(get_tf_str(self.flags, NPY.ARRAY_UPDATEIFCOPY)) return space.wrap(s.build())
def descr_upper_s(s): builder = StringBuilder(len(s)) for i in range(len(s)): ch = s[i] builder.append(chr(unicodedb.toupper(ord(ch)))) return builder.build()