def str_translate__String_ANY_ANY(space, w_string, w_table, w_deletechars=''): """charfilter - unicode handling is not implemented Return a copy of the string where all characters occurring in the optional argument deletechars are removed, and the remaining characters have been mapped through the given translation table, which must be a string of length 256""" if space.is_w(w_table, space.w_None): table = DEFAULT_NOOP_TABLE else: table = space.bufferstr_w(w_table) if len(table) != 256: raise OperationError( space.w_ValueError, space.wrap("translation table must be 256 characters long")) string = w_string._value deletechars = space.str_w(w_deletechars) if len(deletechars) == 0: buf = StringBuilder(len(string)) for char in string: buf.append(table[ord(char)]) else: buf = StringBuilder() deletion_table = [False] * 256 for c in deletechars: deletion_table[ord(c)] = True for char in string: if not deletion_table[ord(char)]: buf.append(table[ord(char)]) return W_StringObject(buf.build())
def _read_all(self, space): "Read all the file, don't update the cache" builder = StringBuilder() # First copy what we have in the current buffer current_size = self._readahead() data = None if current_size: data = ''.join(self.buffer[self.pos:self.pos + current_size]) builder.append(data) self._reader_reset_buf() # We're going past the buffer's bounds, flush it if self.writable: self._writer_flush_unlocked(space, restore_pos=True) while True: # Read until EOF or until read() would block w_data = space.call_method(self.w_raw, "read") if space.is_w(w_data, space.w_None): if current_size == 0: return w_data break data = space.str_w(w_data) size = len(data) if size == 0: break builder.append(data) current_size += size if self.abs_pos != -1: self.abs_pos += size return space.wrap(builder.build())
def func(): s = StringBuilder() s.append("a") s.append("abc") s.append_slice("abc", 1, 2) s.append_multiple_char('d', 4) return s.build()
def readline_w(self, space, w_limit=None): # For backwards compatibility, a (slowish) readline(). limit = convert_size(space, w_limit) old_size = -1 has_peek = space.findattr(self, space.wrap("peek")) builder = StringBuilder() size = 0 while limit < 0 or size < limit: nreadahead = 1 if has_peek: w_readahead = space.call_method(self, "peek", space.wrap(1)) if not space.isinstance_w(w_readahead, space.w_str): raise operationerrfmt( space.w_IOError, "peek() should have returned a bytes object, " "not '%s'", space.type(w_readahead).getname(space), ) length = space.len_w(w_readahead) if length > 0: n = 0 buf = space.str_w(w_readahead) if limit >= 0: while True: if n >= length or n >= limit: break n += 1 if buf[n - 1] == "\n": break else: while True: if n >= length: break n += 1 if buf[n - 1] == "\n": break nreadahead = n w_read = space.call_method(self, "read", space.wrap(nreadahead)) if not space.isinstance_w(w_read, space.w_str): raise operationerrfmt( space.w_IOError, "peek() should have returned a bytes object, " "not '%s'", space.type(w_read).getname(space), ) read = space.str_w(w_read) if not read: break size += len(read) builder.append(read) if read[-1] == "\n": break return space.wrap(builder.build())
def readline_w(self, space, w_limit=None): # For backwards compatibility, a (slowish) readline(). limit = convert_size(space, w_limit) old_size = -1 has_peek = space.findattr(self, space.wrap("peek")) builder = StringBuilder() size = 0 while limit < 0 or size < limit: nreadahead = 1 if has_peek: w_readahead = space.call_method(self, "peek", space.wrap(1)) if not space.isinstance_w(w_readahead, space.w_str): raise operationerrfmt( space.w_IOError, "peek() should have returned a bytes object, " "not '%s'", space.type(w_readahead).getname(space)) length = space.len_w(w_readahead) if length > 0: n = 0 buf = space.str_w(w_readahead) if limit >= 0: while True: if n >= length or n >= limit: break n += 1 if buf[n - 1] == '\n': break else: while True: if n >= length: break n += 1 if buf[n - 1] == '\n': break nreadahead = n w_read = space.call_method(self, "read", space.wrap(nreadahead)) if not space.isinstance_w(w_read, space.w_str): raise operationerrfmt( space.w_IOError, "peek() should have returned a bytes object, " "not '%s'", space.type(w_read).getname(space)) read = space.str_w(w_read) if not read: break size += len(read) builder.append(read) if read[-1] == '\n': break return space.wrap(builder.build())
def str_join__String_ANY(space, w_self, w_list): list_w = space.listview(w_list) if list_w: self = w_self._value reslen = 0 for i in range(len(list_w)): w_s = list_w[i] if not space.is_true(space.isinstance(w_s, space.w_str)): if space.is_true(space.isinstance(w_s, space.w_unicode)): # we need to rebuild w_list here, because the original # w_list might be an iterable which we already consumed w_list = space.newlist(list_w) w_u = space.call_function(space.w_unicode, w_self) return space.call_method(w_u, "join", w_list) raise operationerrfmt( space.w_TypeError, "sequence item %d: expected string, %s " "found", i, space.type(w_s).getname(space, "?"), ) reslen += len(space.str_w(w_s)) reslen += len(self) * (len(list_w) - 1) sb = StringBuilder(reslen) for i in range(len(list_w)): if self and i != 0: sb.append(self) sb.append(space.str_w(list_w[i])) return space.wrap(sb.build()) else: return W_StringObject.EMPTY
def str_join__String_ANY(space, w_self, w_list): list_w = space.listview(w_list) if list_w: self = w_self._value reslen = 0 for i in range(len(list_w)): w_s = list_w[i] if not space.is_true(space.isinstance(w_s, space.w_str)): if space.is_true(space.isinstance(w_s, space.w_unicode)): # we need to rebuild w_list here, because the original # w_list might be an iterable which we already consumed w_list = space.newlist(list_w) w_u = space.call_function(space.w_unicode, w_self) return space.call_method(w_u, "join", w_list) raise operationerrfmt( space.w_TypeError, "sequence item %d: expected string, %s " "found", i, space.type(w_s).getname(space, '?')) reslen += len(space.str_w(w_s)) reslen += len(self) * (len(list_w) - 1) sb = StringBuilder(reslen) for i in range(len(list_w)): if self and i != 0: sb.append(self) sb.append(space.str_w(list_w[i])) return space.wrap(sb.build()) else: return W_StringObject.EMPTY
def rledecode_hqx(space, hexbin): "Decode hexbin RLE-coded string." # that's a guesstimation of the resulting length res = StringBuilder(len(hexbin)) end = len(hexbin) i = 0 lastpushed = -1 while i < end: c = hexbin[i] i += 1 if c != '\x90': res.append(c) lastpushed = ord(c) else: if i == end: raise_Incomplete(space, 'String ends with the RLE code \x90') count = ord(hexbin[i]) - 1 i += 1 if count < 0: res.append('\x90') lastpushed = 0x90 else: if lastpushed < 0: raise_Error(space, 'String starts with the RLE code \x90') res.append_multiple_char(chr(lastpushed), count) return space.wrap(res.build())
def a2b_hqx(space, ascii): """Decode .hqx coding. Returns (bin, done).""" # overestimate the resulting length res = StringBuilder(len(ascii)) done = 0 pending_value = 0 pending_bits = 0 for c in ascii: n = ord(table_a2b_hqx[ord(c)]) if n <= 0x3F: pending_value = (pending_value << 6) | n pending_bits += 6 if pending_bits == 24: # flush res.append(chr(pending_value >> 16)) res.append(chr((pending_value >> 8) & 0xff)) res.append(chr(pending_value & 0xff)) pending_value = 0 pending_bits = 0 elif n == FAIL: raise_Error(space, 'Illegal character') elif n == DONE: if pending_bits >= 8: res.append(chr(pending_value >> (pending_bits - 8))) if pending_bits >= 16: res.append(chr((pending_value >> (pending_bits - 16)) & 0xff)) done = 1 break #elif n == SKIP: pass else: if pending_bits > 0: raise_Incomplete(space, 'String has incomplete number of bytes') return space.newtuple([space.wrap(res.build()), space.wrap(done)])
def fn(_): s = StringBuilder() s.append("a") s.append("abc") s.append_slice("abc", 1, 2) s.append_multiple_char('d', 4) return s.build()
def fn(_): s = StringBuilder(4) got = [] for i in range(50): s.append(chr(33+i)) got.append(s.build()) gc.collect() return ' '.join(got)
def add__StringBuffer_String(space, w_self, w_other): if w_self.builder.getlength() != w_self.length: builder = StringBuilder() builder.append(w_self.force()) else: builder = w_self.builder builder.append(w_other._value) return W_StringBufferObject(builder)
def hexdigest(self, space): "Return the digest value as a string of hexadecimal digits." digest = self._digest(space) hexdigits = '0123456789abcdef' result = StringBuilder(self.digest_size * 2) for c in digest: result.append(hexdigits[(ord(c) >> 4) & 0xf]) result.append(hexdigits[ ord(c) & 0xf]) return space.wrap(result.build())
def hexdigest(self, space): "Return the digest value as a string of hexadecimal digits." digest = self._digest(space) hexdigits = '0123456789abcdef' result = StringBuilder(self.digest_size * 2) for c in digest: result.append(hexdigits[(ord(c) >> 4) & 0xf]) result.append(hexdigits[ord(c) & 0xf]) return space.wrap(result.build())
def main(n): result = 0 while n >= 0: jitdriver.jit_merge_point(n=n, result=result) b = StringBuilder(6) b.append("Hello!") result += ord((b.build() + "xyz")[0]) n -= 1 return result
def fn(): s = StringBuilder(4) s.append("abcd") s.append("defg") s.append("rty") s.append_multiple_char('y', 1000) gc.collect() s.append_multiple_char('y', 1000) res = s.build()[1000] gc.collect() return ord(res)
def hexlify(space, data): '''Hexadecimal representation of binary data. This function is also available as "hexlify()".''' try: newlength = ovfcheck(len(data) * 2) except OverflowError: raise OperationError(space.w_MemoryError, space.w_None) res = StringBuilder(newlength) for c in data: res.append(_value2char(ord(c) >> 4)) res.append(_value2char(ord(c) & 0xf)) return space.wrap(res.build())
def unhexlify(space, hexstr): '''Binary data of hexadecimal representation. hexstr must contain an even number of hex digits (upper or lower case). This function is also available as "unhexlify()".''' if len(hexstr) & 1: raise OperationError(space.w_TypeError, space.wrap('Odd-length string')) res = StringBuilder(len(hexstr) >> 1) for i in range(0, len(hexstr), 2): a = _char2value(space, hexstr[i]) b = _char2value(space, hexstr[i+1]) res.append(chr((a << 4) | b)) return space.wrap(res.build())
def direct_read(self, n=-1): stream = self.getstream() if n < 0: return stream.readall() else: result = StringBuilder(n) while n > 0: data = stream.read(n) if not data: break n -= len(data) result.append(data) return result.build()
def readall_w(self, space): builder = StringBuilder() while True: w_data = space.call_method(self, "read", space.wrap(DEFAULT_BUFFER_SIZE)) if not space.isinstance_w(w_data, space.w_str): raise OperationError(space.w_TypeError, space.wrap( "read() should return bytes")) data = space.str_w(w_data) if not data: break builder.append(data) return space.wrap(builder.build())
class StringBuilderWithOneCharCancellable(object): def __init__(self, crlf, initial): self.crlf = crlf self.builder = StringBuilder(initial) self.pending = -1 def _flush(self): if self.pending >= 0: self.builder.append(chr(self.pending)) self.pending = -1 _flush._always_inline_ = True def append(self, c): self._flush() self.pending = ord(c) def newline(self): self._flush() if self.crlf: self.builder.append('\r') self.pending = ord('\n') def to_hex(self, c): self._flush() uvalue = ord(c) self.builder.append("0123456789ABCDEF"[uvalue >> 4]) self.builder.append("0123456789ABCDEF"[uvalue & 0xf]) def build(self): self._flush() return self.builder.build()
def rlecode_hqx(space, data): "Binhex RLE-code binary data." # that's a guesstimation of the resulting length res = StringBuilder(len(data)) i = 0 end = len(data) while i < end: c = data[i] res.append(c) if c == '\x90': # Escape it, and ignore repetitions (*). res.append('\x00') else: # Check how many following are the same inend = i + 1 while inend < end and data[inend] == c and inend < i + 255: inend += 1 if inend - i > 3: # More than 3 in a row. Output RLE. For the case of more # than 255, see (*) below. res.append('\x90') res.append(chr(inend - i)) i = inend continue i += 1 # (*) Note that we put simplicity before compatness here, like CPython. # I am sure that if we tried harder to produce the smallest possible # string that rledecode_hqx() would expand back to 'data', there are # some programs somewhere that would start failing obscurely in rare # cases. return space.wrap(res.build())
def here_doc(self): ch = self.read() indent = ch == "-" expand = True regexp = False if indent: ch = self.read() if ch in "'\"`": term = ch if term == "'": expand = False elif term == "`": regexp = True marker = StringBuilder() while True: ch = self.read() if ch == self.EOF: self.unread() break elif ch == term: break else: marker.append(ch) else: if not (ch.isalnum() or ch == "_"): self.unread() if indent: self.unread() return marker = StringBuilder() marker.append(ch) while True: ch = self.read() if ch == self.EOF or not (ch.isalnum() or ch == "_"): self.unread() break marker.append(ch) last_line = StringBuilder() while True: ch = self.read() if ch == "\n": break elif ch == self.EOF: self.unread() break last_line.append(ch) self.str_term = HeredocTerm(self, marker.build(), last_line.build(), indent=indent, expand=expand) if regexp: yield self.emit("XSTRING_BEG") else: yield self.emit("STRING_BEG")
def str_title__String(space, w_self): input = w_self._value builder = StringBuilder(len(input)) prev_letter=' ' for pos in range(len(input)): ch = input[pos] if not prev_letter.isalpha(): ch = _upper(ch) builder.append(ch) else: ch = _lower(ch) builder.append(ch) prev_letter = ch return space.wrap(builder.build())
def str_title__String(space, w_self): input = w_self._value builder = StringBuilder(len(input)) prev_letter = ' ' for pos in range(len(input)): ch = input[pos] if not prev_letter.isalpha(): ch = _upper(ch) builder.append(ch) else: ch = _lower(ch) builder.append(ch) prev_letter = ch return space.wrap(builder.build())
def _string_replace(space, input, sub, by, maxsplit): if maxsplit == 0: return space.wrap(input) if not sub: upper = len(input) if maxsplit > 0 and maxsplit < upper + 2: upper = maxsplit - 1 assert upper >= 0 try: result_size = ovfcheck(upper * len(by)) result_size = ovfcheck(result_size + upper) result_size = ovfcheck(result_size + len(by)) remaining_size = len(input) - upper result_size = ovfcheck(result_size + remaining_size) except OverflowError: raise OperationError(space.w_OverflowError, space.wrap("replace string is too long") ) builder = StringBuilder(result_size) for i in range(upper): builder.append(by) builder.append(input[i]) builder.append(by) builder.append_slice(input, upper, len(input)) else: # First compute the exact result size count = input.count(sub) if count > maxsplit and maxsplit > 0: count = maxsplit diff_len = len(by) - len(sub) try: result_size = ovfcheck(diff_len * count) result_size = ovfcheck(result_size + len(input)) except OverflowError: raise OperationError(space.w_OverflowError, space.wrap("replace string is too long") ) builder = StringBuilder(result_size) start = 0 sublen = len(sub) while maxsplit != 0: next = input.find(sub, start) if next < 0: break builder.append_slice(input, start, next) builder.append(by) start = next + sublen maxsplit -= 1 # NB. if it's already < 0, it stays < 0 builder.append_slice(input, start, len(input)) return space.wrap(builder.build())
def _string_replace(space, input, sub, by, maxsplit): if maxsplit == 0: return space.wrap(input) if not sub: upper = len(input) if maxsplit > 0 and maxsplit < upper + 2: upper = maxsplit - 1 assert upper >= 0 try: result_size = ovfcheck(upper * len(by)) result_size = ovfcheck(result_size + upper) result_size = ovfcheck(result_size + len(by)) remaining_size = len(input) - upper result_size = ovfcheck(result_size + remaining_size) except OverflowError: raise OperationError(space.w_OverflowError, space.wrap("replace string is too long")) builder = StringBuilder(result_size) for i in range(upper): builder.append(by) builder.append(input[i]) builder.append(by) builder.append_slice(input, upper, len(input)) else: # First compute the exact result size count = input.count(sub) if count > maxsplit and maxsplit > 0: count = maxsplit diff_len = len(by) - len(sub) try: result_size = ovfcheck(diff_len * count) result_size = ovfcheck(result_size + len(input)) except OverflowError: raise OperationError(space.w_OverflowError, space.wrap("replace string is too long")) builder = StringBuilder(result_size) start = 0 sublen = len(sub) while maxsplit != 0: next = input.find(sub, start) if next < 0: break builder.append_slice(input, start, next) builder.append(by) start = next + sublen maxsplit -= 1 # NB. if it's already < 0, it stays < 0 builder.append_slice(input, start, len(input)) return space.wrap(builder.build())
def str_zfill__String_ANY(space, w_self, w_width): input = w_self._value width = space.int_w(w_width) num_zeros = width - len(input) if num_zeros <= 0: # cannot return w_self, in case it is a subclass of str return space.wrap(input) builder = StringBuilder(width) if len(input) > 0 and (input[0] == '+' or input[0] == '-'): builder.append(input[0]) start = 1 else: start = 0 builder.append_multiple_char('0', num_zeros) builder.append_slice(input, start, len(input)) return space.wrap(builder.build())
def b2a_uu(space, bin): "Uuencode a line of data." length = len(bin) if length > 45: raise_Error(space, 'At most 45 bytes at once') res = StringBuilder(2 + ((length + 2) // 3) * 4) res.append(chr(0x20 + length)) for i in range(0, length, 3): A = _b2a_read(bin, i) B = _b2a_read(bin, i + 1) C = _b2a_read(bin, i + 2) # res.append(chr(0x20 + (A >> 2))) res.append(chr(0x20 + ((A & 0x3) << 4 | B >> 4))) res.append(chr(0x20 + ((B & 0xF) << 2 | C >> 6))) res.append(chr(0x20 + (C & 0x3F))) res.append('\n') return space.wrap(res.build())
def b2a_uu(space, bin): "Uuencode a line of data." length = len(bin) if length > 45: raise_Error(space, 'At most 45 bytes at once') res = StringBuilder(2 + ((length + 2) // 3) * 4) res.append(chr(0x20 + length)) for i in range(0, length, 3): A = _b2a_read(bin, i) B = _b2a_read(bin, i+1) C = _b2a_read(bin, i+2) # res.append(chr(0x20 + (A >> 2))) res.append(chr(0x20 + ((A & 0x3) << 4 | B >> 4))) res.append(chr(0x20 + ((B & 0xF) << 2 | C >> 6))) res.append(chr(0x20 + (C & 0x3F))) res.append('\n') return space.wrap(res.build())
def test_string_builder(): s = StringBuilder() s.append("a") s.append("abc") s.append("a") s.append_slice("abc", 1, 2) s.append_multiple_char('d', 4) assert s.build() == "aabcabdddd"
def readall_w(self, space): self._check_closed(space) self._check_readable(space) total = 0 builder = StringBuilder() while True: newsize = int(new_buffersize(self.fd, total)) try: chunk = os.read(self.fd, newsize - total) except OSError, e: if e.errno == errno.EAGAIN: if total > 0: # return what we've got so far break return space.w_None raise wrap_oserror(space, e, exception_name='w_IOError') if not chunk: break builder.append(chunk) total += len(chunk)
def readall_w(self, space): self._check_closed(space) self._check_readable(space) total = 0 builder = StringBuilder() while True: newsize = int(new_buffersize(self.fd, total)) try: chunk = os.read(self.fd, newsize - total) except OSError, e: if e.errno == errno.EAGAIN: if total > 0: # return what we've got so far break return space.w_None raise wrap_oserror(space, e, exception_name="w_IOError") if not chunk: break builder.append(chunk) total += len(chunk)
def a2b_base64(space, ascii): "Decode a line of base64 data." res = StringBuilder((len(ascii) // 4) * 3) # maximum estimate quad_pos = 0 leftchar = 0 leftbits = 0 last_char_was_a_pad = False for c in ascii: if c == PAD: if quad_pos > 2 or (quad_pos == 2 and last_char_was_a_pad): break # stop on 'xxx=' or on 'xx==' last_char_was_a_pad = True else: n = ord(table_a2b_base64[ord(c)]) if n == 0xff: continue # ignore strange characters # # Shift it in on the low end, and see if there's # a byte ready for output. quad_pos = (quad_pos + 1) & 3 leftchar = (leftchar << 6) | n leftbits += 6 # if leftbits >= 8: leftbits -= 8 res.append(chr(leftchar >> leftbits)) leftchar &= ((1 << leftbits) - 1) # last_char_was_a_pad = False else: if leftbits != 0: raise_Error(space, "Incorrect padding") return space.wrap(res.build())
def _str_join_many_items(space, w_self, list_w, size): self = w_self._value reslen = len(self) * (size - 1) for i in range(size): w_s = list_w[i] if not space.isinstance_w(w_s, space.w_str): if space.isinstance_w(w_s, space.w_unicode): # we need to rebuild w_list here, because the original # w_list might be an iterable which we already consumed w_list = space.newlist(list_w) w_u = space.call_function(space.w_unicode, w_self) return space.call_method(w_u, "join", w_list) raise operationerrfmt( space.w_TypeError, "sequence item %d: expected string, %s " "found", i, space.type(w_s).getname(space)) reslen += len(space.str_w(w_s)) sb = StringBuilder(reslen) for i in range(size): if self and i != 0: sb.append(self) sb.append(space.str_w(list_w[i])) return space.wrap(sb.build())
def a2b_qp(space, data, header=0): "Decode a string of qp-encoded data." # We allocate the output same size as input, this is overkill. odata = StringBuilder(len(data)) inp = 0 while inp < len(data): c = data[inp] inp += 1 if c == '=': if inp >= len(data): break # Soft line breaks c = data[inp] if c == '\n' or c == '\r': if c != '\n': while inp < len(data) and data[inp] != '\n': inp += 1 inp += 1 # may go beyond len(data) elif c == '=': # broken case from broken python qp odata.append('=') inp += 1 elif (inp + 1 < len(data) and ('A' <= c <= 'F' or 'a' <= c <= 'f' or '0' <= c <= '9') and ('A' <= data[inp+1] <= 'F' or 'a' <= data[inp+1] <= 'f' or '0' <= data[inp+1] <= '9')): # hexval ch = chr(hexval(c) << 4 | hexval(data[inp+1])) inp += 2 odata.append(ch) else: odata.append('=') else: if header and c == '_': c = ' ' odata.append(c) return space.wrap(odata.build())
def str_capitalize__String(space, w_self): input = w_self._value builder = StringBuilder(len(input)) if len(input) > 0: ch = input[0] if ch.islower(): o = ord(ch) - 32 builder.append(chr(o)) else: builder.append(ch) for i in range(1, len(input)): ch = input[i] if ch.isupper(): o = ord(ch) + 32 builder.append(chr(o)) else: builder.append(ch) return space.wrap(builder.build())
def a2b_qp(space, data, header=0): "Decode a string of qp-encoded data." # We allocate the output same size as input, this is overkill. odata = StringBuilder(len(data)) inp = 0 while inp < len(data): c = data[inp] inp += 1 if c == '=': if inp >= len(data): break # Soft line breaks c = data[inp] if c == '\n' or c == '\r': if c != '\n': while inp < len(data) and data[inp] != '\n': inp += 1 inp += 1 # may go beyond len(data) elif c == '=': # broken case from broken python qp odata.append('=') inp += 1 elif (inp + 1 < len(data) and ('A' <= c <= 'F' or 'a' <= c <= 'f' or '0' <= c <= '9') and ('A' <= data[inp + 1] <= 'F' or 'a' <= data[inp + 1] <= 'f' or '0' <= data[inp + 1] <= '9')): # hexval ch = chr(hexval(c) << 4 | hexval(data[inp + 1])) inp += 2 odata.append(ch) else: odata.append('=') else: if header and c == '_': c = ' ' odata.append(c) return space.wrap(odata.build())
def str_swapcase__String(space, w_self): self = w_self._value builder = StringBuilder(len(self)) for i in range(len(self)): ch = self[i] if ch.isupper(): o = ord(ch) + 32 builder.append(chr(o)) elif ch.islower(): o = ord(ch) - 32 builder.append(chr(o)) else: builder.append(ch) return space.wrap(builder.build())