def string_append(args): if not args: return W_String.fromascii("") builder = StringBuilder() unibuilder = None ascii_idx = 0 try: for ascii_idx in range(len(args)): arg = args[ascii_idx] if not isinstance(arg, W_String): raise SchemeException("string-append: expected a string") builder.append(arg.as_str_ascii()) except ValueError: unibuilder = UnicodeBuilder() unibuilder.append(unicode(builder.build())) builder = None for i in range(ascii_idx, len(args)): arg = args[i] if not isinstance(arg, W_String): raise SchemeException("string-append: expected a string") unibuilder.append(arg.as_unicode()) if unibuilder is None: assert builder is not None return W_String.fromascii(builder.build()) else: assert unibuilder is not None return W_String.fromunicode(unibuilder.build())
def string_append(args): if jit.isconstant(len(args)): return string_append_fastpath(args) if not args: return W_String.fromascii("") builder = StringBuilder(len(args)) unibuilder = None ascii_idx = 0 try: for ascii_idx in range(len(args)): arg = args[ascii_idx] if not isinstance(arg, W_String): raise SchemeException("string-append: expected a string") builder.append(arg.as_str_ascii()) except ValueError: unibuilder = UnicodeBuilder(len(args)) unibuilder.append(unicode(builder.build())) builder = None for i in range(ascii_idx, len(args)): arg = args[i] if not isinstance(arg, W_String): raise SchemeException("string-append: expected a string") unibuilder.append(arg.as_unicode()) if unibuilder is None: assert builder is not None return W_String.fromascii(builder.build()) else: assert unibuilder is not None return W_String.fromunicode(unibuilder.build())
def direct_read(self, n=-1): stream = self.getstream() self.check_readable() if n < 0: return stream.readall() else: result = StringBuilder(n) while n > 0: try: data = stream.read(n) except OSError as e: # a special-case only for read() (similar to CPython, which # also loses partial data with other methods): if we get # EAGAIN after already some data was received, return it. # Note that we can get EAGAIN while there is buffered data # waiting; read that too. if is_wouldblock_error(e.errno): m = stream.count_buffered_bytes() if m > 0: result.append(stream.read(min(n, m))) got = result.build() if len(got) > 0: return got raise if not data: break n -= len(data) result.append(data) return result.build()
def direct_read(self, n=-1): stream = self.getstream() self.check_readable() if n < 0: return stream.readall() else: result = StringBuilder(n) while n > 0: try: data = stream.read(n) except OSError, e: # a special-case only for read() (similar to CPython, which # also loses partial data with other methods): if we get # EAGAIN after already some data was received, return it. # Note that we can get EAGAIN while there is buffered data # waiting; read that too. if is_wouldblock_error(e.errno): m = stream.count_buffered_bytes() if m > 0: result.append(stream.read(min(n, m))) got = result.build() if len(got) > 0: return got raise if not data: break n -= len(data) result.append(data) return result.build()
def here_doc(self): ch = self.read() indent = ch == "-" expand = True regexp = False if indent: ch = self.read() if ch in "'\"`": term = ch if term == "'": expand = False elif term == "`": regexp = True marker = StringBuilder() while True: ch = self.read() if ch == self.EOF: self.unread() break elif ch == term: break else: marker.append(ch) else: if not (ch.isalnum() or ch == "_"): self.unread() if indent: self.unread() return marker = StringBuilder() marker.append(ch) while True: ch = self.read() if ch == self.EOF or not (ch.isalnum() or ch == "_"): self.unread() break marker.append(ch) last_line = StringBuilder() while True: ch = self.read() if ch in "\r\n": self.newline(ch) break elif ch == self.EOF: self.unread() break last_line.append(ch) self.str_term = HeredocTerm(self, marker.build(), last_line.build(), indent=indent, expand=expand) if regexp: yield self.emit("XSTRING_BEG") else: yield self.emit("STRING_BEG")
def unicode_encode_utf_16_helper(s, size, errors, errorhandler=None, allow_surrogates=True, byteorder='little', public_encoding_name='utf16'): if errorhandler is None: errorhandler = default_unicode_error_encode if size == 0: if byteorder == 'native': result = StringBuilder(2) _STORECHAR(result, 0xFEFF, BYTEORDER) return result.build() return "" result = StringBuilder(size * 2 + 2) if byteorder == 'native': _STORECHAR(result, 0xFEFF, BYTEORDER) byteorder = BYTEORDER pos = 0 while pos < size: ch = ord(s[pos]) pos += 1 if ch < 0xD800: _STORECHAR(result, ch, byteorder) elif ch >= 0x10000: _STORECHAR(result, 0xD800 | ((ch - 0x10000) >> 10), byteorder) _STORECHAR(result, 0xDC00 | ((ch - 0x10000) & 0x3FF), byteorder) elif ch >= 0xE000 or allow_surrogates: _STORECHAR(result, ch, byteorder) else: ru, rs, pos = errorhandler(errors, public_encoding_name, 'surrogates not allowed', s, pos - 1, pos) if rs is not None: # py3k only if len(rs) % 2 != 0: errorhandler('strict', public_encoding_name, 'surrogates not allowed', s, pos - 1, pos) result.append(rs) continue for ch in ru: if ord(ch) < 0xD800: _STORECHAR(result, ord(ch), byteorder) else: errorhandler('strict', public_encoding_name, 'surrogates not allowed', s, pos - 1, pos) continue return result.build()
def unicode_encode_utf_32_helper(s, size, errors, errorhandler=None, allow_surrogates=True, byteorder='little', public_encoding_name='utf32'): if errorhandler is None: errorhandler = default_unicode_error_encode if size == 0: if byteorder == 'native': result = StringBuilder(4) _STORECHAR32(result, 0xFEFF, BYTEORDER) return result.build() return "" result = StringBuilder(size * 4 + 4) if byteorder == 'native': _STORECHAR32(result, 0xFEFF, BYTEORDER) byteorder = BYTEORDER pos = 0 while pos < size: ch = ord(s[pos]) pos += 1 ch2 = 0 if not allow_surrogates and 0xD800 <= ch < 0xE000: ru, rs, pos = errorhandler(errors, public_encoding_name, 'surrogates not allowed', s, pos - 1, pos) if rs is not None: # py3k only if len(rs) % 4 != 0: errorhandler('strict', public_encoding_name, 'surrogates not allowed', s, pos - 1, pos) result.append(rs) continue for ch in ru: if ord(ch) < 0xD800: _STORECHAR32(result, ord(ch), byteorder) else: errorhandler('strict', public_encoding_name, 'surrogates not allowed', s, pos - 1, pos) continue if 0xD800 <= ch < 0xDC00 and MAXUNICODE < 65536 and pos < size: ch2 = ord(s[pos]) if 0xDC00 <= ch2 < 0xE000: ch = (((ch & 0x3FF) << 10) | (ch2 & 0x3FF)) + 0x10000 pos += 1 _STORECHAR32(result, ch, byteorder) return result.build()
def test_string_builder(): s = StringBuilder() s.append("a") s.append("abc") assert s.getlength() == len('aabc') s.append("a") s.append_slice("abc", 1, 2) s.append_multiple_char('d', 4) result = s.build() assert result == "aabcabdddd" assert result == s.build() s.append("x") assert s.build() == result + "x"
def my_replace(string): from rpython.rlib.rstring import StringBuilder result = StringBuilder() for char in string: if not char==' ': result.append(char) return result.build()
def rledecode_hqx(space, hexbin): "Decode hexbin RLE-coded string." # that's a guesstimation of the resulting length res = StringBuilder(len(hexbin)) end = len(hexbin) i = 0 lastpushed = -1 while i < end: c = hexbin[i] i += 1 if c != '\x90': res.append(c) lastpushed = ord(c) else: if i == end: raise_Incomplete(space, 'String ends with the RLE code \\x90') count = ord(hexbin[i]) - 1 i += 1 if count < 0: res.append('\x90') lastpushed = 0x90 else: if lastpushed < 0: raise_Error(space, 'String starts with the RLE code \\x90') res.append_multiple_char(chr(lastpushed), count) return space.newbytes(res.build())
def draw(self): builder = StringBuilder() for i in range(len(self.segments)): builder.append(self.draw_segment(i)) builder.append(self.reset) builder.append(' ') return builder.build()
def _operate(stream, data, flush, max_length, cfunc, while_doing): """Common code for compress() and decompress(). """ # Prepare the input buffer for the stream with lltype.scoped_alloc(rffi.CCHARP.TO, len(data)) as inbuf: for i in xrange(len(data)): inbuf[i] = data[i] stream.c_next_in = rffi.cast(Bytefp, inbuf) rffi.setintfield(stream, 'c_avail_in', len(data)) # Prepare the output buffer with lltype.scoped_alloc(rffi.CCHARP.TO, OUTPUT_BUFFER_SIZE) as outbuf: # Strategy: we call deflate() to get as much output data as fits in # the buffer, then accumulate all output into a StringBuffer # 'result'. result = StringBuilder() while True: stream.c_next_out = rffi.cast(Bytefp, outbuf) bufsize = OUTPUT_BUFFER_SIZE if max_length < bufsize: if max_length <= 0: err = Z_OK break bufsize = max_length max_length -= bufsize rffi.setintfield(stream, 'c_avail_out', bufsize) err = cfunc(stream, flush) if err == Z_OK or err == Z_STREAM_END: # accumulate data into 'result' avail_out = rffi.cast(lltype.Signed, stream.c_avail_out) result.append_charpsize(outbuf, bufsize - avail_out) # if the output buffer is full, there might be more data # so we need to try again. Otherwise, we're done. if avail_out > 0: break # We're also done if we got a Z_STREAM_END (which should # only occur when flush == Z_FINISH). if err == Z_STREAM_END: break else: continue elif err == Z_BUF_ERROR: avail_out = rffi.cast(lltype.Signed, stream.c_avail_out) # When compressing, we will only get Z_BUF_ERROR if # the output buffer was full but there wasn't more # output when we tried again, so it is not an error # condition. if avail_out == bufsize: break # fallback case: report this error raise RZlibError.fromstream(stream, err, while_doing) # When decompressing, if the compressed stream of data was truncated, # then the zlib simply returns Z_OK and waits for more. If it is # complete it returns Z_STREAM_END. return (result.build(), err, rffi.cast(lltype.Signed, stream.c_avail_in))
def http_build_query(interp, w_data, num_prefix="", arg_sep=None, enctype=1): space = interp.space if arg_sep is None: arg_sep = interp.config.get_ini_str("arg_separator.output") w_data = w_data.deref() out = StringBuilder() if not w_data.tp in [space.tp_array, space.tp_object]: interp.space.ec.warn("http_build_query(): Parameter 1 " "expected to be Array or Object. " "Incorrect value given") if w_data.tp == space.tp_array: with space.iter(w_data) as itr: while not itr.done(): w_key, w_value = itr.next_item(space) key = _get_key(space, num_prefix, w_key) res = _build_query(space, [], key, w_value, num_prefix, arg_sep, enctype) out.append(''.join(res)) if w_data.tp == space.tp_object: for key, w_value in w_data.get_instance_attrs(interp).iteritems(): _, prop = demangle_property(key) if prop: continue res = _build_query(space, [], key, w_value, num_prefix, arg_sep, enctype) out.append(''.join(res)) outstr = out.build() if outstr.endswith(arg_sep): outstr = outstr.rstrip(arg_sep) return interp.space.newstr(outstr)
def xmlcharrefreplace_errors(space, w_exc): check_exception(space, w_exc) if space.isinstance_w(w_exc, space.w_UnicodeEncodeError): w_obj = space.getattr(w_exc, space.newtext('object')) space.realutf8_w(w_obj) # weeoes w_obj = space.convert_arg_to_w_unicode(w_obj) start = space.int_w(space.getattr(w_exc, space.newtext('start'))) w_end = space.getattr(w_exc, space.newtext('end')) end = space.int_w(w_end) start = w_obj._index_to_byte(start) end = w_obj._index_to_byte(end) builder = StringBuilder() pos = start obj = w_obj._utf8 while pos < end: code = rutf8.codepoint_at_pos(obj, pos) builder.append("&#") builder.append(str(code)) builder.append(";") pos = rutf8.next_codepoint_pos(obj, pos) r = builder.build() lgt = rutf8.check_utf8(r, True) return space.newtuple([space.newutf8(r, lgt), w_end]) else: raise oefmt(space.w_TypeError, "don't know how to handle %T in error callback", w_exc)
def a2b_uu(space, ascii): "Decode a line of uuencoded data." if len(ascii) == 0: # obscure case, for compability with CPython length = (-0x20) & 0x3F else: length = (ord(ascii[0]) - 0x20) & 0x3F res = StringBuilder(length) for i in range(1, len(ascii), 4): A = _a2b_read(space, ascii, i) B = _a2b_read(space, ascii, i + 1) C = _a2b_read(space, ascii, i + 2) D = _a2b_read(space, ascii, i + 3) # if res.getlength() < length: res.append(chr(A << 2 | B >> 4)) elif A != 0 or B != 0: raise_Error(space, "Trailing garbage") # if res.getlength() < length: res.append(chr((B & 0xF) << 4 | C >> 2)) elif C != 0: raise_Error(space, "Trailing garbage") # if res.getlength() < length: res.append(chr((C & 0x3) << 6 | D)) elif D != 0: raise_Error(space, "Trailing garbage") remaining = length - res.getlength() if remaining > 0: res.append_multiple_char("\x00", remaining) return space.wrap(res.build())
def repr(self, numbers=True): i = 0 res = StringBuilder() bc = self.bytecode while i < len(bc): opcode = opcodes.opcodes[ord(bc[i])] c = i if opcode.numargs == 0: r = " " + opcode.name i += 1 elif opcode.numargs == 1: argval = (ord(bc[i + 1]) << 8) + ord(bc[i + 2]) r = " %s %d" % (opcode.name, argval) i += 3 else: assert opcode.numargs == 2 arg1 = (ord(bc[i + 1]) << 8) + ord(bc[i + 2]) arg2 = (ord(bc[i + 3]) << 8) + ord(bc[i + 4]) r = " %s %d %d" % (opcode.name, arg1, arg2) i += 5 if numbers: res.append("%3d" % c + r) else: res.append(r) res.append("\n") return res.build()
class W_BytesBuilder(W_Root): def __init__(self, space, size): if size < 0: self.builder = StringBuilder() else: self.builder = StringBuilder(size) @unwrap_spec(size=int) def descr__new__(space, w_subtype, size=-1): return W_BytesBuilder(space, size) @unwrap_spec(s='bytes') def descr_append(self, space, s): self.builder.append(s) @unwrap_spec(s='bytes', start=int, end=int) def descr_append_slice(self, space, s, start, end): if not 0 <= start <= end <= len(s): raise oefmt(space.w_ValueError, "bad start/stop") self.builder.append_slice(s, start, end) def descr_build(self, space): w_s = space.newbytes(self.builder.build()) # after build(), we can continue to append more strings # to the same builder. This is supported since # 2ff5087aca28 in RPython. return w_s def descr_len(self, space): if self.builder is None: raise oefmt(space.w_ValueError, "no length of built builder") return space.newint(self.builder.getlength())
def repr__Bytearray(space, w_bytearray): s = w_bytearray.data # Good default if there are no replacements. buf = StringBuilder(len("bytearray(b'')") + len(s)) buf.append("bytearray(b'") for i in range(len(s)): c = s[i] if c == '\\' or c == "'": buf.append('\\') buf.append(c) elif c == '\t': buf.append('\\t') elif c == '\r': buf.append('\\r') elif c == '\n': buf.append('\\n') elif not '\x20' <= c < '\x7f': n = ord(c) buf.append('\\x') buf.append("0123456789abcdef"[n>>4]) buf.append("0123456789abcdef"[n&0xF]) else: buf.append(c) buf.append("')") return space.wrap(buf.build())
class W_StringOutputPort(W_OutputPort): errorname = "output-port" def __init__(self): self.closed = False self.str = StringBuilder() def write(self, s): self.str.append(s) def contents(self): return self.str.build() def seek(self, offset, end=False): if end or offset == self.str.getlength(): return if offset > self.str.getlength(): self.str.append("\0" * (self.str.getlength() - offset)) else: # FIXME: this is potentially slow. content = self.contents() self.str = StringBuilder(offset) self.str.append_slice(content, 0, offset) def tell(self): return self.str.getlength()
def bitwise_not(self, space): length = self.strlen() builder = StringBuilder(length) for i in range(length): c = ord(self.character(i)) builder.append(chr(c ^ 0xff)) return W_ConstStringObject(builder.build())
def read_number_or_id(f, init): sofar = StringBuilder(64) sofar.append(init) while True: c = f.peek() if c == "": break if idchar(c): v = f.read(1) assert v == c sofar.append(v) else: break got = sofar.build() try: val = string_to_int(got) return values.W_Fixnum.make_or_interned(val) except ParseStringOverflowError: val = rbigint.fromdecimalstr(got) return values.W_Bignum(val) except ParseStringError: try: return values.W_Flonum(float(got)) except: return values.W_Symbol.make(got)
class W_StringOutputPort(W_OutputPort): errorname = "output-port" _attrs_ = ["closed", "str"] def __init__(self): self.closed = False self.str = StringBuilder() def write(self, s): self.str.append(s) def contents(self): return self.str.build() def seek(self, offset, end=False): if end or offset == self.str.getlength(): return if offset > self.str.getlength(): self.str.append("\0" * (self.str.getlength() - offset)) else: # FIXME: this is potentially slow. content = self.contents() self.str = StringBuilder(offset) self.str.append_slice(content, 0, offset) def tell(self): return self.str.getlength()
def a2b_hqx(space, ascii): """Decode .hqx coding. Returns (bin, done).""" # overestimate the resulting length res = StringBuilder(len(ascii)) done = 0 pending_value = 0 pending_bits = 0 for c in ascii: n = ord(table_a2b_hqx[ord(c)]) if n <= 0x3F: pending_value = (pending_value << 6) | n pending_bits += 6 if pending_bits == 24: # flush res.append(chr(pending_value >> 16)) res.append(chr((pending_value >> 8) & 0xff)) res.append(chr(pending_value & 0xff)) pending_value = 0 pending_bits = 0 elif n == FAIL: raise_Error(space, 'Illegal character') elif n == DONE: if pending_bits >= 8: res.append(chr(pending_value >> (pending_bits - 8))) if pending_bits >= 16: res.append(chr((pending_value >> (pending_bits - 16)) & 0xff)) done = 1 break #elif n == SKIP: pass else: if pending_bits > 0: raise_Incomplete(space, 'String has incomplete number of bytes') return space.newtuple([space.newbytes(res.build()), space.wrap(done)])
def read(self, size=-1): # XXX CPython uses a more delicate logic here self._check_closed() ll_file = self._ll_file if size == 0: return "" elif size < 0: # read the entire contents buf = lltype.malloc(rffi.CCHARP.TO, BASE_BUF_SIZE, flavor='raw') try: s = StringBuilder() while True: returned_size = self._fread(buf, BASE_BUF_SIZE, ll_file) returned_size = intmask(returned_size) # is between 0 and BASE_BUF_SIZE if returned_size == 0: if c_feof(ll_file): # ok, finished return s.build() raise _error(ll_file) s.append_charpsize(buf, returned_size) finally: lltype.free(buf, flavor='raw') else: # size > 0 with rffi.scoped_alloc_buffer(size) as buf: returned_size = self._fread(buf.raw, size, ll_file) returned_size = intmask(returned_size) # is between 0 and size if returned_size == 0: if not c_feof(ll_file): raise _error(ll_file) s = buf.str(returned_size) assert s is not None return s
def unwrap(self): # note: always overriden so far length = self.strlen() builder = StringBuilder(length) for i in range(length): builder.append(self.character(i)) return builder.build()
class StringBuilderWithOneCharCancellable(object): def __init__(self, crlf, initial): self.crlf = crlf self.builder = StringBuilder(initial) self.pending = -1 def _flush(self): if self.pending >= 0: self.builder.append(chr(self.pending)) self.pending = -1 _flush._always_inline_ = True def append(self, c): self._flush() self.pending = ord(c) def newline(self): self._flush() if self.crlf: self.builder.append('\r') self.pending = ord('\n') def to_hex(self, c): self._flush() uvalue = ord(c) self.builder.append("0123456789ABCDEF"[uvalue >> 4]) self.builder.append("0123456789ABCDEF"[uvalue & 0xf]) def build(self): self._flush() return self.builder.build()
def b2a_base64(space, bin): "Base64-code line of data." newlength = (len(bin) + 2) // 3 try: newlength = ovfcheck(newlength * 4) except OverflowError: raise OperationError(space.w_MemoryError, space.w_None) newlength += 1 res = StringBuilder(newlength) leftchar = 0 leftbits = 0 for c in bin: # Shift into our buffer, and output any 6bits ready leftchar = (leftchar << 8) | ord(c) leftbits += 8 res.append(table_b2a_base64[(leftchar >> (leftbits-6)) & 0x3f]) leftbits -= 6 if leftbits >= 6: res.append(table_b2a_base64[(leftchar >> (leftbits-6)) & 0x3f]) leftbits -= 6 # if leftbits == 2: res.append(table_b2a_base64[(leftchar & 3) << 4]) res.append(PAD) res.append(PAD) elif leftbits == 4: res.append(table_b2a_base64[(leftchar & 0xf) << 2]) res.append(PAD) res.append('\n') return space.newbytes(res.build())
def func(): s = StringBuilder() s.append("a") s.append("abc") s.append_slice("abc", 1, 2) s.append_multiple_char('d', 4) return s.build()
def _read_all(self, space): "Read all the file, don't update the cache" # Must run with the lock held! builder = StringBuilder() # First copy what we have in the current buffer current_size = self._readahead() data = None if current_size: data = ''.join(self.buffer[self.pos:self.pos + current_size]) builder.append(data) self.pos += current_size # We're going past the buffer's bounds, flush it if self.writable: self._flush_and_rewind_unlocked(space) self._reader_reset_buf() while True: # Read until EOF or until read() would block w_data = space.call_method(self.w_raw, "read") if space.is_w(w_data, space.w_None): if current_size == 0: return w_data break data = space.str_w(w_data) size = len(data) if size == 0: break builder.append(data) current_size += size if self.abs_pos != -1: self.abs_pos += size return space.wrap(builder.build())
def a2b_hqx(space, ascii): """Decode .hqx coding. Returns (bin, done).""" # overestimate the resulting length res = StringBuilder(len(ascii)) done = 0 pending_value = 0 pending_bits = 0 for c in ascii: n = ord(table_a2b_hqx[ord(c)]) if n <= 0x3F: pending_value = (pending_value << 6) | n pending_bits += 6 if pending_bits == 24: # flush res.append(chr(pending_value >> 16)) res.append(chr((pending_value >> 8) & 0xff)) res.append(chr(pending_value & 0xff)) pending_value = 0 pending_bits = 0 elif n == FAIL: raise_Error(space, 'Illegal character') elif n == DONE: if pending_bits >= 8: res.append(chr(pending_value >> (pending_bits - 8))) if pending_bits >= 16: res.append(chr((pending_value >> (pending_bits - 16)) & 0xff)) done = 1 break #elif n == SKIP: pass else: if pending_bits > 0: raise_Incomplete(space, 'String has incomplete number of bytes') return space.newtuple([space.newbytes(res.build()), space.newint(done)])
def _charp2str_to_null(cp, index): index = rffi.cast(lltype.Signed, index) string = StringBuilder() while cp[index] != '\x00': string.append(cp[index]) index += 1 return string.build()
def rlecode_hqx(space, data): "Binhex RLE-code binary data." # that's a guesstimation of the resulting length res = StringBuilder(len(data)) i = 0 end = len(data) while i < end: c = data[i] res.append(c) if c == '\x90': # Escape it, and ignore repetitions (*). res.append('\x00') else: # Check how many following are the same inend = i + 1 while inend < end and data[inend] == c and inend < i + 255: inend += 1 if inend - i > 3: # More than 3 in a row. Output RLE. For the case of more # than 255, see (*) below. res.append('\x90') res.append(chr(inend - i)) i = inend continue i += 1 # (*) Note that we put simplicity before compatness here, like CPython. # I am sure that if we tried harder to produce the smallest possible # string that rledecode_hqx() would expand back to 'data', there are # some programs somewhere that would start failing obscurely in rare # cases. return space.newbytes(res.build())
def descr_buffer__new__(space, w_subtype, w_object, offset=0, size=-1): # w_subtype can only be exactly 'buffer' for now if not space.is_w(w_subtype, space.gettypefor(Buffer)): raise OperationError(space.w_TypeError, space.wrap("argument 1 must be 'buffer'")) if space.isinstance_w(w_object, space.w_unicode): # unicode objects support the old buffer interface # but not the new buffer interface (change in python 2.7) from rpython.rlib.rstruct.unichar import pack_unichar, UNICODE_SIZE unistr = space.unicode_w(w_object) builder = StringBuilder(len(unistr) * UNICODE_SIZE) for unich in unistr: pack_unichar(unich, builder) from pypy.interpreter.buffer import StringBuffer w_buffer = space.wrap(StringBuffer(builder.build())) else: w_buffer = space.buffer(w_object) buffer = space.interp_w(Buffer, w_buffer) # type-check if offset == 0 and size == -1: return w_buffer # handle buffer slices if offset < 0: raise OperationError(space.w_ValueError, space.wrap("offset must be zero or positive")) if size < -1: raise OperationError(space.w_ValueError, space.wrap("size must be zero or positive")) if isinstance(buffer, RWBuffer): buffer = RWSubBuffer(buffer, offset, size) else: buffer = SubBuffer(buffer, offset, size) return space.wrap(buffer)
def read(self, size=-1): # XXX CPython uses a more delicate logic here ll_file = self.ll_file if not ll_file: raise ValueError("I/O operation on closed file") if size < 0: # read the entire contents buf = lltype.malloc(rffi.CCHARP.TO, BASE_BUF_SIZE, flavor='raw') try: s = StringBuilder() while True: returned_size = c_fread(buf, 1, BASE_BUF_SIZE, ll_file) returned_size = intmask(returned_size) # is between 0 and BASE_BUF_SIZE if returned_size == 0: if c_feof(ll_file): # ok, finished return s.build() raise _error(ll_file) s.append_charpsize(buf, returned_size) finally: lltype.free(buf, flavor='raw') else: raw_buf, gc_buf = rffi.alloc_buffer(size) try: returned_size = c_fread(raw_buf, 1, size, ll_file) returned_size = intmask(returned_size) # is between 0 and size if returned_size == 0: if not c_feof(ll_file): raise _error(ll_file) s = rffi.str_from_buffer(raw_buf, gc_buf, size, returned_size) finally: rffi.keep_buffer_alive_until_here(raw_buf, gc_buf) return s
def str_translate__String_ANY_ANY(space, w_string, w_table, w_deletechars=''): """charfilter - unicode handling is not implemented Return a copy of the string where all characters occurring in the optional argument deletechars are removed, and the remaining characters have been mapped through the given translation table, which must be a string of length 256""" if space.is_w(w_table, space.w_None): table = DEFAULT_NOOP_TABLE else: table = space.bufferstr_w(w_table) if len(table) != 256: raise OperationError( space.w_ValueError, space.wrap("translation table must be 256 characters long")) string = w_string._value deletechars = space.str_w(w_deletechars) if len(deletechars) == 0: buf = StringBuilder(len(string)) for char in string: buf.append(table[ord(char)]) else: buf = StringBuilder() deletion_table = [False] * 256 for c in deletechars: deletion_table[ord(c)] = True for char in string: if not deletion_table[ord(char)]: buf.append(table[ord(char)]) return W_StringObject(buf.build())
def a2b_uu(space, ascii): "Decode a line of uuencoded data." if len(ascii) == 0: # obscure case, for compability with CPython length = (-0x20) & 0x3f else: length = (ord(ascii[0]) - 0x20) & 0x3f res = StringBuilder(length) for i in range(1, len(ascii), 4): A = _a2b_read(space, ascii, i) B = _a2b_read(space, ascii, i + 1) C = _a2b_read(space, ascii, i + 2) D = _a2b_read(space, ascii, i + 3) # if res.getlength() < length: res.append(chr(A << 2 | B >> 4)) elif A != 0 or B != 0: raise_Error(space, "Trailing garbage") # if res.getlength() < length: res.append(chr((B & 0xf) << 4 | C >> 2)) elif C != 0: raise_Error(space, "Trailing garbage") # if res.getlength() < length: res.append(chr((C & 0x3) << 6 | D)) elif D != 0: raise_Error(space, "Trailing garbage") remaining = length - res.getlength() if remaining > 0: res.append_multiple_char('\x00', remaining) return space.wrap(res.build())
def direct_readline(self, size=-1): stream = self.getstream() self.check_readable() if size < 0: return stream.readline() else: # very inefficient unless there is a peek() result = StringBuilder() while size > 0: # "peeks" on the underlying stream to see how many chars # we can safely read without reading past an end-of-line startindex, peeked = stream.peek() assert 0 <= startindex <= len(peeked) endindex = startindex + size pn = peeked.find("\n", startindex, endindex) if pn < 0: pn = min(endindex - 1, len(peeked)) c = stream.read(pn - startindex + 1) if not c: break result.append(c) if c.endswith('\n'): break size -= len(c) return result.build()
def readall_w(self, space): self._check_closed(space) self._check_readable(space) total = 0 builder = StringBuilder() while True: newsize = int(new_buffersize(self.fd, total)) try: chunk = os.read(self.fd, newsize - total) except OSError as e: if e.errno == errno.EINTR: space.getexecutioncontext().checksignals() continue if total > 0: # return what we've got so far break if e.errno == errno.EAGAIN: return space.w_None raise wrap_oserror(space, e, exception_name='w_IOError') if not chunk: break builder.append(chunk) total += len(chunk) return space.newbytes(builder.build())
def test_deflate_set_dictionary(): text = 'abcabc' zdict = 'abc' stream = rzlib.deflateInit() rzlib.deflateSetDictionary(stream, zdict) bytes = rzlib.compress(stream, text, rzlib.Z_FINISH) rzlib.deflateEnd(stream) stream2 = rzlib.inflateInit() from rpython.rtyper.lltypesystem import lltype, rffi, rstr from rpython.rtyper.annlowlevel import llstr from rpython.rlib.rstring import StringBuilder with lltype.scoped_alloc(rffi.CCHARP.TO, len(bytes)) as inbuf: rstr.copy_string_to_raw(llstr(bytes), inbuf, 0, len(bytes)) stream2.c_next_in = rffi.cast(rzlib.Bytefp, inbuf) rffi.setintfield(stream2, 'c_avail_in', len(bytes)) with lltype.scoped_alloc(rffi.CCHARP.TO, 100) as outbuf: stream2.c_next_out = rffi.cast(rzlib.Bytefp, outbuf) bufsize = 100 rffi.setintfield(stream2, 'c_avail_out', bufsize) err = rzlib._inflate(stream2, rzlib.Z_SYNC_FLUSH) assert err == rzlib.Z_NEED_DICT rzlib.inflateSetDictionary(stream2, zdict) rzlib._inflate(stream2, rzlib.Z_SYNC_FLUSH) avail_out = rffi.cast(lltype.Signed, stream2.c_avail_out) result = StringBuilder() result.append_charpsize(outbuf, bufsize - avail_out) rzlib.inflateEnd(stream2) assert result.build() == text
def serialize(self, w_obj): from hippy.module.serialize import SerializerMemo assert not isinstance(w_obj, W_Reference) builder = StringBuilder() w_obj.serialize(self, builder, SerializerMemo()) return builder.build()
def b2a_base64(space, bin): "Base64-code line of data." newlength = (len(bin) + 2) // 3 try: newlength = ovfcheck(newlength * 4) except OverflowError: raise OperationError(space.w_MemoryError, space.w_None) newlength += 1 res = StringBuilder(newlength) leftchar = 0 leftbits = 0 for c in bin: # Shift into our buffer, and output any 6bits ready leftchar = (leftchar << 8) | ord(c) leftbits += 8 res.append(table_b2a_base64[(leftchar >> (leftbits - 6)) & 0x3f]) leftbits -= 6 if leftbits >= 6: res.append(table_b2a_base64[(leftchar >> (leftbits - 6)) & 0x3f]) leftbits -= 6 # if leftbits == 2: res.append(table_b2a_base64[(leftchar & 3) << 4]) res.append(PAD) res.append(PAD) elif leftbits == 4: res.append(table_b2a_base64[(leftchar & 0xf) << 2]) res.append(PAD) res.append('\n') return space.wrapbytes(res.build())
def decode_string_escaped(self, start): i = self.pos builder = StringBuilder((i - start) * 2) # just an estimate assert start >= 0 assert i >= 0 builder.append_slice(self.s, start, i) while True: ch = self.ll_chars[i] i += 1 if ch == '"': content_utf8 = builder.build() content_unicode = unicodehelper.decode_utf8( self.space, content_utf8) self.last_type = TYPE_STRING self.pos = i return self.space.newunicode(content_unicode) elif ch == '\\': i = self.decode_escape_sequence(i, builder) elif ch < '\x20': if ch == '\0': self._raise("Unterminated string starting at char %d", start - 1) else: self._raise("Invalid control character at char %d", i - 1) else: builder.append(ch)
def _read_all(self, space): "Read all the file, don't update the cache" # Must run with the lock held! builder = StringBuilder() # First copy what we have in the current buffer current_size = self._readahead() data = None if current_size: data = self.buffer[self.pos:self.pos + current_size] builder.append(data) self.pos += current_size # We're going past the buffer's bounds, flush it if self.writable: self._flush_and_rewind_unlocked(space) self._reader_reset_buf() while True: # Read until EOF or until read() would block w_data = space.call_method(self.w_raw, "read") if space.is_w(w_data, space.w_None): if current_size == 0: return w_data break data = space.bytes_w(w_data) size = len(data) if size == 0: break builder.append(data) current_size += size if self.abs_pos != -1: self.abs_pos += size return space.newbytes(builder.build())
def reencode_utf8_with_surrogates(utf8): """ Receiving valid UTF8 which contains surrogates, combine surrogate pairs into correct UTF8 with pairs collpased. This is a rare case and you should not be using surrogate pairs in the first place, so the performance here is a bit secondary """ s = StringBuilder(len(utf8)) stop = len(utf8) i = 0 while i < stop: uchr = codepoint_at_pos(utf8, i) if 0xD800 <= uchr <= 0xDBFF: high = uchr i = next_codepoint_pos(utf8, i) if i >= stop: unichr_as_utf8_append(s, uchr, True) break low = codepoint_at_pos(utf8, i) if 0xDC00 <= low <= 0xDFFF: uchr = 0x10000 + (high - 0xD800) * 0x400 + (low - 0xDC00) i = next_codepoint_pos(utf8, i) # else not really a surrogate pair, just append high else: i = next_codepoint_pos(utf8, i) unichr_as_utf8_append(s, uchr, True) return s.build()
def var_dump(self, space, indent, recursion): if self in recursion: return '%s*RECURSION*\n' % indent s = StringBuilder() recursion[self] = None header = 'object(%s)#%d ' % (self.getclass().name, self.get_instance_number()) orig_indent = indent if indent.endswith('&'): indent = indent[:-1] subindent = indent + ' ' counter = 0 all_names = [] all_values_w = [] self.enum_properties(space.ec.interpreter, all_names, all_values_w) properties = OrderedDict() for i in range(len(all_names)): name, access = demangle_property(all_names[i]) key = dump_property(name, access) properties[key] = '%s[%s]=>\n%s' % ( subindent, key, all_values_w[i].var_dump( space, subindent, recursion)) for part in properties.itervalues(): counter += 1 s.append(part) s.append('%s}\n' % indent) del recursion[self] return '%s%s(%d) {\n' % (orig_indent, header, counter) + s.build()