def str_replace__Rope_Rope_Rope_ANY(space, w_self, w_sub, w_by, w_maxsplit=-1): node = w_self._node length = node.length() sub = w_sub._node by = w_by._node maxsplit = space.int_w(w_maxsplit) if maxsplit == 0: return w_self.create_if_subclassed() if not sub.length(): upper = node.length() if maxsplit > 0 and maxsplit < upper + 2: upper = maxsplit - 1 assert upper >= 0 substrings = [by] iter = rope.ItemIterator(node) for i in range(upper): substrings.append(iter.nextrope()) substrings.append(by) substrings.append(rope.getslice_one(node, upper, length)) try: return W_RopeObject(rope.rebalance(substrings)) except OverflowError: raise OperationError(space.w_OverflowError, space.wrap("string too long")) substrings = rope.split(node, sub, maxsplit) if not substrings: return w_self.create_if_subclassed() try: return W_RopeObject(rope.join(by, substrings)) except OverflowError: raise OperationError(space.w_OverflowError, space.wrap("string too long"))
def unicode_translate__RopeUnicode_ANY(space, w_self, w_table): self = w_self._node w_sys = space.getbuiltinmodule('sys') maxunicode = space.int_w(space.getattr(w_sys, space.wrap("maxunicode"))) result = [] iter = rope.ItemIterator(self) for i in range(self.length()): crope = iter.nextrope() char = crope.getint(0) try: w_newval = space.getitem(w_table, space.wrap(char)) except OperationError, e: if e.match(space, space.w_LookupError): result.append(crope) else: raise else: if space.is_w(w_newval, space.w_None): continue elif space.isinstance_w(w_newval, space.w_int): newval = space.int_w(w_newval) if newval < 0 or newval > maxunicode: raise OperationError( space.w_TypeError, space.wrap("character mapping must be in range(0x%x)" % (maxunicode + 1, ))) result.append(rope.rope_from_unichar(unichr(newval))) elif space.isinstance_w(w_newval, space.w_unicode): result.append(ropeunicode_w(space, w_newval)) else: raise OperationError( space.w_TypeError, space.wrap( "character mapping must return integer, None or unicode" ))
def unicode_to_decimal_w(space, w_unistr): if not isinstance(w_unistr, W_RopeUnicodeObject): raise OperationError(space.w_TypeError, space.wrap("expected unicode")) unistr = w_unistr._node length = unistr.length() result = ['\0'] * length digits = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] iter = rope.ItemIterator(unistr) for i in range(length): uchr = iter.nextint() if unicodedb.isspace(uchr): result[i] = ' ' continue try: result[i] = digits[unicodedb.decimal(uchr)] except KeyError: if 0 < uchr < 256: result[i] = chr(uchr) else: w_encoding = space.wrap('decimal') w_start = space.wrap(i) w_end = space.wrap(i + 1) w_reason = space.wrap('invalid decimal Unicode string') raise OperationError( space.w_UnicodeEncodeError, space.newtuple( [w_encoding, w_unistr, w_start, w_end, w_reason])) return ''.join(result)
def str_translate__Rope_ANY_ANY(space, w_string, w_table, w_deletechars=''): """charfilter - unicode handling is not implemented Return a copy of the string where all characters occurring in the optional argument deletechars are removed, and the remaining characters have been mapped through the given translation table, which must be a string of length 256""" # XXX CPython accepts buffers, too, not sure what we should do table = space.str_w(w_table) if len(table) != 256: raise OperationError( space.w_ValueError, space.wrap("translation table must be 256 characters long")) node = w_string._node chars = [] iter = rope.ItemIterator(node) while 1: try: c = iter.nextchar() w_char = W_RopeObject.PREBUILT[ord(c)] if not space.is_true(space.contains(w_deletechars, w_char)): chars.append(table[ord(c)]) except StopIteration: break return W_RopeObject(rope.rope_from_charlist(chars))
def _is_generic(space, w_self, fun): l = w_self._node.length() if l == 0: return space.w_False iter = rope.ItemIterator(w_self._node) for i in range(l): if not fun(iter.nextchar()): return space.w_False return space.w_True
def func(space, w_self): node = w_self._node if node.length() == 0: return space.w_False iter = rope.ItemIterator(node) for idx in range(node.length()): if not getattr(unicodedb, funcname)(iter.nextint()): return space.w_False return space.w_True
def _local_transform(node, transform): l = node.length() res = [' '] * l iter = rope.ItemIterator(node) for i in range(l): ch = iter.nextchar() res[i] = transform(ch) return W_RopeObject(rope.rope_from_charlist(res))
def buffer__RopeUnicode(space, w_unicode): from pypy.rlib.rstruct.unichar import pack_unichar charlist = [] node = w_unicode._node iter = rope.ItemIterator(node) for idx in range(node.length()): unich = unichr(iter.nextint()) pack_unichar(unich, charlist) from pypy.interpreter.buffer import StringBuffer return space.wrap(StringBuffer(''.join(charlist)))
def buffer__RopeUnicode(space, w_unicode): from pypy.rlib.rstruct.unichar import pack_unichar, UNICODE_SIZE node = w_unicode._node iter = rope.ItemIterator(node) length = node.length() builder = StringBuilder(length * UNICODE_SIZE) for idx in range(length): unich = unichr(iter.nextint()) pack_unichar(unich, builder) from pypy.interpreter.buffer import StringBuffer return space.wrap(StringBuffer(builder.build()))
def unicode_capitalize__RopeUnicode(space, w_self): input = w_self._node length = input.length() if length == 0: return w_self result = [u'\0'] * length iter = rope.ItemIterator(input) result[0] = unichr(unicodedb.toupper(iter.nextint())) for i in range(1, length): result[i] = unichr(unicodedb.tolower(iter.nextint())) return W_RopeUnicodeObject(rope.rope_from_unicharlist(result))
def unicode_isupper__RopeUnicode(space, w_unicode): cased = False iter = rope.ItemIterator(w_unicode._node) while 1: try: ch = iter.nextint() except StopIteration: return space.newbool(cased) if (unicodedb.islower(ch) or unicodedb.istitle(ch)): return space.w_False if not cased and unicodedb.isupper(ch): cased = True
def repr__Rope(space, w_str): node = w_str._node length = node.length() i = 0 buf = [' '] * (length * 4 + 2) # safely overallocate quote = "'" if (rope.find_int(node, ord(quote)) != -1 and rope.find_int(node, ord('"')) == -1): quote = '"' buf[0] = quote iter = rope.ItemIterator(node) while 1: try: c = iter.nextchar() i += 1 except StopIteration: break bs_char = None # character quoted by backspace if c == '\\' or c == quote: bs_char = c elif c == '\t': bs_char = 't' elif c == '\r': bs_char = 'r' elif c == '\n': bs_char = 'n' elif not '\x20' <= c < '\x7f': n = ord(c) buf[i] = '\\' i += 1 buf[i] = 'x' i += 1 buf[i] = "0123456789abcdef"[n >> 4] i += 1 buf[i] = "0123456789abcdef"[n & 0xF] else: buf[i] = c if bs_char is not None: buf[i] = '\\' i += 1 buf[i] = bs_char i += 1 buf[i] = quote return W_RopeObject(rope.rope_from_charlist(buf[:i + 1]))
def str_isupper__Rope(space, w_self): """Return True if all cased characters in S are uppercase and there is at least one cased character in S, False otherwise.""" l = w_self._node.length() if l == 0: return space.w_False cased = False iter = rope.ItemIterator(w_self._node) for idx in range(l): c = iter.nextchar() if c.islower(): return space.w_False elif not cased and c.isupper(): cased = True return space.newbool(cased)
def unicode_title__RopeUnicode(space, w_self): input = w_self._node length = input.length() if length == 0: return w_self result = [u'\0'] * length iter = rope.ItemIterator(input) previous_is_cased = False for i in range(input.length()): unichar = iter.nextint() if previous_is_cased: result[i] = unichr(unicodedb.tolower(unichar)) else: result[i] = unichr(unicodedb.totitle(unichar)) previous_is_cased = unicodedb.iscased(unichar) return W_RopeUnicodeObject(rope.rope_from_unicharlist(result))
def str_title__Rope(space, w_self): node = w_self._node length = node.length() buffer = [' '] * length prev_letter = ' ' iter = rope.ItemIterator(node) for pos in range(0, length): ch = iter.nextchar() if not prev_letter.isalpha(): buffer[pos] = _upper(ch) else: buffer[pos] = _lower(ch) prev_letter = buffer[pos] return W_RopeObject(rope.rope_from_charlist(buffer))
def unicode_istitle__RopeUnicode(space, w_unicode): cased = False previous_is_cased = False iter = rope.ItemIterator(w_unicode._node) while 1: try: ch = iter.nextint() except StopIteration: return space.newbool(cased) if (unicodedb.isupper(ch) or unicodedb.istitle(ch)): if previous_is_cased: return space.w_False previous_is_cased = cased = True elif unicodedb.islower(ch): if not previous_is_cased: return space.w_False previous_is_cased = cased = True else: previous_is_cased = False
def str_istitle__Rope(space, w_self): """Return True if S is a titlecased string and there is at least one character in S, i.e. uppercase characters may only follow uncased characters and lowercase characters only cased ones. Return False otherwise.""" cased = False previous_is_cased = False iter = rope.ItemIterator(w_self._node) for pos in range(0, w_self._node.length()): ch = iter.nextchar() if ch.isupper(): if previous_is_cased: return space.w_False previous_is_cased = True cased = True elif ch.islower(): if not previous_is_cased: return space.w_False cased = True else: previous_is_cased = False return space.newbool(cased)
def str_capitalize__Rope(space, w_self): node = w_self._node length = node.length() buffer = [' '] * length if length > 0: iter = rope.ItemIterator(node) ch = iter.nextchar() if ch.islower(): o = ord(ch) - 32 buffer[0] = chr(o) else: buffer[0] = ch for i in range(1, length): ch = iter.nextchar() if ch.isupper(): o = ord(ch) + 32 buffer[i] = chr(o) else: buffer[i] = ch else: return W_RopeObject.EMPTY return W_RopeObject(rope.rope_from_charlist(buffer))
def repr__RopeUnicode(space, w_unicode): hexdigits = "0123456789abcdef" node = w_unicode._node size = node.length() singlequote = doublequote = False iter = rope.ItemIterator(node) for i in range(size): c = iter.nextunichar() if singlequote and doublequote: break if c == u'\'': singlequote = True elif c == u'"': doublequote = True if singlequote and not doublequote: quote = '"' else: quote = '\'' result = ['u', quote] iter = rope.ItemIterator(node) j = 0 while j < size: code = iter.nextint() if code >= 0x10000: result.extend([ '\\', "U", hexdigits[(code >> 28) & 0xf], hexdigits[(code >> 24) & 0xf], hexdigits[(code >> 20) & 0xf], hexdigits[(code >> 16) & 0xf], hexdigits[(code >> 12) & 0xf], hexdigits[(code >> 8) & 0xf], hexdigits[(code >> 4) & 0xf], hexdigits[(code >> 0) & 0xf], ]) j += 1 continue if code >= 0xD800 and code < 0xDC00: if j < size - 1: code2 = iter.nextint() # XXX this is wrong: if the next if is false, # code2 is lost if code2 >= 0xDC00 and code2 <= 0xDFFF: code = (((code & 0x03FF) << 10) | (code2 & 0x03FF)) + 0x00010000 result.extend([ '\\', "U", hexdigits[(code >> 28) & 0xf], hexdigits[(code >> 24) & 0xf], hexdigits[(code >> 20) & 0xf], hexdigits[(code >> 16) & 0xf], hexdigits[(code >> 12) & 0xf], hexdigits[(code >> 8) & 0xf], hexdigits[(code >> 4) & 0xf], hexdigits[(code >> 0) & 0xf], ]) j += 2 continue if code >= 0x100: result.extend([ '\\', "u", hexdigits[(code >> 12) & 0xf], hexdigits[(code >> 8) & 0xf], hexdigits[(code >> 4) & 0xf], hexdigits[(code >> 0) & 0xf], ]) j += 1 continue if code == ord('\\') or code == ord(quote): result.append('\\') result.append(chr(code)) j += 1 continue if code == ord('\t'): result.append('\\') result.append('t') j += 1 continue if code == ord('\r'): result.append('\\') result.append('r') j += 1 continue if code == ord('\n'): result.append('\\') result.append('n') j += 1 continue if code < ord(' ') or code >= 0x7f: result.extend([ '\\', "x", hexdigits[(code >> 4) & 0xf], hexdigits[(code >> 0) & 0xf], ]) j += 1 continue result.append(chr(code)) j += 1 result.append(quote) return W_RopeObject(rope.rope_from_charlist(result))
def __init__(w_self, w_rope, index=0): w_self.node = node = w_rope._node w_self.item_iter = rope.ItemIterator(node) w_self.index = index