def upper(self, w_str): # copy paste from above, but the types are different value = self.unerase(w_str.get_storage()) builder = UnicodeBuilder(len(value)) for i, ch in enumerate(value): builder.append(unichr(unicodedb.toupper(ord(ch)))) return W_MutableString(self, self.erase(list(builder.build())))
def read_w(self, space, w_size=None): self._check_attached(space) self._check_closed(space) if not self.w_decoder: raise oefmt(space.w_IOError, "not readable") size = convert_size(space, w_size) self._writeflush(space) if size < 0: # Read everything w_bytes = space.call_method(self.w_buffer, "read") w_decoded = space.call_method(self.w_decoder, "decode", w_bytes, space.w_True) check_decoded(space, w_decoded) w_result = space.newunicode(self.decoded.get_chars(-1)) w_final = space.add(w_result, w_decoded) self.snapshot = None return w_final remaining = size builder = UnicodeBuilder(size) # Keep reading chunks until we have n characters to return while remaining > 0: if not self._ensure_data(space): break data = self.decoded.get_chars(remaining) builder.append(data) remaining -= len(data) return space.newunicode(builder.build())
def rawwcharp2unicoden(wcp, maxlen): b = UnicodeBuilder(maxlen) i = 0 while i < maxlen and rffi.cast(lltype.Signed, wcp[i]) != 0: b.append(code_to_unichr(wcp[i])) i += 1 return assert_str0(b.build())
def char_utf_8_length(char): # same as (bytes-length (string->bytes/utf-8 (string char))) builder = UnicodeBuilder() builder.append(char.value) w_str = W_String.fromunicode(builder.build()) w_bytes = values.W_Bytes.from_charlist(w_str.as_charlist_utf8()) return values.W_Fixnum(w_bytes.length())
def toLowerCase(self): # Use current size as a size hint. In the best case, characters # are one-to-one; in the next-best case, we overestimate and end # up with a couple bytes of slop. ub = UnicodeBuilder(len(self._s)) for char in self._s: ub.append(unichr(unicodedb.tolower(ord(char)))) return ub.build()
def f(n): while n > 0: jitdriver.jit_merge_point(n=n) sb = UnicodeBuilder() if sb.build() != u"": raise ValueError n -= 1 return n
def f(n): while n > 0: jitdriver.jit_merge_point(n=n) sb = UnicodeBuilder() sb.append(u"") s = sb.build() if len(s) != 0: raise ValueError n -= 1 return n
def test_prebuilt_unicode_builder(self): s = UnicodeBuilder(100) s.append(u"abc") def f(): return len(s.build()) res = self.interpret(f, []) assert res == 3
def _parse_plain_flags(source): b = UnicodeBuilder(4) while True: ch = source.get() if ch == u":": break else: b.append(ch) return b.build()
def ll_decode_utf8(self, llvalue): from rpython.rtyper.annlowlevel import hlstr value = hlstr(llvalue) assert value is not None result = UnicodeBuilder(len(value)) self.rstr_decode_utf_8( value, len(value), 'strict', final=True, errorhandler=self.ll_raise_unicode_exception_decode, allow_surrogates=False, result=result) return self.ll.llunicode(result.build())
def from_char_code(this, args): builder = UnicodeBuilder(len(args)) for arg in args: i = arg.ToInt16() c = unichr(i) builder.append(c) s = builder.build() return s
def to_upper_case(this, args): from rpython.rlib.unicodedata import unicodedb string = this.to_string() builder = UnicodeBuilder(len(string)) for char in string: builder.append(unichr(unicodedb.toupper(ord(char)))) return builder.build()
def f(n): while n > 0: jitdriver.jit_merge_point(n=n) sb = UnicodeBuilder() sb.append_slice(u"abcdefghij", 1, n) sb.append_slice(u"abcdefghij", 0, n) s = sb.build() if len(s) != 2 * n - 1: raise ValueError n -= 1 return n
def namereplace_errors(space, w_exc): check_exception(space, w_exc) if space.isinstance_w(w_exc, space.w_UnicodeEncodeError): w_obj = space.getattr(w_exc, space.newtext('object')) obj = space.realunicode_w(w_obj) start = space.int_w(space.getattr(w_exc, space.newtext('start'))) w_end = space.getattr(w_exc, space.newtext('end')) end = space.int_w(w_end) builder = UnicodeBuilder() pos = start while pos < end: oc = ord(obj[pos]) try: name = unicodedb.name(oc) except KeyError: raw_unicode_escape_helper_unicode(builder, oc) else: builder.append(u'\\N{') builder.append(unicode(name)) builder.append(u'}') pos += 1 return space.newtuple([space.newunicode(builder.build()), w_end]) else: raise oefmt(space.w_TypeError, "don't know how to handle %T in error callback", w_exc)
def xmlcharrefreplace_errors(space, w_exc): check_exception(space, w_exc) if space.isinstance_w(w_exc, space.w_UnicodeEncodeError): w_obj = space.getattr(w_exc, space.newtext('object')) obj = space.realunicode_w(w_obj) start = space.int_w(space.getattr(w_exc, space.newtext('start'))) w_end = space.getattr(w_exc, space.newtext('end')) end = space.int_w(w_end) builder = UnicodeBuilder() pos = start while pos < end: code = ord(obj[pos]) if (MAXUNICODE == 0xffff and 0xD800 <= code <= 0xDBFF and pos + 1 < end and 0xDC00 <= ord(obj[pos + 1]) <= 0xDFFF): code = (code & 0x03FF) << 10 code |= ord(obj[pos + 1]) & 0x03FF code += 0x10000 pos += 1 builder.append(u"&#") builder.append(unicode(str(code))) builder.append(u";") pos += 1 return space.newtuple([space.newunicode(builder.build()), w_end]) else: raise oefmt(space.w_TypeError, "don't know how to handle %T in error callback", w_exc)
def escape_string(string): out = UnicodeBuilder() out.append(u'"') for ch in string: n = ord(ch) if 0x20 <= n and n <= 0x7E or 0xFF < n: # remove the last part in cond if you don't want if ch == u'\\': # unicode printed out for some reason. ch = u'\\\\' elif ch == u'"': ch = u'\\"' else: #if n <= 0xFF: c = u"0123456789abcdef"[n >> 4 & 15] d = u"0123456789abcdef"[n & 15] ch = u'x' + c + d #else: # for unicode escapes. # a = u"0123456789abcdef"[n >> 12] # b = u"0123456789abcdef"[n >> 8 & 15] # c = u"0123456789abcdef"[n >> 4 & 15] # d = u"0123456789abcdef"[n & 15] # ch = u'u' + a + b + c + d ch = u'\\' + character_escapes.get(n, ch) out.append(ch) out.append(u'"') return out.build()
def f(n): while n > 0: jitdriver.jit_merge_point(n=n) sb = UnicodeBuilder() sb.append_slice(u"fOo!", 1, 3) s = sb.build() if len(s) != 2: raise ValueError if s[0] != u"O": raise ValueError if s[1] != u"o": raise ValueError n -= 1 return n
def _parse_count(source): b = UnicodeBuilder(2) while True: here = source.pos ch = source.get() if is_digit(ord(ch[0])): b.append(ch) else: source.pos = here break return b.build()
def string(args): if len(args) == 0: return W_String.fromascii("") assert len(args) > 0 builder = UnicodeBuilder() # XXX could do one less copy in the ascii case for char in args: if not isinstance(char, values.W_Character): raise SchemeException("string: expected a character") builder.append(char.value) return W_String.fromunicode(builder.build())
def f(n): while n > 0: jitdriver.jit_merge_point(n=n) sb = UnicodeBuilder() sb.append(u"ab") s = sb.build() if len(s) != 2: raise ValueError if s[0] != u"a": raise ValueError if s[1] != u"b": raise ValueError n -= 1 return n
def configured_stringify(obj, config): if config is None: ub = UnicodeBuilder() quick_stringify(ub, obj) return ub.build() scan = Scanner() scan.indent = space.to_int(get_config(config, u"indent", space.Integer(2))) scan.sort_keys = space.is_true(get_config(config, u"sort_keys", space.false)) stringify(scan, obj) scan.finish() return scan.printer.result.build()
def list_to_string(w_list): if not w_list.is_proper_list(): raise SchemeException("list->string: expected proper list") if not isinstance(w_list, values.W_Cons): return W_String.fromascii("") builder = UnicodeBuilder() while isinstance(w_list, values.W_Cons): char, w_list = w_list.car(), w_list.cdr() if not isinstance(char, values.W_Character): raise SchemeException("list->string: expected list of characters") builder.append(char.value) return W_String.fromunicode(builder.build())
def configured_stringify(obj, config): if config is None: ub = UnicodeBuilder() quick_stringify(ub, obj) return ub.build() scan = Scanner() scan.indent = space.to_int(get_config(config, u"indent", space.Integer(2))) scan.sort_keys = space.is_true( get_config(config, u"sort_keys", space.false)) stringify(scan, obj) scan.finish() return scan.printer.result.build()
def f(n): while n > 0: jitdriver.jit_merge_point(n=n) sb = UnicodeBuilder() sb.append_multiple_char(u"x", 35) s = sb.build() if len(s) != 35: raise ValueError for c in s: if c != u"x": raise ValueError n -= 1 return n
def _parse_name(source): b = UnicodeBuilder(5) while True: here = source.pos ch = source.get() if ch in u")>": source.pos = here break elif not ch: break else: b.append(ch) return b.build()
def _read(self, space, size): remaining = size builder = UnicodeBuilder(size) # Keep reading chunks until we have n characters to return while remaining > 0: if not self._ensure_data(space): break data = self.decoded.get_chars(remaining) builder.append(data) remaining -= len(data) return space.newunicode(builder.build())
def ll_decode_utf8(self, llvalue): from rpython.rtyper.annlowlevel import hlstr value = hlstr(llvalue) assert value is not None result = UnicodeBuilder(len(value)) self.rstr_decode_utf_8( value, len(value), 'strict', final=False, errorhandler=self.ll_raise_unicode_exception_decode, allow_surrogates=False, result=result) return self.ll.llunicode(result.build())
def f(n): while n > 0: jitdriver.jit_merge_point(n=n) sb = UnicodeBuilder() sb.append_multiple_char(u"x", 5) s = sb.build() if len(s) != 5: raise ValueError if s[0] != u"x": raise ValueError if s[1] != u"x": raise ValueError if s[2] != u"x": raise ValueError if s[3] != u"x": raise ValueError if s[4] != u"x": raise ValueError n -= 1 return n
def xmlcharrefreplace_errors(space, w_exc): check_exception(space, w_exc) if space.isinstance_w(w_exc, space.w_UnicodeEncodeError): obj = space.realunicode_w(space.getattr(w_exc, space.wrap('object'))) start = space.int_w(space.getattr(w_exc, space.wrap('start'))) w_end = space.getattr(w_exc, space.wrap('end')) end = space.int_w(w_end) builder = UnicodeBuilder() pos = start while pos < end: code = ord(obj[pos]) if (MAXUNICODE == 0xffff and 0xD800 <= code <= 0xDBFF and pos + 1 < end and 0xDC00 <= ord(obj[pos+1]) <= 0xDFFF): code = (code & 0x03FF) << 10 code |= ord(obj[pos+1]) & 0x03FF code += 0x10000 pos += 1 builder.append(u"&#") builder.append(unicode(str(code))) builder.append(u";") pos += 1 return space.newtuple([space.wrap(builder.build()), w_end]) else: raise oefmt(space.w_TypeError, "don't know how to handle %T in error callback", w_exc)
def fn(): return (compute_unique_id("foo"), compute_unique_id(u"bar"), compute_unique_id([1]), compute_unique_id({"foo": 3}), compute_unique_id(StringBuilder()), compute_unique_id(UnicodeBuilder()))
def __init__(self, ub=None, newline=StrObject(u"\n"), context=None): self.ub = ub or UnicodeBuilder() self.newline = newline if context is None: self.context = {} else: self.context = context
def writerow(self, w_fields): """Construct and write a CSV record from a sequence of fields. Non-string elements will be converted to string.""" space = self.space fields_w = space.listview(w_fields) dialect = self.dialect rec = UnicodeBuilder(80) # for field_index in range(len(fields_w)): w_field = fields_w[field_index] if space.is_w(w_field, space.w_None): field = u"" elif space.isinstance_w(w_field, space.w_float): field = space.unicode_w(space.repr(w_field)) else: field = space.unicode_w(space.str(w_field)) # if dialect.quoting == QUOTE_NONNUMERIC: try: space.float_w(w_field) # is it an int/long/float? quoted = False except OperationError, e: if e. async (space): raise quoted = True
def decode_json(action, ch, ctx): if action == 0x1: # push list ctx.ds.append(space.List([])) # Push object to ds elif action == 0x2: # push object ctx.ds.append(space.Dict()) elif action == 0x3: # pop & append val = ctx.ds.pop() top = ctx.ds[len(ctx.ds) - 1] assert isinstance(top, List) # we can trust this. top.contents.append(val) elif action == 0x4: # pop pop & setitem val = ctx.ds.pop() key = ctx.ds.pop() top = ctx.ds[len(ctx.ds) - 1] assert isinstance(top, Dict) # again.. top.data[key] = val elif action == 0x5: # push null ctx.ds.append(space.null) elif action == 0x6: # push true ctx.ds.append(space.true) elif action == 0x7: # push false ctx.ds.append(space.false) elif action == 0x8: # push string val = ctx.ss.build() ctx.ds.append(space.String(val)) ctx.ss = UnicodeBuilder() ctx.es = UnicodeBuilder() elif action == 0x9: val = int(ctx.ss.build().encode('utf-8')) # push int ctx.ds.append(space.Integer(val)) ctx.ss = UnicodeBuilder() elif action == 0xA: val = float(ctx.ss.build().encode('utf-8')) # push float ctx.ds.append(space.Float(val)) ctx.ss = UnicodeBuilder() elif action == 0xB: # push ch to ss ctx.ss.append(ch) elif action == 0xC: # push ch to es ctx.es.append(ch) elif action == 0xD: # push escape ctx.ss.append(unichr(escape_characters[ch])) elif action == 0xE: # push unicode point ctx.ss.append(unichr(int(ctx.es.build().encode('utf-8'), 16))) ctx.es = UnicodeBuilder() else: # This is very unlikely to happen. assert False, "JSON decoder bug"
def entry_point(argv): b = UnicodeBuilder(32) for x in to_do: if x < 1500: print "``%s''" % str(b.build()) if x < 1000: b = UnicodeBuilder(32) elif x < 20000: b.append(unichr(32 + (x & 63))) elif x < 30000: b.append_multiple_char(unichr(32 + (x & 63)), x % 93) else: b.append(unicode(str(x))) return 0
def quoteStr(s): """ Quote an entire string. """ # The length hint is the length of the incoming string, plus two for the # quote marks. This will never overshoot, and in the common case, will not # undershoot either. ub = UnicodeBuilder(len(s) + 2) ub.append(u'"') for c in s: if c == u'"': ub.append(u'\\"') else: ub.append(quoteCommon(c)) ub.append(u'"') return ub.build()
def unicode_swapcase__Unicode(space, w_self): input = w_self._value builder = UnicodeBuilder(len(input)) for i in range(len(input)): unichar = ord(input[i]) if unicodedb.islower(unichar): builder.append(unichr(unicodedb.toupper(unichar))) elif unicodedb.isupper(unichar): builder.append(unichr(unicodedb.tolower(unichar))) else: builder.append(input[i]) return W_UnicodeObject(builder.build())
def read_w(self, space, w_size=None): self._check_attached(space) self._check_closed(space) if not self.w_decoder: raise oefmt(space.w_IOError, "not readable") size = convert_size(space, w_size) self._writeflush(space) if size < 0: # Read everything w_bytes = space.call_method(self.w_buffer, "read") w_decoded = space.call_method(self.w_decoder, "decode", w_bytes, space.w_True) check_decoded(space, w_decoded) w_result = space.wrap(self._get_decoded_chars(-1)) w_final = space.add(w_result, w_decoded) self.snapshot = None return w_final remaining = size builder = UnicodeBuilder(size) # Keep reading chunks until we have n characters to return while True: data = self._get_decoded_chars(remaining) builder.append(data) remaining -= len(data) if remaining <= 0: # Done break try: if not self._read_chunk(space): # EOF break except OperationError as e: if trap_eintr(space, e): continue raise return space.wrap(builder.build())
def _parse_property(source, info, positive, in_set): here = source.pos if source.match(u"{"): negate = source.match(u"^") b = UnicodeBuilder(5) found = False while True: ch = source.get() if ch == u"}": found = True break elif not ch: break else: b.append(ch) if found: name = b.build() if name in PROPERTIES: return Property(PROPERTIES[name], positive != negate) source.pos = here return make_character(info, ord("p" if positive else "P"), in_set)
def string_append(args): if jit.isconstant(len(args)): return string_append_fastpath(args) if not args: return W_String.fromascii("") builder = StringBuilder(len(args)) unibuilder = None ascii_idx = 0 try: for ascii_idx in range(len(args)): arg = args[ascii_idx] if not isinstance(arg, W_String): raise SchemeException("string-append: expected a string") builder.append(arg.as_str_ascii()) except ValueError: unibuilder = UnicodeBuilder(len(args)) unibuilder.append(unicode(builder.build())) builder = None for i in range(ascii_idx, len(args)): arg = args[i] if not isinstance(arg, W_String): raise SchemeException("string-append: expected a string") unibuilder.append(arg.as_unicode()) if unibuilder is None: assert builder is not None return W_String.fromascii(builder.build()) else: assert unibuilder is not None return W_String.fromunicode(unibuilder.build())
class Printer: def __init__(self): self.margin = 80 self.layout = Layout(None, 80, False) self.spaceleft = 80 self.spaces = 80 self.result = UnicodeBuilder() def scan(self, x): if isinstance(x, Left): self.layout = Layout(self.layout, self.spaces, x.size < 0 or self.spaceleft < x.size) elif isinstance(x, Right): if self.layout.parent: self.layout = self.layout.parent elif isinstance(x, Blank): if x.size < 0 or self.spaceleft < x.size or self.layout.force_break: self.spaces = self.layout.spaces - x.indent self.spaceleft = self.spaces self.result.append(u'\n' + u' '*(self.margin - self.spaces)) else: self.result.append(x.text) self.spaceleft -= len(x.text) elif isinstance(x, Text): self.result.append(x.text) self.spaceleft -= len(x.text) return len(x)
def unicode_capitalize__Unicode(space, w_self): input = w_self._value if len(input) == 0: return W_UnicodeObject.EMPTY builder = UnicodeBuilder(len(input)) builder.append(unichr(unicodedb.toupper(ord(input[0])))) for i in range(1, len(input)): builder.append(unichr(unicodedb.tolower(ord(input[i])))) return W_UnicodeObject(builder.build())