Esempio n. 1
0
def read_number_or_id(f, init):
    sofar = StringBuilder(64)
    sofar.append(init)
    while True:
        c = f.peek()
        if c == "":
            break
        if idchar(c):
            v = f.read(1)
            assert v == c
            sofar.append(v)
        else:
            break
    got = sofar.build()
    try:
        val = string_to_int(got)
        return values.W_Fixnum.make_or_interned(val)
    except ParseStringOverflowError:
        val = rbigint.fromdecimalstr(got)
        return values.W_Bignum(val)
    except ParseStringError:
        try:
            return values.W_Flonum(float(got))
        except:
            return values.W_Symbol.make(got)
Esempio n. 2
0
def my_replace(string):
    from rpython.rlib.rstring import StringBuilder
    result = StringBuilder()
    for char in string:
        if not char==' ':
            result.append(char)
    return result.build()
Esempio n. 3
0
 def func():
     s = StringBuilder()
     s.append("a")
     s.append("abc")
     s.append_slice("abc", 1, 2)
     s.append_multiple_char('d', 4)
     return s.build()
Esempio n. 4
0
 def direct_read(self, n=-1):
     stream = self.getstream()
     self.check_readable()
     if n < 0:
         return stream.readall()
     else:
         result = StringBuilder(n)
         while n > 0:
             try:
                 data = stream.read(n)
             except OSError, e:
                 # a special-case only for read() (similar to CPython, which
                 # also loses partial data with other methods): if we get
                 # EAGAIN after already some data was received, return it.
                 # Note that we can get EAGAIN while there is buffered data
                 # waiting; read that too.
                 if is_wouldblock_error(e.errno):
                     m = stream.count_buffered_bytes()
                     if m > 0:
                         result.append(stream.read(min(n, m)))
                     got = result.build()
                     if len(got) > 0:
                         return got
                 raise
             if not data:
                 break
             n -= len(data)
             result.append(data)
         return result.build()
Esempio n. 5
0
 def bitwise_not(self, space):
     length = self.strlen()
     builder = StringBuilder(length)
     for i in range(length):
         c = ord(self.character(i))
         builder.append(chr(c ^ 0xff))
     return W_ConstStringObject(builder.build())
Esempio n. 6
0
def http_build_query(interp,  w_data,
                     num_prefix="",  arg_sep=None,  enctype=1):
    space = interp.space
    if arg_sep is None:
        arg_sep = interp.config.get_ini_str("arg_separator.output")
    w_data = w_data.deref()
    out = StringBuilder()
    if not w_data.tp in [space.tp_array,  space.tp_object]:
        interp.space.ec.warn("http_build_query(): Parameter 1 "
                             "expected to be Array or Object.  "
                             "Incorrect value given")
    if w_data.tp == space.tp_array:
        with space.iter(w_data) as itr:
            while not itr.done():
                w_key,  w_value = itr.next_item(space)
                key = _get_key(space,  num_prefix,  w_key)
                res = _build_query(space,  [],  key,  w_value,
                                   num_prefix,  arg_sep,  enctype)
                out.append(''.join(res))

    if w_data.tp == space.tp_object:
        for key, w_value in w_data.get_instance_attrs(interp).iteritems():
            _,  prop = demangle_property(key)
            if prop:
                continue
            res = _build_query(space, [], key, w_value, num_prefix,
                               arg_sep, enctype)
            out.append(''.join(res))

    outstr = out.build()
    if outstr.endswith(arg_sep):
        outstr = outstr.rstrip(arg_sep)
    return interp.space.newstr(outstr)
Esempio n. 7
0
class W_StringOutputPort(W_OutputPort):
    errorname = "output-port"
    _attrs_ = ["closed", "str"]

    def __init__(self):
        self.closed = False
        self.str = StringBuilder()

    def write(self, s):
        self.str.append(s)

    def contents(self):
        return self.str.build()

    def seek(self, offset, end=False):
        if end or offset == self.str.getlength():
            return
        if offset > self.str.getlength():
            self.str.append("\0" * (self.str.getlength() - offset))
        else:
            # FIXME: this is potentially slow.
            content = self.contents()
            self.str = StringBuilder(offset)
            self.str.append_slice(content, 0, offset)

    def tell(self):
        return self.str.getlength()
Esempio n. 8
0
    def _read_all(self, space):
        "Read all the file, don't update the cache"
        # Must run with the lock held!
        builder = StringBuilder()
        # First copy what we have in the current buffer
        current_size = self._readahead()
        data = None
        if current_size:
            data = ''.join(self.buffer[self.pos:self.pos + current_size])
            builder.append(data)
            self.pos += current_size
        # We're going past the buffer's bounds, flush it
        if self.writable:
            self._flush_and_rewind_unlocked(space)
        self._reader_reset_buf()

        while True:
            # Read until EOF or until read() would block
            w_data = space.call_method(self.w_raw, "read")
            if space.is_w(w_data, space.w_None):
                if current_size == 0:
                    return w_data
                break
            data = space.str_w(w_data)
            size = len(data)
            if size == 0:
                break
            builder.append(data)
            current_size += size
            if self.abs_pos != -1:
                self.abs_pos += size
        return space.wrap(builder.build())
Esempio n. 9
0
def string_append(args):
    if jit.isconstant(len(args)):
        return string_append_fastpath(args)
    if not args:
        return W_String.fromascii("")
    builder = StringBuilder(len(args))
    unibuilder = None
    ascii_idx = 0
    try:
        for ascii_idx in range(len(args)):
            arg = args[ascii_idx]
            if not isinstance(arg, W_String):
                raise SchemeException("string-append: expected a string")
            builder.append(arg.as_str_ascii())
    except ValueError:
        unibuilder = UnicodeBuilder(len(args))
        unibuilder.append(unicode(builder.build()))
        builder = None
        for i in range(ascii_idx, len(args)):
            arg = args[i]
            if not isinstance(arg, W_String):
                raise SchemeException("string-append: expected a string")
            unibuilder.append(arg.as_unicode())
    if unibuilder is None:
        assert builder is not None
        return W_String.fromascii(builder.build())
    else:
        assert unibuilder is not None
        return W_String.fromunicode(unibuilder.build())
Esempio n. 10
0
 def direct_readline(self, size=-1):
     stream = self.getstream()
     self.check_readable()
     if size < 0:
         return stream.readline()
     else:
         # very inefficient unless there is a peek()
         result = StringBuilder()
         while size > 0:
             # "peeks" on the underlying stream to see how many chars
             # we can safely read without reading past an end-of-line
             startindex, peeked = stream.peek()
             assert 0 <= startindex <= len(peeked)
             endindex = startindex + size
             pn = peeked.find("\n", startindex, endindex)
             if pn < 0:
                 pn = min(endindex - 1, len(peeked))
             c = stream.read(pn - startindex + 1)
             if not c:
                 break
             result.append(c)
             if c.endswith('\n'):
                 break
             size -= len(c)
         return result.build()
Esempio n. 11
0
def str_translate__String_ANY_ANY(space, w_string, w_table, w_deletechars=''):
    """charfilter - unicode handling is not implemented

    Return a copy of the string where all characters occurring
    in the optional argument deletechars are removed, and the
    remaining characters have been mapped through the given translation table,
    which must be a string of length 256"""

    if space.is_w(w_table, space.w_None):
        table = DEFAULT_NOOP_TABLE
    else:
        table = space.bufferstr_w(w_table)
        if len(table) != 256:
            raise OperationError(
                space.w_ValueError,
                space.wrap("translation table must be 256 characters long"))

    string = w_string._value
    deletechars = space.str_w(w_deletechars)
    if len(deletechars) == 0:
        buf = StringBuilder(len(string))
        for char in string:
            buf.append(table[ord(char)])
    else:
        buf = StringBuilder()
        deletion_table = [False] * 256
        for c in deletechars:
            deletion_table[ord(c)] = True
        for char in string:
            if not deletion_table[ord(char)]:
                buf.append(table[ord(char)])
    return W_StringObject(buf.build())
Esempio n. 12
0
def rledecode_hqx(space, hexbin):
    "Decode hexbin RLE-coded string."

    # that's a guesstimation of the resulting length
    res = StringBuilder(len(hexbin))

    end = len(hexbin)
    i = 0
    lastpushed = -1
    while i < end:
        c = hexbin[i]
        i += 1
        if c != '\x90':
            res.append(c)
            lastpushed = ord(c)
        else:
            if i == end:
                raise_Incomplete(space, 'String ends with the RLE code \\x90')
            count = ord(hexbin[i]) - 1
            i += 1
            if count < 0:
                res.append('\x90')
                lastpushed = 0x90
            else:
                if lastpushed < 0:
                    raise_Error(space, 'String starts with the RLE code \\x90')
                res.append_multiple_char(chr(lastpushed), count)
    return space.newbytes(res.build())
Esempio n. 13
0
def a2b_hqx(space, ascii):
    """Decode .hqx coding.  Returns (bin, done)."""

    # overestimate the resulting length
    res = StringBuilder(len(ascii))
    done = 0
    pending_value = 0
    pending_bits = 0

    for c in ascii:
        n = ord(table_a2b_hqx[ord(c)])
        if n <= 0x3F:
            pending_value = (pending_value << 6) | n
            pending_bits += 6
            if pending_bits == 24:
                # flush
                res.append(chr(pending_value >> 16))
                res.append(chr((pending_value >> 8) & 0xff))
                res.append(chr(pending_value & 0xff))
                pending_value = 0
                pending_bits = 0
        elif n == FAIL:
            raise_Error(space, 'Illegal character')
        elif n == DONE:
            if pending_bits >= 8:
                res.append(chr(pending_value >> (pending_bits - 8)))
            if pending_bits >= 16:
                res.append(chr((pending_value >> (pending_bits - 16)) & 0xff))
            done = 1
            break
        #elif n == SKIP: pass
    else:
        if pending_bits > 0:
            raise_Incomplete(space, 'String has incomplete number of bytes')
    return space.newtuple([space.newbytes(res.build()), space.wrap(done)])
Esempio n. 14
0
 def unwrap(self):
     # note: always overriden so far
     length = self.strlen()
     builder = StringBuilder(length)
     for i in range(length):
         builder.append(self.character(i))
     return builder.build()
Esempio n. 15
0
def _charp2str_to_null(cp, index):
    index = rffi.cast(lltype.Signed, index)
    string = StringBuilder()
    while cp[index] != '\x00':
        string.append(cp[index])
        index += 1
    return string.build()
Esempio n. 16
0
    def readall_w(self, space):
        self._check_closed(space)
        self._check_readable(space)
        total = 0

        builder = StringBuilder()
        while True:
            newsize = int(new_buffersize(self.fd, total))

            try:
                chunk = os.read(self.fd, newsize - total)
            except OSError, e:
                if e.errno == errno.EINTR:
                    space.getexecutioncontext().checksignals()
                    continue
                if total > 0:
                    # return what we've got so far
                    break
                if e.errno == errno.EAGAIN:
                    return space.w_None
                raise wrap_oserror(space, e,
                                   exception_name='w_IOError')

            if not chunk:
                break
            builder.append(chunk)
            total += len(chunk)
Esempio n. 17
0
def add__StringBuffer_String(space, w_self, w_other):
    if w_self.builder.getlength() != w_self.length:
        builder = StringBuilder()
        builder.append(w_self.force())
    else:
        builder = w_self.builder
    builder.append(w_other._value)
    return W_StringBufferObject(builder)
Esempio n. 18
0
 def sanitize(self, s):
     res = StringBuilder(len(s))
     for c in s:
         if c in CONTROL_CHARS:
             res.append('_')
         else:
             res.append(c)
     return res.build()
Esempio n. 19
0
 def text_build(self, space, frame, bytecode, no):
     items = [None] * no
     for i in range(no - 1, -1, -1):
         items[i] = space.str(frame.pop())
     sb = StringBuilder()
     for item in items:
         sb.append(item)
     frame.push(space.newtext(sb.build()))
Esempio n. 20
0
 def fn(_):
     s = StringBuilder(4)
     got = []
     for i in range(50):
         s.append(chr(33+i))
         got.append(s.build())
         gc.collect()
     return ' '.join(got)
Esempio n. 21
0
 def hexdigest(self, space):
     "Return the digest value as a string of hexadecimal digits."
     digest = self._digest(space)
     hexdigits = '0123456789abcdef'
     result = StringBuilder(self.digest_size * 2)
     for c in digest:
         result.append(hexdigits[(ord(c) >> 4) & 0xf])
         result.append(hexdigits[ ord(c)       & 0xf])
     return space.wrap(result.build())
Esempio n. 22
0
 def fn(_):
     got = []
     for j in range(3, 76, 5):
         s = StringBuilder()
         for i in range(j):
             s.append(chr(33+i))
             gc.collect()
         got.append(s.build())
     return ' '.join(got)
Esempio n. 23
0
 def string_func(space, w_left, w_right):
     left = w_left.unwrap()
     right = w_right.unwrap()
     n = min(len(left), len(right))
     s = StringBuilder(n)
     for i in range(n):
         char = chr(bitwise_op(ord(left[i]), ord(right[i])))
         s.append(char)
     return space.newstr(s.build())
Esempio n. 24
0
    def test_prebuilt_string_builder(self):
        s = StringBuilder(100)
        s.append("abc")
        
        def f():
            return len(s.build())

        res = self.interpret(f, [])
        assert res == 3
Esempio n. 25
0
def str2hexstr(arr, size):
    HEXCHARS = ['0', '1', '2', '3', '4', '5', '6', '7',
                '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']

    s = StringBuilder(size)
    for i in range(size):
        s.append(HEXCHARS[(ord(arr[i]) >> 4)])
        s.append(HEXCHARS[(ord(arr[i])) & 15])
    return s.build()
Esempio n. 26
0
def _parse_plain_flags(source):
    b = StringBuilder(4)
    while True:
        ch = source.get()
        if ch == ":":
            break
        else:
            b.append(ch)
    return b.build()
Esempio n. 27
0
    def readline_w(self, space, w_limit=None):
        # For backwards compatibility, a (slowish) readline().
        limit = convert_size(space, w_limit)

        has_peek = space.findattr(self, space.wrap("peek"))

        builder = StringBuilder()
        size = 0

        while limit < 0 or size < limit:
            nreadahead = 1

            if has_peek:
                w_readahead = space.call_method(self, "peek", space.wrap(1))
                if not space.isinstance_w(w_readahead, space.w_str):
                    raise operationerrfmt(
                        space.w_IOError,
                        "peek() should have returned a bytes object, not '%T'",
                        w_readahead)
                length = space.len_w(w_readahead)
                if length > 0:
                    n = 0
                    buf = space.str_w(w_readahead)
                    if limit >= 0:
                        while True:
                            if n >= length or n >= limit:
                                break
                            n += 1
                            if buf[n-1] == '\n':
                                break
                    else:
                        while True:
                            if n >= length:
                                break
                            n += 1
                            if buf[n-1] == '\n':
                                break
                    nreadahead = n

            w_read = space.call_method(self, "read", space.wrap(nreadahead))
            if not space.isinstance_w(w_read, space.w_str):
                raise operationerrfmt(
                    space.w_IOError,
                    "peek() should have returned a bytes object, not '%T'",
                    w_read)
            read = space.str_w(w_read)
            if not read:
                break

            size += len(read)
            builder.append(read)

            if read[-1] == '\n':
                break

        return space.wrap(builder.build())
Esempio n. 28
0
File: text.py Progetto: fijal/quill
class W_StringBuilder(W_Root):
    def __init__(self, length):
        self._s = StringBuilder(length)

    @unwrap_spec(txt='utf8')
    def append(self, txt):
        self._s.append(txt)

    def build(self, space):
        return space.newtext(self._s.build())
Esempio n. 29
0
 def _flush_codes(self, space):
     b = StringBuilder()
     for code in self.current_codes:
         name = code._get_full_name()
         b.append('\x02')
         write_long_to_string_builder(code._unique_id, b)
         write_long_to_string_builder(len(name), b)
         b.append(name)
     os.write(self.fileno, b.build())
     self.current_codes = []
Esempio n. 30
0
 def f(n):
     while n > 0:
         jitdriver.jit_merge_point(n=n)
         sb = StringBuilder(36)
         for s in IN:
             sb.append(s)
         s = sb.build()
         if s != JOINED:
             raise ValueError
         n -= 1
     return n
Esempio n. 31
0
def b2a_base64(space, bin, __kwonly__, newline=True):
    "Base64-code line of data."

    newlength = (len(bin) + 2) // 3
    try:
        newlength = ovfcheck(newlength * 4)
    except OverflowError:
        raise OperationError(space.w_MemoryError, space.w_None)
    newlength += 1
    res = StringBuilder(newlength)

    leftchar = 0
    leftbits = 0
    for c in bin:
        # Shift into our buffer, and output any 6bits ready
        leftchar = (leftchar << 8) | ord(c)
        leftbits += 8
        res.append(table_b2a_base64[(leftchar >> (leftbits - 6)) & 0x3f])
        leftbits -= 6
        if leftbits >= 6:
            res.append(table_b2a_base64[(leftchar >> (leftbits - 6)) & 0x3f])
            leftbits -= 6
    #
    if leftbits == 2:
        res.append(table_b2a_base64[(leftchar & 3) << 4])
        res.append(PAD)
        res.append(PAD)
    elif leftbits == 4:
        res.append(table_b2a_base64[(leftchar & 0xf) << 2])
        res.append(PAD)
    if newline:
        res.append('\n')
    return space.newbytes(res.build())
Esempio n. 32
0
def format_number(digits, buflen, sign, decpt, code, precision, flags, upper):
    # We got digits back, format them.  We may need to pad 'digits'
    # either on the left or right (or both) with extra zeros, so in
    # general the resulting string has the form
    #
    # [<sign>]<zeros><digits><zeros>[<exponent>]
    #
    # where either of the <zeros> pieces could be empty, and there's a
    # decimal point that could appear either in <digits> or in the
    # leading or trailing <zeros>.
    #
    # Imagine an infinite 'virtual' string vdigits, consisting of the
    # string 'digits' (starting at index 0) padded on both the left
    # and right with infinite strings of zeros.  We want to output a
    # slice
    #
    # vdigits[vdigits_start : vdigits_end]
    #
    # of this virtual string.  Thus if vdigits_start < 0 then we'll
    # end up producing some leading zeros; if vdigits_end > digits_len
    # there will be trailing zeros in the output.  The next section of
    # code determines whether to use an exponent or not, figures out
    # the position 'decpt' of the decimal point, and computes
    # 'vdigits_start' and 'vdigits_end'.
    builder = StringBuilder(20)

    use_exp = False
    vdigits_end = buflen
    if code == 'e':
        use_exp = True
        vdigits_end = precision
    elif code == 'f':
        vdigits_end = decpt + precision
    elif code == 'g':
        if decpt <= -4:
            use_exp = True
        elif decpt > precision:
            use_exp = True
        elif flags & rfloat.DTSF_ADD_DOT_0 and decpt == precision:
            use_exp = True
        if flags & rfloat.DTSF_ALT:
            vdigits_end = precision
    elif code == 'r':
        #  convert to exponential format at 1e16.  We used to convert
        #  at 1e17, but that gives odd-looking results for some values
        #  when a 16-digit 'shortest' repr is padded with bogus zeros.
        #  For example, repr(2e16+8) would give 20000000000000010.0;
        #  the true value is 20000000000000008.0.
        if decpt <= -4 or decpt > 16:
            use_exp = True
    else:
        raise ValueError

    # if using an exponent, reset decimal point position to 1 and
    # adjust exponent accordingly.
    if use_exp:
        exp = decpt - 1
        decpt = 1
    else:
        exp = 0

    # ensure vdigits_start < decpt <= vdigits_end, or vdigits_start <
    # decpt < vdigits_end if add_dot_0_if_integer and no exponent
    if decpt <= 0:
        vdigits_start = decpt-1
    else:
        vdigits_start = 0
    if vdigits_end <= decpt:
        if not use_exp and flags & rfloat.DTSF_ADD_DOT_0:
            vdigits_end = decpt + 1
        else:
            vdigits_end = decpt

    # double check inequalities
    assert vdigits_start <= 0
    assert 0 <= buflen <= vdigits_end
    # decimal point should be in (vdigits_start, vdigits_end]
    assert vdigits_start < decpt <= vdigits_end

    if sign == 1:
        builder.append('-')
    elif flags & rfloat.DTSF_SIGN:
        builder.append('+')

    # note that exactly one of the three 'if' conditions is true, so
    # we include exactly one decimal point
    # 1. Zero padding on left of digit string
    if decpt <= 0:
        builder.append_multiple_char('0', decpt - vdigits_start)
        builder.append('.')
        builder.append_multiple_char('0', 0 - decpt)
    else:
        builder.append_multiple_char('0', 0 - vdigits_start)

    # 2. Digits, with included decimal point
    if 0 < decpt <= buflen:
        builder.append(rffi.charpsize2str(digits, decpt - 0))
        builder.append('.')
        ptr = rffi.ptradd(digits, decpt)
        builder.append(rffi.charpsize2str(ptr, buflen - decpt))
    else:
        builder.append(rffi.charpsize2str(digits, buflen))

    # 3. And zeros on the right
    if buflen < decpt:
        builder.append_multiple_char('0', decpt - buflen)
        builder.append('.')
        builder.append_multiple_char('0', vdigits_end - decpt)
    else:
        builder.append_multiple_char('0', vdigits_end - buflen)

    s = builder.build()

    # Delete a trailing decimal pt unless using alternative formatting.
    if not flags & rfloat.DTSF_ALT:
        last = len(s) - 1
        if last >= 0 and s[last] == '.':
            s = s[:last]

    # Now that we've done zero padding, add an exponent if needed.
    if use_exp:
        if upper:
            e = 'E'
        else:
            e = 'e'

        if exp >= 0:
            exp_str = str(exp)
            if len(exp_str) < 2 and not (flags & rfloat.DTSF_CUT_EXP_0):
                s += e + '+0' + exp_str
            else:
                s += e + '+' + exp_str
        else:
            exp_str = str(-exp)
            if len(exp_str) < 2 and not (flags & rfloat.DTSF_CUT_EXP_0):
                s += e + '-0' + exp_str
            else:
                s += e + '-' + exp_str

    return s
Esempio n. 33
0
                last_pos = ctx.match_end
                if filter_is_callable:
                    w_match = self.getmatch(ctx, True)
                    # make a copy of 'ctx'; see test_sub_matches_stay_valid
                    ctx = ctx.fresh_copy(
                        start)  # match_start/match_end dropped
                    w_piece = space.call_function(w_filter, w_match)
                    if not space.is_w(w_piece, space.w_None):
                        assert strbuilder is None and unicodebuilder is None
                        assert not use_builder
                        sublist_w.append(w_piece)
                else:
                    if use_builder:
                        if strbuilder is not None:
                            assert filter_as_string is not None
                            strbuilder.append(filter_as_string)
                        else:
                            assert unicodebuilder is not None
                            assert filter_as_unicode is not None
                            unicodebuilder.append(filter_as_unicode)
                    else:
                        sublist_w.append(w_filter)
                n += 1
            elif last_pos >= ctx.end:
                break  # empty match at the end: finished
            ctx.reset(start)

        if last_pos < ctx.end:
            _sub_append_slice(ctx, space, use_builder, sublist_w, strbuilder,
                              unicodebuilder, last_pos, ctx.end)
        if use_builder:
Esempio n. 34
0
def raise_mismatch_err(args):
    name = args[0]
    assert isinstance(name, values.W_Symbol)
    message = args[1]
    assert isinstance(message, values_string.W_String)
    v = args[2]
    assert isinstance(v, values.W_Object)
    from rpython.rlib.rstring import StringBuilder
    error_msg = StringBuilder()
    error_msg.append(name.utf8value)
    error_msg.append(": ")
    error_msg.append(message.as_str_utf8())
    error_msg.append(v.tostring())
    i = 3
    while i + 1 < len(args):
        message = args[i]
        assert isinstance(message, values_string.W_String)
        error_msg.append(message.as_str_utf8())
        v = args[i + 1]
        assert isinstance(v, values.W_Object)
        error_msg.append(v.tostring())
        i += 2
    raise SchemeException(error_msg.build())
Esempio n. 35
0
def PyString_DecodeEscape(space, s, errors, recode_encoding):
    """
    Unescape a backslash-escaped string. If recode_encoding is non-zero,
    the string is UTF-8 encoded and should be re-encoded in the
    specified encoding.
    """
    builder = StringBuilder(len(s))
    ps = 0
    end = len(s)
    while ps < end:
        if s[ps] != '\\':
            # note that the C code has a label here.
            # the logic is the same.
            if recode_encoding and ord(s[ps]) & 0x80:
                w, ps = decode_utf8_recode(space, s, ps, end, recode_encoding)
                # Append bytes to output buffer.
                builder.append(w)
            else:
                builder.append(s[ps])
                ps += 1
            continue

        ps += 1
        if ps == end:
            raise_app_valueerror(space, 'Trailing \\ in string')
        prevps = ps
        ch = s[ps]
        ps += 1
        # XXX This assumes ASCII!
        if ch == '\n':
            pass
        elif ch == '\\':
            builder.append('\\')
        elif ch == "'":
            builder.append("'")
        elif ch == '"':
            builder.append('"')
        elif ch == 'b':
            builder.append("\010")
        elif ch == 'f':
            builder.append('\014')  # FF
        elif ch == 't':
            builder.append('\t')
        elif ch == 'n':
            builder.append('\n')
        elif ch == 'r':
            builder.append('\r')
        elif ch == 'v':
            builder.append('\013')  # VT
        elif ch == 'a':
            builder.append('\007')  # BEL, not classic C
        elif ch in '01234567':
            # Look for up to two more octal digits
            span = ps
            span += (span < end) and (s[span] in '01234567')
            span += (span < end) and (s[span] in '01234567')
            octal = s[prevps:span]
            # emulate a strange wrap-around behavior of CPython:
            # \400 is the same as \000 because 0400 == 256
            num = int(octal, 8) & 0xFF
            builder.append(chr(num))
            ps = span
        elif ch == 'x':
            if ps + 2 <= end and isxdigit(s[ps]) and isxdigit(s[ps + 1]):
                hexa = s[ps:ps + 2]
                num = int(hexa, 16)
                builder.append(chr(num))
                ps += 2
            else:
                if errors == 'strict':
                    raise_app_valueerror(
                        space, "invalid \\x escape at position %d" % (ps - 2))
                elif errors == 'replace':
                    builder.append('?')
                elif errors == 'ignore':
                    pass
                else:
                    raise oefmt(
                        space.w_ValueError, "decoding error; "
                        "unknown error handling code: %s", errors)
                if ps + 1 <= end and isxdigit(s[ps]):
                    ps += 1
        else:
            # this was not an escape, so the backslash
            # has to be added, and we start over in
            # non-escape mode.
            builder.append('\\')
            ps -= 1
            assert ps >= 0
            continue
            # an arbitry number of unescaped UTF-8 bytes may follow.

    buf = builder.build()
    return buf
Esempio n. 36
0
 def escape(self):
     if self.args is None:
         return self.command
     s = StringBuilder()
     s.append(self.command)
     for arg in self.args:
         s.append(" ")
         for c in arg:
             if c == '\\':
                 s.append('\\\\')
             elif c == ';':
                 s.append('\\;')
             elif c == "\n":
                 s.append("\\n")
             elif c == " ":
                 s.append("\\_")
             else:
                 s.append(c)
     return s.build()
Esempio n. 37
0
def raw_encode_basestring_ascii(space, w_string):
    if space.isinstance_w(w_string, space.w_str):
        s = space.str_w(w_string)
        for i in range(len(s)):
            c = s[i]
            if c >= ' ' and c <= '~' and c != '"' and c != '\\':
                pass
            else:
                first = i
                break
        else:
            # the input is a string with only non-special ascii chars
            return w_string

        eh = unicodehelper.decode_error_handler(space)
        u = str_decode_utf_8(s,
                             len(s),
                             None,
                             final=True,
                             errorhandler=eh,
                             allow_surrogates=True)[0]
        sb = StringBuilder(len(u))
        sb.append_slice(s, 0, first)
    else:
        # We used to check if 'u' contains only safe characters, and return
        # 'w_string' directly.  But this requires an extra pass over all
        # characters, and the expected use case of this function, from
        # json.encoder, will anyway re-encode a unicode result back to
        # a string (with the ascii encoding).  This requires two passes
        # over the characters.  So we may as well directly turn it into a
        # string here --- only one pass.
        u = space.unicode_w(w_string)
        sb = StringBuilder(len(u))
        first = 0

    for i in range(first, len(u)):
        c = u[i]
        if c <= u'~':
            if c == u'"' or c == u'\\':
                sb.append('\\')
            elif c < u' ':
                sb.append(ESCAPE_BEFORE_SPACE[ord(c)])
                continue
            sb.append(chr(ord(c)))
        else:
            if c <= u'\uffff':
                sb.append('\\u')
                sb.append(HEX[ord(c) >> 12])
                sb.append(HEX[(ord(c) >> 8) & 0x0f])
                sb.append(HEX[(ord(c) >> 4) & 0x0f])
                sb.append(HEX[ord(c) & 0x0f])
            else:
                # surrogate pair
                n = ord(c) - 0x10000
                s1 = 0xd800 | ((n >> 10) & 0x3ff)
                sb.append('\\ud')
                sb.append(HEX[(s1 >> 8) & 0x0f])
                sb.append(HEX[(s1 >> 4) & 0x0f])
                sb.append(HEX[s1 & 0x0f])
                s2 = 0xdc00 | (n & 0x3ff)
                sb.append('\\ud')
                sb.append(HEX[(s2 >> 8) & 0x0f])
                sb.append(HEX[(s2 >> 4) & 0x0f])
                sb.append(HEX[s2 & 0x0f])

    res = sb.build()
    return space.wrap(res)
Esempio n. 38
0
    def unicode_escape(s):
        size = len(s)
        result = StringBuilder(size)

        if quotes:
            if prefix:
                result.append(prefix)
            if s.find('\'') != -1 and s.find('\"') == -1:
                quote = ord('\"')
                result.append('"')
            else:
                quote = ord('\'')
                result.append('\'')
        else:
            quote = 0

            if size == 0:
                return ''

        pos = 0
        while pos < size:
            oc = codepoint_at_pos(s, pos)
            ch = s[pos]

            # Escape quotes
            if quotes and (oc == quote or ch == '\\'):
                result.append('\\')
                next_pos = next_codepoint_pos(s, pos)
                result.append_slice(s, pos, next_pos)
                pos = next_pos
                continue

            # The following logic is enabled only if MAXUNICODE == 0xffff, or
            # for testing on top of a host Python where sys.maxunicode == 0xffff
            if (not we_are_translated() and sys.maxunicode == 0xFFFF
                    and 0xD800 <= oc < 0xDC00 and pos + 3 < size):
                # Map UTF-16 surrogate pairs to Unicode \UXXXXXXXX escapes
                pos += 3
                oc2 = codepoint_at_pos(s, pos)

                if 0xDC00 <= oc2 <= 0xDFFF:
                    ucs = (((oc & 0x03FF) << 10) | (oc2 & 0x03FF)) + 0x00010000
                    char_escape_helper(result, ucs)
                    pos += 3
                    continue
                # Fall through: isolated surrogates are copied as-is
                pos -= 3

            # Map special whitespace to '\t', \n', '\r'
            if ch == '\t':
                result.append('\\t')
            elif ch == '\n':
                result.append('\\n')
            elif ch == '\r':
                result.append('\\r')
            elif ch == '\\':
                result.append('\\\\')

            # Map non-printable or non-ascii to '\xhh' or '\uhhhh'
            elif pass_printable and not (oc <= 0x10ffff
                                         and unicodedb.isprintable(oc)):
                char_escape_helper(result, oc)
            elif not pass_printable and (oc < 32 or oc >= 0x7F):
                char_escape_helper(result, oc)

            # Copy everything else as-is
            else:
                if oc < 128:
                    result.append(ch)
                else:
                    next_pos = next_codepoint_pos(s, pos)
                    result.append_slice(s, pos, next_pos)
            pos = next_codepoint_pos(s, pos)

        if quotes:
            result.append(chr(quote))
        return result.build()
Esempio n. 39
0
def string_escape_encode(s, quote):
    buf = StringBuilder(len(s) + 2)

    buf.append(quote)
    startslice = 0

    for i in range(len(s)):
        c = s[i]
        use_bs_char = False # character quoted by backspace

        if c == '\\' or c == quote:
            bs_char = c
            use_bs_char = True
        elif c == '\t':
            bs_char = 't'
            use_bs_char = True
        elif c == '\r':
            bs_char = 'r'
            use_bs_char = True
        elif c == '\n':
            bs_char = 'n'
            use_bs_char = True
        elif not '\x20' <= c < '\x7f':
            n = ord(c)
            if i != startslice:
                buf.append_slice(s, startslice, i)
            startslice = i + 1
            buf.append('\\x')
            buf.append("0123456789abcdef"[n >> 4])
            buf.append("0123456789abcdef"[n & 0xF])

        if use_bs_char:
            if i != startslice:
                buf.append_slice(s, startslice, i)
            startslice = i + 1
            buf.append('\\')
            buf.append(bs_char)

    if len(s) != startslice:
        buf.append_slice(s, startslice, len(s))

    buf.append(quote)

    return buf.build()
Esempio n. 40
0
def lower(string):
    """Return the lowercase version of the string in the current locale."""
    builder = StringBuilder(len(string))
    for c in string:
        builder.append(lower_char(c))
    return builder.build()
Esempio n. 41
0
def fstring_find_literal(astbuilder, fstr, atom_node, rec):
    space = astbuilder.space
    raw = fstr.raw_mode

    # Return the next literal part.  Updates the current index inside 'fstr'.
    # Differs from CPython: this version handles double-braces on its own.
    s = fstr.unparsed
    literal_start = fstr.current_index
    assert literal_start >= 0

    # Get any literal string. It ends when we hit an un-doubled left
    # brace (which isn't part of a unicode name escape such as
    # "\N{EULER CONSTANT}"), or the end of the string.
    i = literal_start
    builder = StringBuilder()
    while i < len(s):
        ch = s[i]
        i += 1
        if not raw and ch == '\\' and i < len(s):
            ch = s[i]
            i += 1
            if ch == 'N':
                if i < len(s) and s[i] == '{':
                    while i < len(s) and s[i] != '}':
                        i += 1
                    if i < len(s):
                        i += 1
                    continue
                elif i < len(s):
                    i += 1
                break
            if ch == '{':
                msg = "invalid escape sequence '%s'"
                try:
                    space.warn(space.newtext(msg % ch),
                               space.w_DeprecationWarning)
                except error.OperationError as e:
                    if e.match(space, space.w_DeprecationWarning):
                        astbuilder.error(msg % ch, atom_node)
                    else:
                        raise
        if ch == '{' or ch == '}':
            # Check for doubled braces, but only at the top level. If
            # we checked at every level, then f'{0:{3}}' would fail
            # with the two closing braces.
            if rec == 0 and i < len(s) and s[i] == ch:
                assert 0 <= i <= len(s)
                builder.append(s[literal_start:i])
                i += 1  # skip over the second brace
                literal_start = i
            elif rec == 0 and ch == '}':
                i -= 1
                assert i >= 0
                fstr.current_index = i
                # Where a single '{' is the start of a new expression, a
                # single '}' is not allowed.
                astbuilder.error("f-string: single '}' is not allowed",
                                 atom_node)
            else:
                # We're either at a '{', which means we're starting another
                # expression; or a '}', which means we're at the end of this
                # f-string (for a nested format_spec).
                i -= 1
                break
    assert 0 <= i <= len(s)
    assert i == len(s) or s[i] == '{' or s[i] == '}'
    builder.append(s[literal_start:i])

    fstr.current_index = i
    literal = builder.build()
    lgt = codepoints_in_utf8(literal)
    if not raw and '\\' in literal:
        literal = parsestring.decode_unicode_utf8(space, literal, 0,
                                                  len(literal))
        literal, lgt, pos = unicodehelper.decode_unicode_escape(space, literal)
    return space.newtext(literal, lgt)
Esempio n. 42
0
def escapeshellarg(interp, arg):
    s = StringBuilder(len(arg) + 2)
    s.append("'")
    for c in arg:
        if c == "'":
            s.append("'")
            s.append("\\")
            s.append("'")
            s.append("'")
        else:
            s.append(c)
    s.append("'")
    return interp.space.wrap(s.build())
Esempio n. 43
0
def array_var_export(dct_w,
                     space,
                     indent,
                     recursion,
                     w_reckey,
                     header,
                     prefix=' ',
                     suffix='',
                     arr_in_arr=False):
    acc = StringBuilder()
    if w_reckey in recursion:
        return '%s*RECURSION*\n' % indent
    recursion[w_reckey] = None
    if arr_in_arr:
        acc.append('%s%s%s(\n' % (' ', header, prefix))
    else:
        acc.append('%s%s%s(\n' % (indent, header, prefix))

    if indent.endswith('&'):
        indent = indent[:-1]
    subindent = indent + '  '
    for key, w_value in dct_w.iteritems():
        w_value = w_value.deref_temp()
        # case where atrrib is protected...
        if key.startswith('\x00') and len(key) > 1:
            key = key[3:]
        if w_value is w_reckey:
            # space.ec.error("Nesting level too deep - recursive dependency?")
            space.ec.warn("var_export does not handle circular references")
            return ""

        try:
            index = try_convert_str_to_int(key)
            s = '%s%d =>' % (subindent, index)
        except ValueError:
            key = string_var_export(key)
            s = '%s%s =>' % (subindent, key)

        acc.append(s)
        if isinstance(w_value, W_ArrayObject):
            acc.append(
                array_var_export(w_value.as_rdict(),
                                 space,
                                 '  ',
                                 recursion,
                                 w_value,
                                 '\n  array',
                                 suffix=',',
                                 arr_in_arr=True))
        elif w_value.tp == space.tp_object:
            acc.append('\n')
            acc.append(w_value.var_export(space, ' ', recursion, suffix='),'))
        else:
            acc.append(w_value.var_export(space, ' ', recursion, suffix=','))
        acc.append('\n')

    acc.append('%s)%s' % (indent, suffix))
    del recursion[w_reckey]
    return acc.build()
Esempio n. 44
0
    def decode_w(self, space, w_input, final=False):
        if self.w_decoder is None:
            raise oefmt(space.w_ValueError,
                        "IncrementalNewlineDecoder.__init__ not called")

        # decode input (with the eventual \r from a previous pass)
        if not space.is_w(self.w_decoder, space.w_None):
            w_output = space.call_method(self.w_decoder, "decode", w_input,
                                         space.newbool(bool(final)))
        else:
            w_output = w_input

        if not space.isinstance_w(w_output, space.w_unicode):
            raise oefmt(space.w_TypeError,
                        "decoder should return a string result")

        output, output_len = space.utf8_len_w(w_output)
        output_len = len(output)
        if self.pendingcr and (final or output_len):
            output = '\r' + output
            self.pendingcr = False
            output_len += 1

        # retain last \r even when not translating data:
        # then readline() is sure to get \r\n in one pass
        if not final and output_len > 0:
            last = len(output) - 1
            assert last >= 0
            if output[last] == '\r':
                output = output[:last]
                self.pendingcr = True
                output_len -= 1

        if output_len == 0:
            return space.newutf8("", 0)

        # Record which newlines are read and do newline translation if
        # desired, all in one pass.
        seennl = self.seennl

        if output.find('\r') < 0:
            # If no \r, quick scan for a possible "\n" character.
            # (there's nothing else to be done, even when in translation mode)
            if output.find('\n') >= 0:
                seennl |= SEEN_LF
                # Finished: we have scanned for newlines, and none of them
                # need translating.
        elif not self.translate:
            i = 0
            while i < len(output):
                if seennl == SEEN_ALL:
                    break
                c = output[i]
                i += 1
                if c == '\n':
                    seennl |= SEEN_LF
                elif c == '\r':
                    if i < len(output) and output[i] == '\n':
                        seennl |= SEEN_CRLF
                        i += 1
                    else:
                        seennl |= SEEN_CR
        elif output.find('\r') >= 0:
            # Translate!
            builder = StringBuilder(len(output))
            i = 0
            while i < output_len:
                c = output[i]
                i += 1
                if c == '\n':
                    seennl |= SEEN_LF
                elif c == '\r':
                    if i < len(output) and output[i] == '\n':
                        seennl |= SEEN_CRLF
                        i += 1
                    else:
                        seennl |= SEEN_CR
                    builder.append('\n')
                    continue
                builder.append(c)
            output = builder.build()

        self.seennl |= seennl
        lgt = check_utf8(output, True)
        return space.newutf8(output, lgt)
Esempio n. 45
0
def raw_encode_basestring_ascii(space, w_unicode):
    u = space.utf8_w(w_unicode)
    for i in range(len(u)):
        c = ord(u[i])
        if c < 32 or c > 126 or c == ord('\\') or c == ord('"'):
            break
    else:
        # The unicode string 'u' contains only safe characters.
        return w_unicode

    sb = StringBuilder(len(u) + 20)

    for c in Utf8StringIterator(u):
        if c <= ord('~'):
            if c == ord('"') or c == ord('\\'):
                sb.append('\\')
            elif c < ord(' '):
                sb.append(ESCAPE_BEFORE_SPACE[c])
                continue
            sb.append(chr(c))
        else:
            if c <= ord(u'\uffff'):
                sb.append('\\u')
                sb.append(HEX[c >> 12])
                sb.append(HEX[(c >> 8) & 0x0f])
                sb.append(HEX[(c >> 4) & 0x0f])
                sb.append(HEX[c & 0x0f])
            else:
                # surrogate pair
                n = c - 0x10000
                s1 = 0xd800 | ((n >> 10) & 0x3ff)
                sb.append('\\ud')
                sb.append(HEX[(s1 >> 8) & 0x0f])
                sb.append(HEX[(s1 >> 4) & 0x0f])
                sb.append(HEX[s1 & 0x0f])
                s2 = 0xdc00 | (n & 0x3ff)
                sb.append('\\ud')
                sb.append(HEX[(s2 >> 8) & 0x0f])
                sb.append(HEX[(s2 >> 4) & 0x0f])
                sb.append(HEX[s2 & 0x0f])

    res = sb.build()
    return space.newtext(res)
Esempio n. 46
0
class Serializer(object):
    def __init__(self, space):
        self.builder = StringBuilder()
        self.space = space

    def write_int(self, i):
        self.builder.append(struct.pack("l", i))

    def write_char(self, c):
        assert len(c) == 1
        self.builder.append(c)

    def write_str(self, s):
        self.write_int(len(s))
        self.builder.append(s)

    def write_wrapped_item(self, w_item):
        w_item.ll_serialize(self.builder)

    def write_wrapped_list(self, lst_w):
        self.write_int(len(lst_w))
        for w_item in lst_w:
            self.write_wrapped_item(w_item)

    def write_list_of_str(self, lst):
        self.write_int(len(lst))
        for item in lst:
            self.write_str(item)

    def write_list_of_int(self, lst):
        self.write_int(len(lst))
        for item in lst:
            self.write_int(item)

    def write_list_of_char(self, lst):
        self.write_int(len(lst))
        for item in lst:
            self.write_char(item)

    def write_list_of_functions(self, lst):
        from hippy.function import Function
        from hippy.klass import ClassDeclaration

        self.write_int(len(lst))
        for func in lst:
            if isinstance(func, Function):
                self.write_char("f")
                self.write_function(func)
            elif isinstance(func, ClassDeclaration):
                self.write_char("u")
                self.write_class(func)
            else:
                raise NotImplementedError

    def write_function(self, func):
        self.write_bytecode(func.bytecode)
        self.write_list_of_str(func.names)
        self.write_list_of_char(func.types)
        # closuredecls, defaults_w, typehints are missing

    def write_class(self, klass):
        # extends_name, property_decl, all_parents, access_flags,
        # const_decl, constructor_method, constants_w, initial_instance_dct_w,
        # base_interface_names, identifier, properties, methods
        self.write_str(klass.name)
        self.write_int(klass.lineno)
        self.write_int(len(klass.method_decl))
        for decl in klass.method_decl.itervalues():
            self.write_str(decl.func.name)
            self.write_int(decl.access_flags)
            self.write_function(decl.func)
        # XXX

    def write_bytecode(self, bc):
        self.write_str(bc.code)
        self.write_wrapped_list(bc.consts)
        self.write_str(bc.name)
        self.write_str(bc.filename)
        self.write_int(bc.startlineno)
        self.write_list_of_str(bc.sourcelines[:])
        self.write_list_of_str(bc.names[:])
        self.write_list_of_str(bc.varnames[:])
        self.write_list_of_int(bc.superglobals[:])
        self.write_int(bc.this_var_num)
        self.write_list_of_functions(bc.late_declarations[:])
        self.write_list_of_functions(bc.classes[:])
        self.write_list_of_functions(bc.functions[:])
        self.write_list_of_int(bc.bc_mapping[:])
        return self

    def finish(self):
        return self.builder.build()
Esempio n. 47
0
def surrogatepass_errors(space, w_exc):
    check_exception(space, w_exc)
    if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
        w_obj = space.getattr(w_exc, space.newtext('object'))
        w_obj = space.convert_arg_to_w_unicode(w_obj)
        start = space.int_w(space.getattr(w_exc, space.newtext('start')))
        w_end = space.getattr(w_exc, space.newtext('end'))
        encoding = space.text_w(space.getattr(w_exc,
                                              space.newtext('encoding')))
        bytelength, code = get_standard_encoding(encoding)
        if code == ENC_UNKNOWN:
            # Not supported, fail with original exception
            raise OperationError(space.type(w_exc), w_exc)
        end = space.int_w(w_end)
        builder = StringBuilder()
        start = w_obj._index_to_byte(start)
        end = w_obj._index_to_byte(end)
        obj = w_obj._utf8
        pos = start
        while pos < end:
            ch = rutf8.codepoint_at_pos(obj, pos)
            pos = rutf8.next_codepoint_pos(obj, pos)
            if ch < 0xd800 or ch > 0xdfff:
                # Not a surrogate, fail with original exception
                raise OperationError(space.type(w_exc), w_exc)
            if code == ENC_UTF8:
                builder.append(chr(0xe0 | (ch >> 12)))
                builder.append(chr(0x80 | ((ch >> 6) & 0x3f)))
                builder.append(chr(0x80 | (ch & 0x3f)))
            elif code == ENC_UTF16LE:
                builder.append(chr(ch & 0xff))
                builder.append(chr(ch >> 8))
            elif code == ENC_UTF16BE:
                builder.append(chr(ch >> 8))
                builder.append(chr(ch & 0xff))
            elif code == ENC_UTF32LE:
                builder.append(chr(ch & 0xff))
                builder.append(chr(ch >> 8))
                builder.append(chr(0))
                builder.append(chr(0))
            elif code == ENC_UTF32BE:
                builder.append(chr(0))
                builder.append(chr(0))
                builder.append(chr(ch >> 8))
                builder.append(chr(ch & 0xff))
        return space.newtuple([space.newbytes(builder.build()), w_end])
    elif space.isinstance_w(w_exc, space.w_UnicodeDecodeError):
        start = space.int_w(space.getattr(w_exc, space.newtext('start')))
        obj = space.bytes_w(space.getattr(w_exc, space.newtext('object')))
        encoding = space.text_w(space.getattr(w_exc,
                                              space.newtext('encoding')))
        bytelength, code = get_standard_encoding(encoding)
        ch = 0
        # Try decoding a single surrogate character. If there are more,
        # let the codec call us again
        ch0 = ord(obj[start + 0]) if len(obj) > start + 0 else -1
        ch1 = ord(obj[start + 1]) if len(obj) > start + 1 else -1
        ch2 = ord(obj[start + 2]) if len(obj) > start + 2 else -1
        ch3 = ord(obj[start + 3]) if len(obj) > start + 3 else -1
        if code == ENC_UTF8:
            if (ch1 != -1 and ch2 != -1 and ch0 & 0xf0 == 0xe0
                    and ch1 & 0xc0 == 0x80 and ch2 & 0xc0 == 0x80):
                # it's a three-byte code
                ch = ((ch0 & 0x0f) << 12) + ((ch1 & 0x3f) << 6) + (ch2 & 0x3f)
        elif code == ENC_UTF16LE:
            ch = (ch1 << 8) | ch0
        elif code == ENC_UTF16BE:
            ch = (ch0 << 8) | ch1
        elif code == ENC_UTF32LE:
            ch = (ch3 << 24) | (ch2 << 16) | (ch1 << 8) | ch0
        elif code == ENC_UTF32BE:
            ch = (ch0 << 24) | (ch1 << 16) | (ch2 << 8) | ch3
        if ch < 0xd800 or ch > 0xdfff:
            # it's not a surrogate - fail
            ch = 0
        if ch == 0:
            raise OperationError(space.type(w_exc), w_exc)
        ch_utf8 = rutf8.unichr_as_utf8(ch, allow_surrogates=True)
        return space.newtuple(
            [space.newtext(ch_utf8, 1),
             space.newint(start + bytelength)])
    else:
        raise oefmt(space.w_TypeError,
                    "don't know how to handle %T in error callback", w_exc)
Esempio n. 48
0
 def fn(n):
     a = StringBuilder()
     x = [1]
     a.append("hello world")
     return x[0]
Esempio n. 49
0
    def here_doc(self):
        ch = self.read()

        indent = ch == "-"
        expand = True
        regexp = False
        if indent:
            ch = self.read()

        if ch in "'\"`":
            term = ch
            if term == "'":
                expand = False
            elif term == "`":
                regexp = True

            marker = StringBuilder()
            while True:
                ch = self.read()
                if ch == self.EOF:
                    self.unread()
                    break
                elif ch == term:
                    break
                else:
                    marker.append(ch)
        else:
            if not (ch.isalnum() or ch == "_"):
                self.unread()
                if indent:
                    self.unread()
                return

            marker = StringBuilder()
            marker.append(ch)
            while True:
                ch = self.read()
                if ch == self.EOF or not (ch.isalnum() or ch == "_"):
                    self.unread()
                    break
                marker.append(ch)

        last_line = StringBuilder()
        while True:
            ch = self.read()
            if ch in "\r\n":
                self.newline(ch)
                break
            elif ch == self.EOF:
                self.unread()
                break
            last_line.append(ch)

        self.str_term = HeredocTerm(self,
                                    marker.build(),
                                    last_line.build(),
                                    indent=indent,
                                    expand=expand)
        if regexp:
            yield self.emit("XSTRING_BEG")
        else:
            yield self.emit("STRING_BEG")
Esempio n. 50
0
    def descr_repr(self, space):
        s = self.data

        # Good default if there are no replacements.
        buf = StringBuilder(len("bytearray(b'')") + len(s))

        buf.append("bytearray(b")
        quote = "'"
        for c in s:
            if c == '"':
                quote = "'"
                break
            elif c == "'":
                quote = '"'
        buf.append(quote)

        for i in range(len(s)):
            c = s[i]

            if c == '\\' or c == "'":
                buf.append('\\')
                buf.append(c)
            elif c == '\t':
                buf.append('\\t')
            elif c == '\r':
                buf.append('\\r')
            elif c == '\n':
                buf.append('\\n')
            elif not '\x20' <= c < '\x7f':
                n = ord(c)
                buf.append('\\x')
                buf.append("0123456789abcdef"[n >> 4])
                buf.append("0123456789abcdef"[n & 0xF])
            else:
                buf.append(c)

        buf.append(quote)
        buf.append(")")

        return space.wrap(buf.build())
Esempio n. 51
0
    def subx(self, w_ptemplate, w_string, count):
        space = self.space
        # use a (much faster) string builder (possibly utf8) if w_ptemplate and
        # w_string are both string or both unicode objects, and if w_ptemplate
        # is a literal
        use_builder = '\x00'  # or 'S'tring or 'U'nicode/UTF8
        is_buffer = False
        filter_as_string = None
        if space.isinstance_w(w_string, space.w_unicode):
            if not self.is_known_unicode():
                raise oefmt(
                    space.w_TypeError,
                    "cannot use a bytes pattern on a string-like object")
        else:
            if self.is_known_unicode():
                raise oefmt(
                    space.w_TypeError,
                    "cannot use a string pattern on a bytes-like object")
        if space.is_true(space.callable(w_ptemplate)):
            w_filter = w_ptemplate
            filter_is_callable = True
        else:
            if space.isinstance_w(w_ptemplate, space.w_unicode):
                filter_as_string = space.utf8_w(w_ptemplate)
                literal = '\\' not in filter_as_string
                if space.isinstance_w(w_string, space.w_unicode) and literal:
                    use_builder = 'U'
            elif space.isinstance_w(w_ptemplate, space.w_bytes):
                filter_as_string = space.bytes_w(w_ptemplate)
                literal = '\\' not in filter_as_string
                if space.isinstance_w(w_string, space.w_bytes) and literal:
                    use_builder = 'S'
            else:
                if space.isinstance_w(w_ptemplate, space.w_bytes):
                    filter_as_string = space.bytes_w(w_ptemplate)
                else:
                    filter_as_string = space.readbuf_w(w_ptemplate).as_str()
                    is_buffer = True
                literal = '\\' not in filter_as_string
                if space.isinstance_w(w_string, space.w_bytes) and literal:
                    use_builder = 'S'
            if literal:
                w_filter = w_ptemplate
                filter_is_callable = False
            else:
                # not a literal; hand it over to the template compiler
                # FIX for a CPython 3.5 bug: if w_ptemplate is a buffer
                # (e.g. a bytearray), convert it to a byte string here.
                if is_buffer:
                    w_ptemplate = space.newbytes(filter_as_string)
                w_re = import_re(space)
                w_filter = space.call_method(w_re, '_subx', self, w_ptemplate)
                filter_is_callable = space.is_true(space.callable(w_filter))
        #
        # XXX this is a bit of a mess, but it improves performance a lot
        ctx = self.make_ctx(w_string)
        sublist_w = strbuilder = None
        if use_builder != '\x00':
            assert filter_as_string is not None
            strbuilder = StringBuilder(ctx.end)
        else:
            sublist_w = []
        n = 0
        last_pos = ctx.ZERO
        while not count or n < count:
            pattern = self.code
            sub_jitdriver.jit_merge_point(
                self=self,
                use_builder=use_builder,
                filter_is_callable=filter_is_callable,
                filter_type=type(w_filter),
                ctx=ctx,
                pattern=pattern,
                w_filter=w_filter,
                strbuilder=strbuilder,
                filter_as_string=filter_as_string,
                count=count,
                w_string=w_string,
                n=n,
                last_pos=last_pos,
                sublist_w=sublist_w)
            space = self.space
            if not searchcontext(space, ctx, pattern):
                break
            if last_pos < ctx.match_start:
                _sub_append_slice(ctx, space, use_builder, sublist_w,
                                  strbuilder, last_pos, ctx.match_start)
            if not (last_pos == ctx.match_start == ctx.match_end and n > 0):
                # the above ignores empty matches on latest position
                last_pos = ctx.match_end
                if filter_is_callable:
                    w_match = self.getmatch(ctx, True)
                    # make a copy of 'ctx'; see test_sub_matches_stay_valid
                    ctx = self.fresh_copy(ctx)
                    w_piece = space.call_function(w_filter, w_match)
                    if not space.is_w(w_piece, space.w_None):
                        assert strbuilder is None
                        assert use_builder == '\x00'
                        sublist_w.append(w_piece)
                else:
                    if use_builder != '\x00':
                        assert filter_as_string is not None
                        assert strbuilder is not None
                        strbuilder.append(filter_as_string)
                    else:
                        sublist_w.append(w_filter)
                n += 1
            elif last_pos >= ctx.end:
                break  # empty match at the end: finished

            start = ctx.match_end
            if start == ctx.match_start:
                if start == ctx.end:
                    break
                start = ctx.next_indirect(start)
            ctx.reset(start)

        if last_pos < ctx.end:
            _sub_append_slice(ctx, space, use_builder, sublist_w, strbuilder,
                              last_pos, ctx.end)
        if use_builder != '\x00':
            assert strbuilder is not None
            result_bytes = strbuilder.build()
            if use_builder == 'S':
                assert not isinstance(ctx, rsre_utf8.Utf8MatchContext)
                return space.newbytes(result_bytes), n
            elif use_builder == 'U':
                assert (isinstance(ctx, UnicodeAsciiMatchContext)
                        or isinstance(ctx, rsre_utf8.Utf8MatchContext))
                return space.newutf8(result_bytes,
                                     rutf8.codepoints_in_utf8(result_bytes)), n
            else:
                raise AssertionError(use_builder)
        else:
            if space.isinstance_w(w_string, space.w_unicode):
                w_emptystr = space.newutf8('', 0)
            else:
                w_emptystr = space.newbytes('')
            w_item = space.call_method(w_emptystr, 'join',
                                       space.newlist(sublist_w))
            return w_item, n
Esempio n. 52
0
    def readline(self, size=-1):
        self._check_closed()
        if size == 0:
            return ""
        elif size < 0 and not self._univ_newline:
            with rffi.scoped_alloc_buffer(BASE_LINE_SIZE) as buf:
                c = self._readline1(buf.raw)
                if c >= 0:
                    return buf.str(c)

                # this is the rare case: the line is longer than BASE_LINE_SIZE
                s = StringBuilder()
                while True:
                    s.append_charpsize(buf.raw, BASE_LINE_SIZE - 1)
                    c = self._readline1(buf.raw)
                    if c >= 0:
                        break
                s.append_charpsize(buf.raw, c)
            return s.build()
        else:  # size > 0 or self._univ_newline
            ll_file = self._ll_file
            c = 0
            s = StringBuilder()
            if self._univ_newline:
                newlinetypes = self._newlinetypes
                skipnextlf = self._skipnextlf
                while size < 0 or s.getlength() < size:
                    c = c_getc(ll_file)
                    if c == EOF:
                        break
                    if skipnextlf:
                        skipnextlf = False
                        if c == ord('\n'):
                            newlinetypes |= NEWLINE_CRLF
                            c = c_getc(ll_file)
                            if c == EOF:
                                break
                        else:
                            newlinetypes |= NEWLINE_CR
                    if c == ord('\r'):
                        skipnextlf = True
                        c = ord('\n')
                    elif c == ord('\n'):
                        newlinetypes |= NEWLINE_LF
                    s.append(chr(c))
                    if c == ord('\n'):
                        break
                if c == EOF:
                    if skipnextlf:
                        newlinetypes |= NEWLINE_CR
                self._newlinetypes = newlinetypes
                self._skipnextlf = skipnextlf
            else:
                while s.getlength() < size:
                    c = c_getc(ll_file)
                    if c == EOF:
                        break
                    s.append(chr(c))
                    if c == ord('\n'):
                        break
            if c == EOF:
                if c_ferror(ll_file):
                    raise _error(ll_file)
            return s.build()
Esempio n. 53
0
    def descr_repr(self, space):
        s, start, end, _ = self._convert_idx_params(space, None, None)

        # Good default if there are no replacements.
        buf = StringBuilder(len("bytearray(b'')") + (end - start))

        buf.append("bytearray(b")
        quote = "'"
        for i in range(start, end):
            c = s[i]
            if c == '"':
                quote = "'"
                break
            elif c == "'":
                quote = '"'
        buf.append(quote)

        for i in range(start, end):
            c = s[i]

            if c == '\\' or c == "'":
                buf.append('\\')
                buf.append(c)
            elif c == '\t':
                buf.append('\\t')
            elif c == '\r':
                buf.append('\\r')
            elif c == '\n':
                buf.append('\\n')
            elif not '\x20' <= c < '\x7f':
                n = ord(c)
                buf.append('\\x')
                buf.append("0123456789abcdef"[n >> 4])
                buf.append("0123456789abcdef"[n & 0xF])
            else:
                buf.append(c)

        buf.append(quote)
        buf.append(")")

        return space.newtext(buf.build())
Esempio n. 54
0
    def readline_w(self, space, w_limit=None):
        # For backwards compatibility, a (slowish) readline().
        limit = convert_size(space, w_limit)

        has_peek = space.findattr(self, space.newtext("peek"))

        builder = StringBuilder()
        size = 0

        while limit < 0 or size < limit:
            nreadahead = 1

            if has_peek:
                try:
                    w_readahead = space.call_method(self, "peek",
                                                    space.newint(1))
                except OperationError as e:
                    if trap_eintr(space, e):
                        continue
                    raise
                if not space.isinstance_w(w_readahead, space.w_bytes):
                    raise oefmt(
                        space.w_IOError,
                        "peek() should have returned a bytes object, "
                        "not '%T'", w_readahead)
                length = space.len_w(w_readahead)
                if length > 0:
                    n = 0
                    buf = space.bytes_w(w_readahead)
                    if limit >= 0:
                        while True:
                            if n >= length or n >= limit:
                                break
                            n += 1
                            if buf[n - 1] == '\n':
                                break
                    else:
                        while True:
                            if n >= length:
                                break
                            n += 1
                            if buf[n - 1] == '\n':
                                break
                    nreadahead = n

            try:
                w_read = space.call_method(self, "read",
                                           space.newint(nreadahead))
            except OperationError as e:
                if trap_eintr(space, e):
                    continue
                raise
            if not space.isinstance_w(w_read, space.w_bytes):
                raise oefmt(
                    space.w_IOError,
                    "peek() should have returned a bytes object, not "
                    "'%T'", w_read)
            read = space.bytes_w(w_read)
            if not read:
                break

            size += len(read)
            builder.append(read)

            if read[-1] == '\n':
                break

        return space.newbytes(builder.build())
Esempio n. 55
0
 def descr___str__(self, space):
     s = StringBuilder()
     s.append('  C_CONTIGUOUS : ')
     s.append(get_tf_str(self.flags, NPY.ARRAY_C_CONTIGUOUS))
     s.append('\n  F_CONTIGUOUS : ')
     s.append(get_tf_str(self.flags, NPY.ARRAY_F_CONTIGUOUS))
     s.append('\n  OWNDATA : ')
     s.append(get_tf_str(self.flags, NPY.ARRAY_OWNDATA))
     s.append('\n  WRITEABLE : ')
     s.append(get_tf_str(self.flags, NPY.ARRAY_WRITEABLE))
     s.append('\n  ALIGNED : ')
     s.append(get_tf_str(self.flags, NPY.ARRAY_ALIGNED))
     s.append('\n  UPDATEIFCOPY : ')
     s.append(get_tf_str(self.flags, NPY.ARRAY_UPDATEIFCOPY))
     return space.wrap(s.build())
Esempio n. 56
0
def descr_upper_s(s):
    builder = StringBuilder(len(s))
    for i in range(len(s)):
        ch = s[i]
        builder.append(chr(unicodedb.toupper(ord(ch))))
    return builder.build()