Esempio n. 1
0
File: rstr.py Progetto: charred/pypy
def _new_copy_contents_fun(SRC_TP, DST_TP, CHAR_TP, name):
    @specialize.arg(0)
    def _str_ofs(TP, item):
        return (llmemory.offsetof(TP, 'chars') +
                llmemory.itemoffsetof(TP.chars, 0) +
                llmemory.sizeof(CHAR_TP) * item)

    @signature(types.any(), types.any(), types.int(), returns=types.any())
    @specialize.arg(0)
    def _get_raw_buf(TP, src, ofs):
        assert typeOf(src).TO == TP
        assert ofs >= 0
        return llmemory.cast_ptr_to_adr(src) + _str_ofs(TP, ofs)
    _get_raw_buf._always_inline_ = True

    @jit.oopspec('stroruni.copy_contents(src, dst, srcstart, dststart, length)')
    @signature(types.any(), types.any(), types.int(), types.int(), types.int(), returns=types.none())
    def copy_string_contents(src, dst, srcstart, dststart, length):
        """Copies 'length' characters from the 'src' string to the 'dst'
        string, starting at position 'srcstart' and 'dststart'."""
        # xxx Warning: don't try to do this at home.  It relies on a lot
        # of details to be sure that it works correctly in all cases.
        # Notably: no GC operation at all from the first cast_ptr_to_adr()
        # because it might move the strings.  The keepalive_until_here()
        # are obscurely essential to make sure that the strings stay alive
        # longer than the raw_memcopy().
        assert length >= 0
        # from here, no GC operations can happen
        src = _get_raw_buf(SRC_TP, src, srcstart)
        dst = _get_raw_buf(DST_TP, dst, dststart)
        llmemory.raw_memcopy(src, dst, llmemory.sizeof(CHAR_TP) * length)
        # end of "no GC" section
        keepalive_until_here(src)
        keepalive_until_here(dst)
    copy_string_contents._always_inline_ = True
    copy_string_contents = func_with_new_name(copy_string_contents,
                                              'copy_%s_contents' % name)

    @jit.oopspec('stroruni.copy_string_to_raw(src, ptrdst, srcstart, length)')
    def copy_string_to_raw(src, ptrdst, srcstart, length):
        """
        Copies 'length' characters from the 'src' string to the 'ptrdst'
        buffer, starting at position 'srcstart'.
        'ptrdst' must be a non-gc Array of Char.
        """
        # xxx Warning: same note as above apply: don't do this at home
        assert length >= 0
        # from here, no GC operations can happen
        src = _get_raw_buf(SRC_TP, src, srcstart)
        adr = llmemory.cast_ptr_to_adr(ptrdst)
        dstbuf = adr + llmemory.itemoffsetof(typeOf(ptrdst).TO, 0)
        llmemory.raw_memcopy(src, dstbuf, llmemory.sizeof(CHAR_TP) * length)
        # end of "no GC" section
        keepalive_until_here(src)
    copy_string_to_raw._always_inline_ = True
    copy_string_to_raw = func_with_new_name(copy_string_to_raw, 'copy_%s_to_raw' % name)

    return copy_string_to_raw, copy_string_contents
Esempio n. 2
0
def test_return_any():
    @signature(types.int(), returns=types.any())
    def f(x):
        return x
    sig = getsig(f)
    assert sig == [model.SomeInteger(), model.SomeInteger()]

    @signature(types.str(), returns=types.any())
    def cannot_add_string(x):
        return f(3) + x
    exc = py.test.raises(Exception, annotate_at, cannot_add_string).value
    assert 'Blocked block' in repr(exc.args)
    assert 'cannot_add_string' in repr(exc.args)
Esempio n. 3
0
def test_return_any():
    @signature(types.int(), returns=types.any())
    def f(x):
        return x

    sig = getsig(f)
    assert sig == [model.SomeInteger(), model.SomeInteger()]

    @signature(types.str(), returns=types.any())
    def cannot_add_string(x):
        return f(3) + x

    exc = py.test.raises(model.AnnotatorError, annotate_at, cannot_add_string).value
    assert "Blocked block" in str(exc)
    assert "cannot_add_string" in str(exc)
Esempio n. 4
0
def test_any_as_argument():
    @signature(types.any(), types.int(), returns=types.float())
    def f(x, y):
        return x + y

    @signature(types.int(), returns=types.float())
    def g(x):
        return f(x, x)

    sig = getsig(g)
    assert sig == [model.SomeInteger(), model.SomeFloat()]

    @signature(types.float(), returns=types.float())
    def g(x):
        return f(x, 4)

    sig = getsig(g)
    assert sig == [model.SomeFloat(), model.SomeFloat()]

    @signature(types.str(), returns=types.int())
    def cannot_add_string(x):
        return f(x, 2)

    exc = py.test.raises(model.AnnotatorError, annotate_at, cannot_add_string).value
    assert "Blocked block" in str(exc)
Esempio n. 5
0
    def prepare_const(self, n):
        result = malloc(self.LIST, n, immortal=True)
        return result


# ____________________________________________________________
#
#  Low-level methods.  These can be run for testing, but are meant to
#  be direct_call'ed from rtyped flow graphs, which means that they will
#  get flowed and annotated, mostly with SomePtr.

# adapted C code

@jit.look_inside_iff(lambda l, newsize, overallocate: jit.isconstant(len(l.items)) and jit.isconstant(newsize))
@signature(types.any(), types.int(), types.bool(), returns=types.none())
def _ll_list_resize_hint_really(l, newsize, overallocate):
    """
    Ensure l.items has room for at least newsize elements.  Note that
    l.items may change, and even if newsize is less than l.length on
    entry.
    """
    # This over-allocates proportional to the list size, making room
    # for additional growth.  The over-allocation is mild, but is
    # enough to give linear-time amortized behavior over a long
    # sequence of appends() in the presence of a poorly-performing
    # system malloc().
    # The growth pattern is:  0, 4, 8, 16, 25, 35, 46, 58, 72, 88, ...
    if newsize <= 0:
        ll_assert(newsize == 0, "negative list length")
        l.length = 0
Esempio n. 6
0
def _new_copy_contents_fun(SRC_TP, DST_TP, CHAR_TP, name):
    @specialize.arg(0)
    def _str_ofs(TP, item):
        return (llmemory.offsetof(TP, 'chars') +
                llmemory.itemoffsetof(TP.chars, 0) +
                llmemory.sizeof(CHAR_TP) * item)

    @signature(types.any(), types.any(), types.int(), returns=types.any())
    @specialize.arg(0)
    def _get_raw_buf(TP, src, ofs):
        """
        WARNING: dragons ahead.
        Return the address of the internal char* buffer of the low level
        string. The return value is valid as long as no GC operation occur, so
        you must ensure that it will be used inside a "GC safe" section, for
        example by marking your function with @rgc.no_collect
        """
        assert typeOf(src).TO == TP
        assert ofs >= 0
        return llmemory.cast_ptr_to_adr(src) + _str_ofs(TP, ofs)
    _get_raw_buf._always_inline_ = True

    @jit.oopspec('stroruni.copy_contents(src, dst, srcstart, dststart, length)')
    @signature(types.any(), types.any(), types.int(), types.int(), types.int(), returns=types.none())
    def copy_string_contents(src, dst, srcstart, dststart, length):
        """Copies 'length' characters from the 'src' string to the 'dst'
        string, starting at position 'srcstart' and 'dststart'."""
        # xxx Warning: don't try to do this at home.  It relies on a lot
        # of details to be sure that it works correctly in all cases.
        # Notably: no GC operation at all from the first cast_ptr_to_adr()
        # because it might move the strings.  The keepalive_until_here()
        # are obscurely essential to make sure that the strings stay alive
        # longer than the raw_memcopy().
        assert length >= 0
        ll_assert(srcstart >= 0, "copystrc: negative srcstart")
        ll_assert(srcstart + length <= len(src.chars), "copystrc: src ovf")
        ll_assert(dststart >= 0, "copystrc: negative dststart")
        ll_assert(dststart + length <= len(dst.chars), "copystrc: dst ovf")
        # from here, no GC operations can happen
        asrc = _get_raw_buf(SRC_TP, src, srcstart)
        adst = _get_raw_buf(DST_TP, dst, dststart)
        llmemory.raw_memcopy(asrc, adst, llmemory.sizeof(CHAR_TP) * length)
        # end of "no GC" section
        keepalive_until_here(src)
        keepalive_until_here(dst)
    copy_string_contents._always_inline_ = True
    copy_string_contents = func_with_new_name(copy_string_contents,
                                              'copy_%s_contents' % name)

    @jit.oopspec('stroruni.copy_string_to_raw(src, ptrdst, srcstart, length)')
    def copy_string_to_raw(src, ptrdst, srcstart, length):
        """
        Copies 'length' characters from the 'src' string to the 'ptrdst'
        buffer, starting at position 'srcstart'.
        'ptrdst' must be a non-gc Array of Char.
        """
        # xxx Warning: same note as above apply: don't do this at home
        assert length >= 0
        # from here, no GC operations can happen
        asrc = _get_raw_buf(SRC_TP, src, srcstart)
        adst = llmemory.cast_ptr_to_adr(ptrdst)
        adst = adst + llmemory.itemoffsetof(typeOf(ptrdst).TO, 0)
        llmemory.raw_memcopy(asrc, adst, llmemory.sizeof(CHAR_TP) * length)
        # end of "no GC" section
        keepalive_until_here(src)
    copy_string_to_raw._always_inline_ = True
    copy_string_to_raw = func_with_new_name(copy_string_to_raw, 'copy_%s_to_raw' % name)

    @jit.dont_look_inside
    @signature(types.any(), types.any(), types.int(), types.int(),
               returns=types.none())
    def copy_raw_to_string(ptrsrc, dst, dststart, length):
        # xxx Warning: same note as above apply: don't do this at home
        assert length >= 0
        # from here, no GC operations can happen
        adst = _get_raw_buf(SRC_TP, dst, dststart)
        asrc = llmemory.cast_ptr_to_adr(ptrsrc)

        asrc = asrc + llmemory.itemoffsetof(typeOf(ptrsrc).TO, 0)
        llmemory.raw_memcopy(asrc, adst, llmemory.sizeof(CHAR_TP) * length)
        # end of "no GC" section
        keepalive_until_here(dst)
    copy_raw_to_string._always_inline_ = True
    copy_raw_to_string = func_with_new_name(copy_raw_to_string,
                                              'copy_raw_to_%s' % name)

    return copy_string_to_raw, copy_raw_to_string, copy_string_contents
Esempio n. 7
0
    entries[i].f_valid = False


def ll_mark_deleted_in_key(entries, i):
    ENTRIES = lltype.typeOf(entries).TO
    dummy = ENTRIES.dummy_obj.ll_dummy_value
    entries[i].key = dummy


def ll_mark_deleted_in_value(entries, i):
    ENTRIES = lltype.typeOf(entries).TO
    dummy = ENTRIES.dummy_obj.ll_dummy_value
    entries[i].value = dummy


@signature(types.any(), types.int(), returns=types.any())
def ll_hash_from_cache(entries, i):
    return entries[i].f_hash


@signature(types.any(), types.int(), returns=types.any())
def ll_hash_recomputed(entries, i):
    ENTRIES = lltype.typeOf(entries).TO
    return ENTRIES.fasthashfn(entries[i].key)


def ll_keyhash_custom(d, key):
    DICT = lltype.typeOf(d).TO
    return objectmodel.hlinvoke(DICT.r_rdict_hashfn, d.fnkeyhash, key)

Esempio n. 8
0
class LLHelpers(AbstractLLHelpers):
    from rpython.rtyper.annlowlevel import llstr, llunicode

    @staticmethod
    @jit.elidable
    def ll_str_mul(s, times):
        if times < 0:
            times = 0
        try:
            size = ovfcheck(len(s.chars) * times)
        except OverflowError:
            raise MemoryError
        newstr = s.malloc(size)
        i = 0
        if i < size:
            s.copy_contents(s, newstr, 0, 0, len(s.chars))
            i += len(s.chars)
        while i < size:
            if i <= size - i:
                j = i
            else:
                j = size - i
            s.copy_contents(newstr, newstr, 0, i, j)
            i += j
        return newstr

    @staticmethod
    @jit.elidable
    def ll_char_mul(ch, times):
        if typeOf(ch) is Char:
            malloc = mallocstr
        else:
            malloc = mallocunicode
        if times < 0:
            times = 0
        newstr = malloc(times)
        j = 0
        # XXX we can use memset here, not sure how useful this is
        while j < times:
            newstr.chars[j] = ch
            j += 1
        return newstr

    @staticmethod
    def ll_strlen(s):
        return len(s.chars)

    @staticmethod
    @signature(types.any(), types.int(), returns=types.any())
    def ll_stritem_nonneg(s, i):
        chars = s.chars
        ll_assert(i >= 0, "negative str getitem index")
        ll_assert(i < len(chars), "str getitem index out of bound")
        return chars[i]

    @staticmethod
    def ll_chr2str(ch):
        if typeOf(ch) is Char:
            malloc = mallocstr
        else:
            malloc = mallocunicode
        s = malloc(1)
        s.chars[0] = ch
        return s

    # @jit.look_inside_iff(lambda str: jit.isconstant(len(str.chars)) and len(str.chars) == 1)
    @staticmethod
    @jit.oopspec("str.str2unicode(str)")
    def ll_str2unicode(str):
        lgt = len(str.chars)
        s = mallocunicode(lgt)
        for i in range(lgt):
            if ord(str.chars[i]) > 127:
                raise UnicodeDecodeError
            s.chars[i] = cast_primitive(UniChar, str.chars[i])
        return s

    @staticmethod
    def ll_str2bytearray(str):
        from rpython.rtyper.lltypesystem.rbytearray import BYTEARRAY

        lgt = len(str.chars)
        b = malloc(BYTEARRAY, lgt)
        for i in range(lgt):
            b.chars[i] = str.chars[i]
        return b

    @staticmethod
    def ll_strhash(s):
        if s:
            return jit.conditional_call_elidable(s.hash,
                                                 LLHelpers._ll_strhash, s)
        else:
            return 0

    @staticmethod
    @dont_inline
    @jit.dont_look_inside
    def _ll_strhash(s):
        # unlike CPython, there is no reason to avoid to return -1
        # but our malloc initializes the memory to zero, so we use zero as the
        # special non-computed-yet value.  Also, jit.conditional_call_elidable
        # always checks for zero, for now.
        x = ll_hash_string(s)
        if x == 0:
            x = 29872897
        s.hash = x
        return x

    @staticmethod
    def ll_length(s):
        return len(s.chars)

    @staticmethod
    def ll_strfasthash(s):
        ll_assert(s.hash != 0, "ll_strfasthash: hash==0")
        return s.hash     # assumes that the hash is already computed

    @staticmethod
    @jit.elidable
    @jit.oopspec('stroruni.concat(s1, s2)')
    def ll_strconcat(s1, s2):
        len1 = s1.length()
        len2 = s2.length()
        # a single '+' like this is allowed to overflow: it gets
        # a negative result, and the gc will complain
        # the typechecks below are if TP == BYTEARRAY
        if typeOf(s1) == Ptr(STR):
            newstr = s2.malloc(len1 + len2)
            newstr.copy_contents_from_str(s1, newstr, 0, 0, len1)
        else:
            newstr = s1.malloc(len1 + len2)
            newstr.copy_contents(s1, newstr, 0, 0, len1)
        if typeOf(s2) == Ptr(STR):
            newstr.copy_contents_from_str(s2, newstr, 0, len1, len2)
        else:
            newstr.copy_contents(s2, newstr, 0, len1, len2)
        return newstr

    @staticmethod
    @jit.elidable
    def ll_strip(s, ch, left, right):
        s_len = len(s.chars)
        if s_len == 0:
            return s.empty()
        lpos = 0
        rpos = s_len - 1
        if left:
            while lpos <= rpos and s.chars[lpos] == ch:
                lpos += 1
        if right:
            while lpos <= rpos and s.chars[rpos] == ch:
                rpos -= 1
        if rpos < lpos:
            return s.empty()
        r_len = rpos - lpos + 1
        result = s.malloc(r_len)
        s.copy_contents(s, result, lpos, 0, r_len)
        return result

    @staticmethod
    @jit.elidable
    def ll_strip_default(s, left, right):
        s_len = len(s.chars)
        if s_len == 0:
            return s.empty()
        lpos = 0
        rpos = s_len - 1
        if left:
            while lpos <= rpos and s.chars[lpos].isspace():
                lpos += 1
        if right:
            while lpos <= rpos and s.chars[rpos].isspace():
                rpos -= 1
        if rpos < lpos:
            return s.empty()
        r_len = rpos - lpos + 1
        result = s.malloc(r_len)
        s.copy_contents(s, result, lpos, 0, r_len)
        return result

    @staticmethod
    @jit.elidable
    def ll_strip_multiple(s, s2, left, right):
        s_len = len(s.chars)
        if s_len == 0:
            return s.empty()
        lpos = 0
        rpos = s_len - 1
        if left:
            while lpos <= rpos and LLHelpers.ll_contains(s2, s.chars[lpos]):
                lpos += 1
        if right:
            while lpos <= rpos and LLHelpers.ll_contains(s2, s.chars[rpos]):
                rpos -= 1
        if rpos < lpos:
            return s.empty()
        r_len = rpos - lpos + 1
        result = s.malloc(r_len)
        s.copy_contents(s, result, lpos, 0, r_len)
        return result

    @staticmethod
    @jit.elidable
    def ll_upper(s):
        s_chars = s.chars
        s_len = len(s_chars)
        if s_len == 0:
            return s.empty()
        i = 0
        result = mallocstr(s_len)
        #        ^^^^^^^^^ specifically to explode on unicode
        while i < s_len:
            result.chars[i] = LLHelpers.ll_upper_char(s_chars[i])
            i += 1
        return result

    @staticmethod
    @jit.elidable
    def ll_lower(s):
        s_chars = s.chars
        s_len = len(s_chars)
        if s_len == 0:
            return s.empty()
        i = 0
        result = mallocstr(s_len)
        #        ^^^^^^^^^ specifically to explode on unicode
        while i < s_len:
            result.chars[i] = LLHelpers.ll_lower_char(s_chars[i])
            i += 1
        return result

    @staticmethod
    def ll_join(s, length, items):
        s_chars = s.chars
        s_len = len(s_chars)
        num_items = length
        if num_items == 0:
            return s.empty()
        itemslen = 0
        i = 0
        while i < num_items:
            try:
                itemslen = ovfcheck(itemslen + len(items[i].chars))
            except OverflowError:
                raise MemoryError
            i += 1
        try:
            seplen = ovfcheck(s_len * (num_items - 1))
        except OverflowError:
            raise MemoryError
        # a single '+' at the end is allowed to overflow: it gets
        # a negative result, and the gc will complain
        result = s.malloc(itemslen + seplen)
        res_index = len(items[0].chars)
        s.copy_contents(items[0], result, 0, 0, res_index)
        i = 1
        while i < num_items:
            s.copy_contents(s, result, 0, res_index, s_len)
            res_index += s_len
            lgt = len(items[i].chars)
            s.copy_contents(items[i], result, 0, res_index, lgt)
            res_index += lgt
            i += 1
        return result

    @staticmethod
    @jit.elidable
    @jit.oopspec('stroruni.cmp(s1, s2)')
    def ll_strcmp(s1, s2):
        if not s1 and not s2:
            return True
        if not s1 or not s2:
            return False
        chars1 = s1.chars
        chars2 = s2.chars
        len1 = len(chars1)
        len2 = len(chars2)

        if len1 < len2:
            cmplen = len1
        else:
            cmplen = len2
        i = 0
        while i < cmplen:
            diff = ord(chars1[i]) - ord(chars2[i])
            if diff != 0:
                return diff
            i += 1
        return len1 - len2

    @staticmethod
    @jit.elidable
    @jit.oopspec('stroruni.equal(s1, s2)')
    def ll_streq(s1, s2):
        if s1 == s2:       # also if both are NULLs
            return True
        if not s1 or not s2:
            return False
        len1 = len(s1.chars)
        len2 = len(s2.chars)
        if len1 != len2:
            return False
        j = 0
        chars1 = s1.chars
        chars2 = s2.chars
        while j < len1:
            if chars1[j] != chars2[j]:
                return False
            j += 1
        return True

    @staticmethod
    @jit.elidable
    def ll_startswith(s1, s2):
        len1 = len(s1.chars)
        len2 = len(s2.chars)
        if len1 < len2:
            return False
        j = 0
        chars1 = s1.chars
        chars2 = s2.chars
        while j < len2:
            if chars1[j] != chars2[j]:
                return False
            j += 1

        return True

    @staticmethod
    def ll_startswith_char(s, ch):
        if not len(s.chars):
            return False
        return s.chars[0] == ch

    @staticmethod
    @jit.elidable
    def ll_endswith(s1, s2):
        len1 = len(s1.chars)
        len2 = len(s2.chars)
        if len1 < len2:
            return False
        j = 0
        chars1 = s1.chars
        chars2 = s2.chars
        offset = len1 - len2
        while j < len2:
            if chars1[offset + j] != chars2[j]:
                return False
            j += 1

        return True

    @staticmethod
    def ll_endswith_char(s, ch):
        if not len(s.chars):
            return False
        return s.chars[len(s.chars) - 1] == ch

    @staticmethod
    @jit.elidable
    @signature(types.any(), types.any(), types.int(), types.int(), returns=types.int())
    def ll_find_char(s, ch, start, end):
        i = start
        if end > len(s.chars):
            end = len(s.chars)
        while i < end:
            if s.chars[i] == ch:
                return i
            i += 1
        return -1

    @staticmethod
    @jit.elidable
    @signature(types.any(), types.any(), types.int(), types.int(), returns=types.int())
    def ll_rfind_char(s, ch, start, end):
        if end > len(s.chars):
            end = len(s.chars)
        i = end
        while i > start:
            i -= 1
            if s.chars[i] == ch:
                return i
        return -1

    @staticmethod
    @jit.elidable
    def ll_count_char(s, ch, start, end):
        count = 0
        i = start
        if end > len(s.chars):
            end = len(s.chars)
        while i < end:
            if s.chars[i] == ch:
                count += 1
            i += 1
        return count

    @staticmethod
    @signature(types.any(), types.any(), types.int(), types.int(), returns=types.int())
    def ll_find(s1, s2, start, end):
        if start < 0:
            start = 0
        if end > len(s1.chars):
            end = len(s1.chars)
        if end - start < 0:
            return -1

        m = len(s2.chars)
        if m == 1:
            return LLHelpers.ll_find_char(s1, s2.chars[0], start, end)

        return LLHelpers.ll_search(s1, s2, start, end, FAST_FIND)

    @staticmethod
    @signature(types.any(), types.any(), types.int(), types.int(), returns=types.int())
    def ll_rfind(s1, s2, start, end):
        if start < 0:
            start = 0
        if end > len(s1.chars):
            end = len(s1.chars)
        if end - start < 0:
            return -1

        m = len(s2.chars)
        if m == 1:
            return LLHelpers.ll_rfind_char(s1, s2.chars[0], start, end)

        return LLHelpers.ll_search(s1, s2, start, end, FAST_RFIND)

    @classmethod
    def ll_count(cls, s1, s2, start, end):
        if start < 0:
            start = 0
        if end > len(s1.chars):
            end = len(s1.chars)
        if end - start < 0:
            return 0

        m = len(s2.chars)
        if m == 1:
            return cls.ll_count_char(s1, s2.chars[0], start, end)

        res = cls.ll_search(s1, s2, start, end, FAST_COUNT)
        assert res >= 0
        return res

    @staticmethod
    @jit.elidable
    def ll_search(s1, s2, start, end, mode):
        count = 0
        n = end - start
        m = len(s2.chars)
        tp = typeOf(s1)
        if tp == string_repr.lowleveltype or tp == Char:
            NUL = '\0'
        else:
            NUL = u'\0'

        if m == 0:
            if mode == FAST_COUNT:
                return end - start + 1
            elif mode == FAST_RFIND:
                return end
            else:
                return start

        w = n - m

        if w < 0:
            if mode == FAST_COUNT:
                return 0
            return -1

        mlast = m - 1
        skip = mlast - 1
        mask = 0

        if mode != FAST_RFIND:
            for i in range(mlast):
                mask = bloom_add(mask, s2.chars[i])
                if s2.chars[i] == s2.chars[mlast]:
                    skip = mlast - i - 1
            mask = bloom_add(mask, s2.chars[mlast])

            i = start - 1
            while i + 1 <= start + w:
                i += 1
                if s1.chars[i + m - 1] == s2.chars[m - 1]:
                    for j in range(mlast):
                        if s1.chars[i + j] != s2.chars[j]:
                            break
                    else:
                        if mode != FAST_COUNT:
                            return i
                        count += 1
                        i += mlast
                        continue

                    if i + m < len(s1.chars):
                        c = s1.chars[i + m]
                    else:
                        c = NUL
                    if not bloom(mask, c):
                        i += m
                    else:
                        i += skip
                else:
                    if i + m < len(s1.chars):
                        c = s1.chars[i + m]
                    else:
                        c = NUL
                    if not bloom(mask, c):
                        i += m
        else:
            mask = bloom_add(mask, s2.chars[0])
            for i in range(mlast, 0, -1):
                mask = bloom_add(mask, s2.chars[i])
                if s2.chars[i] == s2.chars[0]:
                    skip = i - 1

            i = start + w + 1
            while i - 1 >= start:
                i -= 1
                if s1.chars[i] == s2.chars[0]:
                    for j in xrange(mlast, 0, -1):
                        if s1.chars[i + j] != s2.chars[j]:
                            break
                    else:
                        return i
                    if i - 1 >= 0 and not bloom(mask, s1.chars[i - 1]):
                        i -= m
                    else:
                        i -= skip
                else:
                    if i - 1 >= 0 and not bloom(mask, s1.chars[i - 1]):
                        i -= m

        if mode != FAST_COUNT:
            return -1
        return count

    @staticmethod
    @signature(types.int(), types.any(), returns=types.any())
    @jit.look_inside_iff(lambda length, items: jit.loop_unrolling_heuristic(
        items, length))
    def ll_join_strs(length, items):
        # Special case for length 1 items, helps both the JIT and other code
        if length == 1:
            return items[0]

        num_items = length
        itemslen = 0
        i = 0
        while i < num_items:
            try:
                itemslen = ovfcheck(itemslen + len(items[i].chars))
            except OverflowError:
                raise MemoryError
            i += 1
        if typeOf(items).TO.OF.TO == STR:
            malloc = mallocstr
            copy_contents = copy_string_contents
        else:
            malloc = mallocunicode
            copy_contents = copy_unicode_contents
        result = malloc(itemslen)
        res_index = 0
        i = 0
        while i < num_items:
            item_chars = items[i].chars
            item_len = len(item_chars)
            copy_contents(items[i], result, 0, res_index, item_len)
            res_index += item_len
            i += 1
        return result

    @staticmethod
    @jit.look_inside_iff(lambda length, chars, RES: jit.isconstant(length) and jit.isvirtual(chars))
    def ll_join_chars(length, chars, RES):
        # no need to optimize this, will be replaced by string builder
        # at some point soon
        num_chars = length
        if RES is StringRepr.lowleveltype:
            target = Char
            malloc = mallocstr
        else:
            target = UniChar
            malloc = mallocunicode
        result = malloc(num_chars)
        res_chars = result.chars
        i = 0
        while i < num_chars:
            res_chars[i] = cast_primitive(target, chars[i])
            i += 1
        return result

    @staticmethod
    @jit.oopspec('stroruni.slice(s1, start, stop)')
    @signature(types.any(), types.int(), types.int(), returns=types.any())
    @jit.elidable
    def _ll_stringslice(s1, start, stop):
        lgt = stop - start
        assert start >= 0
        # If start > stop, return a empty string. This can happen if the start
        # is greater than the length of the string. Use < instead of <= to avoid
        # creating another path for the JIT when start == stop.
        if lgt < 0:
            return s1.empty()
        newstr = s1.malloc(lgt)
        s1.copy_contents(s1, newstr, start, 0, lgt)
        return newstr

    @staticmethod
    def ll_stringslice_startonly(s1, start):
        return LLHelpers._ll_stringslice(s1, start, len(s1.chars))

    @staticmethod
    @signature(types.any(), types.int(), types.int(), returns=types.any())
    def ll_stringslice_startstop(s1, start, stop):
        if jit.we_are_jitted():
            if stop > len(s1.chars):
                stop = len(s1.chars)
        else:
            if stop >= len(s1.chars):
                if start == 0:
                    return s1
                stop = len(s1.chars)
        return LLHelpers._ll_stringslice(s1, start, stop)

    @staticmethod
    def ll_stringslice_minusone(s1):
        newlen = len(s1.chars) - 1
        return LLHelpers._ll_stringslice(s1, 0, newlen)

    @staticmethod
    def ll_split_chr(LIST, s, c, max):
        chars = s.chars
        strlen = len(chars)
        count = 1
        i = 0
        if max == 0:
            i = strlen
        while i < strlen:
            if chars[i] == c:
                count += 1
                if max >= 0 and count > max:
                    break
            i += 1
        res = LIST.ll_newlist(count)
        items = res.ll_items()
        i = 0
        j = 0
        resindex = 0
        if max == 0:
            j = strlen
        while j < strlen:
            if chars[j] == c:
                item = items[resindex] = s.malloc(j - i)
                item.copy_contents(s, item, i, 0, j - i)
                resindex += 1
                i = j + 1
                if max >= 0 and resindex >= max:
                    j = strlen
                    break
            j += 1
        item = items[resindex] = s.malloc(j - i)
        item.copy_contents(s, item, i, 0, j - i)
        return res

    @staticmethod
    def ll_split(LIST, s, c, max):
        count = 1
        if max == -1:
            max = len(s.chars)
        pos = 0
        last = len(s.chars)
        markerlen = len(c.chars)
        pos = s.find(c, 0, last)
        while pos >= 0 and count <= max:
            pos = s.find(c, pos + markerlen, last)
            count += 1
        res = LIST.ll_newlist(count)
        items = res.ll_items()
        pos = 0
        count = 0
        pos = s.find(c, 0, last)
        prev_pos = 0
        if pos < 0:
            items[0] = s
            return res
        while pos >= 0 and count < max:
            item = items[count] = s.malloc(pos - prev_pos)
            item.copy_contents(s, item, prev_pos, 0, pos -
                               prev_pos)
            count += 1
            prev_pos = pos + markerlen
            pos = s.find(c, pos + markerlen, last)
        item = items[count] = s.malloc(last - prev_pos)
        item.copy_contents(s, item, prev_pos, 0, last - prev_pos)
        return res

    @staticmethod
    def ll_rsplit_chr(LIST, s, c, max):
        chars = s.chars
        strlen = len(chars)
        count = 1
        i = 0
        if max == 0:
            i = strlen
        while i < strlen:
            if chars[i] == c:
                count += 1
                if max >= 0 and count > max:
                    break
            i += 1
        res = LIST.ll_newlist(count)
        items = res.ll_items()
        i = strlen
        j = strlen
        resindex = count - 1
        assert resindex >= 0
        if max == 0:
            j = 0
        while j > 0:
            j -= 1
            if chars[j] == c:
                item = items[resindex] = s.malloc(i - j - 1)
                item.copy_contents(s, item, j + 1, 0, i - j - 1)
                resindex -= 1
                i = j
                if resindex == 0:
                    j = 0
                    break
        item = items[resindex] = s.malloc(i - j)
        item.copy_contents(s, item, j, 0, i - j)
        return res

    @staticmethod
    def ll_rsplit(LIST, s, c, max):
        count = 1
        if max == -1:
            max = len(s.chars)
        pos = len(s.chars)
        markerlen = len(c.chars)
        pos = s.rfind(c, 0, pos)
        while pos >= 0 and count <= max:
            pos = s.rfind(c, 0, pos - markerlen)
            count += 1
        res = LIST.ll_newlist(count)
        items = res.ll_items()
        pos = 0
        pos = len(s.chars)
        prev_pos = pos
        pos = s.rfind(c, 0, pos)
        if pos < 0:
            items[0] = s
            return res
        count -= 1
        while pos >= 0 and count > 0:
            item = items[count] = s.malloc(prev_pos - pos - markerlen)
            item.copy_contents(s, item, pos + markerlen, 0,
                               prev_pos - pos - markerlen)
            count -= 1
            prev_pos = pos
            pos = s.rfind(c, 0, pos)
        item = items[count] = s.malloc(prev_pos)
        item.copy_contents(s, item, 0, 0, prev_pos)
        return res

    @staticmethod
    @jit.elidable
    def ll_replace_chr_chr(s, c1, c2):
        length = len(s.chars)
        newstr = s.malloc(length)
        src = s.chars
        dst = newstr.chars
        j = 0
        while j < length:
            c = src[j]
            if c == c1:
                c = c2
            dst[j] = c
            j += 1
        return newstr

    @staticmethod
    @jit.elidable
    def ll_contains(s, c):
        chars = s.chars
        strlen = len(chars)
        i = 0
        while i < strlen:
            if chars[i] == c:
                return True
            i += 1
        return False

    @staticmethod
    @jit.elidable
    def ll_int(s, base):
        if not 2 <= base <= 36:
            raise ValueError
        chars = s.chars
        strlen = len(chars)
        i = 0
        #XXX: only space is allowed as white space for now
        while i < strlen and chars[i] == ' ':
            i += 1
        if not i < strlen:
            raise ValueError
        #check sign
        sign = 1
        if chars[i] == '-':
            sign = -1
            i += 1
        elif chars[i] == '+':
            i += 1
        # skip whitespaces between sign and digits
        while i < strlen and chars[i] == ' ':
            i += 1
        #now get digits
        val = 0
        oldpos = i
        while i < strlen:
            c = ord(chars[i])
            if ord('a') <= c <= ord('z'):
                digit = c - ord('a') + 10
            elif ord('A') <= c <= ord('Z'):
                digit = c - ord('A') + 10
            elif ord('0') <= c <= ord('9'):
                digit = c - ord('0')
            else:
                break
            if digit >= base:
                break
            val = val * base + digit
            i += 1
        if i == oldpos:
            raise ValueError # catch strings like '+' and '+  '
        #skip trailing whitespace
        while i < strlen and chars[i] == ' ':
            i += 1
        if not i == strlen:
            raise ValueError
        return sign * val

    # interface to build strings:
    #   x = ll_build_start(n)
    #   ll_build_push(x, next_string, 0)
    #   ll_build_push(x, next_string, 1)
    #   ...
    #   ll_build_push(x, next_string, n-1)
    #   s = ll_build_finish(x)

    @staticmethod
    def ll_build_start(parts_count):
        return malloc(TEMP, parts_count)

    @staticmethod
    def ll_build_push(builder, next_string, index):
        builder[index] = next_string

    @staticmethod
    def ll_build_finish(builder):
        return LLHelpers.ll_join_strs(len(builder), builder)

    @staticmethod
    @specialize.memo()
    def ll_constant(s):
        return string_repr.convert_const(s)

    @staticmethod
    @specialize.memo()
    def ll_constant_unicode(s):
        return unicode_repr.convert_const(s)

    @classmethod
    def do_stringformat(cls, hop, sourcevarsrepr):
        s_str = hop.args_s[0]
        assert s_str.is_constant()
        is_unicode = isinstance(s_str, annmodel.SomeUnicodeString)
        if is_unicode:
            TEMPBUF = TEMP_UNICODE
        else:
            TEMPBUF = TEMP
        s = s_str.const
        things = cls.parse_fmt_string(s)
        size = inputconst(Signed, len(things)) # could be unsigned?
        cTEMP = inputconst(Void, TEMPBUF)
        cflags = inputconst(Void, {'flavor': 'gc'})
        vtemp = hop.genop("malloc_varsize", [cTEMP, cflags, size],
                          resulttype=Ptr(TEMPBUF))

        argsiter = iter(sourcevarsrepr)

        from rpython.rtyper.rclass import InstanceRepr
        for i, thing in enumerate(things):
            if isinstance(thing, tuple):
                code = thing[0]
                vitem, r_arg = argsiter.next()
                if not hasattr(r_arg, 'll_str'):
                    raise TyperError("ll_str unsupported for: %r" % r_arg)
                if code == 's':
                    if is_unicode:
                        # only UniCharRepr and UnicodeRepr has it so far
                        vchunk = hop.gendirectcall(r_arg.ll_unicode, vitem)
                    else:
                        vchunk = hop.gendirectcall(r_arg.ll_str, vitem)
                elif code == 'r' and isinstance(r_arg, InstanceRepr):
                    vchunk = hop.gendirectcall(r_arg.ll_str, vitem)
                elif code == 'd':
                    assert isinstance(r_arg, IntegerRepr)
                    #vchunk = hop.gendirectcall(r_arg.ll_str, vitem)
                    vchunk = hop.gendirectcall(ll_str.ll_int2dec, vitem)
                elif code == 'f':
                    #assert isinstance(r_arg, FloatRepr)
                    vchunk = hop.gendirectcall(r_arg.ll_str, vitem)
                elif code == 'x':
                    assert isinstance(r_arg, IntegerRepr)
                    vchunk = hop.gendirectcall(ll_str.ll_int2hex, vitem,
                                               inputconst(Bool, False))
                elif code == 'o':
                    assert isinstance(r_arg, IntegerRepr)
                    vchunk = hop.gendirectcall(ll_str.ll_int2oct, vitem,
                                               inputconst(Bool, False))
                else:
                    raise TyperError("%%%s is not RPython" % (code,))
            else:
                if is_unicode:
                    vchunk = inputconst(unicode_repr, thing)
                else:
                    vchunk = inputconst(string_repr, thing)
            i = inputconst(Signed, i)
            if is_unicode and vchunk.concretetype != Ptr(UNICODE):
                # if we are here, one of the ll_str.* functions returned some
                # STR, so we convert it to unicode. It's a bit suboptimal
                # because we do one extra copy.
                vchunk = hop.gendirectcall(cls.ll_str2unicode, vchunk)
            hop.genop('setarrayitem', [vtemp, i, vchunk])

        hop.exception_cannot_occur()   # to ignore the ZeroDivisionError of '%'
        return hop.gendirectcall(cls.ll_join_strs, size, vtemp)

    @staticmethod
    @jit.dont_look_inside
    def ll_string2list(RESLIST, src):
        length = len(src.chars)
        lst = RESLIST.ll_newlist(length)
        dst = lst.ll_items()
        SRC = typeOf(src).TO     # STR or UNICODE
        DST = typeOf(dst).TO     # GcArray
        assert DST.OF is SRC.chars.OF
        #
        # If the 'split_gc_address_space' option is set, we must copy
        # manually, character-by-character
        if rgc.must_split_gc_address_space():
            i = 0
            while i < length:
                dst[i] = src.chars[i]
                i += 1
            return lst
        #
        # from here, no GC operations can happen
        asrc = llmemory.cast_ptr_to_adr(src) + (
            llmemory.offsetof(SRC, 'chars') +
            llmemory.itemoffsetof(SRC.chars, 0))
        adst = llmemory.cast_ptr_to_adr(dst) + llmemory.itemoffsetof(DST, 0)
        llmemory.raw_memcopy(asrc, adst, llmemory.sizeof(DST.OF) * length)
        # end of "no GC" section
        keepalive_until_here(src)
        keepalive_until_here(dst)
        return lst
Esempio n. 9
0
class SubBuffer(Buffer):
    _attrs_ = ['buffer', 'offset', 'size', 'readonly']
    _immutable_ = True

    @signature(types.any(), types.instance(Buffer), types.int(), types.int(),
               returns=types.none())
    def __init__(self, buffer, offset, size):
        self.readonly = buffer.readonly
        if isinstance(buffer, SubBuffer):     # don't nest them
            # we want a view (offset, size) over a view
            # (buffer.offset, buffer.size) over buffer.buffer.
            # Note that either '.size' can be -1 to mean 'up to the end'.
            at_most = buffer.getlength() - offset
            if size > at_most or size < 0:
                if at_most < 0:
                    at_most = 0
                size = at_most
            offset += buffer.offset
            buffer = buffer.buffer
        #
        self.buffer = buffer
        self.offset = offset
        self.size = size

    def getlength(self):
        at_most = self.buffer.getlength() - self.offset
        if 0 <= self.size <= at_most:
            return self.size
        elif at_most >= 0:
            return at_most
        else:
            return 0

    def getitem(self, index):
        return self.buffer.getitem(self.offset + index)

    def getslice(self, start, stop, step, size):
        if start == stop:
            return ''     # otherwise, adding self.offset might make them
                          # out of bounds
        return self.buffer.getslice(self.offset + start, self.offset + stop,
                                    step, size)

    def setitem(self, index, char):
        self.buffer.setitem(self.offset + index, char)

    def setslice(self, start, string):
        if len(string) == 0:
            return        # otherwise, adding self.offset might make 'start'
                          # out of bounds
        self.buffer.setslice(self.offset + start, string)

    def get_raw_address(self):
        from rpython.rtyper.lltypesystem import rffi
        ptr = self.buffer.get_raw_address()
        return rffi.ptradd(ptr, self.offset)

    @specialize.ll_and_arg(1)
    def typed_read(self, TP, byte_offset):
        return self.buffer.typed_read(TP, byte_offset + self.offset)

    @specialize.ll_and_arg(1)
    def typed_write(self, TP, byte_offset, value):
        return self.buffer.typed_write(TP, byte_offset + self.offset, value)
Esempio n. 10
0
class Buffer(object):
    """
    Base class for buffers of bytes.

    Most probably, you do NOT want to use this as a lone base class, but
    either inherit from RawBuffer or GCBuffer, so that you automatically get
    the proper implementation of typed_read and typed_write.
    """
    _attrs_ = ['readonly']
    _immutable_ = True

    def getlength(self):
        """Return the size in bytes."""
        raise NotImplementedError

    def __len__(self):
        res = self.getlength()
        assert res >= 0
        return res

    def as_str(self):
        "Returns an interp-level string with the whole content of the buffer."
        # May be overridden.
        return self.getslice(0, self.getlength(), 1, self.getlength())

    def getitem(self, index):
        "Returns the index'th character in the buffer."
        raise NotImplementedError   # Must be overriden.  No bounds checks.

    def __getitem__(self, i):
        return self.getitem(i)

    def getslice(self, start, stop, step, size):
        # May be overridden.  No bounds checks.
        return ''.join([self.getitem(i) for i in range(start, stop, step)])

    @signature(types.any(), types.int(), types.int(), returns=types.str())
    def __getslice__(self, start, stop):
        return self.getslice(start, stop, 1, stop - start)

    def setitem(self, index, char):
        "Write a character into the buffer."
        raise NotImplementedError   # Must be overriden.  No bounds checks.

    def __setitem__(self, i, char):
        return self.setitem(i, char)

    def setslice(self, start, string):
        # May be overridden.  No bounds checks.
        for i in range(len(string)):
            self.setitem(start + i, string[i])

    @jit.look_inside_iff(lambda self, index, count:
                         jit.isconstant(count) and count <= 8)
    def setzeros(self, index, count):
        for i in range(index, index+count):
            self.setitem(i, '\x00')

    @specialize.ll_and_arg(1)
    def typed_read(self, TP, byte_offset):
        """
        Read the value of type TP starting at byte_offset. No bounds checks
        """
        raise CannotRead

    @specialize.ll_and_arg(1)
    def typed_write(self, TP, byte_offset, value):
        """
        Write the value of type TP at byte_offset. No bounds checks
        """
        raise CannotWrite

    def get_raw_address(self):
        msg = "cannot take the raw address of this buffer"
        if not we_are_translated():
            msg += " '%s'" % (self,)
        raise ValueError(msg)
Esempio n. 11
0
class ObjectSpace(object):
    def __init__(self, config=''):
        self.config = config

        self.cache = SpaceCache(self)
        self.symbol_cache = {}
        self._executioncontexts = ExecutionContextHolder()
        self.globals = GlobalsDict()
        self.bootstrap = True
        self.exit_handlers_w = []

        self.w_true = W_TrueObject(self)
        self.w_false = W_FalseObject(self)
        self.w_nil = W_NilObject(self)

        # Force the setup of a few key classes, we create a fake "Class" class
        # for the initial bootstrap.
        self.w_class = self.newclass("FakeClass", None)
        cls_reference = weakref.ref(self.w_class)
        self.w_basicobject = self.getclassfor(W_BaseObject)
        self.w_object = self.getclassfor(W_Object)
        self.w_class = self.getclassfor(W_ClassObject)
        # We replace the one reference to our FakeClass with the real class.
        self.w_basicobject.klass.superclass = self.w_class

        gc.collect()
        assert cls_reference() is None

        self.w_symbol = self.getclassfor(W_SymbolObject)
        self.w_array = self.getclassfor(W_ArrayObject)
        self.w_proc = self.getclassfor(W_ProcObject)
        self.w_binding = self.getclassfor(W_BindingObject)
        self.w_numeric = self.getclassfor(W_NumericObject)
        self.w_fixnum = self.getclassfor(W_FixnumObject)
        self.w_float = self.getclassfor(W_FloatObject)
        self.w_bignum = self.getclassfor(W_BignumObject)
        self.w_integer = self.getclassfor(W_IntegerObject)
        self.w_module = self.getclassfor(W_ModuleObject)
        self.w_string = self.getclassfor(W_StringObject)
        self.w_regexp = self.getclassfor(W_RegexpObject)
        self.w_hash = self.getclassfor(W_HashObject)
        self.w_method = self.getclassfor(W_MethodObject)
        self.w_unbound_method = self.getclassfor(W_UnboundMethodObject)
        self.w_io = self.getclassfor(W_IOObject)
        self.w_NoMethodError = self.getclassfor(W_NoMethodError)
        self.w_ArgumentError = self.getclassfor(W_ArgumentError)
        self.w_LocalJumpError = self.getclassfor(W_LocalJumpError)
        self.w_NameError = self.getclassfor(W_NameError)
        self.w_NotImplementedError = self.getclassfor(W_NotImplementedError)
        self.w_IndexError = self.getclassfor(W_IndexError)
        self.w_KeyError = self.getclassfor(W_KeyError)
        self.w_IOError = self.getclassfor(W_IOError)
        self.w_EOFError = self.getclassfor(W_EOFError)
        self.w_FiberError = self.getclassfor(W_FiberError)
        self.w_LoadError = self.getclassfor(W_LoadError)
        self.w_RangeError = self.getclassfor(W_RangeError)
        self.w_FloatDomainError = self.getclassfor(W_FloatDomainError)
        self.w_RegexpError = self.getclassfor(W_RegexpError)
        self.w_RuntimeError = self.getclassfor(W_RuntimeError)
        self.w_StandardError = self.getclassfor(W_StandardError)
        self.w_StopIteration = self.getclassfor(W_StopIteration)
        self.w_SyntaxError = self.getclassfor(W_SyntaxError)
        self.w_SystemCallError = self.getclassfor(W_SystemCallError)
        self.w_SystemExit = self.getclassfor(W_SystemExit)
        self.w_SystemStackError = self.getclassfor(W_SystemStackError)
        self.w_TypeError = self.getclassfor(W_TypeError)
        self.w_ZeroDivisionError = self.getclassfor(W_ZeroDivisionError)
        self.w_kernel = self.getmoduleobject(Kernel.moduledef)

        self.w_topaz = self.getmoduleobject(Topaz.moduledef)

        for w_cls in [
            self.w_basicobject, self.w_object, self.w_array, self.w_proc,
            self.w_numeric, self.w_fixnum, self.w_bignum, self.w_float,
            self.w_string, self.w_symbol, self.w_class, self.w_module,
            self.w_hash, self.w_regexp, self.w_method, self.w_unbound_method,
            self.w_io, self.w_binding,

            self.w_NoMethodError, self.w_ArgumentError, self.w_TypeError,
            self.w_ZeroDivisionError, self.w_SystemExit, self.w_RangeError,
            self.w_RegexpError, self.w_RuntimeError, self.w_SystemCallError,
            self.w_LoadError, self.w_StopIteration, self.w_SyntaxError,
            self.w_NameError, self.w_StandardError, self.w_LocalJumpError,
            self.w_IndexError, self.w_IOError, self.w_NotImplementedError,
            self.w_EOFError, self.w_FloatDomainError, self.w_FiberError,
            self.w_SystemStackError, self.w_KeyError,

            self.w_kernel, self.w_topaz,

            self.getclassfor(W_NilObject),
            self.getclassfor(W_TrueObject),
            self.getclassfor(W_FalseObject),
            self.getclassfor(W_RangeObject),
            self.getclassfor(W_FileObject),
            self.getclassfor(W_DirObject),
            self.getclassfor(W_EncodingObject),
            self.getclassfor(W_IntegerObject),
            self.getclassfor(W_RandomObject),
            self.getclassfor(W_ThreadObject),
            self.getclassfor(W_TimeObject),
            self.getclassfor(W_MethodObject),
            self.getclassfor(W_UnboundMethodObject),
            self.getclassfor(W_FiberObject),
            self.getclassfor(W_MatchDataObject),

            self.getclassfor(W_ExceptionObject),
            self.getclassfor(W_ThreadError),

            self.getmoduleobject(Comparable.moduledef),
            self.getmoduleobject(Enumerable.moduledef),
            self.getmoduleobject(Marshal.moduledef),
            self.getmoduleobject(Math.moduledef),
            self.getmoduleobject(Fcntl.moduledef),
            self.getmoduleobject(FFI.moduledef),
            self.getmoduleobject(Process.moduledef),
            self.getmoduleobject(Signal.moduledef),
            self.getmoduleobject(ObjectSpaceModule.moduledef),
        ]:
            self.set_const(
                self.w_object,
                self.str_w(self.send(w_cls, "name")),
                w_cls
            )

        for w_cls in [
            self.getclassfor(W_EnvObject), self.getclassfor(W_HashIterator),
        ]:
            self.set_const(
                self.w_topaz,
                self.str_w(self.send(w_cls, "name")),
                w_cls
            )

        self.set_const(self.w_basicobject, "BasicObject", self.w_basicobject)

        # This is bootstrap. We have to delay sending until true, false and nil
        # are defined
        self.send(self.w_object, "include", [self.w_kernel])
        self.bootstrap = False

        self.w_load_path = self.newarray([])
        self.globals.define_virtual(
            "$LOAD_PATH", lambda space: space.w_load_path)
        self.globals.define_virtual("$:", lambda space: space.w_load_path)

        self.globals.define_virtual(
            "$$", lambda space: space.send(space.getmoduleobject(Process.moduledef), "pid"))

        self.w_loaded_features = self.newarray([])
        self.globals.define_virtual(
            "$LOADED_FEATURES", lambda space: space.w_loaded_features)
        self.globals.define_virtual(
            '$"', lambda space: space.w_loaded_features)

        self.w_main_thread = W_ThreadObject(self)

        self.w_load_path = self.newarray([])
        self.base_lib_path = os.path.abspath(os.path.join(
            os.path.dirname(__file__), "lib-ruby"))

    def _freeze_(self):
        self._executioncontexts.clear()
        return True

    def find_executable(self, executable):
        if os.sep in executable or (system.IS_WINDOWS and ":" in executable):
            return executable
        path = os.environ.get("PATH")
        if path:
            for dir in path.split(os.pathsep):
                f = os.path.join(dir, executable)
                if os.path.isfile(f):
                    executable = f
                    break
        return rpath.rabspath(executable)

    def setup(self, executable):
        """
        Performs runtime setup.
        """
        path = rpath.rabspath(self.find_executable(executable))
        # Fallback to a path relative to the compiled location.
        lib_path = self.base_lib_path
        kernel_path = os.path.join(
            os.path.join(lib_path, os.path.pardir), "lib-topaz")
        while True:
            par_path = rpath.rabspath(os.path.join(path, os.path.pardir))
            if par_path == path:
                break
            path = par_path
            if isdir(os.path.join(path, "lib-ruby")):
                lib_path = os.path.join(path, "lib-ruby")
                kernel_path = os.path.join(path, "lib-topaz")
                break
        self.send(self.w_load_path, "unshift", [self.newstr_fromstr(lib_path)])
        self.load_kernel(kernel_path)

        self.set_const(
            self.w_object,
            "RUBY_ENGINE", self.newstr_fromstr(system.RUBY_ENGINE))
        self.set_const(
            self.w_object,
            "RUBY_VERSION", self.newstr_fromstr(system.RUBY_VERSION))
        self.set_const(
            self.w_object,
            "RUBY_PATCHLEVEL", self.newint(system.RUBY_PATCHLEVEL))
        self.set_const(
            self.w_object,
            "RUBY_PLATFORM", self.newstr_fromstr(system.RUBY_PLATFORM))
        self.set_const(
            self.w_object,
            "RUBY_DESCRIPTION", self.newstr_fromstr(system.RUBY_DESCRIPTION))
        self.set_const(
            self.w_object,
            "RUBY_REVISION", self.newstr_fromstr(system.RUBY_REVISION))

    def load_kernel(self, kernel_path):
        self.send(
            self.w_kernel,
            "load",
            [self.newstr_fromstr(os.path.join(kernel_path, "bootstrap.rb"))]
        )

    @specialize.memo()
    def fromcache(self, key):
        return self.cache.getorbuild(key)

    # Methods for dealing with source code.

    def parse(self, source, initial_lineno=1, symtable=None):
        if symtable is None:
            symtable = SymbolTable()
        parser = Parser(Lexer(
            source, initial_lineno=initial_lineno, symtable=symtable))
        try:
            return parser.parse().getast()
        except ParsingError as e:
            source_pos = e.getsourcepos()
            token = e.message
            if source_pos is not None:
                msg = "line %d (unexpected %s)" % (source_pos.lineno, token)
            else:
                msg = ""
            raise self.error(self.w_SyntaxError, msg)
        except LexerError as e:
            raise self.error(
                self.w_SyntaxError, "line %d (%s)" % (e.pos.lineno, e.msg))

    def compile(self, source, filepath, initial_lineno=1, symtable=None):
        if symtable is None:
            symtable = SymbolTable()
        astnode = self.parse(
            source, initial_lineno=initial_lineno, symtable=symtable)
        ctx = CompilerContext(self, "<main>", symtable, filepath)
        with ctx.set_lineno(initial_lineno):
            try:
                astnode.compile(ctx)
            except CompilerError as e:
                raise self.error(self.w_SyntaxError, "%s" % e.msg)
        return ctx.create_bytecode(initial_lineno, [], [], None, None)

    def execute(self, source, w_self=None, lexical_scope=None, filepath="-e",
                initial_lineno=1):
        bc = self.compile(source, filepath, initial_lineno=initial_lineno)
        frame = self.create_frame(
            bc, w_self=w_self, lexical_scope=lexical_scope)
        with self.getexecutioncontext().visit_frame(frame):
            return self.execute_frame(frame, bc)

    @jit.loop_invariant
    def getexecutioncontext(self):
        ec = self._executioncontexts.get()
        if ec is None:
            ec = ExecutionContext()
            self._executioncontexts.set(ec)
        return ec

    def create_frame(self, bc, w_self=None, lexical_scope=None, block=None,
                     parent_interp=None, top_parent_interp=None,
                     regexp_match_cell=None):

        if w_self is None:
            w_self = self.w_top_self
        if regexp_match_cell is None:
            regexp_match_cell = ClosureCell(None)
        return Frame(
            jit.promote(bc), w_self, lexical_scope, block, parent_interp,
            top_parent_interp, regexp_match_cell
        )

    def execute_frame(self, frame, bc):
        return Interpreter().interpret(self, frame, bc)

    # Methods for allocating new objects.

    @signature(types.any(), types.bool(), returns=types.instance(W_Root))
    def newbool(self, boolvalue):
        if boolvalue:
            return self.w_true
        else:
            return self.w_false

    @signature(types.any(), types.int(),
               returns=types.instance(W_FixnumObject))
    def newint(self, intvalue):
        return W_FixnumObject(self, intvalue)

    def newbigint_fromint(self, intvalue):
        return W_BignumObject.newbigint_fromint(self, intvalue)

    def newbigint_fromfloat(self, floatvalue):
        return W_BignumObject.newbigint_fromfloat(self, floatvalue)

    def newbigint_fromrbigint(self, bigint):
        return W_BignumObject.newbigint_fromrbigint(self, bigint)

    @specialize.argtype(1)
    def newint_or_bigint(self, someinteger):
        if -sys.maxint <= someinteger <= sys.maxint:
            # The smallest int -sys.maxint - 1 has to be a Bignum,
            # because parsing gives a Bignum in that case
            return self.newint(intmask(someinteger))
        else:
            return self.newbigint_fromrbigint(
                rbigint.fromrarith_int(someinteger))

    @specialize.argtype(1)
    def newint_or_bigint_fromunsigned(self, someunsigned):
        # XXX somehow combine with above
        if 0 <= someunsigned <= sys.maxint:
            return self.newint(intmask(someunsigned))
        else:
            return self.newbigint_fromrbigint(
                rbigint.fromrarith_int(someunsigned))

    def newfloat(self, floatvalue):
        return W_FloatObject(self, floatvalue)

    @jit.elidable
    def newsymbol(self, symbol):
        try:
            w_sym = self.symbol_cache[symbol]
        except KeyError:
            w_sym = self.symbol_cache[symbol] = W_SymbolObject(self, symbol)
        return w_sym

    def newstr_fromchars(self, chars):
        return W_StringObject.newstr_fromchars(self, chars)

    def newstr_fromstr(self, strvalue):
        assert strvalue is not None
        return W_StringObject.newstr_fromstr(self, strvalue)

    def newstr_fromstrs(self, strs_w):
        return W_StringObject.newstr_fromstrs(self, strs_w)

    def newarray(self, items_w):
        return W_ArrayObject(self, items_w)

    def newhash(self):
        return W_HashObject(self)

    def newrange(self, w_start, w_end, exclusive):
        return W_RangeObject(self, w_start, w_end, exclusive)

    def newregexp(self, regexp, flags):
        return W_RegexpObject(self, regexp, flags)

    def newmodule(self, name, w_scope=None):
        complete_name = self.buildname(name, w_scope)
        return W_ModuleObject(self, complete_name)

    def newclass(self, name, superclass, is_singleton=False, w_scope=None,
                 attached=None):
        complete_name = self.buildname(name, w_scope)
        return W_ClassObject(
            self, complete_name, superclass,
            is_singleton=is_singleton, attached=attached)

    def newfunction(self, w_name, w_code, lexical_scope, visibility):
        name = self.symbol_w(w_name)
        assert isinstance(w_code, W_CodeObject)
        return W_UserFunction(name, w_code, lexical_scope, visibility)

    def newmethod(self, name, w_cls):
        w_function = w_cls.find_method(self, name)
        if w_function is None:
            raise self.error(
                self.w_NameError,
                "undefined method `%s' for class `%s'" % (
                    name, self.obj_to_s(w_cls)))
        else:
            return W_UnboundMethodObject(self, w_cls, w_function)

    def newproc(self, bytecode, w_self, lexical_scope, cells, block,
                parent_interp, top_parent_interp, regexp_match_cell,
                is_lambda=False):
        return W_ProcObject(
            self, bytecode, w_self, lexical_scope, cells, block, parent_interp,
            top_parent_interp, regexp_match_cell, is_lambda=False
        )

    @jit.unroll_safe
    def newbinding_fromframe(self, frame):
        names = frame.bytecode.cellvars + frame.bytecode.freevars
        cells = [None] * len(frame.cells)
        for i in xrange(len(frame.cells)):
            cells[i] = frame.cells[i].upgrade_to_closure(self, frame, i)
        return W_BindingObject(
            self, names, cells, frame.w_self, frame.lexical_scope)

    @jit.unroll_safe
    def newbinding_fromblock(self, block):
        names = block.bytecode.cellvars + block.bytecode.freevars
        cells = block.cells[:]
        return W_BindingObject(
            self, names, cells, block.w_self, block.lexical_scope)

    def buildname(self, name, w_scope):
        complete_name = name
        if w_scope is not None:
            assert isinstance(w_scope, W_ModuleObject)
            if w_scope is not self.w_object:
                complete_name = "%s::%s" % (self.obj_to_s(w_scope), name)
        return complete_name

    def int_w(self, w_obj):
        return w_obj.int_w(self)

    def bigint_w(self, w_obj):
        return w_obj.bigint_w(self)

    def float_w(self, w_obj):
        return w_obj.float_w(self)

    def symbol_w(self, w_obj):
        return w_obj.symbol_w(self)

    def str_w(self, w_obj):
        """Unpacks a string object as an rstr."""
        return w_obj.str_w(self)

    def str0_w(self, w_obj):
        string = w_obj.str_w(self)
        if "\x00" in string:
            raise self.error(self.w_ArgumentError, "string contains null byte")
        else:
            return string

    def listview(self, w_obj):
        return w_obj.listview(self)

    # Methods for implementing the language semantics.

    def is_true(self, w_obj):
        return w_obj.is_true(self)

    def getclass(self, w_receiver):
        return w_receiver.getclass(self)

    def getsingletonclass(self, w_receiver):
        return w_receiver.getsingletonclass(self)

    def getscope(self, w_receiver):
        if isinstance(w_receiver, W_ModuleObject):
            return w_receiver
        else:
            return self.getclass(w_receiver)

    @jit.unroll_safe
    def getnonsingletonclass(self, w_receiver):
        cls = self.getclass(w_receiver)
        while cls.is_singleton:
            cls = cls.superclass
        return cls

    def getclassfor(self, cls):
        return self.getclassobject(cls.classdef)

    def getclassobject(self, classdef):
        return self.fromcache(ClassCache).getorbuild(classdef)

    def getmoduleobject(self, moduledef):
        return self.fromcache(ModuleCache).getorbuild(moduledef)

    def find_const(self, w_module, name):
        w_res = w_module.find_const(self, name, autoload=True)
        if w_res is None:
            w_res = self.send(
                w_module, "const_missing", [self.newsymbol(name)])
        return w_res

    @jit.elidable
    def _valid_const_name(self, name):
        if not name[0].isupper():
            return False
        for i in range(1, len(name)):
            ch = name[i]
            if not (ch.isalnum() or ch == "_" or ord(ch) > 127):
                return False
        return True

    def _check_const_name(self, name):
        if not self._valid_const_name(name):
            raise self.error(self.w_NameError, "wrong constant name %s" % name)

    def set_const(self, module, name, w_value):
        self._check_const_name(name)
        module.set_const(self, name, w_value)

    @jit.unroll_safe
    def _find_lexical_const(self, lexical_scope, name, autoload=True):
        w_res = None
        scope = lexical_scope
        # perform lexical search but skip Object
        while scope is not None:
            w_mod = scope.w_mod
            if w_mod is self.w_top_self:
                break
            w_res = w_mod.find_local_const(self, name, autoload=autoload)
            if w_res is not None:
                return w_res
            scope = scope.backscope

        object_seen = False
        fallback_scope = self.w_object

        if lexical_scope is not None:
            w_mod = lexical_scope.w_mod
            while w_mod is not None:
                object_seen = object_seen or w_mod is self.w_object
                # BasicObject was our starting point, do not use Object
                # as fallback
                if w_mod is self.w_basicobject and not object_seen:
                    fallback_scope = None
                w_res = w_mod.find_const(self, name, autoload=autoload)
                if w_res is not None:
                    return w_res
                if isinstance(w_mod, W_ClassObject):
                    w_mod = w_mod.superclass
                else:
                    break

        if fallback_scope is not None:
            w_res = fallback_scope.find_const(self, name, autoload=autoload)
        return w_res

    @jit.unroll_safe
    def find_lexical_const(self, lexical_scope, name):
        w_res = self._find_lexical_const(lexical_scope, name)
        if w_res is None:
            if lexical_scope is not None:
                w_mod = lexical_scope.w_mod
            else:
                w_mod = self.w_object
            w_res = self.send(w_mod, "const_missing", [self.newsymbol(name)])
        return w_res

    def find_instance_var(self, w_obj, name):
        w_res = w_obj.find_instance_var(self, name)
        return w_res if w_res is not None else self.w_nil

    def set_instance_var(self, w_obj, name, w_value):
        w_obj.set_instance_var(self, name, w_value)

    def find_class_var(self, w_module, name):
        w_res = w_module.find_class_var(self, name)
        if w_res is None:
            module_name = self.obj_to_s(w_module)
            raise self.error(
                self.w_NameError,
                "uninitialized class variable %s in %s" % (name, module_name))
        return w_res

    def set_class_var(self, w_module, name, w_value):
        w_module.set_class_var(self, name, w_value)

    def send(self, w_receiver, name, args_w=None, block=None):
        if args_w is None:
            args_w = []

        w_cls = self.getclass(w_receiver)
        raw_method = w_cls.find_method(self, name)
        return self._send_raw(
            name, raw_method, w_receiver, w_cls, args_w, block)

    def send_super(self, w_cls, w_receiver, name, args_w, block=None):
        raw_method = w_cls.find_method_super(self, name)
        return self._send_raw(
            name, raw_method, w_receiver, w_cls, args_w, block)

    def _send_raw(self, name, raw_method, w_receiver, w_cls, args_w, block):
        if raw_method is None:
            method_missing = w_cls.find_method(self, "method_missing")
            if method_missing is None:
                class_name = self.str_w(self.send(w_cls, "to_s"))
                raise self.error(
                    self.w_NoMethodError,
                    "undefined method `%s' for %s" % (name, class_name))
            else:
                args_w = [self.newsymbol(name)] + args_w
                return method_missing.call(self, w_receiver, args_w, block)
        return raw_method.call(self, w_receiver, args_w, block)

    def respond_to(self, w_receiver, name):
        w_cls = self.getclass(w_receiver)
        raw_method = w_cls.find_method(self, name)
        return raw_method is not None

    def is_kind_of(self, w_obj, w_cls):
        return w_obj.is_kind_of(self, w_cls)

    @jit.unroll_safe
    def invoke_block(self, block, args_w, block_arg=None):
        bc = block.bytecode
        frame = self.create_frame(
            bc, w_self=block.w_self, lexical_scope=block.lexical_scope,
            block=block.block, parent_interp=block.parent_interp,
            top_parent_interp=block.top_parent_interp,
            regexp_match_cell=block.regexp_match_cell,
        )
        if block.is_lambda:
            frame.handle_args(self, bc, args_w, block_arg)
        else:
            if (len(bc.arg_pos) != 0 or bc.splat_arg_pos != -1 or
                    bc.block_arg_pos != -1):
                frame.handle_block_args(self, bc, args_w, block_arg)
        assert len(block.cells) == len(bc.freevars)
        for i in xrange(len(bc.freevars)):
            frame.cells[len(bc.cellvars) + i] = block.cells[i]

        with self.getexecutioncontext().visit_frame(frame):
            return self.execute_frame(frame, bc)

    def invoke_function(self, w_function, w_receiver, args_w, block):
        return self._send_raw(
            w_function.name, w_function, w_receiver, self.getclass(w_receiver),
            args_w, block)

    def error(self, w_type, msg="", optargs=None):
        if not optargs:
            optargs = []
        args_w = [self.newstr_fromstr(msg)] + optargs
        w_exc = self.send(w_type, "new", args_w)
        assert isinstance(w_exc, W_ExceptionObject)
        return RubyError(w_exc)

    def hash_w(self, w_obj):
        return self.int_w(self.send(w_obj, "hash"))

    def eq_w(self, w_obj1, w_obj2):
        return self.is_true(self.send(w_obj2, "eql?", [w_obj1]))

    def register_exit_handler(self, w_proc):
        self.exit_handlers_w.append(w_proc)

    def run_exit_handlers(self):
        status = -1
        while self.exit_handlers_w:
            w_proc = self.exit_handlers_w.pop()
            try:
                self.send(w_proc, "call")
            except RubyError as e:
                w_exc = e.w_value
                if isinstance(w_exc, W_SystemExit):
                    status = w_exc.status
                else:
                    print_traceback(self, e.w_value)
        return status

    def subscript_access(self, length, w_idx, w_count):
        inclusive = False
        as_range = False
        end = 0
        nil = False

        if isinstance(w_idx, W_RangeObject) and not w_count:
            start = self.int_w(self.convert_type(
                w_idx.w_start, self.w_fixnum, "to_int"))
            end = self.int_w(self.convert_type(
                w_idx.w_end, self.w_fixnum, "to_int"))
            inclusive = not w_idx.exclusive
            as_range = True
        else:
            start = self.int_w(self.convert_type(
                w_idx, self.w_fixnum, "to_int"))
            if w_count:
                end = self.int_w(self.convert_type(
                    w_count, self.w_fixnum, "to_int"))
                if end >= 0:
                    as_range = True
                else:
                    if start < 0:
                        start += length
                    return (start, end, False, True)

        if start < 0:
            start += length

        if as_range:
            if w_count:
                end += start
            if end < 0:
                end += length
            if inclusive:
                end += 1
            if end < start:
                end = start
            elif end > length:
                end = length
            nil = start < 0 or end < 0 or start > length
        else:
            nil = start < 0 or start >= length

        return (start, end, as_range, nil)

    def convert_type(self, w_obj, w_cls, method, raise_error=True,
                     reraise_error=False):
        if self.is_kind_of(w_obj, w_cls):
            return w_obj

        try:
            w_res = self.send(w_obj, method)
        except RubyError as e:
            if reraise_error:
                raise e
            self.mark_topframe_not_escaped()
            if not raise_error:
                return self.w_nil
            src_cls_name = self.obj_to_s(self.getclass(w_obj))
            w_cls_name = self.obj_to_s(w_cls)
            raise self.error(
                self.w_TypeError,
                "can't convert %s into %s" % (src_cls_name, w_cls_name))

        if not w_res or w_res is self.w_nil and not raise_error:
            return self.w_nil
        elif not self.is_kind_of(w_res, w_cls):
            src_cls = self.obj_to_s(self.getclass(w_obj))
            res_cls = self.obj_to_s(self.getclass(w_res))
            w_cls_name = self.obj_to_s(w_cls)
            raise self.error(
                self.w_TypeError,
                "can't convert %s to %s (%s#%s gives %s)" % (
                    src_cls, w_cls_name, src_cls, method, res_cls))
        else:
            return w_res

    def mark_topframe_not_escaped(self):
        self.getexecutioncontext().gettopframe().escaped = False

    def infect(self, w_dest, w_src, taint=True, untrust=True, freeze=False):
        """
        By default copies tainted and untrusted state from src to dest.
        Frozen state isn't copied by default, as this is the rarer case MRI.
        """
        if taint and self.is_true(w_src.get_flag(self, "tainted?")):
            w_dest.set_flag(self, "tainted?")
        if untrust and self.is_true(w_src.get_flag(self, "untrusted?")):
            w_dest.set_flag(self, "untrusted?")
        if freeze and self.is_true(w_src.get_flag(self, "frozen?")):
            w_dest.set_flag(self, "frozen?")

    def getaddrstring(self, w_obj):
        w_id = self.newint_or_bigint(compute_unique_id(w_obj))
        w_4 = self.newint(4)
        w_0x0F = self.newint(0x0F)
        i = 2 * rffi.sizeof(llmemory.Address)
        addrstring = [" "] * i
        while True:
            n = self.int_w(self.send(w_id, "&", [w_0x0F]))
            n += ord("0")
            if n > ord("9"):
                n += (ord("a") - ord("9") - 1)
            i -= 1
            addrstring[i] = chr(n)
            if i == 0:
                break
            w_id = self.send(w_id, ">>", [w_4])
        return "".join(addrstring)

    def any_to_s(self, w_obj):
        return "#<%s:0x%s>" % (
            self.obj_to_s(self.getnonsingletonclass(w_obj)),
            self.getaddrstring(w_obj)
        )

    def obj_to_s(self, w_obj):
        return self.str_w(self.send(w_obj, "to_s"))

    def compare(self, w_a, w_b, block=None):
        if block is None:
            w_cmp_res = self.send(w_a, "<=>", [w_b])
        else:
            w_cmp_res = self.invoke_block(block, [w_a, w_b])
        if w_cmp_res is self.w_nil:
            raise self.error(
                self.w_ArgumentError,
                "comparison of %s with %s failed" % (
                    self.obj_to_s(self.getclass(w_a)),
                    self.obj_to_s(self.getclass(w_b)),
                )
            )
        else:
            return w_cmp_res
Esempio n. 12
0
        # Windows: re-use the last separator character (/ or \\) when
        # appending the __pycache__ path.
        lastsep = pathname[lastpos - 1]
    else:
        lastsep = os.sep
    ext = fname
    for i in range(len(fname)):
        if fname[i] == '.':
            ext = fname[:i + 1]

    result = (pathname[:lastpos] + "__pycache__" + lastsep + ext + PYC_TAG +
              '.pyc')
    return result


@signature(types.str0(), returns=types.any())
def make_source_pathname(pathname):
    "Given the path to a .pyc file, return the path to its .py file."
    # (...)/__pycache__/foo.<tag>.pyc -> (...)/foo.py

    right = rightmost_sep(pathname)
    if right < 0:
        return None
    left = rightmost_sep(pathname[:right]) + 1
    assert left >= 0
    if pathname[left:right] != '__pycache__':
        return None

    # Now verify that the path component to the right of the last
    # slash has two dots in it.
    rightpart = pathname[right + 1:]
Esempio n. 13
0
class BufferedMixin:
    _mixin_ = True

    def __init__(self, space):
        W_IOBase.__init__(self, space)
        self.state = STATE_ZERO

        self.buffer = None

        self.abs_pos = 0  # Absolute position inside the raw stream (-1 if
        # unknown).
        self.pos = 0  # Current logical position in the buffer
        self.raw_pos = 0  # Position of the raw stream in the buffer.

        self.read_end = -1  # Just after the last buffered byte in the buffer,
        # or -1 if the buffer isn't ready for reading

        self.write_pos = 0  # Just after the last byte actually written
        self.write_end = -1  # Just after the last byte waiting to be written,
        # or -1 if the buffer isn't ready for writing.

        self.lock = None

        self.readable = False
        self.writable = False

    def _reader_reset_buf(self):
        self.read_end = -1

    def _writer_reset_buf(self):
        self.write_pos = 0
        self.write_end = -1

    def _init(self, space):
        if self.buffer_size <= 0:
            raise oefmt(space.w_ValueError,
                        "buffer size must be strictly positive")

        if space.config.translation.split_gc_address_space:
            # When using split GC address space, it is not possible to get the
            # raw address of a GC buffer. Therefore we use a buffer backed by
            # raw memory.
            self.buffer = RawByteBuffer(self.buffer_size)
        else:
            # TODO: test whether using the raw buffer is faster
            self.buffer = ByteBuffer(self.buffer_size)

        self.lock = TryLock(space)

        try:
            self._raw_tell(space)
        except OperationError:
            pass

    def _check_init(self, space):
        if self.state == STATE_ZERO:
            raise oefmt(space.w_ValueError,
                        "I/O operation on uninitialized object")
        elif self.state == STATE_DETACHED:
            raise oefmt(space.w_ValueError, "raw stream has been detached")

    def _check_closed(self, space, message=None):
        self._check_init(space)
        W_IOBase._check_closed(self, space, message)

    def _raw_tell(self, space):
        w_pos = space.call_method(self.w_raw, "tell")
        pos = space.r_longlong_w(w_pos)
        if pos < 0:
            raise oefmt(space.w_IOError,
                        "raw stream returned invalid position")

        self.abs_pos = pos
        return pos

    def closed_get_w(self, space):
        self._check_init(space)
        return space.getattr(self.w_raw, space.newtext("closed"))

    def name_get_w(self, space):
        self._check_init(space)
        return space.getattr(self.w_raw, space.newtext("name"))

    def mode_get_w(self, space):
        self._check_init(space)
        return space.getattr(self.w_raw, space.newtext("mode"))

    def readable_w(self, space):
        self._check_init(space)
        return space.call_method(self.w_raw, "readable")

    def writable_w(self, space):
        self._check_init(space)
        return space.call_method(self.w_raw, "writable")

    def seekable_w(self, space):
        self._check_init(space)
        return space.call_method(self.w_raw, "seekable")

    def isatty_w(self, space):
        self._check_init(space)
        return space.call_method(self.w_raw, "isatty")

    def repr_w(self, space):
        typename = space.type(self).name
        try:
            w_name = space.getattr(self, space.newtext("name"))
        except OperationError as e:
            if not e.match(space, space.w_Exception):
                raise
            return space.newtext("<%s>" % (typename, ))
        else:
            name_repr = space.text_w(space.repr(w_name))
            return space.newtext("<%s name=%s>" % (typename, name_repr))

    # ______________________________________________

    @signature(types.any(), returns=types.int())
    def _readahead(self):
        if self.readable and self.read_end != -1:
            available = self.read_end - self.pos
            assert available >= 0
            return available
        return 0

    def _raw_offset(self):
        if self.raw_pos >= 0 and ((self.readable and self.read_end != -1) or
                                  (self.writable and self.write_end != -1)):
            return self.raw_pos - self.pos
        return 0

    def tell_w(self, space):
        self._check_init(space)
        pos = self._raw_tell(space) - self._raw_offset()
        return space.newint(pos)

    @unwrap_spec(pos=r_longlong, whence=int)
    def seek_w(self, space, pos, whence=0):
        self._check_init(space)
        if whence not in (0, 1, 2):
            raise oefmt(space.w_ValueError,
                        "whence must be between 0 and 2, not %d", whence)
        self._check_closed(space, "seek of closed file")
        if whence != 2 and self.readable:
            # Check if seeking leaves us inside the current buffer, so as to
            # return quickly if possible. Also, we needn't take the lock in
            # this fast path.
            if self.abs_pos == -1:
                self._raw_tell(space)
            current = self.abs_pos
            available = self._readahead()
            if available > 0:
                if whence == 0:
                    offset = pos - (current - self._raw_offset())
                else:
                    offset = pos
                if -self.pos <= offset <= available:
                    newpos = self.pos + int(offset)
                    assert newpos >= 0
                    self.pos = newpos
                    return space.newint(current - available + offset)

        # Fallback: invoke raw seek() method and clear buffer
        with self.lock:
            if self.writable:
                self._writer_flush_unlocked(space)
                self._writer_reset_buf()

            if whence == 1:
                pos -= self._raw_offset()
            n = self._raw_seek(space, pos, whence)
            self.raw_pos = -1
            if self.readable:
                self._reader_reset_buf()
            return space.newint(n)

    def _raw_seek(self, space, pos, whence):
        w_pos = space.call_method(self.w_raw, "seek", space.newint(pos),
                                  space.newint(whence))
        pos = space.r_longlong_w(w_pos)
        if pos < 0:
            raise oefmt(space.w_IOError,
                        "Raw stream returned invalid position")
        self.abs_pos = pos
        return pos

    def _closed(self, space):
        return space.is_true(space.getattr(self.w_raw,
                                           space.newtext("closed")))

    def close_w(self, space):
        self._check_init(space)
        with self.lock:
            if self._closed(space):
                return
        try:
            space.call_method(self, "flush")
        finally:
            with self.lock:
                space.call_method(self.w_raw, "close")

    def simple_flush_w(self, space):
        self._check_init(space)
        return space.call_method(self.w_raw, "flush")

    def _writer_flush_unlocked(self, space):
        if self.write_end == -1 or self.write_pos == self.write_end:
            return
        # First, rewind
        rewind = self._raw_offset() + (self.pos - self.write_pos)
        if rewind != 0:
            self._raw_seek(space, -rewind, 1)
            self.raw_pos -= rewind

        written = 0
        while self.write_pos < self.write_end:
            try:
                n = self._raw_write(space, self.write_pos, self.write_end)
            except BlockingIOError:
                raise make_write_blocking_error(space, 0)
            self.write_pos += n
            self.raw_pos = self.write_pos
            written += n
            # Partial writes can return successfully when interrupted by a
            # signal (see write(2)).  We must run signal handlers before
            # blocking another time, possibly indefinitely.
            space.getexecutioncontext().checksignals()

        self._writer_reset_buf()

    def _write(self, space, data):
        w_data = space.newbytes(data)
        while True:
            try:
                w_written = space.call_method(self.w_raw, "write", w_data)
            except OperationError as e:
                if trap_eintr(space, e):
                    continue  # try again
                raise
            else:
                break

        if space.is_w(w_written, space.w_None):
            # Non-blocking stream would have blocked.
            raise BlockingIOError()

        written = space.getindex_w(w_written, space.w_IOError)
        if not 0 <= written <= len(data):
            raise oefmt(space.w_IOError, "raw write() returned invalid length")
        if self.abs_pos != -1:
            self.abs_pos += written
        return written

    def _raw_write(self, space, start, end):
        return self._write(space, self.buffer[start:end])

    def detach_w(self, space):
        self._check_init(space)
        space.call_method(self, "flush")
        w_raw = self.w_raw
        self.w_raw = None
        self.state = STATE_DETACHED
        return w_raw

    def fileno_w(self, space):
        self._check_init(space)
        return space.call_method(self.w_raw, "fileno")

    @unwrap_spec(w_size=WrappedDefault(None))
    def truncate_w(self, space, w_size):
        self._check_init(space)
        with self.lock:
            if self.writable:
                self._flush_and_rewind_unlocked(space)
            # invalidate cached position
            self.abs_pos = -1

            return space.call_method(self.w_raw, "truncate", w_size)

    # ________________________________________________________________
    # Read methods

    def read_w(self, space, w_size=None):
        self._check_init(space)
        self._check_closed(space, "read of closed file")
        size = convert_size(space, w_size)

        if size == -1:
            # read until the end of stream
            with self.lock:
                return self._read_all(space)
        elif size >= 0:
            res = self._read_fast(size)
            if res is None:
                with self.lock:
                    res = self._read_generic(space, size)
        else:
            raise oefmt(space.w_ValueError,
                        "read length must be positive or -1")
        return space.newbytes(res)

    @unwrap_spec(size=int)
    def peek_w(self, space, size=0):
        self._check_init(space)
        self._check_closed(space, "peek of closed file")
        with self.lock:
            if self.writable:
                self._flush_and_rewind_unlocked(space)
            # Constraints:
            # 1. we don't want to advance the file position.
            # 2. we don't want to lose block alignment, so we can't shift the
            #    buffer to make some place.
            # Therefore, we either return `have` bytes (if > 0), or a full
            # buffer.
            have = self._readahead()
            if have > 0:
                data = self.buffer[self.pos:self.pos + have]
                return space.newbytes(data)

            # Fill the buffer from the raw stream, and copy it to the result
            self._reader_reset_buf()
            try:
                size = self._fill_buffer(space)
            except BlockingIOError:
                size = 0
            self.pos = 0
            data = self.buffer[0:size]
            return space.newbytes(data)

    @unwrap_spec(size=int)
    def read1_w(self, space, size):
        self._check_init(space)
        self._check_closed(space, "read of closed file")

        if size < 0:
            raise oefmt(space.w_ValueError, "read length must be positive")
        if size == 0:
            return space.newbytes("")

        with self.lock:
            # Return up to n bytes.  If at least one byte is buffered, we only
            # return buffered bytes.  Otherwise, we do one raw read.

            # XXX: this mimicks the io.py implementation but is probably
            # wrong. If we need to read from the raw stream, then we could
            # actually read all `n` bytes asked by the caller (and possibly
            # more, so as to fill our buffer for the next reads).

            have = self._readahead()
            if have == 0:
                if self.writable:
                    self._flush_and_rewind_unlocked(space)

                # Fill the buffer from the raw stream
                self._reader_reset_buf()
                self.pos = 0
                try:
                    have = self._fill_buffer(space)
                except BlockingIOError:
                    have = 0
            if size > have:
                size = have
            endpos = self.pos + size
            data = self.buffer[self.pos:endpos]
            self.pos = endpos
            return space.newbytes(data)

    def _read_all(self, space):
        "Read all the file, don't update the cache"
        # Must run with the lock held!
        builder = StringBuilder()
        # First copy what we have in the current buffer
        current_size = self._readahead()
        data = None
        if current_size:
            data = self.buffer[self.pos:self.pos + current_size]
            builder.append(data)
            self.pos += current_size
        # We're going past the buffer's bounds, flush it
        if self.writable:
            self._flush_and_rewind_unlocked(space)
        self._reader_reset_buf()

        while True:
            # Read until EOF or until read() would block
            w_data = space.call_method(self.w_raw, "read")
            if space.is_w(w_data, space.w_None):
                if current_size == 0:
                    return w_data
                break
            data = space.bytes_w(w_data)
            size = len(data)
            if size == 0:
                break
            builder.append(data)
            current_size += size
            if self.abs_pos != -1:
                self.abs_pos += size
        return space.newbytes(builder.build())

    def _raw_read(self, space, buffer, start, length):
        assert buffer is not None
        length = intmask(length)
        start = intmask(start)
        w_view = SimpleView(SubBuffer(buffer, start, length)).wrap(space)
        while True:
            try:
                w_size = space.call_method(self.w_raw, "readinto", w_view)
            except OperationError as e:
                if trap_eintr(space, e):
                    continue  # try again
                raise
            else:
                break

        if space.is_w(w_size, space.w_None):
            raise BlockingIOError()
        size = space.int_w(w_size)
        if size < 0 or size > length:
            raise oefmt(
                space.w_IOError,
                "raw readinto() returned invalid length %d (should "
                "have been between 0 and %d)", size, length)
        if self.abs_pos != -1:
            self.abs_pos += size
        return size

    def _fill_buffer(self, space):
        start = self.read_end
        if start == -1:
            start = 0
        length = self.buffer_size - start
        size = self._raw_read(space, self.buffer, start, length)
        if size > 0:
            self.read_end = self.raw_pos = start + size
        return size

    def _read_generic(self, space, n):
        """Generic read function: read from the stream until enough bytes are
           read, or until an EOF occurs or until read() would block."""
        # Must run with the lock held!
        current_size = self._readahead()
        if n <= current_size:
            return self._read_fast(n)

        result_buffer = ByteBuffer(n)
        remaining = n
        written = 0
        if current_size:
            self.output_slice(space, result_buffer, written,
                              self.buffer[self.pos:self.pos + current_size])
            remaining -= current_size
            written += current_size
            self.pos += current_size

        # Flush the write buffer if necessary
        if self.writable:
            self._flush_and_rewind_unlocked(space)
        self._reader_reset_buf()

        # Read whole blocks, and don't buffer them
        while remaining > 0:
            r = self.buffer_size * (remaining // self.buffer_size)
            if r == 0:
                break
            try:
                size = self._raw_read(space, result_buffer, written, r)
            except BlockingIOError:
                if written == 0:
                    return None
                size = 0
            if size == 0:
                return result_buffer[0:written]
            remaining -= size
            written += size

        self.pos = 0
        self.raw_pos = 0
        self.read_end = 0

        while remaining > 0 and self.read_end < self.buffer_size:
            try:
                size = self._fill_buffer(space)
            except BlockingIOError:
                # EOF or read() would block
                if written == 0:
                    return None
                size = 0
            if size == 0:
                break

            if remaining > 0:
                if size > remaining:
                    size = remaining
                self.output_slice(space, result_buffer, written,
                                  self.buffer[self.pos:self.pos + size])
                self.pos += size
                written += size
                remaining -= size

        return result_buffer[0:written]

    def _read_fast(self, n):
        """Read n bytes from the buffer if it can, otherwise return None.
           This function is simple enough that it can run unlocked."""
        current_size = self._readahead()
        if n <= current_size:
            endpos = self.pos + n
            res = self.buffer[self.pos:endpos]
            self.pos = endpos
            return res
        return None

    def readline_w(self, space, w_limit=None):
        self._check_init(space)
        self._check_closed(space, "readline of closed file")

        limit = convert_size(space, w_limit)

        # First, try to find a line in the buffer. This can run
        # unlocked because the calls to the C API are simple enough
        # that they can't trigger any thread switch.
        have = self._readahead()
        if limit >= 0 and have > limit:
            have = limit
        for pos in range(self.pos, self.pos + have):
            if self.buffer[pos] == '\n':
                break
        else:
            pos = -1
        if pos >= 0:
            w_res = space.newbytes(self.buffer[self.pos:pos + 1])
            self.pos = pos + 1
            return w_res
        if have == limit:
            w_res = space.newbytes(self.buffer[self.pos:self.pos + have])
            self.pos += have
            return w_res

        written = 0
        with self.lock:
            # Now we try to get some more from the raw stream
            chunks = []
            if have > 0:
                chunks.append(self.buffer[self.pos:self.pos + have])
                written += have
                self.pos += have
                if limit >= 0:
                    limit -= have
            if self.writable:
                self._flush_and_rewind_unlocked(space)

            while True:
                self._reader_reset_buf()
                have = self._fill_buffer(space)
                if have == 0:
                    break
                if limit >= 0 and have > limit:
                    have = limit
                pos = 0
                found = False
                while pos < have:
                    c = self.buffer.getitem(pos)
                    pos += 1
                    if c == '\n':
                        self.pos = pos
                        found = True
                        break
                chunks.append(self.buffer[0:pos])
                if found:
                    break
                if have == limit:
                    self.pos = have
                    break
                written += have
                if limit >= 0:
                    limit -= have
            return space.newbytes(''.join(chunks))

    # ____________________________________________________
    # Write methods

    def _adjust_position(self, new_pos):
        assert new_pos >= 0
        self.pos = new_pos
        if self.readable and self.read_end != -1 and self.read_end < new_pos:
            self.read_end = self.pos

    def write_w(self, space, w_data):
        self._check_init(space)
        self._check_closed(space, "write to closed file")
        data = space.getarg_w('s*', w_data).as_str()
        size = len(data)

        with self.lock:
            if (not (self.readable and self.read_end != -1)
                    and not (self.writable and self.write_end != -1)):
                self.pos = 0
                self.raw_pos = 0
            available = self.buffer_size - self.pos
            # Fast path: the data to write can be fully buffered
            if size <= available:
                for i in range(size):
                    self.buffer[self.pos + i] = data[i]
                if self.write_end == -1 or self.write_pos > self.pos:
                    self.write_pos = self.pos
                self._adjust_position(self.pos + size)
                if self.pos > self.write_end:
                    self.write_end = self.pos
                return space.newint(size)

            # First write the current buffer
            try:
                self._writer_flush_unlocked(space)
            except OperationError as e:
                if not e.match(space,
                               space.gettypeobject(W_BlockingIOError.typedef)):
                    raise
                w_exc = e.get_w_value(space)
                assert isinstance(w_exc, W_BlockingIOError)
                if self.readable:
                    self._reader_reset_buf()
                # Make some place by shifting the buffer
                for i in range(self.write_pos, self.write_end):
                    self.buffer.setitem(i - self.write_pos,
                                        self.buffer.getitem(i))
                self.write_end -= self.write_pos
                self.raw_pos -= self.write_pos
                newpos = self.pos - self.write_pos
                assert newpos >= 0
                self.pos = newpos
                self.write_pos = 0
                available = self.buffer_size - self.write_end
                assert available >= 0
                if size <= available:
                    # Everything can be buffered
                    for i in range(size):
                        self.buffer[self.write_end + i] = data[i]
                    self.write_end += size
                    self.pos += size
                    return space.newint(size)
                # Buffer as much as possible
                for i in range(available):
                    self.buffer[self.write_end + i] = data[i]
                self.write_end += available
                self.pos += available
                # Modifying the existing exception will will change
                # e.characters_written but not e.args[2].  Therefore
                # we just replace with a new error.
                raise make_write_blocking_error(space, available)

            # Adjust the raw stream position if it is away from the logical
            # stream position. This happens if the read buffer has been filled
            # but not modified (and therefore _bufferedwriter_flush_unlocked()
            # didn't rewind the raw stream by itself).
            offset = self._raw_offset()
            if offset:
                self._raw_seek(space, -offset, 1)
                self.raw_pos -= offset

            # Then write buf itself. At this point the buffer has been emptied
            remaining = size
            written = 0
            while remaining > self.buffer_size:
                try:
                    n = self._write(space, data[written:])
                except BlockingIOError:
                    # Write failed because raw file is non-blocking
                    if remaining > self.buffer_size:
                        # Can't buffer everything, still buffer as much as
                        # possible
                        for i in range(self.buffer_size):
                            self.buffer[i] = data[written + i]
                        self.raw_pos = 0
                        self._adjust_position(self.buffer_size)
                        self.write_end = self.buffer_size
                        written += self.buffer_size
                        raise make_write_blocking_error(space, written)
                    break
                written += n
                remaining -= n
                # Partial writes can return successfully when interrupted by a
                # signal (see write(2)).  We must run signal handlers before
                # blocking another time, possibly indefinitely.
                space.getexecutioncontext().checksignals()

            if self.readable:
                self._reader_reset_buf()
            if remaining > 0:
                for i in range(remaining):
                    self.buffer[i] = data[written + i]
                written += remaining
            self.write_pos = 0
            self.write_end = remaining
            self._adjust_position(remaining)
            self.raw_pos = 0
        return space.newint(written)

    def flush_w(self, space):
        self._check_init(space)
        self._check_closed(space, "flush of closed file")
        with self.lock:
            self._flush_and_rewind_unlocked(space)

    def _flush_and_rewind_unlocked(self, space):
        self._writer_flush_unlocked(space)
        if self.readable:
            # Rewind the raw stream so that its position corresponds to
            # the current logical position.
            try:
                self._raw_seek(space, -self._raw_offset(), 1)
            finally:
                self._reader_reset_buf()
Esempio n. 14
0
def _new_copy_contents_fun(SRC_TP, DST_TP, CHAR_TP, name):
    @specialize.arg(0)
    def _str_ofs(TP, item):
        return (llmemory.offsetof(TP, 'chars') +
                llmemory.itemoffsetof(TP.chars, 0) +
                llmemory.sizeof(CHAR_TP) * item)

    @signature(types.any(), types.any(), types.int(), returns=types.any())
    @specialize.arg(0)
    def _get_raw_buf(TP, src, ofs):
        """
        WARNING: dragons ahead.
        Return the address of the internal char* buffer of the low level
        string. The return value is valid as long as no GC operation occur, so
        you must ensure that it will be used inside a "GC safe" section, for
        example by marking your function with @rgc.no_collect
        """
        assert typeOf(src).TO == TP
        assert ofs >= 0
        return llmemory.cast_ptr_to_adr(src) + _str_ofs(TP, ofs)
    _get_raw_buf._always_inline_ = True

    @jit.oopspec('stroruni.copy_contents(src, dst, srcstart, dststart, length)')
    @signature(types.any(), types.any(), types.int(), types.int(), types.int(), returns=types.none())
    def copy_string_contents(src, dst, srcstart, dststart, length):
        """Copies 'length' characters from the 'src' string to the 'dst'
        string, starting at position 'srcstart' and 'dststart'."""
        # xxx Warning: don't try to do this at home.  It relies on a lot
        # of details to be sure that it works correctly in all cases.
        # Notably: no GC operation at all from the first cast_ptr_to_adr()
        # because it might move the strings.  The keepalive_until_here()
        # are obscurely essential to make sure that the strings stay alive
        # longer than the raw_memcopy().
        assert length >= 0
        ll_assert(srcstart >= 0, "copystrc: negative srcstart")
        ll_assert(srcstart + length <= len(src.chars), "copystrc: src ovf")
        ll_assert(dststart >= 0, "copystrc: negative dststart")
        ll_assert(dststart + length <= len(dst.chars), "copystrc: dst ovf")
        # from here, no GC operations can happen
        asrc = _get_raw_buf(SRC_TP, src, srcstart)
        adst = _get_raw_buf(DST_TP, dst, dststart)
        llmemory.raw_memcopy(asrc, adst, llmemory.sizeof(CHAR_TP) * length)
        # end of "no GC" section
        keepalive_until_here(src)
        keepalive_until_here(dst)
    copy_string_contents._always_inline_ = True
    copy_string_contents = func_with_new_name(copy_string_contents,
                                              'copy_%s_contents' % name)

    @jit.oopspec('stroruni.copy_string_to_raw(src, ptrdst, srcstart, length)')
    def copy_string_to_raw(src, ptrdst, srcstart, length):
        """
        Copies 'length' characters from the 'src' string to the 'ptrdst'
        buffer, starting at position 'srcstart'.
        'ptrdst' must be a non-gc Array of Char.
        """
        # xxx Warning: same note as above apply: don't do this at home
        assert length >= 0
        # from here, no GC operations can happen
        asrc = _get_raw_buf(SRC_TP, src, srcstart)
        adst = llmemory.cast_ptr_to_adr(ptrdst)
        adst = adst + llmemory.itemoffsetof(typeOf(ptrdst).TO, 0)
        llmemory.raw_memcopy(asrc, adst, llmemory.sizeof(CHAR_TP) * length)
        # end of "no GC" section
        keepalive_until_here(src)
    copy_string_to_raw._always_inline_ = True
    copy_string_to_raw = func_with_new_name(copy_string_to_raw, 'copy_%s_to_raw' % name)

    @jit.dont_look_inside
    @signature(types.any(), types.any(), types.int(), types.int(),
               returns=types.none())
    def copy_raw_to_string(ptrsrc, dst, dststart, length):
        # xxx Warning: same note as above apply: don't do this at home
        assert length >= 0
        # from here, no GC operations can happen
        adst = _get_raw_buf(SRC_TP, dst, dststart)
        asrc = llmemory.cast_ptr_to_adr(ptrsrc)

        asrc = asrc + llmemory.itemoffsetof(typeOf(ptrsrc).TO, 0)
        llmemory.raw_memcopy(asrc, adst, llmemory.sizeof(CHAR_TP) * length)
        # end of "no GC" section
        keepalive_until_here(dst)
    copy_raw_to_string._always_inline_ = True
    copy_raw_to_string = func_with_new_name(copy_raw_to_string,
                                              'copy_raw_to_%s' % name)

    return (copy_string_to_raw, copy_raw_to_string, copy_string_contents,
            _get_raw_buf)
Esempio n. 15
0
        result = malloc(self.LIST, n, immortal=True)
        return result


# ____________________________________________________________
#
#  Low-level methods.  These can be run for testing, but are meant to
#  be direct_call'ed from rtyped flow graphs, which means that they will
#  get flowed and annotated, mostly with SomePtr.

# adapted C code


@jit.look_inside_iff(lambda l, newsize, overallocate: jit.isconstant(
    len(l.items)) and jit.isconstant(newsize))
@signature(types.any(), types.int(), types.bool(), returns=types.none())
def _ll_list_resize_hint_really(l, newsize, overallocate):
    """
    Ensure l.items has room for at least newsize elements.  Note that
    l.items may change, and even if newsize is less than l.length on
    entry.
    """
    # This over-allocates proportional to the list size, making room
    # for additional growth.  The over-allocation is mild, but is
    # enough to give linear-time amortized behavior over a long
    # sequence of appends() in the presence of a poorly-performing
    # system malloc().
    # The growth pattern is:  0, 4, 8, 16, 25, 35, 46, 58, 72, 88, ...
    if newsize <= 0:
        ll_assert(newsize == 0, "negative list length")
        l.length = 0