Example #1
0
 def test_basic(self):
     buf = ByteBuffer(4)
     assert buf.getlength() == 4
     assert buf.getitem(2) == '\x00'
     buf.setitem(0, 'A')
     buf.setitem(3, 'Z')
     assert buf.as_str() == 'A\x00\x00Z'
Example #2
0
class BufferedMixin:
    _mixin_ = True

    def __init__(self, space):
        W_IOBase.__init__(self, space)
        self.state = STATE_ZERO

        self.buffer = None

        self.abs_pos = 0  # Absolute position inside the raw stream (-1 if
        # unknown).
        self.pos = 0  # Current logical position in the buffer
        self.raw_pos = 0  # Position of the raw stream in the buffer.

        self.read_end = -1  # Just after the last buffered byte in the buffer,
        # or -1 if the buffer isn't ready for reading

        self.write_pos = 0  # Just after the last byte actually written
        self.write_end = -1  # Just after the last byte waiting to be written,
        # or -1 if the buffer isn't ready for writing.

        self.lock = None

        self.readable = False
        self.writable = False

    def _reader_reset_buf(self):
        self.read_end = -1

    def _writer_reset_buf(self):
        self.write_pos = 0
        self.write_end = -1

    def _init(self, space):
        if self.buffer_size <= 0:
            raise oefmt(space.w_ValueError,
                        "buffer size must be strictly positive")

        if space.config.translation.split_gc_address_space:
            # When using split GC address space, it is not possible to get the
            # raw address of a GC buffer. Therefore we use a buffer backed by
            # raw memory.
            self.buffer = RawByteBuffer(self.buffer_size)
        else:
            # TODO: test whether using the raw buffer is faster
            self.buffer = ByteBuffer(self.buffer_size)

        self.lock = TryLock(space)

        try:
            self._raw_tell(space)
        except OperationError:
            pass

    def _check_init(self, space):
        if self.state == STATE_ZERO:
            raise oefmt(space.w_ValueError,
                        "I/O operation on uninitialized object")
        elif self.state == STATE_DETACHED:
            raise oefmt(space.w_ValueError, "raw stream has been detached")

    def _check_closed(self, space, message=None):
        self._check_init(space)
        W_IOBase._check_closed(self, space, message)

    def _raw_tell(self, space):
        w_pos = space.call_method(self.w_raw, "tell")
        pos = space.r_longlong_w(w_pos)
        if pos < 0:
            raise oefmt(space.w_IOError,
                        "raw stream returned invalid position")

        self.abs_pos = pos
        return pos

    def closed_get_w(self, space):
        self._check_init(space)
        return space.getattr(self.w_raw, space.newtext("closed"))

    def name_get_w(self, space):
        self._check_init(space)
        return space.getattr(self.w_raw, space.newtext("name"))

    def mode_get_w(self, space):
        self._check_init(space)
        return space.getattr(self.w_raw, space.newtext("mode"))

    def readable_w(self, space):
        self._check_init(space)
        return space.call_method(self.w_raw, "readable")

    def writable_w(self, space):
        self._check_init(space)
        return space.call_method(self.w_raw, "writable")

    def seekable_w(self, space):
        self._check_init(space)
        return space.call_method(self.w_raw, "seekable")

    def isatty_w(self, space):
        self._check_init(space)
        return space.call_method(self.w_raw, "isatty")

    def repr_w(self, space):
        typename = space.type(self).name
        try:
            w_name = space.getattr(self, space.newtext("name"))
        except OperationError as e:
            if not e.match(space, space.w_Exception):
                raise
            return space.newtext("<%s>" % (typename, ))
        else:
            name_repr = space.text_w(space.repr(w_name))
            return space.newtext("<%s name=%s>" % (typename, name_repr))

    # ______________________________________________

    @signature(types.any(), returns=types.int())
    def _readahead(self):
        if self.readable and self.read_end != -1:
            available = self.read_end - self.pos
            assert available >= 0
            return available
        return 0

    def _raw_offset(self):
        if self.raw_pos >= 0 and ((self.readable and self.read_end != -1) or
                                  (self.writable and self.write_end != -1)):
            return self.raw_pos - self.pos
        return 0

    def tell_w(self, space):
        self._check_init(space)
        pos = self._raw_tell(space) - self._raw_offset()
        return space.newint(pos)

    @unwrap_spec(pos=r_longlong, whence=int)
    def seek_w(self, space, pos, whence=0):
        self._check_init(space)
        if whence not in (0, 1, 2):
            raise oefmt(space.w_ValueError,
                        "whence must be between 0 and 2, not %d", whence)
        self._check_closed(space, "seek of closed file")
        if whence != 2 and self.readable:
            # Check if seeking leaves us inside the current buffer, so as to
            # return quickly if possible. Also, we needn't take the lock in
            # this fast path.
            if self.abs_pos == -1:
                self._raw_tell(space)
            current = self.abs_pos
            available = self._readahead()
            if available > 0:
                if whence == 0:
                    offset = pos - (current - self._raw_offset())
                else:
                    offset = pos
                if -self.pos <= offset <= available:
                    newpos = self.pos + int(offset)
                    assert newpos >= 0
                    self.pos = newpos
                    return space.newint(current - available + offset)

        # Fallback: invoke raw seek() method and clear buffer
        with self.lock:
            if self.writable:
                self._writer_flush_unlocked(space)
                self._writer_reset_buf()

            if whence == 1:
                pos -= self._raw_offset()
            n = self._raw_seek(space, pos, whence)
            self.raw_pos = -1
            if self.readable:
                self._reader_reset_buf()
            return space.newint(n)

    def _raw_seek(self, space, pos, whence):
        w_pos = space.call_method(self.w_raw, "seek", space.newint(pos),
                                  space.newint(whence))
        pos = space.r_longlong_w(w_pos)
        if pos < 0:
            raise oefmt(space.w_IOError,
                        "Raw stream returned invalid position")
        self.abs_pos = pos
        return pos

    def _closed(self, space):
        return space.is_true(space.getattr(self.w_raw,
                                           space.newtext("closed")))

    def close_w(self, space):
        self._check_init(space)
        with self.lock:
            if self._closed(space):
                return
        try:
            space.call_method(self, "flush")
        finally:
            with self.lock:
                space.call_method(self.w_raw, "close")

    def simple_flush_w(self, space):
        self._check_init(space)
        return space.call_method(self.w_raw, "flush")

    def _writer_flush_unlocked(self, space):
        if self.write_end == -1 or self.write_pos == self.write_end:
            return
        # First, rewind
        rewind = self._raw_offset() + (self.pos - self.write_pos)
        if rewind != 0:
            self._raw_seek(space, -rewind, 1)
            self.raw_pos -= rewind

        written = 0
        while self.write_pos < self.write_end:
            try:
                n = self._raw_write(space, self.write_pos, self.write_end)
            except BlockingIOError:
                raise make_write_blocking_error(space, 0)
            self.write_pos += n
            self.raw_pos = self.write_pos
            written += n
            # Partial writes can return successfully when interrupted by a
            # signal (see write(2)).  We must run signal handlers before
            # blocking another time, possibly indefinitely.
            space.getexecutioncontext().checksignals()

        self._writer_reset_buf()

    def _write(self, space, data):
        w_data = space.newbytes(data)
        while True:
            try:
                w_written = space.call_method(self.w_raw, "write", w_data)
            except OperationError as e:
                if trap_eintr(space, e):
                    continue  # try again
                raise
            else:
                break

        if space.is_w(w_written, space.w_None):
            # Non-blocking stream would have blocked.
            raise BlockingIOError()

        written = space.getindex_w(w_written, space.w_IOError)
        if not 0 <= written <= len(data):
            raise oefmt(space.w_IOError, "raw write() returned invalid length")
        if self.abs_pos != -1:
            self.abs_pos += written
        return written

    def _raw_write(self, space, start, end):
        return self._write(space, self.buffer[start:end])

    def detach_w(self, space):
        self._check_init(space)
        space.call_method(self, "flush")
        w_raw = self.w_raw
        self.w_raw = None
        self.state = STATE_DETACHED
        return w_raw

    def fileno_w(self, space):
        self._check_init(space)
        return space.call_method(self.w_raw, "fileno")

    @unwrap_spec(w_size=WrappedDefault(None))
    def truncate_w(self, space, w_size):
        self._check_init(space)
        with self.lock:
            if self.writable:
                self._flush_and_rewind_unlocked(space)
            # invalidate cached position
            self.abs_pos = -1

            return space.call_method(self.w_raw, "truncate", w_size)

    # ________________________________________________________________
    # Read methods

    def read_w(self, space, w_size=None):
        self._check_init(space)
        self._check_closed(space, "read of closed file")
        size = convert_size(space, w_size)

        if size == -1:
            # read until the end of stream
            with self.lock:
                return self._read_all(space)
        elif size >= 0:
            res = self._read_fast(size)
            if res is None:
                with self.lock:
                    res = self._read_generic(space, size)
        else:
            raise oefmt(space.w_ValueError,
                        "read length must be positive or -1")
        return space.newbytes(res)

    @unwrap_spec(size=int)
    def peek_w(self, space, size=0):
        self._check_init(space)
        self._check_closed(space, "peek of closed file")
        with self.lock:
            if self.writable:
                self._flush_and_rewind_unlocked(space)
            # Constraints:
            # 1. we don't want to advance the file position.
            # 2. we don't want to lose block alignment, so we can't shift the
            #    buffer to make some place.
            # Therefore, we either return `have` bytes (if > 0), or a full
            # buffer.
            have = self._readahead()
            if have > 0:
                data = self.buffer[self.pos:self.pos + have]
                return space.newbytes(data)

            # Fill the buffer from the raw stream, and copy it to the result
            self._reader_reset_buf()
            try:
                size = self._fill_buffer(space)
            except BlockingIOError:
                size = 0
            self.pos = 0
            data = self.buffer[0:size]
            return space.newbytes(data)

    @unwrap_spec(size=int)
    def read1_w(self, space, size):
        self._check_init(space)
        self._check_closed(space, "read of closed file")

        if size < 0:
            raise oefmt(space.w_ValueError, "read length must be positive")
        if size == 0:
            return space.newbytes("")

        with self.lock:
            # Return up to n bytes.  If at least one byte is buffered, we only
            # return buffered bytes.  Otherwise, we do one raw read.

            # XXX: this mimicks the io.py implementation but is probably
            # wrong. If we need to read from the raw stream, then we could
            # actually read all `n` bytes asked by the caller (and possibly
            # more, so as to fill our buffer for the next reads).

            have = self._readahead()
            if have == 0:
                if self.writable:
                    self._flush_and_rewind_unlocked(space)

                # Fill the buffer from the raw stream
                self._reader_reset_buf()
                self.pos = 0
                try:
                    have = self._fill_buffer(space)
                except BlockingIOError:
                    have = 0
            if size > have:
                size = have
            endpos = self.pos + size
            data = self.buffer[self.pos:endpos]
            self.pos = endpos
            return space.newbytes(data)

    def _read_all(self, space):
        "Read all the file, don't update the cache"
        # Must run with the lock held!
        builder = StringBuilder()
        # First copy what we have in the current buffer
        current_size = self._readahead()
        data = None
        if current_size:
            data = self.buffer[self.pos:self.pos + current_size]
            builder.append(data)
            self.pos += current_size
        # We're going past the buffer's bounds, flush it
        if self.writable:
            self._flush_and_rewind_unlocked(space)
        self._reader_reset_buf()

        while True:
            # Read until EOF or until read() would block
            w_data = space.call_method(self.w_raw, "read")
            if space.is_w(w_data, space.w_None):
                if current_size == 0:
                    return w_data
                break
            data = space.bytes_w(w_data)
            size = len(data)
            if size == 0:
                break
            builder.append(data)
            current_size += size
            if self.abs_pos != -1:
                self.abs_pos += size
        return space.newbytes(builder.build())

    def _raw_read(self, space, buffer, start, length):
        assert buffer is not None
        length = intmask(length)
        start = intmask(start)
        w_view = SimpleView(SubBuffer(buffer, start, length)).wrap(space)
        while True:
            try:
                w_size = space.call_method(self.w_raw, "readinto", w_view)
            except OperationError as e:
                if trap_eintr(space, e):
                    continue  # try again
                raise
            else:
                break

        if space.is_w(w_size, space.w_None):
            raise BlockingIOError()
        size = space.int_w(w_size)
        if size < 0 or size > length:
            raise oefmt(
                space.w_IOError,
                "raw readinto() returned invalid length %d (should "
                "have been between 0 and %d)", size, length)
        if self.abs_pos != -1:
            self.abs_pos += size
        return size

    def _fill_buffer(self, space):
        start = self.read_end
        if start == -1:
            start = 0
        length = self.buffer_size - start
        size = self._raw_read(space, self.buffer, start, length)
        if size > 0:
            self.read_end = self.raw_pos = start + size
        return size

    def _read_generic(self, space, n):
        """Generic read function: read from the stream until enough bytes are
           read, or until an EOF occurs or until read() would block."""
        # Must run with the lock held!
        current_size = self._readahead()
        if n <= current_size:
            return self._read_fast(n)

        result_buffer = ByteBuffer(n)
        remaining = n
        written = 0
        if current_size:
            self.output_slice(space, result_buffer, written,
                              self.buffer[self.pos:self.pos + current_size])
            remaining -= current_size
            written += current_size
            self.pos += current_size

        # Flush the write buffer if necessary
        if self.writable:
            self._flush_and_rewind_unlocked(space)
        self._reader_reset_buf()

        # Read whole blocks, and don't buffer them
        while remaining > 0:
            r = self.buffer_size * (remaining // self.buffer_size)
            if r == 0:
                break
            try:
                size = self._raw_read(space, result_buffer, written, r)
            except BlockingIOError:
                if written == 0:
                    return None
                size = 0
            if size == 0:
                return result_buffer[0:written]
            remaining -= size
            written += size

        self.pos = 0
        self.raw_pos = 0
        self.read_end = 0

        while remaining > 0 and self.read_end < self.buffer_size:
            try:
                size = self._fill_buffer(space)
            except BlockingIOError:
                # EOF or read() would block
                if written == 0:
                    return None
                size = 0
            if size == 0:
                break

            if remaining > 0:
                if size > remaining:
                    size = remaining
                self.output_slice(space, result_buffer, written,
                                  self.buffer[self.pos:self.pos + size])
                self.pos += size
                written += size
                remaining -= size

        return result_buffer[0:written]

    def _read_fast(self, n):
        """Read n bytes from the buffer if it can, otherwise return None.
           This function is simple enough that it can run unlocked."""
        current_size = self._readahead()
        if n <= current_size:
            endpos = self.pos + n
            res = self.buffer[self.pos:endpos]
            self.pos = endpos
            return res
        return None

    def readline_w(self, space, w_limit=None):
        self._check_init(space)
        self._check_closed(space, "readline of closed file")

        limit = convert_size(space, w_limit)

        # First, try to find a line in the buffer. This can run
        # unlocked because the calls to the C API are simple enough
        # that they can't trigger any thread switch.
        have = self._readahead()
        if limit >= 0 and have > limit:
            have = limit
        for pos in range(self.pos, self.pos + have):
            if self.buffer[pos] == '\n':
                break
        else:
            pos = -1
        if pos >= 0:
            w_res = space.newbytes(self.buffer[self.pos:pos + 1])
            self.pos = pos + 1
            return w_res
        if have == limit:
            w_res = space.newbytes(self.buffer[self.pos:self.pos + have])
            self.pos += have
            return w_res

        written = 0
        with self.lock:
            # Now we try to get some more from the raw stream
            chunks = []
            if have > 0:
                chunks.append(self.buffer[self.pos:self.pos + have])
                written += have
                self.pos += have
                if limit >= 0:
                    limit -= have
            if self.writable:
                self._flush_and_rewind_unlocked(space)

            while True:
                self._reader_reset_buf()
                have = self._fill_buffer(space)
                if have == 0:
                    break
                if limit >= 0 and have > limit:
                    have = limit
                pos = 0
                found = False
                while pos < have:
                    c = self.buffer.getitem(pos)
                    pos += 1
                    if c == '\n':
                        self.pos = pos
                        found = True
                        break
                chunks.append(self.buffer[0:pos])
                if found:
                    break
                if have == limit:
                    self.pos = have
                    break
                written += have
                if limit >= 0:
                    limit -= have
            return space.newbytes(''.join(chunks))

    # ____________________________________________________
    # Write methods

    def _adjust_position(self, new_pos):
        assert new_pos >= 0
        self.pos = new_pos
        if self.readable and self.read_end != -1 and self.read_end < new_pos:
            self.read_end = self.pos

    def write_w(self, space, w_data):
        self._check_init(space)
        self._check_closed(space, "write to closed file")
        data = space.getarg_w('s*', w_data).as_str()
        size = len(data)

        with self.lock:
            if (not (self.readable and self.read_end != -1)
                    and not (self.writable and self.write_end != -1)):
                self.pos = 0
                self.raw_pos = 0
            available = self.buffer_size - self.pos
            # Fast path: the data to write can be fully buffered
            if size <= available:
                for i in range(size):
                    self.buffer[self.pos + i] = data[i]
                if self.write_end == -1 or self.write_pos > self.pos:
                    self.write_pos = self.pos
                self._adjust_position(self.pos + size)
                if self.pos > self.write_end:
                    self.write_end = self.pos
                return space.newint(size)

            # First write the current buffer
            try:
                self._writer_flush_unlocked(space)
            except OperationError as e:
                if not e.match(space,
                               space.gettypeobject(W_BlockingIOError.typedef)):
                    raise
                w_exc = e.get_w_value(space)
                assert isinstance(w_exc, W_BlockingIOError)
                if self.readable:
                    self._reader_reset_buf()
                # Make some place by shifting the buffer
                for i in range(self.write_pos, self.write_end):
                    self.buffer.setitem(i - self.write_pos,
                                        self.buffer.getitem(i))
                self.write_end -= self.write_pos
                self.raw_pos -= self.write_pos
                newpos = self.pos - self.write_pos
                assert newpos >= 0
                self.pos = newpos
                self.write_pos = 0
                available = self.buffer_size - self.write_end
                assert available >= 0
                if size <= available:
                    # Everything can be buffered
                    for i in range(size):
                        self.buffer[self.write_end + i] = data[i]
                    self.write_end += size
                    self.pos += size
                    return space.newint(size)
                # Buffer as much as possible
                for i in range(available):
                    self.buffer[self.write_end + i] = data[i]
                self.write_end += available
                self.pos += available
                # Modifying the existing exception will will change
                # e.characters_written but not e.args[2].  Therefore
                # we just replace with a new error.
                raise make_write_blocking_error(space, available)

            # Adjust the raw stream position if it is away from the logical
            # stream position. This happens if the read buffer has been filled
            # but not modified (and therefore _bufferedwriter_flush_unlocked()
            # didn't rewind the raw stream by itself).
            offset = self._raw_offset()
            if offset:
                self._raw_seek(space, -offset, 1)
                self.raw_pos -= offset

            # Then write buf itself. At this point the buffer has been emptied
            remaining = size
            written = 0
            while remaining > self.buffer_size:
                try:
                    n = self._write(space, data[written:])
                except BlockingIOError:
                    # Write failed because raw file is non-blocking
                    if remaining > self.buffer_size:
                        # Can't buffer everything, still buffer as much as
                        # possible
                        for i in range(self.buffer_size):
                            self.buffer[i] = data[written + i]
                        self.raw_pos = 0
                        self._adjust_position(self.buffer_size)
                        self.write_end = self.buffer_size
                        written += self.buffer_size
                        raise make_write_blocking_error(space, written)
                    break
                written += n
                remaining -= n
                # Partial writes can return successfully when interrupted by a
                # signal (see write(2)).  We must run signal handlers before
                # blocking another time, possibly indefinitely.
                space.getexecutioncontext().checksignals()

            if self.readable:
                self._reader_reset_buf()
            if remaining > 0:
                for i in range(remaining):
                    self.buffer[i] = data[written + i]
                written += remaining
            self.write_pos = 0
            self.write_end = remaining
            self._adjust_position(remaining)
            self.raw_pos = 0
        return space.newint(written)

    def flush_w(self, space):
        self._check_init(space)
        self._check_closed(space, "flush of closed file")
        with self.lock:
            self._flush_and_rewind_unlocked(space)

    def _flush_and_rewind_unlocked(self, space):
        self._writer_flush_unlocked(space)
        if self.readable:
            # Rewind the raw stream so that its position corresponds to
            # the current logical position.
            try:
                self._raw_seek(space, -self._raw_offset(), 1)
            finally:
                self._reader_reset_buf()