Esempio n. 1
0
    async def readlines(self) -> AsyncIterator[str]:
        while not self.at_eof:
            # StreamReader supports only LF line separator. This leads to buffer
            # overrun when it contains only CR-terminated lines. Thus, we read
            # blocks manually and then split it to lines with universal line
            # separator.
            block = await self.reader.read(self.blocksize)
            # empty read means that stream is closed
            self.at_eof = len(block) == 0

            # checking max buffer size
            buffered = len(self.buffer) + len(block)
            if buffered > self.bufsize:
                raise asyncio.LimitOverrunError("buffer overrun", buffered)

            self.buffer += block
            if self.buffer:
                # Split buffer to line with any of CR, LF of CRLF separators
                # Last line is buffered to handle the case when CRLF sequence is
                # being split to subsequent reads.
                [*lines, self.buffer] = self.buffer.splitlines(keepends=True)
                for line in lines:
                    yield line.decode(self.encoding)
        # We always leave one non-empty line above, but stream may be empty. In
        # this case we don't want to yield empty line.
        if self.buffer:
            yield self.buffer.decode(self.encoding)
Esempio n. 2
0
 def test_LimitOverrunError_pickleable(self):
     e = asyncio.LimitOverrunError('message', 10)
     for proto in range(pickle.HIGHEST_PROTOCOL + 1):
         with self.subTest(pickle_protocol=proto):
             e2 = pickle.loads(pickle.dumps(e, protocol=proto))
             self.assertEqual(str(e), str(e2))
             self.assertEqual(e.consumed, e2.consumed)
Esempio n. 3
0
async def test_read_failure(serial):
    """Test serial transport read failure."""
    mock_reader, mock_writer = serial.return_value
    transport = SerialTransport("/test", 123456)

    await transport.connect()

    assert serial.call_count == 1
    assert serial.call_args == call(url="/test", baudrate=123456)

    mock_reader.readuntil.side_effect = asyncio.LimitOverrunError("Boom",
                                                                  consumed=2)

    with pytest.raises(TransportReadError):
        await transport.read()

    mock_reader.readuntil.side_effect = asyncio.IncompleteReadError(
        partial=b"partial_test", expected=20)

    with pytest.raises(TransportReadError) as exc_info:
        await transport.read()

    assert exc_info.value.partial_bytes == b"partial_test"

    mock_reader.readuntil.side_effect = OSError("Boom")

    with pytest.raises(TransportFailedError):
        await transport.read()

    await transport.disconnect()

    assert mock_writer.close.call_count == 1
    assert mock_writer.wait_closed.call_count == 1
Esempio n. 4
0
    def test_elt_tap_line_length_limit_error(
        self,
        google_tracker,
        cli_runner,
        project,
        tap,
        target,
        tap_process,
        target_process,
        project_plugins_service,
    ):
        job_id = "pytest_test_elt"
        args = ["elt", "--job_id", job_id, tap.name, target.name]

        # Raise a ValueError wrapping a LimitOverrunError, like StreamReader.readline does:
        # https://github.com/python/cpython/blob/v3.8.7/Lib/asyncio/streams.py#L549
        try:  # noqa: WPS328
            raise asyncio.LimitOverrunError(
                "Separator is not found, and chunk exceed the limit", 0)
        except asyncio.LimitOverrunError as err:
            try:  # noqa: WPS328, WPS505
                # `ValueError` needs to be raised from inside the except block
                # for `LimitOverrunError` so that `__context__` is set.
                raise ValueError(str(err))
            except ValueError as wrapper_err:
                tap_process.stdout.readline.side_effect = wrapper_err

        # Have `tap_process.wait` take 1s to make sure the LimitOverrunError exception can be raised before tap finishes
        async def wait_mock():
            await asyncio.sleep(1)
            return tap_process.wait.return_value

        tap_process.wait.side_effect = wait_mock

        invoke_async = CoroutineMock(side_effect=(tap_process, target_process))
        with mock.patch.object(PluginInvoker, "invoke_async",
                               new=invoke_async) as invoke_async, mock.patch(
                                   "meltano.cli.elt.ProjectPluginsService",
                                   return_value=project_plugins_service,
                               ):
            result = cli_runner.invoke(cli, args)
            assert result.exit_code == 1
            assert "Output line length limit exceeded" in str(result.exception)

            assert_lines(
                result.stdout,
                "meltano     | Running extract & load...\n",
                "meltano     | The extractor generated a message exceeding the message size limit of 5.0MiB (half the buffer size of 10.0MiB).\n",
                "meltano     | ELT could not be completed: Output line length limit exceeded\n",
            )
Esempio n. 5
0
async def readuntil(self, separators=(b'\n', b'\r')):
    """Read data from the stream until ``separator`` is found.

    On success, the data and separator will be removed from the
    internal buffer (consumed). Returned data will include the
    separator at the end.

    Configured stream limit is used to check result. Limit sets the
    maximal length of data that can be returned, not counting the
    separator.

    If an EOF occurs and the complete separator is still not found,
    an IncompleteReadError exception will be raised, and the internal
    buffer will be reset.  The IncompleteReadError.partial attribute
    may contain the separator partially.

    If the data cannot be read because of over limit, a
    LimitOverrunError exception  will be raised, and the data
    will be left in the internal buffer, so it can be read again.
    """
    assert separators
    # seplen = len(separator)
    # 这里强制设为1无关紧要,只要separators的元素都不为空就行
    seplen = 1
    if seplen == 0:
        raise ValueError('Separator should be at least one-byte string')

    if self._exception is not None:
        raise self._exception

    # Consume whole buffer except last bytes, which length is
    # one less than seplen. Let's check corner cases with
    # separator='SEPARATOR':
    # * we have received almost complete separator (without last
    #   byte). i.e buffer='some textSEPARATO'. In this case we
    #   can safely consume len(separator) - 1 bytes.
    # * last byte of buffer is first byte of separator, i.e.
    #   buffer='abcdefghijklmnopqrS'. We may safely consume
    #   everything except that last byte, but this require to
    #   analyze bytes of buffer that match partial separator.
    #   This is slow and/or require FSM. For this case our
    #   implementation is not optimal, since require rescanning
    #   of data that is known to not belong to separator. In
    #   real world, separator will not be so long to notice
    #   performance problems. Even when reading MIME-encoded
    #   messages :)

    # `offset` is the number of bytes from the beginning of the buffer
    # where there is no occurrence of `separator`.
    offset = 0

    # Loop until we find `separator` in the buffer, exceed the buffer size,
    # or an EOF has happened.
    while True:
        buflen = len(self._buffer)

        # Check if we now have enough data in the buffer for `separator` to
        # fit.
        if buflen - offset >= seplen:
            # 多种处理方式,并且有顺序的,先检测 \n,然后是\r
            # 并且保证\r不在最后一字节,以优先匹配\r\n
            for separator in separators:
                isep = self._buffer.find(separator, offset)
                try:
                    if separator == b'\r' and isep != -1 and self._buffer[
                            isep + 1] == b'\n':
                        continue
                except IndexError:
                    pass
                if isep != -1:
                    # `separator` is in the buffer. `isep` will be used later
                    # to retrieve the data.
                    break
            else:
                # see upper comment for explanation.
                offset = buflen + 1 - seplen
                if offset > self._limit:
                    raise asyncio.LimitOverrunError(
                        'Separator is not found, and chunk exceed the limit',
                        offset)
            break

        # Complete message (with full separator) may be present in buffer
        # even when EOF flag is set. This may happen when the last chunk
        # adds data which makes separator be found. That's why we check for
        # EOF *ater* inspecting the buffer.
        if self._eof:
            chunk = bytes(self._buffer)
            self._buffer.clear()
            raise asyncio.IncompleteReadError(chunk, None)

        # _wait_for_data() will resume reading if stream was paused.
        await self._wait_for_data('readuntil')

    if isep > self._limit:
        raise asyncio.LimitOverrunError(
            'Separator is found, but chunk is longer than limit', isep)

    chunk = self._buffer[:isep + seplen]
    del self._buffer[:isep + seplen]
    self._maybe_resume_transport()
    return bytes(chunk)