コード例 #1
0
ファイル: iostream_test.py プロジェクト: lowks/wpull
    def test_basic(self):
        socket_obj = socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0)
        stream = IOStream(socket_obj)

        yield stream.connect(('127.0.0.1', self.get_http_port()))
        yield stream.write(b'GET / HTTP/1.0\r\n\r\n')

        headers = yield stream.read_until(b'\r\n\r\n')

        self.assertIn(b'OK', headers)

        body_1 = yield stream.read_until(b' ')
        body_2 = yield stream.read_until_close()

        self.assertEqual(b'hello world!', body_1 + body_2)

        self.assertTrue(stream.closed)
コード例 #2
0
    def test_basic(self):
        socket_obj = socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0)
        stream = IOStream(socket_obj)

        yield stream.connect(('127.0.0.1', self.get_http_port()))
        yield stream.write(b'GET / HTTP/1.0\r\n\r\n')

        headers = yield stream.read_until(b'\r\n\r\n')

        self.assertIn(b'OK', headers)

        body_1 = yield stream.read_until(b' ')
        body_2 = yield stream.read_until_close()

        self.assertEqual(b'hello world!', body_1 + body_2)

        self.assertTrue(stream.closed)
コード例 #3
0
ファイル: connection.py プロジェクト: nwpu063291/wpull
class Connection(object):
    '''A single HTTP connection.

    Args:
        address (tuple): The hostname (str) and port number (int).
        resolver (:class:`.network.Resovler`): The DNS resolver.
        ssl_enable (bool): If True, SSL is used.
        params (:class:`ConnectionParams`): Parameters that tweak the
            connection.

    '''
    class ConnectionEvents(object):
        def __init__(self):
            self.pre_request = Event()
            self.request = Event()
            self.pre_response = Event()
            self.response = Event()
            self.request_data = Event()
            self.response_data = Event()

        def attach(self, recorder_session):
            self.pre_request.handle(recorder_session.pre_request)
            self.request.handle(recorder_session.request)
            self.pre_response.handle(recorder_session.pre_response)
            self.response.handle(recorder_session.response)
            self.request_data.handle(recorder_session.request_data)
            self.response_data.handle(recorder_session.response_data)

        def clear(self):
            self.pre_request.clear()
            self.request.clear()
            self.pre_response.clear()
            self.response.clear()
            self.request_data.clear()
            self.response_data.clear()

    def __init__(self, address, resolver=None, ssl_enable=False, params=None):
        self._original_address = address
        self._resolver = resolver or Resolver()
        self._ssl = ssl_enable
        self._params = params or ConnectionParams()
        self._resolved_address = None
        self._socket = None
        self._io_stream = None
        self._events = Connection.ConnectionEvents()
        self._active = False
        self._decompressor = None

    @tornado.gen.coroutine
    def _make_socket(self):
        '''Make and wrap the socket with an IOStream.'''
        host, port = self._original_address

        family, self._resolved_address = yield self._resolver.resolve(
            host, port)

        self._socket = socket.socket(family, socket.SOCK_STREAM)

        _logger.debug(__('Socket to {0}/{1}.', family, self._resolved_address))

        if self._params.bind_address:
            _logger.debug(__(
                'Binding socket to {0}', self._params.bind_address
            ))
            self._socket.bind(self._params.bind_address)

        if self._ssl:
            self._io_stream = SSLIOStream(
                self._socket,
                max_buffer_size=self._params.buffer_size,
                rw_timeout=self._params.read_timeout,
                ssl_options=self._params.ssl_options or {},
                server_hostname=host,
            )
        else:
            self._io_stream = IOStream(
                self._socket,
                rw_timeout=self._params.read_timeout,
                max_buffer_size=self._params.buffer_size,
            )

        self._io_stream.set_close_callback(self._stream_closed_callback)

    @tornado.gen.coroutine
    def _connect(self):
        '''Connect the socket if not already connected.'''
        if self.connected:
            # Reset the callback so the context does not leak to another
            self._io_stream.set_close_callback(self._stream_closed_callback)
            return

        yield self._make_socket()

        _logger.debug(__('Connecting to {0}.', self._resolved_address))
        try:
            yield self._io_stream.connect(
                self._resolved_address, timeout=self._params.connect_timeout
            )
        except (tornado.netutil.SSLCertificateError,
                SSLVerficationError) as error:
            raise SSLVerficationError('Certificate error: {error}'.format(
                error=error)) from error
        except (ssl.SSLError, socket.error) as error:
            if error.errno == errno.ECONNREFUSED:
                raise ConnectionRefused('Connection refused: {error}'.format(
                    error=error)) from error
            else:
                raise NetworkError('Connection error: {error}'.format(
                    error=error)) from error
        else:
            _logger.debug('Connected.')

    @tornado.gen.coroutine
    def fetch(self, request, recorder=None, response_factory=Response):
        '''Fetch a document.

        Args:
            request: :class:`Request`
            recorder: :class:`.recorder.BaseRecorder`
            response_factory: a callable object that makes a :class:`Response`.

        If an exception occurs, this function will close the connection
        automatically.

        Returns:
            Response: An instance of :class:`Response`

        Raises:
            Exception: Exceptions specified in :mod:`.errors`.
        '''
        _logger.debug(__('Request {0}.', request))

        assert not self._active

        self._active = True

        try:
            if recorder:
                with recorder.session() as recorder_session:
                    self._events.attach(recorder_session)
                    response = yield self._process_request(request,
                                                           response_factory)
            else:
                response = yield self._process_request(request,
                                                       response_factory)

            response.url_info = request.url_info
        except:
            _logger.debug('Fetch exception.')
            self.close()
            raise
        finally:
            self._events.clear()
            self._active = False

        if not self._params.keep_alive and self.connected:
            _logger.debug('Not keep-alive. Closing connection.')
            self.close()

        _logger.debug('Fetching done.')

        raise tornado.gen.Return(response)

    @tornado.gen.coroutine
    def _process_request(self, request, response_factory):
        '''Fulfill a single request.

        Returns:
            Response
        '''
        yield self._connect()

        request.address = self._resolved_address
        self._events.pre_request(request)

        if sys.version_info < (3, 3):
            error_class = (socket.error, StreamClosedError, ssl.SSLError)
        else:
            error_class = (ConnectionError, StreamClosedError, ssl.SSLError)

        if not self._params.keep_alive and 'Connection' not in request.fields:
            request.fields['Connection'] = 'close'

        try:
            yield self._send_request_header(request)
            yield self._send_request_body(request)
            self._events.request.fire(request)

            response = yield self._read_response_header(response_factory)
            # TODO: handle 100 Continue

            yield self._read_response_body(request, response)
        except error_class as error:
            raise NetworkError('Network error: {0}'.format(error)) from error
        except BufferFullError as error:
            raise ProtocolError(*error.args) from error

        self._events.response.fire(response)

        if self.should_close(request.version,
                             response.fields.get('Connection')):
            _logger.debug('HTTP connection close.')
            self.close()
        else:
            self._io_stream.monitor_for_close()

        raise tornado.gen.Return(response)

    @classmethod
    def should_close(cls, http_version, connection_field):
        connection_field = (connection_field or '').lower()

        if http_version == 'HTTP/1.0':
            return connection_field.replace('-', '') != 'keepalive'
        else:
            return connection_field == 'close'

    @tornado.gen.coroutine
    def _send_request_header(self, request):
        '''Send the request's HTTP status line and header fields.'''
        _logger.debug('Sending headers.')
        data = request.header()
        self._events.request_data.fire(data)
        yield self._io_stream.write(data)

    @tornado.gen.coroutine
    def _send_request_body(self, request):
        '''Send the request's content body.'''
        _logger.debug('Sending body.')
        for data in request.body or ():
            self._events.request_data.fire(data)
            yield self._io_stream.write(data)

    @tornado.gen.coroutine
    def _read_response_header(self, response_factory):
        '''Read the response's HTTP status line and header fields.'''
        _logger.debug('Reading header.')

        response_header_data = yield self._io_stream.read_until_regex(
            br'\r?\n\r?\n'
        )

        self._events.response_data.fire(response_header_data)

        status_line, header = response_header_data.split(b'\n', 1)
        version, status_code, status_reason = Response.parse_status_line(
            status_line)
        response = response_factory(version, status_code, status_reason)
        response.fields.parse(header, strict=False)
        self._events.pre_response.fire(response)

        raise tornado.gen.Return(response)

    @tornado.gen.coroutine
    def _read_response_body(self, request, response):
        '''Read the response's content body.'''
        if 'Content-Length' not in response.fields \
           and 'Transfer-Encoding' not in response.fields \
           and (
               response.status_code in self._params.no_content_codes
               or request.method.upper() == 'HEAD'
           ):
            return

        self._setup_decompressor(response)

        if re.match(r'chunked($|;)',
                    response.fields.get('Transfer-Encoding', '')):
            yield self._read_response_by_chunk(response)
        elif 'Content-Length' in response.fields \
             and not self._params.ignore_length:
            yield self._read_response_by_length(response)
        else:
            yield self._read_response_until_close(response)

        response.body.content_file.seek(0)

    def _setup_decompressor(self, response):
        '''Set up the content encoding decompressor.'''
        encoding = response.fields.get('Content-Encoding', '').lower()

        if encoding == 'gzip':
            self._decompressor = wpull.decompression.GzipDecompressor()
        elif encoding == 'deflate':
            self._decompressor = wpull.decompression.DeflateDecompressor()
        else:
            self._decompressor = None

    def _decompress_data(self, data):
        '''Decompress the given data and return the uncompressed data.'''
        if self._decompressor:
            try:
                return self._decompressor.decompress(data)
            except zlib.error as error:
                raise ProtocolError(
                    'zlib error: {0}.'.format(error)
                ) from error
        else:
            return data

    def _flush_decompressor(self):
        '''Return any data left in the decompressor.'''
        if self._decompressor:
            try:
                return self._decompressor.flush()
            except zlib.error as error:
                raise ProtocolError(
                    'zlib flush error: {0}.'.format(error)
                ) from error
        else:
            return b''

    @tornado.gen.coroutine
    def _read_response_by_length(self, response):
        '''Read the connection specified by a length.'''
        _logger.debug('Reading body by length.')

        try:
            body_size = int(response.fields['Content-Length'])

            if body_size < 0:
                raise ValueError('Content length cannot be negative.')

        except ValueError as error:
            _logger.warning(__(
                _('Invalid content length: {error}'), error=error
            ))

            yield self._read_response_until_close(response)
            return

        def callback(data):
            self._events.response_data.fire(data)
            response.body.content_file.write(self._decompress_data(data))

        yield self._io_stream.read_bytes(
            body_size, streaming_callback=callback,
        )

        response.body.content_file.write(self._flush_decompressor())

    @tornado.gen.coroutine
    def _read_response_by_chunk(self, response):
        '''Read the connection using chunked transfer encoding.'''
        stream_reader = ChunkedTransferStreamReader(self._io_stream)
        stream_reader.data_event.handle(self._events.response_data.fire)
        stream_reader.content_event.handle(
            lambda data:
                response.body.content_file.write(self._decompress_data(data))
        )

        while True:
            chunk_size = yield stream_reader.read_chunk()

            if chunk_size == 0:
                break

        trailer_data = yield stream_reader.read_trailer()
        response.fields.parse(trailer_data)

        response.body.content_file.write(self._flush_decompressor())

    @tornado.gen.coroutine
    def _read_response_until_close(self, response):
        '''Read the response until the connection closes.'''
        _logger.debug('Reading body until close.')

        def callback(data):
            self._events.response_data.fire(data)
            response.body.content_file.write(self._decompress_data(data))

        yield self._io_stream.read_until_close(streaming_callback=callback)

        response.body.content_file.write(self._flush_decompressor())

    @property
    def active(self):
        '''Return whether the connection is in use due to a fetch in progress.
        '''
        return self._active

    @property
    def connected(self):
        '''Return whether the connection is connected.'''
        return self._io_stream and not self._io_stream.closed()

    def close(self):
        '''Close the connection if open.'''
        if self._io_stream:
            self._io_stream.close()

    def _stream_closed_callback(self):
        _logger.debug(__(
            'Stream closed. active={0} connected={1} closed={2}',
            self._active,
            self.connected,
            self._io_stream.closed(),
        ))
コード例 #4
0
class Connection(object):
    '''A single HTTP connection.

    Args:
        address (tuple): The hostname (str) and port number (int).
        resolver (:class:`.network.Resovler`): The DNS resolver.
        ssl_enable (bool): If True, SSL is used.
        params (:class:`ConnectionParams`): Parameters that tweak the
            connection.

    '''
    class ConnectionEvents(object):
        def __init__(self):
            self.pre_request = Event()
            self.request = Event()
            self.pre_response = Event()
            self.response = Event()
            self.request_data = Event()
            self.response_data = Event()

        def attach(self, recorder_session):
            self.pre_request.handle(recorder_session.pre_request)
            self.request.handle(recorder_session.request)
            self.pre_response.handle(recorder_session.pre_response)
            self.response.handle(recorder_session.response)
            self.request_data.handle(recorder_session.request_data)
            self.response_data.handle(recorder_session.response_data)

        def clear(self):
            self.pre_request.clear()
            self.request.clear()
            self.pre_response.clear()
            self.response.clear()
            self.request_data.clear()
            self.response_data.clear()

    def __init__(self, address, resolver=None, ssl_enable=False, params=None):
        self._original_address = address
        self._resolver = resolver or Resolver()
        self._ssl = ssl_enable
        self._params = params or ConnectionParams()
        self._resolved_address = None
        self._socket = None
        self._io_stream = None
        self._events = Connection.ConnectionEvents()
        self._active = False
        self._decompressor = None

    @tornado.gen.coroutine
    def _make_socket(self):
        '''Make and wrap the socket with an IOStream.'''
        host, port = self._original_address

        family, self._resolved_address = yield self._resolver.resolve(
            host, port)

        self._socket = socket.socket(family, socket.SOCK_STREAM)

        _logger.debug('Socket to {0}/{1}.'.format(family,
                                                  self._resolved_address))

        if self._params.bind_address:
            _logger.debug('Binding socket to {0}'.format(
                self._params.bind_address))
            self._socket.bind(self._params.bind_address)

        if self._ssl:
            self._io_stream = SSLIOStream(
                self._socket,
                max_buffer_size=self._params.buffer_size,
                rw_timeout=self._params.read_timeout,
                ssl_options=self._params.ssl_options or {},
                server_hostname=host,
            )
        else:
            self._io_stream = IOStream(
                self._socket,
                rw_timeout=self._params.read_timeout,
                max_buffer_size=self._params.buffer_size,
            )

        self._io_stream.set_close_callback(self._stream_closed_callback)

    @tornado.gen.coroutine
    def _connect(self):
        '''Connect the socket if not already connected.'''
        if self.connected:
            # Reset the callback so the context does not leak to another
            self._io_stream.set_close_callback(self._stream_closed_callback)
            return

        yield self._make_socket()

        _logger.debug('Connecting to {0}.'.format(self._resolved_address))
        try:
            yield self._io_stream.connect(self._resolved_address,
                                          timeout=self._params.connect_timeout)
        except (tornado.netutil.SSLCertificateError,
                SSLVerficationError) as error:
            raise SSLVerficationError(
                'Certificate error: {error}'.format(error=error)) from error
        except (ssl.SSLError, socket.error) as error:
            if error.errno == errno.ECONNREFUSED:
                raise ConnectionRefused('Connection refused: {error}'.format(
                    error=error)) from error
            else:
                raise NetworkError(
                    'Connection error: {error}'.format(error=error)) from error
        else:
            _logger.debug('Connected.')

    @tornado.gen.coroutine
    def fetch(self, request, recorder=None, response_factory=Response):
        '''Fetch a document.

        Args:
            request: :class:`Request`
            recorder: :class:`.recorder.BaseRecorder`
            response_factory: a callable object that makes a :class:`Response`.

        If an exception occurs, this function will close the connection
        automatically.

        Returns:
            Response: An instance of :class:`Response`

        Raises:
            Exception: Exceptions specified in :mod:`.errors`.
        '''
        _logger.debug('Request {0}.'.format(request))

        assert not self._active

        self._active = True

        try:
            if recorder:
                with recorder.session() as recorder_session:
                    self._events.attach(recorder_session)
                    response = yield self._process_request(
                        request, response_factory)
            else:
                response = yield self._process_request(request,
                                                       response_factory)

            response.url_info = request.url_info
        except:
            _logger.debug('Fetch exception.')
            self.close()
            raise
        finally:
            self._events.clear()
            self._active = False

        if not self._params.keep_alive and self.connected:
            _logger.debug('Not keep-alive. Closing connection.')
            self.close()

        _logger.debug('Fetching done.')

        raise tornado.gen.Return(response)

    @tornado.gen.coroutine
    def _process_request(self, request, response_factory):
        '''Fulfill a single request.

        Returns:
            Response
        '''
        yield self._connect()

        request.address = self._resolved_address
        self._events.pre_request(request)

        if sys.version_info < (3, 3):
            error_class = (socket.error, StreamClosedError, ssl.SSLError)
        else:
            error_class = (ConnectionError, StreamClosedError, ssl.SSLError)

        if not self._params.keep_alive and 'Connection' not in request.fields:
            request.fields['Connection'] = 'close'

        try:
            yield self._send_request_header(request)
            yield self._send_request_body(request)
            self._events.request.fire(request)

            response = yield self._read_response_header(response_factory)
            # TODO: handle 100 Continue

            yield self._read_response_body(request, response)
        except error_class as error:
            raise NetworkError('Network error: {0}'.format(error)) from error
        except BufferFullError as error:
            raise ProtocolError(*error.args) from error

        self._events.response.fire(response)

        if self.should_close(request.version,
                             response.fields.get('Connection')):
            _logger.debug('HTTP connection close.')
            self.close()
        else:
            self._io_stream.monitor_for_close()

        raise tornado.gen.Return(response)

    @classmethod
    def should_close(cls, http_version, connection_field):
        connection_field = (connection_field or '').lower()

        if http_version == 'HTTP/1.0':
            return connection_field.replace('-', '') != 'keepalive'
        else:
            return connection_field == 'close'

    @tornado.gen.coroutine
    def _send_request_header(self, request):
        '''Send the request's HTTP status line and header fields.'''
        _logger.debug('Sending headers.')
        data = request.header()
        self._events.request_data.fire(data)
        yield self._io_stream.write(data)

    @tornado.gen.coroutine
    def _send_request_body(self, request):
        '''Send the request's content body.'''
        _logger.debug('Sending body.')
        for data in request.body or ():
            self._events.request_data.fire(data)
            yield self._io_stream.write(data)

    @tornado.gen.coroutine
    def _read_response_header(self, response_factory):
        '''Read the response's HTTP status line and header fields.'''
        _logger.debug('Reading header.')

        response_header_data = yield self._io_stream.read_until_regex(
            br'\r?\n\r?\n')

        self._events.response_data.fire(response_header_data)

        status_line, header = response_header_data.split(b'\n', 1)
        version, status_code, status_reason = Response.parse_status_line(
            status_line)
        response = response_factory(version, status_code, status_reason)
        response.fields.parse(header, strict=False)
        self._events.pre_response.fire(response)

        raise tornado.gen.Return(response)

    @tornado.gen.coroutine
    def _read_response_body(self, request, response):
        '''Read the response's content body.'''
        if 'Content-Length' not in response.fields \
        and 'Transfer-Encoding' not in response.fields \
        and (
            response.status_code in self._params.no_content_codes \
            or request.method.upper() == 'HEAD'
        ):
            return

        self._setup_decompressor(response)

        if re.match(r'chunked($|;)',
                    response.fields.get('Transfer-Encoding', '')):
            yield self._read_response_by_chunk(response)
        elif 'Content-Length' in response.fields \
        and not self._params.ignore_length:
            yield self._read_response_by_length(response)
        else:
            yield self._read_response_until_close(response)

        response.body.content_file.seek(0)

    def _setup_decompressor(self, response):
        '''Set up the content encoding decompressor.'''
        encoding = response.fields.get('Content-Encoding', '').lower()

        if encoding == 'gzip':
            self._decompressor = wpull.decompression.GzipDecompressor()
        elif encoding == 'deflate':
            self._decompressor = wpull.decompression.DeflateDecompressor()
        else:
            self._decompressor = None

    def _decompress_data(self, data):
        '''Decompress the given data and return the uncompressed data.'''
        if self._decompressor:
            try:
                return self._decompressor.decompress(data)
            except zlib.error as error:
                raise ProtocolError(
                    'zlib error: {0}.'.format(error)) from error
        else:
            return data

    def _flush_decompressor(self):
        '''Return any data left in the decompressor.'''
        if self._decompressor:
            try:
                return self._decompressor.flush()
            except zlib.error as error:
                raise ProtocolError(
                    'zlib flush error: {0}.'.format(error)) from error
        else:
            return b''

    @tornado.gen.coroutine
    def _read_response_by_length(self, response):
        '''Read the connection specified by a length.'''
        _logger.debug('Reading body by length.')

        try:
            body_size = int(response.fields['Content-Length'])

            if body_size < 0:
                raise ValueError('Content length cannot be negative.')

        except ValueError as error:
            _logger.warning(
                _('Invalid content length: {error}').format(error=error))

            yield self._read_response_until_close(response)
            return

        def callback(data):
            self._events.response_data.fire(data)
            response.body.content_file.write(self._decompress_data(data))

        yield self._io_stream.read_bytes(
            body_size,
            streaming_callback=callback,
        )

        response.body.content_file.write(self._flush_decompressor())

    @tornado.gen.coroutine
    def _read_response_by_chunk(self, response):
        '''Read the connection using chunked transfer encoding.'''
        stream_reader = ChunkedTransferStreamReader(self._io_stream)
        stream_reader.data_event.handle(self._events.response_data.fire)
        stream_reader.content_event.handle(
            lambda data: response.body.content_file.write(
                self._decompress_data(data)))

        while True:
            chunk_size = yield stream_reader.read_chunk()

            if chunk_size == 0:
                break

        trailer_data = yield stream_reader.read_trailer()
        response.fields.parse(trailer_data)

        response.body.content_file.write(self._flush_decompressor())

    @tornado.gen.coroutine
    def _read_response_until_close(self, response):
        '''Read the response until the connection closes.'''
        _logger.debug('Reading body until close.')

        def callback(data):
            self._events.response_data.fire(data)
            response.body.content_file.write(self._decompress_data(data))

        yield self._io_stream.read_until_close(streaming_callback=callback)

        response.body.content_file.write(self._flush_decompressor())

    @property
    def active(self):
        '''Return whether the connection is in use due to a fetch in progress.
        '''
        return self._active

    @property
    def connected(self):
        '''Return whether the connection is connected.'''
        return self._io_stream and not self._io_stream.closed()

    def close(self):
        '''Close the connection if open.'''
        if self._io_stream:
            self._io_stream.close()

    def _stream_closed_callback(self):
        _logger.debug(
            'Stream closed. active={0} connected={1} closed={2}'.format(
                self._active,
                self.connected,
                self._io_stream.closed(),
            ))