Esempio n. 1
0
    def test_name_value_fallback(self):
        text = '''Name: Кракозябры'''.encode('koi8-r')
        record = NameValueRecord()
        record.parse(text)

        self.assertEqual('Кракозябры'.encode('koi8-r').decode('latin1'),
                         record['name'])
Esempio n. 2
0
 def test_name_value_str_format(self):
     record = NameValueRecord()
     record.parse(self.RECORD_STR_1)
     self.assertEqual(('Entry:\r\n'
                       'Who: Gilbert, W.S. | Sullivan, Arthur\r\n'
                       'What: The Yeomen of the Guard\r\n'
                       'When/Created: 1888\r\n'), str(record))
Esempio n. 3
0
    def test_name_value_fallback(self):
        text = '''Name: Кракозябры'''.encode('koi8-r')
        record = NameValueRecord()
        record.parse(text)

        self.assertEqual(
            'Кракозябры'.encode('koi8-r').decode('latin1'),
            record['name'])
Esempio n. 4
0
 def __init__(self, method=None, resource_path=None, version='HTTP/1.1'):
     super().__init__()
     self.method = method
     self.resource_path = resource_path
     self.version = version
     self.fields = NameValueRecord(encoding='latin-1')
     self.body = None
     self.encoding = 'latin-1'
Esempio n. 5
0
    def test_name_value_encoding(self):
        text = '''Name: Кракозябры'''.encode('koi8-r')
        record = NameValueRecord(encoding='koi8-r')
        record.parse(text)

        self.assertEqual(
            'Кракозябры',
            record['name'])
Esempio n. 6
0
    def test_name_value_record_setters(self):
        record = NameValueRecord()

        self.assertNotIn('cache', record)
        self.assertRaises(KeyError, lambda: record['cache'])
        record['cache'] = 'value1'
        self.assertEqual('value1', record['CACHE'])
        self.assertEqual(['value1'], record.get_list('Cache'))
        self.assertEqual([('Cache', 'value1')], list(record.get_all()))
Esempio n. 7
0
 def test_name_value_str_format(self):
     record = NameValueRecord()
     record.parse(self.RECORD_STR_1)
     self.assertEqual(
         ('Entry:\r\n'
          'Who: Gilbert, W.S. | Sullivan, Arthur\r\n'
          'What: The Yeomen of the Guard\r\n'
          'When/Created: 1888\r\n'),
         str(record)
     )
Esempio n. 8
0
    def _build_phantomjs_coprocessor(cls, session: AppSession,
                                     proxy_port: int):
        '''Build proxy server and PhantomJS client. controller, coprocessor.'''
        page_settings = {}
        default_headers = NameValueRecord()

        for header_string in session.args.header:
            default_headers.parse(header_string)

        # Since we can only pass a one-to-one mapping to PhantomJS,
        # we put these last since NameValueRecord.items() will use only the
        # first value added for each key.
        default_headers.add('Accept-Language', '*')

        if not session.args.http_compression:
            default_headers.add('Accept-Encoding', 'identity')

        default_headers = dict(default_headers.items())

        if session.args.read_timeout:
            page_settings['resourceTimeout'] = session.args.read_timeout * 1000

        page_settings['userAgent'] = session.args.user_agent \
                                     or session.default_user_agent

        # Test early for executable
        wpull.driver.phantomjs.get_version(session.args.phantomjs_exe)

        phantomjs_params = PhantomJSParams(
            wait_time=session.args.phantomjs_wait,
            num_scrolls=session.args.phantomjs_scroll,
            smart_scroll=session.args.phantomjs_smart_scroll,
            snapshot=session.args.phantomjs_snapshot,
            custom_headers=default_headers,
            page_settings=page_settings,
            load_time=session.args.phantomjs_max_time,
        )

        extra_args = [
            '--proxy', '{}:{}'.format(session.args.proxy_server_address,
                                      proxy_port), '--ignore-ssl-errors=true'
        ]

        phantomjs_driver_factory = functools.partial(
            session.factory.class_map['PhantomJSDriver'],
            exe_path=session.args.phantomjs_exe,
            extra_args=extra_args,
        )

        phantomjs_coprocessor = session.factory.new(
            'PhantomJSCoprocessor',
            phantomjs_driver_factory,
            session.factory['ProcessingRule'],
            phantomjs_params,
            root_path=session.args.directory_prefix,
            warc_recorder=session.factory.get('WARCRecorder'),
        )

        return phantomjs_coprocessor
Esempio n. 9
0
    def test_missing_colon(self):
        record = NameValueRecord()

        self.assertRaises(ValueError, record.parse, 'text:hello\nhi\n')

        record = NameValueRecord()

        record.parse('text:hello\nhi\n', strict=False)

        self.assertEqual('hello', record['text'])
        self.assertNotIn('hi', record)
Esempio n. 10
0
    def test_name_value_record_setters(self):
        record = NameValueRecord()

        self.assertNotIn('cache', record)
        self.assertRaises(KeyError, lambda: record['cache'])
        record['cache'] = 'value1'
        self.assertEqual('value1', record['CACHE'])
        self.assertEqual(['value1'], record.get_list('Cache'))
        self.assertEqual(
            [('Cache', 'value1')],
            list(record.get_all())
        )
Esempio n. 11
0
    def _build_phantomjs_coprocessor(cls, session: AppSession, proxy_port: int):
        '''Build proxy server and PhantomJS client. controller, coprocessor.'''
        page_settings = {}
        default_headers = NameValueRecord()

        for header_string in session.args.header:
            default_headers.parse(header_string)

        # Since we can only pass a one-to-one mapping to PhantomJS,
        # we put these last since NameValueRecord.items() will use only the
        # first value added for each key.
        default_headers.add('Accept-Language', '*')

        if not session.args.http_compression:
            default_headers.add('Accept-Encoding', 'identity')

        default_headers = dict(default_headers.items())

        if session.args.read_timeout:
            page_settings['resourceTimeout'] = session.args.read_timeout * 1000

        page_settings['userAgent'] = session.args.user_agent \
                                     or session.default_user_agent

        # Test early for executable
        wpull.driver.phantomjs.get_version(session.args.phantomjs_exe)

        phantomjs_params = PhantomJSParams(
            wait_time=session.args.phantomjs_wait,
            num_scrolls=session.args.phantomjs_scroll,
            smart_scroll=session.args.phantomjs_smart_scroll,
            snapshot=session.args.phantomjs_snapshot,
            custom_headers=default_headers,
            page_settings=page_settings,
            load_time=session.args.phantomjs_max_time,
        )

        extra_args = [
            '--proxy',
            '{}:{}'.format(session.args.proxy_server_address, proxy_port),
            '--ignore-ssl-errors=true'
        ]

        phantomjs_driver_factory = functools.partial(
            session.factory.class_map['PhantomJSDriver'],
            exe_path=session.args.phantomjs_exe,
            extra_args=extra_args,
        )

        phantomjs_coprocessor = session.factory.new(
            'PhantomJSCoprocessor',
            phantomjs_driver_factory,
            session.factory['ProcessingRule'],
            phantomjs_params,
            root_path=session.args.directory_prefix,
            warc_recorder=session.factory.get('WARCRecorder'),
        )

        return phantomjs_coprocessor
Esempio n. 12
0
    def test_mixed_line_ending(self):
        record = NameValueRecord()
        record.parse(self.MIXED_LINE_ENDING_STR_1)

        self.assertEqual('woof', record['dog'])
        self.assertEqual('meow', record['cat'])
        self.assertEqual('tweet', record['bird'])
        self.assertEqual('squeak', record['mouse'])
        self.assertEqual('moo', record['cow'])
        self.assertEqual('croak', record['frog'])
        self.assertEqual('toot', record['elephant'])
        self.assertEqual('quack', record['duck'])
        self.assertEqual('blub', record['fish'])
        self.assertEqual('ow ow ow', record['seal'])
        self.assertEqual('???', record['fox'])
Esempio n. 13
0
 def __init__(self, version, status_code, status_reason):
     self.version = version
     self.status_code = status_code
     self.status_reason = status_reason
     self.fields = NameValueRecord()
     self.body = Body()
     self.url_info = None
Esempio n. 14
0
    def _build_phantomjs_controller(self):
        '''Build proxy server and PhantomJS client and controller.'''
        if not self._args.phantomjs:
            return

        proxy_server = self._factory.new('HTTPProxyServer',
                                         self.factory['Client'])
        proxy_socket, proxy_port = tornado.testing.bind_unused_port()

        proxy_server.add_socket(proxy_socket)

        page_settings = {}
        default_headers = NameValueRecord()

        for header_string in self._args.header:
            default_headers.parse(header_string)

        # Since we can only pass a one-to-one mapping to PhantomJS,
        # we put these last since NameValueRecord.items() will use only the
        # first value added for each key.
        default_headers.add('Accept-Language', '*')

        if not self._args.http_compression:
            default_headers.add('Accept-Encoding', 'identity')

        default_headers = dict(default_headers.items())

        if self._args.read_timeout:
            page_settings['resourceTimeout'] = self._args.read_timeout * 1000

        page_settings['userAgent'] = self._args.user_agent \
            or self.default_user_agent

        phantomjs_client = self._factory.new(
            'PhantomJSClient',
            'localhost:{0}'.format(proxy_port),
            page_settings=page_settings,
            default_headers=default_headers,
        )
        phantomjs_client.test_client_exe()

        phantomjs_controller = self._factory.new(
            'PhantomJSController',
            phantomjs_client,
            wait_time=self._args.phantomjs_wait,
            num_scrolls=self._args.phantomjs_scroll,
            warc_recorder=self.factory.get('WARCRecorder'),
            smart_scroll=self._args.phantomjs_smart_scroll,
        )

        return phantomjs_controller
Esempio n. 15
0
    def __init__(self,
                 status_code=None,
                 reason=None,
                 version='HTTP/1.1',
                 request=None):
        if status_code is not None:
            assert isinstance(status_code, int), \
                'Expect int, got {}'.format(type(status_code))
            assert reason is not None

        self.status_code = status_code
        self.reason = reason
        self.version = version
        self.fields = NameValueRecord(encoding='latin-1')
        self.body = None
        self.request = request
        self.encoding = 'latin-1'
Esempio n. 16
0
 def __init__(self, method, resource_url, version='HTTP/1.1'):
     self.method = method
     self.resource_url = resource_url
     self.url_info = None
     self.version = version
     self.fields = NameValueRecord()
     self.body = Body()
     self.address = None
Esempio n. 17
0
 def __init__(self, method=None, resource_path=None, version='HTTP/1.1'):
     super().__init__()
     self.method = method
     self.resource_path = resource_path
     self.version = version
     self.fields = NameValueRecord(encoding='latin-1')
     self.body = None
     self.encoding = 'latin-1'
Esempio n. 18
0
    def _build_phantomjs_controller(self):
        '''Build proxy server and PhantomJS client and controller.'''
        if not self._args.phantomjs:
            return

        proxy_server = self._factory.new(
            'HTTPProxyServer',
            self.factory['Client']
        )
        proxy_socket, proxy_port = tornado.testing.bind_unused_port()

        proxy_server.add_socket(proxy_socket)

        page_settings = {}
        default_headers = NameValueRecord()

        for header_string in self._args.header:
            default_headers.parse(header_string)

        # Since we can only pass a one-to-one mapping to PhantomJS,
        # we put these last since NameValueRecord.items() will use only the
        # first value added for each key.
        default_headers.add('Accept-Language', '*')

        if not self._args.http_compression:
            default_headers.add('Accept-Encoding', 'identity')

        default_headers = dict(default_headers.items())

        if self._args.read_timeout:
            page_settings['resourceTimeout'] = self._args.read_timeout * 1000

        page_settings['userAgent'] = self._args.user_agent \
            or self.default_user_agent

        phantomjs_client = self._factory.new(
            'PhantomJSClient',
            'localhost:{0}'.format(proxy_port),
            page_settings=page_settings,
            default_headers=default_headers,
            exe_path=self._args.phantomjs_exe
        )
        phantomjs_client.test_client_exe()

        phantomjs_controller = self._factory.new(
            'PhantomJSController',
            phantomjs_client,
            wait_time=self._args.phantomjs_wait,
            num_scrolls=self._args.phantomjs_scroll,
            warc_recorder=self.factory.get('WARCRecorder'),
            smart_scroll=self._args.phantomjs_smart_scroll,
            snapshot=self._args.phantomjs_snapshot,
        )

        return phantomjs_controller
Esempio n. 19
0
    def _populate_warcinfo(self, extra_fields=None):
        '''Add the metadata to the Warcinfo record.'''
        self._warcinfo_record.set_common_fields(
            WARCRecord.WARCINFO, WARCRecord.WARC_FIELDS)

        info_fields = NameValueRecord()
        info_fields['Software'] = 'Wpull/{0} Python/{1}'.format(
            wpull.version.__version__, wpull.util.python_version())
        info_fields['format'] = 'WARC File Format 1.0'
        info_fields['conformsTo'] = \
            'http://bibnum.bnf.fr/WARC/WARC_ISO_28500_version1_latestdraft.pdf'

        if extra_fields:
            for name, value in extra_fields:
                info_fields.add(name, value)

        self._warcinfo_record.block_file = io.BytesIO(
            bytes(info_fields) + b'\r\n')
        self._warcinfo_record.compute_checksum()
Esempio n. 20
0
    def _populate_warcinfo(self, extra_fields=None):
        '''Add the metadata to the Warcinfo record.'''
        self._warcinfo_record.set_common_fields(WARCRecord.WARCINFO,
                                                WARCRecord.WARC_FIELDS)

        info_fields = NameValueRecord()
        info_fields['Software'] = self._params.software_string \
            or self.DEFAULT_SOFTWARE_STRING
        info_fields['format'] = 'WARC File Format 1.0'
        info_fields['conformsTo'] = \
            'http://bibnum.bnf.fr/WARC/WARC_ISO_28500_version1_latestdraft.pdf'

        if extra_fields:
            for name, value in extra_fields:
                info_fields.add(name, value)

        self._warcinfo_record.block_file = io.BytesIO(
            bytes(info_fields) + b'\r\n')
        self._warcinfo_record.compute_checksum()
Esempio n. 21
0
    def test_wrap_width(self):
        record = NameValueRecord(wrap_width=24)
        record['blah'] = 'hello ' * 10

        self.assertEqual(
            'Blah: hello hello hello hello\r\n'
            '  hello hello hello \r\n'
            ' hello hello hello \r\n',
            str(record)
        )
Esempio n. 22
0
    def _populate_warcinfo(self, extra_fields=None):
        '''Add the metadata to the Warcinfo record.'''
        self._warcinfo_record.set_common_fields(
            WARCRecord.WARCINFO, WARCRecord.WARC_FIELDS)

        info_fields = NameValueRecord()
        info_fields['Software'] = self._params.software_string \
            or self.DEFAULT_SOFTWARE_STRING
        info_fields['format'] = 'WARC File Format 1.0'
        info_fields['conformsTo'] = \
            'http://bibnum.bnf.fr/WARC/WARC_ISO_28500_version1_latestdraft.pdf'

        if extra_fields:
            for name, value in extra_fields:
                info_fields.add(name, value)

        self._warcinfo_record.block_file = io.BytesIO(
            bytes(info_fields) + b'\r\n')
        self._warcinfo_record.compute_checksum()
Esempio n. 23
0
    def test_with_normalize_overrides(self):
        record = NameValueRecord(normalize_overrides=['WARC-Type'])

        record.add('WARC-Type', 'warcinfo')

        self.assertIn('WARC-Type', record)
        self.assertEqual('warcinfo', record['WARC-Type'])
        self.assertEqual([('WARC-Type', 'warcinfo')], list(record.get_all()))
        self.assertEqual(['warcinfo'], record.get_list('Warc-Type'))
        self.assertEqual(['WARC-Type'], list(record.keys()))

        record['Warc-Type'] = 'resource'

        self.assertIn('WARC-Type', record)
        self.assertEqual('resource', record['WARC-Type'])
        self.assertEqual([('WARC-Type', 'resource')], list(record.get_all()))
        self.assertEqual(['resource'], record.get_list('Warc-Type'))
        self.assertEqual(['WARC-Type'], list(record.keys()))

        record['WARC-Blah'] = 'blah'
        self.assertEqual(['WARC-Type', 'Warc-Blah'], list(record.keys()))
Esempio n. 24
0
    def __init__(self, status_code=None, reason=None, version='HTTP/1.1', request=None):
        if status_code is not None:
            assert isinstance(status_code, int), \
                'Expect int, got {}'.format(type(status_code))
            assert reason is not None

        self.status_code = status_code
        self.reason = reason
        self.version = version
        self.fields = NameValueRecord(encoding='latin-1')
        self.body = None
        self.request = request
        self.encoding = 'latin-1'
Esempio n. 25
0
    def _new_mock_response(self, response, file_path):
        '''Return a new mock Response with the content.'''
        mock_response = copy.copy(response)

        mock_response.body = Body(open(file_path, 'rb'))
        mock_response.fields = NameValueRecord()

        for name, value in response.fields.get_all():
            mock_response.fields.add(name, value)

        mock_response.fields['Content-Type'] = 'text/html; charset="utf-8"'

        return mock_response
Esempio n. 26
0
    def test_missing_colon(self):
        record = NameValueRecord()

        self.assertRaises(ValueError, record.parse, 'text:hello\nhi\n')

        record = NameValueRecord()

        record.parse('text:hello\nhi\n', strict=False)

        self.assertEqual('hello', record['text'])
        self.assertNotIn('hi', record)
Esempio n. 27
0
    def _new_phantomjs_response(self, response, content):
        '''Return a new mock Response with the content.'''
        mock_response = copy.copy(response)

        # tempfile needed for scripts that need a on-disk filename
        mock_response.body.content_file = tempfile.SpooledTemporaryFile(
            max_size=999999999)

        mock_response.body.content_file.write(content.encode('utf-8'))
        mock_response.body.content_file.seek(0)

        mock_response.fields = NameValueRecord()

        for name, value in response.fields.get_all():
            mock_response.fields.add(name, value)

        mock_response.fields['Content-Type'] = 'text/html; charset="utf-8"'

        return mock_response
Esempio n. 28
0
    def test_with_normalize_overrides(self):
        record = NameValueRecord(normalize_overrides=['WARC-Type'])

        record.add('WARC-Type', 'warcinfo')

        self.assertIn('WARC-Type', record)
        self.assertEqual('warcinfo', record['WARC-Type'])
        self.assertEqual([('WARC-Type', 'warcinfo')], list(record.get_all()))
        self.assertEqual(['warcinfo'], record.get_list('Warc-Type'))
        self.assertEqual(['WARC-Type'], list(record.keys()))

        record['Warc-Type'] = 'resource'

        self.assertIn('WARC-Type', record)
        self.assertEqual('resource', record['WARC-Type'])
        self.assertEqual([('WARC-Type', 'resource')], list(record.get_all()))
        self.assertEqual(['resource'], record.get_list('Warc-Type'))
        self.assertEqual(['WARC-Type'], list(record.keys()))

        record['WARC-Blah'] = 'blah'
        self.assertEqual(['WARC-Type', 'Warc-Blah'], list(record.keys()))
Esempio n. 29
0
    def test_copy(self):
        record = NameValueRecord()
        record['blah'] = 'hello'

        # Check for no crash
        copy.deepcopy(record)
Esempio n. 30
0
class Response(SerializableMixin, DictableMixin, ProtocolResponseMixin):
    '''Represents the HTTP response.

    Attributes:
        status_code (int): The status code in the status line.
        status_reason (str): The status reason string in the status line.
        version (str): The HTTP version in the status line. For example,
            ``HTTP/1.1``.
        fields (:class:`.namevalue.NameValueRecord`): The fields in
            the HTTP headers (and trailer, if present).
        body (:class:`.body.Body`, file-like, None): The optional payload
            (without and transfer or content encoding).
        request: The corresponding request.
        encoding (str): The encoding of the status line.
    '''
    def __init__(self, status_code=None, reason=None, version='HTTP/1.1', request=None):
        if status_code is not None:
            assert isinstance(status_code, int), \
                'Expect int, got {}'.format(type(status_code))
            assert reason is not None

        self.status_code = status_code
        self.reason = reason
        self.version = version
        self.fields = NameValueRecord(encoding='latin-1')
        self.body = None
        self.request = request
        self.encoding = 'latin-1'

    @property
    def protocol(self):
        return 'http'

    def to_dict(self):
        return {
            'protocol': 'http',
            'status_code': self.status_code,
            'reason': self.reason,
            'response_code': self.status_code,
            'response_message': self.reason,
            'version': self.version,
            'fields': list(self.fields.get_all()),
            'body': self.call_to_dict_or_none(self.body),
            'request': self.request.to_dict() if self.request else None,
            'encoding': self.encoding,
        }

    def to_bytes(self):
        assert self.version
        assert self.status_code is not None
        assert self.reason is not None

        status = '{0} {1} {2}'.format(self.version, self.status_code, self.reason).encode(self.encoding)
        fields = self.fields.to_bytes(errors='replace')

        return b'\r\n'.join([status, fields, b''])

    def parse(self, data):
        if self.status_code is None:
            line, data = data.split(b'\n', 1)
            self.version, self.status_code, self.reason = self.parse_status_line(line)

        self.fields.parse(data, strict=False)

    @classmethod
    def parse_status_line(cls, data):
        '''Parse the status line bytes.

        Returns:
            tuple: An tuple representing the version, code, and reason.
        '''
        match = re.match(
            br'(HTTP/\d+\.\d+)[ \t]+([0-9]{1,3})[ \t]*([^\r\n]*)',
            data
        )
        if match:
            groups = match.groups()
            if len(groups) == 3:
                return wpull.string.to_str(
                    (groups[0], int(groups[1]), groups[2]),
                    encoding='latin-1',
                )

        raise ProtocolError(
            'Error parsing status line {line}".'.format(line=ascii(data))
        )

    def __repr__(self):
        return '<Response({version}, {code}, {reason})>'.format(
            version=ascii(self.version), code=self.status_code,
            reason=ascii(self.reason)
        )

    def __str__(self):
        return wpull.string.printable_str(
            self.to_bytes().decode('utf-8', 'replace'), keep_newlines=True
        )

    def response_code(self):
        return self.status_code

    def response_message(self):
        return self.reason
Esempio n. 31
0
class RawRequest(SerializableMixin, DictableMixin):
    '''Represents an HTTP request.

    Attributes:
        method (str): The HTTP method in the status line. For example, ``GET``,
            ``POST``.
        resource_path (str): The URL or "path" in the status line.
        version (str): The HTTP version in the status line. For example,
            ``HTTP/1.0``.
        fields (:class:`.namevalue.NameValueRecord`): The fields in
            the HTTP header.
        body (:class:`.body.Body`, file-like, None): An optional payload.
        encoding (str): The encoding of the status line.
    '''
    def __init__(self, method=None, resource_path=None, version='HTTP/1.1'):
        super().__init__()
        self.method = method
        self.resource_path = resource_path
        self.version = version
        self.fields = NameValueRecord(encoding='latin-1')
        self.body = None
        self.encoding = 'latin-1'

    def to_dict(self):
        return {
            'protocol': 'http',
            'method': self.method,
            'version': self.version,
            'resource_path': self.resource_path,
            'fields': list(self.fields.get_all()),
            'body': self.call_to_dict_or_none(self.body),
            'encoding': self.encoding,
        }

    def to_bytes(self):
        assert self.method
        assert self.resource_path
        assert self.version

        status = '{0} {1} {2}'.format(self.method, self.resource_path, self.version).encode(self.encoding)
        fields = self.fields.to_bytes(errors='replace')

        return b'\r\n'.join([status, fields, b''])

    def parse(self, data):
        if not self.resource_path:
            line, data = data.split(b'\n', 1)
            self.method, self.resource_path, self.version = self.parse_status_line(line)

        self.fields.parse(data, strict=False)

    def parse_status_line(self, data):
        '''Parse the status line bytes.

        Returns:
            tuple: An tuple representing the method, URI, and
            version.
        '''
        match = re.match(
            br'([a-zA-Z]+)[ \t]+([^ \t]+)[ \t]+(HTTP/\d+\.\d+)',
            data
        )
        if match:
            groups = match.groups()
            if len(groups) == 3:
                return wpull.string.to_str(
                    (groups[0], groups[1], groups[2]),
                    encoding=self.encoding,
                )

        raise ProtocolError('Error parsing status line.')

    def __repr__(self):
        return '<Request({method}, {url}, {version})>'.format(
            method=self.method, url=self.resource_path, version=self.version
        )

    def copy(self):
        '''Return a copy.'''
        return copy.deepcopy(self)

    def set_continue(self, offset):
        '''Modify the request into a range request.'''
        assert offset >= 0, offset
        self.fields['Range'] = 'bytes={0}-'.format(offset)
Esempio n. 32
0
 def test_name_value_record_parsing(self):
     record = NameValueRecord()
     record.parse(self.RECORD_STR_1)
     self.assertIn('who', record)
     self.assertEqual('Gilbert, W.S. | Sullivan, Arthur', record['who'])
Esempio n. 33
0
 def __init__(self):
     self.fields = NameValueRecord(normalize_overrides=self.NAME_OVERRIDES)
     self.block_file = None
Esempio n. 34
0
    def test_name_value_utf8(self):
        text = '''Name: dogé'''
        record = NameValueRecord()
        record.parse(text)

        self.assertEqual('dogé', record['name'])
Esempio n. 35
0
    def test_name_value_utf8(self):
        text = '''Name: dogé'''
        record = NameValueRecord()
        record.parse(text)

        self.assertEqual('dogé', record['name'])
Esempio n. 36
0
 def test_name_value_record_parsing(self):
     record = NameValueRecord()
     record.parse(self.RECORD_STR_1)
     self.assertIn('who', record)
     self.assertEqual('Gilbert, W.S. | Sullivan, Arthur', record['who'])
Esempio n. 37
0
class RawRequest(BaseRequest, SerializableMixin, DictableMixin):
    '''Represents an HTTP request.

    Attributes:
        method (str): The HTTP method in the status line. For example, ``GET``,
            ``POST``.
        resource_path (str): The URL or "path" in the status line.
        version (str): The HTTP version in the status line. For example,
            ``HTTP/1.0``.
        fields (:class:`.namevalue.NameValueRecord`): The fields in
            the HTTP header.
        body (:class:`.body.Body`, file-like, None): An optional payload.
        encoding (str): The encoding of the status line.
    '''
    def __init__(self, method=None, resource_path=None, version='HTTP/1.1'):
        super().__init__()
        self.method = method
        self.resource_path = resource_path
        self.version = version
        self.fields = NameValueRecord(encoding='latin-1')
        self.body = None
        self.encoding = 'latin-1'

    def to_dict(self):
        return {
            'protocol': 'http',
            'method': self.method,
            'version': self.version,
            'resource_path': self.resource_path,
            'fields': list(self.fields.get_all()),
            'body': self.call_to_dict_or_none(self.body),
            'encoding': self.encoding,
        }

    def to_bytes(self):
        assert self.method
        assert self.resource_path
        assert self.version

        status = '{0} {1} {2}'.format(self.method, self.resource_path,
                                      self.version).encode(self.encoding)
        fields = self.fields.to_bytes(errors='replace')

        return b'\r\n'.join([status, fields, b''])

    def parse(self, data):
        if not self.resource_path:
            line, data = data.split(b'\n', 1)
            self.method, self.resource_path, self.version = self.parse_status_line(
                line)

        self.fields.parse(data, strict=False)

    def parse_status_line(self, data):
        '''Parse the status line bytes.

        Returns:
            tuple: An tuple representing the method, URI, and
            version.
        '''
        match = re.match(br'([a-zA-Z]+)[ \t]+([^ \t]+)[ \t]+(HTTP/\d+\.\d+)',
                         data)
        if match:
            groups = match.groups()
            if len(groups) == 3:
                return wpull.string.to_str(
                    (groups[0], groups[1], groups[2]),
                    encoding=self.encoding,
                )

        raise ProtocolError('Error parsing status line.')

    def __repr__(self):
        return '<Request({method}, {url}, {version})>'.format(
            method=self.method, url=self.resource_path, version=self.version)

    def copy(self):
        '''Return a copy.'''
        return copy.deepcopy(self)

    def set_continue(self, offset):
        '''Modify the request into a range request.'''
        assert offset >= 0, offset
        self.fields['Range'] = 'bytes={0}-'.format(offset)
Esempio n. 38
0
class Response(BaseResponse, SerializableMixin, DictableMixin):
    '''Represents the HTTP response.

    Attributes:
        status_code (int): The status code in the status line.
        status_reason (str): The status reason string in the status line.
        version (str): The HTTP version in the status line. For example,
            ``HTTP/1.1``.
        fields (:class:`.namevalue.NameValueRecord`): The fields in
            the HTTP headers (and trailer, if present).
        body (:class:`.body.Body`, file-like, None): The optional payload
            (without and transfer or content encoding).
        request: The corresponding request.
        encoding (str): The encoding of the status line.
    '''
    def __init__(self,
                 status_code=None,
                 reason=None,
                 version='HTTP/1.1',
                 request=None):
        super().__init__()

        if status_code is not None:
            assert isinstance(status_code, int), \
                'Expect int, got {}'.format(type(status_code))
            assert reason is not None

        self.status_code = status_code
        self.reason = reason
        self.version = version
        self.fields = NameValueRecord(encoding='latin-1')
        self.request = request
        self.encoding = 'latin-1'

    @property
    def protocol(self):
        return 'http'

    def to_dict(self):
        return {
            'protocol': 'http',
            'status_code': self.status_code,
            'reason': self.reason,
            'response_code': self.status_code,
            'response_message': self.reason,
            'version': self.version,
            'fields': list(self.fields.get_all()),
            'body': self.call_to_dict_or_none(self.body),
            'request': self.request.to_dict() if self.request else None,
            'encoding': self.encoding,
        }

    def to_bytes(self):
        assert self.version
        assert self.status_code is not None
        assert self.reason is not None

        status = '{0} {1} {2}'.format(self.version, self.status_code,
                                      self.reason).encode(self.encoding)
        fields = self.fields.to_bytes(errors='replace')

        return b'\r\n'.join([status, fields, b''])

    def parse(self, data):
        if self.status_code is None:
            line, data = data.split(b'\n', 1)
            self.version, self.status_code, self.reason = self.parse_status_line(
                line)

        self.fields.parse(data, strict=False)

    @classmethod
    def parse_status_line(cls, data):
        '''Parse the status line bytes.

        Returns:
            tuple: An tuple representing the version, code, and reason.
        '''
        match = re.match(br'(HTTP/\d+\.\d+)[ \t]+([0-9]{1,3})[ \t]*([^\r\n]*)',
                         data)
        if match:
            groups = match.groups()
            if len(groups) == 3:
                return wpull.string.to_str(
                    (groups[0], int(groups[1]), groups[2]),
                    encoding='latin-1',
                )

        raise ProtocolError(
            'Error parsing status line {line}".'.format(line=ascii(data)))

    def __repr__(self):
        return '<Response({version}, {code}, {reason})>'.format(
            version=ascii(self.version),
            code=self.status_code,
            reason=ascii(self.reason))

    def __str__(self):
        return wpull.string.printable_str(self.to_bytes().decode(
            'utf-8', 'replace'),
                                          keep_newlines=True)

    def response_code(self):
        return self.status_code

    def response_message(self):
        return self.reason