Beispiel #1
0
    def _load_from_json(self):
        """ Fill response attributes from JSON results """

        # response.status
        if 'http_status' in self.data:
            self.status = int(self.data['http_status'])
        elif self._splash_options().get('http_status_from_error_code', False):
            if 'error' in self.data:
                try:
                    error = self.data['info']['error']
                except KeyError:
                    error = ''
                http_code_m = re.match(r'http(\d{3})', error)
                if http_code_m:
                    self.status = int(http_code_m.group(1))

        # response.url
        if 'url' in self.data:
            self._url = self.data['url']

        # response.body
        if 'body' in self.data:
            self._body = base64.b64decode(self.data['body'])
            self._cached_ubody = self._body.decode(self.encoding)
        elif 'html' in self.data:
            self._cached_ubody = self.data['html']
            self._body = self._cached_ubody.encode(self.encoding)
            self.headers[b"Content-Type"] = b"text/html; charset=utf-8"

        # response.headers
        if 'headers' in self.data:
            self.headers = headers_to_scrapy(self.data['headers'])
Beispiel #2
0
    def _load_from_json(self):
        """ Fill response attributes from JSON results """

        # response.status
        if 'http_status' in self.data:
            self.status = int(self.data['http_status'])
        elif self._splash_options().get('http_status_from_error_code', False):
            if 'error' in self.data:
                try:
                    error = self.data['info']['error']
                except KeyError:
                    error = ''
                http_code_m = re.match(r'http(\d{3})', error)
                if http_code_m:
                    self.status = int(http_code_m.group(1))

        # response.url
        if 'url' in self.data:
            self._url = self.data['url']

        # response.body
        if 'body' in self.data:
            self._body = base64.b64decode(self.data['body'])
            self._cached_ubody = self._body.decode(self.encoding)
        elif 'html' in self.data:
            self._cached_ubody = self.data['html']
            self._body = self._cached_ubody.encode(self.encoding)
            self.headers[b"Content-Type"] = b"text/html; charset=utf-8"

        # response.headers
        if 'headers' in self.data:
            self.headers = headers_to_scrapy(self.data['headers'])
def test_headers_to_scrapy():
    assert headers_to_scrapy(None) == Headers()
    assert headers_to_scrapy({}) == Headers()
    assert headers_to_scrapy([]) == Headers()

    html_headers = Headers({'Content-Type': 'text/html'})

    assert headers_to_scrapy({'Content-Type': 'text/html'}) == html_headers
    assert headers_to_scrapy([('Content-Type', 'text/html')]) == html_headers
    assert headers_to_scrapy([{'name': 'Content-Type', 'value': 'text/html'}]) == html_headers
Beispiel #4
0
def test_headers_to_scrapy():
    assert headers_to_scrapy(None) == Headers()
    assert headers_to_scrapy({}) == Headers()
    assert headers_to_scrapy([]) == Headers()

    html_headers = Headers({'Content-Type': 'text/html'})

    assert headers_to_scrapy({'Content-Type': 'text/html'}) == html_headers
    assert headers_to_scrapy([('Content-Type', 'text/html')]) == html_headers
    assert headers_to_scrapy([{
        'name': 'Content-Type',
        'value': 'text/html'
    }]) == html_headers