def test_update(self): h = Headers() h.update({ 'Content-Type': 'text/html', 'X-Forwarded-For': ['ip1', 'ip2'] }) self.assertEqual(h.getlist('Content-Type'), ['text/html']) self.assertEqual(h.getlist('X-Forwarded-For'), ['ip1', 'ip2'])
def test_update(self): h = Headers() h.update({ "Content-Type": "text/html", "X-Forwarded-For": ["ip1", "ip2"] }) self.assertEqual(h.getlist("Content-Type"), [b"text/html"]) self.assertEqual(h.getlist("X-Forwarded-For"), [b"ip1", b"ip2"])
def _headers_from_twisted_response(response): headers = Headers() if response.length != UNKNOWN_LENGTH: headers[b'Content-Length'] = str(response.length).encode() headers.update(response.headers.getAllRawHeaders()) return headers
def test_update(self): h = Headers() h.update( {'Content-Type': 'text/html', 'X-Forwarded-For': ['ip1', 'ip2']}) self.assertEqual(h.getlist('Content-Type'), ['text/html']) self.assertEqual(h.getlist('X-Forwarded-For'), ['ip1', 'ip2'])
def process_request(self, request, spider): """ The request will be passed to the AutoExtract server only if the request is explicitly enabled with `{'autoextract': {'enabled': True}}` meta. The page type value must be also present, either in the AUTOEXTRACT_PAGE_TYPE option, or in `{'autoextract': {'pageType': '...'}}` meta. """ if not self._is_enabled_for_request(request): return # If the request was already processed by AutoExtract if request.meta.get(AUTOEXTRACT_META_KEY): return if request.method != 'GET': raise AutoExtractError('Only GET requests are supported by AutoExtract') request.meta[AUTOEXTRACT_META_KEY] = { 'original_url': request.url, 'timing': { 'start_ts': time.time() }, } # Maybe over-write the page type value from the request page_type = self._check_page_type(request) logger.debug('Process AutoExtract request for %s URL %s', page_type, request, extra={'spider': spider}) # Define request timeout request.meta['download_timeout'] = self.timeout # Define concurrency settings self._set_download_slot(request, request.meta) payload = {'url': request.url, 'pageType': page_type} # Add the extra payload, if available extra_payload = self._get_meta_name(request, 'extra') if extra_payload: payload.update(extra_payload) headers = Headers({ 'Content-Type': 'application/json', 'User-Agent': USER_AGENT, 'Authorization': basic_auth_header(self._api_user, self._api_pass) }) # Update the headers, if provided extra_headers = self._get_meta_name(request, 'headers') if extra_headers: headers.update(extra_headers) new_request = request.replace( url=self._api_url, method='POST', headers=headers, body=json.dumps([payload], sort_keys=True), ) self.inc_metric('autoextract/request_count') return new_request