def _enqueue(uri, method='GET', params=None, body=None, headers=None, data=None, **kwargs): # pragma: no cover """DEPRECATED. Enqueue non-blocking threaded HTTP request with callback. Callbacks, including the default error handler if enabled, are run in the HTTP thread, where exceptions are logged but are not able to be caught. The default error handler is called first, then 'callback' (singular), followed by each callback in 'callbacks' (plural). All callbacks are invoked, even if the default error handler detects a problem, so they must check request.exception before using the response data. Note: multiple async requests do not automatically run concurrently, as they are limited by the number of http threads in L{numthreads}, which is set to 1 by default. @see: L{requests.Session.request} for parameters. @kwarg default_error_handling: Use default error handling @type default_error_handling: bool @kwarg callback: Method to call once data is fetched @type callback: callable @kwarg callbacks: Methods to call once data is fetched @type callbacks: list of callable @rtype: L{threadedhttp.HttpRequest} """ # body and data parameters both map to the data parameter of # requests.Session.request. if data: body = data default_error_handling = kwargs.pop('default_error_handling', None) callback = kwargs.pop('callback', None) callbacks = [] if default_error_handling: callbacks.append(error_handling_callback) if callback: callbacks.append(callback) callbacks += kwargs.pop('callbacks', []) all_headers = config.extra_headers.copy() all_headers.update(headers or {}) user_agent_format_string = all_headers.get('user-agent') if not user_agent_format_string or '{' in user_agent_format_string: all_headers['user-agent'] = user_agent(None, user_agent_format_string) request = threadedhttp.HttpRequest(uri, method, params, body, all_headers, callbacks, **kwargs) _http_process(session, request) return request
def test_no_charset(self): """Test decoding without explicit charset.""" req = threadedhttp.HttpRequest(None) req._data = ({'content-type': ''}, CharsetTestCase.LATIN1_BYTES[:]) self.assertIsNone(req.charset) self.assertEqual('latin1', req.encoding) self.assertEqual(req.raw, CharsetTestCase.LATIN1_BYTES) self.assertEqual(req.content, CharsetTestCase.STR)
def _create_request(charset=None, data=UTF8_BYTES): """Helper method.""" req = threadedhttp.HttpRequest('', charset=charset) resp = requests.Response() resp.headers = {'content-type': 'charset=utf-8'} resp._content = data[:] req._data = resp return req
def test_no_charset(self): """Test decoding without explicit charset.""" req = threadedhttp.HttpRequest('') resp = requests.Response() resp.headers = {'content-type': ''} resp._content = CharsetTestCase.LATIN1_BYTES[:] req._data = resp self.assertIsNone(req.charset) self.assertEqual('latin1', req.encoding) self.assertEqual(req.raw, CharsetTestCase.LATIN1_BYTES) self.assertEqual(req.content, CharsetTestCase.STR)
def test_no_content_type(self): """Test decoding without content-type (and then no charset).""" req = threadedhttp.HttpRequest('') resp = requests.Response() resp.headers = {} resp._content = CharsetTestCase.LATIN1_BYTES[:] req._data = resp self.assertIsNone(req.charset) self.assertEqual('latin1', req.encoding) self.assertEqual(req.content, CharsetTestCase.LATIN1_BYTES) self.assertEqual(req.text, CharsetTestCase.STR)
def _enqueue(uri, method="GET", body=None, headers=None, **kwargs): """ Enqueue non-blocking threaded HTTP request with callback. Callbacks, including the default error handler if enabled, are run in the HTTP thread, where exceptions are logged but are not able to be caught. The default error handler is called first, then 'callback' (singular), followed by each callback in 'callbacks' (plural). All callbacks are invoked, even if the default error handler detects a problem, so they must check request.exception before using the response data. Note: multiple async requests do not automatically run concurrently, as they are limited by the number of http threads in L{numthreads}, which is set to 1 by default. @see: L{httplib2.Http.request} for parameters. @kwarg default_error_handling: Use default error handling @type default_error_handling: bool @kwarg callback: Method to call once data is fetched @type callback: callable @kwarg callbacks: Methods to call once data is fetched @type callbacks: list of callable @rtype: L{threadedhttp.HttpRequest} """ default_error_handling = kwargs.pop('default_error_handling', None) callback = kwargs.pop('callback', None) callbacks = [] if default_error_handling: callbacks.append(error_handling_callback) if callback: callbacks.append(callback) callbacks += kwargs.pop('callbacks', []) if not headers: headers = {} user_agent_format_string = headers.get("user-agent", None) if not user_agent_format_string or '{' in user_agent_format_string: headers["user-agent"] = user_agent(None, user_agent_format_string) request = threadedhttp.HttpRequest( uri, method, body, headers, callbacks, **kwargs) http_queue.put(request) return request
def test_threading(self): queue = Queue.Queue() cookiejar = threadedhttp.LockableCookieJar() connection_pool = threadedhttp.ConnectionPool() proc = threadedhttp.HttpProcessor(queue, cookiejar, connection_pool) proc.setDaemon(True) proc.start() r = threadedhttp.HttpRequest('http://www.wikipedia.org/') queue.put(r) self.assertNotIsInstance(r.exception, Exception) self.assertIsInstance(r.data, tuple) self.assertIsInstance(r.response_headers, dict) self.assertIn('status', r.response_headers) self.assertIsInstance(r.response_headers['status'], str) self.assertEqual(r.response_headers['status'], '200') self.assertEqual(r.status, 200) self.assertIsInstance(r.raw, bytes) self.assertIn(b'<html lang="mul"', r.raw) self.assertEqual(int(r.response_headers['content-length']), len(r.raw)) queue.put(None) # Stop the http processor thread
def _create_request(charset=None, data=UTF8_BYTES): req = threadedhttp.HttpRequest(None, charset=charset) req._data = ({'content-type': 'charset=utf-8'}, data[:]) return req
def request(site=None, uri=None, *args, **kwargs): """Queue a request to be submitted to Site. All parameters not listed below are the same as L{httplib2.Http.request}. If the site argument is provided, the uri is relative to the site's scriptpath. If the site argument is None, the uri must be absolute, and is used for requests to non wiki pages. @param site: The Site to connect to @type site: L{pywikibot.site.BaseSite} @param uri: the URI to retrieve @type uri: str @return: The received data (a unicode string). """ assert(site or uri) if site: proto = site.protocol() if proto == 'https': host = site.ssl_hostname() uri = site.ssl_pathprefix() + uri else: host = site.hostname() baseuri = urlparse.urljoin("%s://%s" % (proto, host), uri) kwargs.setdefault("disable_ssl_certificate_validation", site.ignore_certificate_error()) else: baseuri = uri host = urlparse.urlparse(uri).netloc format_string = kwargs.setdefault("headers", {}).get("user-agent") kwargs["headers"]["user-agent"] = user_agent(site, format_string) request = threadedhttp.HttpRequest(baseuri, *args, **kwargs) http_queue.put(request) while not request.lock.acquire(False): time.sleep(0.1) # TODO: do some error correcting stuff if isinstance(request.data, SSLHandshakeError): if SSL_CERT_VERIFY_FAILED_MSG in str(request.data): raise FatalServerError(str(request.data)) # if all else fails if isinstance(request.data, Exception): raise request.data if request.data[0].status == 504: raise Server504Error("Server %s timed out" % host) if request.data[0].status == 414: raise Server414Error('Too long GET request') # HTTP status 207 is also a success status for Webdav FINDPROP, # used by the version module. if request.data[0].status not in (200, 207): pywikibot.warning(u"Http response status %(status)s" % {'status': request.data[0].status}) pos = request.data[0]['content-type'].find('charset=') if pos >= 0: pos += len('charset=') encoding = request.data[0]['content-type'][pos:] else: encoding = 'ascii' # Don't warn, many pages don't contain one pywikibot.log(u"Http response doesn't contain a charset.") return request.data[1].decode(encoding)
def fetch(uri, method='GET', headers=None, default_error_handling: bool = True, use_fake_user_agent: Union[bool, str] = False, **kwargs): """ HTTP request. See L{requests.Session.request} for parameters. @param default_error_handling: Use default error handling @param use_fake_user_agent: Set to True to use fake UA, False to use pywikibot's UA, str to specify own UA. This behaviour might be overridden by domain in config. @kwarg charset: Either a valid charset (usable for str.decode()) or None to automatically chose the charset from the returned header (defaults to latin-1) @type charset: CodecInfo, str, None @kwarg verify: verify the SSL certificate (default is True) @type verify: bool or path to certificates @kwarg callbacks: Methods to call once data is fetched @type callbacks: list of callable @rtype: L{threadedhttp.HttpRequest} """ # Change user agent depending on fake UA settings. # Set header to new UA if needed. headers = headers or {} headers.update(config.extra_headers.copy() or {}) def assign_fake_user_agent(use_fake_user_agent, uri): uri_domain = urlparse(uri).netloc use_fake_user_agent = config.fake_user_agent_exceptions.get( uri_domain, use_fake_user_agent) if use_fake_user_agent is False: return user_agent() if use_fake_user_agent is True: return fake_user_agent() if use_fake_user_agent and isinstance(use_fake_user_agent, str): return use_fake_user_agent # Custom UA. raise ValueError('Invalid parameter: ' 'use_fake_user_agent={}'.format(use_fake_user_agent)) def assign_user_agent(user_agent_format_string): if not user_agent_format_string or '{' in user_agent_format_string: return user_agent(None, user_agent_format_string) else: # do nothing, it is already a UA return user_agent_format_string # If not already specified. if 'user-agent' not in headers: # Get fake UA exceptions from `fake_user_agent_exceptions` config. headers['user-agent'] = assign_fake_user_agent(use_fake_user_agent, uri) # Already specified. else: headers['user-agent'] = assign_user_agent(headers.get('user-agent')) callbacks = kwargs.pop('callbacks', []) if default_error_handling: callbacks.append(error_handling_callback) charset = kwargs.pop('charset', None) request = threadedhttp.HttpRequest(charset=charset) auth = get_authentication(uri) if auth is not None and len(auth) == 4: if isinstance(requests_oauthlib, ImportError): warn('%s' % requests_oauthlib, ImportWarning) error('OAuth authentication not supported: %s' % requests_oauthlib) auth = None else: auth = requests_oauthlib.OAuth1(*auth) timeout = config.socket_timeout old_validation = kwargs.pop('disable_ssl_certificate_validation', None) if old_validation is not None: issue_deprecation_warning('disable_ssl_certificate_validation', instead='verify', warning_class=FutureWarning, since='20201220') kwargs.update(verify=not old_validation) try: # Note that the connections are pooled which mean that a future # HTTPS request can succeed even if the certificate is invalid and # verify=True, when a request with verify=False happened before response = session.request(method, uri, headers=headers, auth=auth, timeout=timeout, **kwargs) except Exception as e: request.data = e response = e else: request.data = response for callback in callbacks: callback(response) # if there's no data in the answer we're in trouble try: request.data except AssertionError as e: raise e return request