Ejemplo n.º 1
0
def _enqueue(uri,
             method='GET',
             params=None,
             body=None,
             headers=None,
             data=None,
             **kwargs):  # pragma: no cover
    """DEPRECATED.

    Enqueue non-blocking threaded HTTP request with callback.

    Callbacks, including the default error handler if enabled, are run in the
    HTTP thread, where exceptions are logged but are not able to be caught.
    The default error handler is called first, then 'callback' (singular),
    followed by each callback in 'callbacks' (plural). All callbacks are
    invoked, even if the default error handler detects a problem, so they
    must check request.exception before using the response data.

    Note: multiple async requests do not automatically run concurrently,
    as they are limited by the number of http threads in L{numthreads},
    which is set to 1 by default.

    @see: L{requests.Session.request} for parameters.

    @kwarg default_error_handling: Use default error handling
    @type default_error_handling: bool
    @kwarg callback: Method to call once data is fetched
    @type callback: callable
    @kwarg callbacks: Methods to call once data is fetched
    @type callbacks: list of callable
    @rtype: L{threadedhttp.HttpRequest}
    """
    # body and data parameters both map to the data parameter of
    # requests.Session.request.
    if data:
        body = data

    default_error_handling = kwargs.pop('default_error_handling', None)
    callback = kwargs.pop('callback', None)

    callbacks = []
    if default_error_handling:
        callbacks.append(error_handling_callback)
    if callback:
        callbacks.append(callback)

    callbacks += kwargs.pop('callbacks', [])

    all_headers = config.extra_headers.copy()
    all_headers.update(headers or {})

    user_agent_format_string = all_headers.get('user-agent')
    if not user_agent_format_string or '{' in user_agent_format_string:
        all_headers['user-agent'] = user_agent(None, user_agent_format_string)

    request = threadedhttp.HttpRequest(uri, method, params, body, all_headers,
                                       callbacks, **kwargs)
    _http_process(session, request)
    return request
Ejemplo n.º 2
0
 def test_no_charset(self):
     """Test decoding without explicit charset."""
     req = threadedhttp.HttpRequest(None)
     req._data = ({'content-type': ''}, CharsetTestCase.LATIN1_BYTES[:])
     self.assertIsNone(req.charset)
     self.assertEqual('latin1', req.encoding)
     self.assertEqual(req.raw, CharsetTestCase.LATIN1_BYTES)
     self.assertEqual(req.content, CharsetTestCase.STR)
 def _create_request(charset=None, data=UTF8_BYTES):
     """Helper method."""
     req = threadedhttp.HttpRequest('', charset=charset)
     resp = requests.Response()
     resp.headers = {'content-type': 'charset=utf-8'}
     resp._content = data[:]
     req._data = resp
     return req
 def test_no_charset(self):
     """Test decoding without explicit charset."""
     req = threadedhttp.HttpRequest('')
     resp = requests.Response()
     resp.headers = {'content-type': ''}
     resp._content = CharsetTestCase.LATIN1_BYTES[:]
     req._data = resp
     self.assertIsNone(req.charset)
     self.assertEqual('latin1', req.encoding)
     self.assertEqual(req.raw, CharsetTestCase.LATIN1_BYTES)
     self.assertEqual(req.content, CharsetTestCase.STR)
Ejemplo n.º 5
0
 def test_no_content_type(self):
     """Test decoding without content-type (and then no charset)."""
     req = threadedhttp.HttpRequest('')
     resp = requests.Response()
     resp.headers = {}
     resp._content = CharsetTestCase.LATIN1_BYTES[:]
     req._data = resp
     self.assertIsNone(req.charset)
     self.assertEqual('latin1', req.encoding)
     self.assertEqual(req.content, CharsetTestCase.LATIN1_BYTES)
     self.assertEqual(req.text, CharsetTestCase.STR)
Ejemplo n.º 6
0
def _enqueue(uri, method="GET", body=None, headers=None, **kwargs):
    """
    Enqueue non-blocking threaded HTTP request with callback.

    Callbacks, including the default error handler if enabled, are run in the
    HTTP thread, where exceptions are logged but are not able to be caught.
    The default error handler is called first, then 'callback' (singular),
    followed by each callback in 'callbacks' (plural).  All callbacks are
    invoked, even if the default error handler detects a problem, so they
    must check request.exception before using the response data.

    Note: multiple async requests do not automatically run concurrently,
    as they are limited by the number of http threads in L{numthreads},
    which is set to 1 by default.

    @see: L{httplib2.Http.request} for parameters.

    @kwarg default_error_handling: Use default error handling
    @type default_error_handling: bool
    @kwarg callback: Method to call once data is fetched
    @type callback: callable
    @kwarg callbacks: Methods to call once data is fetched
    @type callbacks: list of callable
    @rtype: L{threadedhttp.HttpRequest}
    """
    default_error_handling = kwargs.pop('default_error_handling', None)
    callback = kwargs.pop('callback', None)

    callbacks = []
    if default_error_handling:
        callbacks.append(error_handling_callback)
    if callback:
        callbacks.append(callback)

    callbacks += kwargs.pop('callbacks', [])

    if not headers:
        headers = {}

    user_agent_format_string = headers.get("user-agent", None)
    if not user_agent_format_string or '{' in user_agent_format_string:
        headers["user-agent"] = user_agent(None, user_agent_format_string)

    request = threadedhttp.HttpRequest(
        uri, method, body, headers, callbacks, **kwargs)
    http_queue.put(request)
    return request
Ejemplo n.º 7
0
    def test_threading(self):
        queue = Queue.Queue()
        cookiejar = threadedhttp.LockableCookieJar()
        connection_pool = threadedhttp.ConnectionPool()
        proc = threadedhttp.HttpProcessor(queue, cookiejar, connection_pool)
        proc.setDaemon(True)
        proc.start()
        r = threadedhttp.HttpRequest('http://www.wikipedia.org/')
        queue.put(r)

        self.assertNotIsInstance(r.exception, Exception)
        self.assertIsInstance(r.data, tuple)
        self.assertIsInstance(r.response_headers, dict)
        self.assertIn('status', r.response_headers)
        self.assertIsInstance(r.response_headers['status'], str)
        self.assertEqual(r.response_headers['status'], '200')
        self.assertEqual(r.status, 200)

        self.assertIsInstance(r.raw, bytes)
        self.assertIn(b'<html lang="mul"', r.raw)
        self.assertEqual(int(r.response_headers['content-length']), len(r.raw))

        queue.put(None)  # Stop the http processor thread
Ejemplo n.º 8
0
 def _create_request(charset=None, data=UTF8_BYTES):
     req = threadedhttp.HttpRequest(None, charset=charset)
     req._data = ({'content-type': 'charset=utf-8'}, data[:])
     return req
Ejemplo n.º 9
0
def request(site=None, uri=None, *args, **kwargs):
    """Queue a request to be submitted to Site.

    All parameters not listed below are the same as
    L{httplib2.Http.request}.

    If the site argument is provided, the uri is relative to the site's
    scriptpath.

    If the site argument is None, the uri must be absolute, and is
    used for requests to non wiki pages.

    @param site: The Site to connect to
    @type site: L{pywikibot.site.BaseSite}
    @param uri: the URI to retrieve
    @type uri: str
    @return: The received data (a unicode string).

    """
    assert(site or uri)
    if site:
        proto = site.protocol()
        if proto == 'https':
            host = site.ssl_hostname()
            uri = site.ssl_pathprefix() + uri
        else:
            host = site.hostname()
        baseuri = urlparse.urljoin("%s://%s" % (proto, host), uri)

        kwargs.setdefault("disable_ssl_certificate_validation",
                          site.ignore_certificate_error())
    else:
        baseuri = uri
        host = urlparse.urlparse(uri).netloc

    format_string = kwargs.setdefault("headers", {}).get("user-agent")
    kwargs["headers"]["user-agent"] = user_agent(site, format_string)

    request = threadedhttp.HttpRequest(baseuri, *args, **kwargs)
    http_queue.put(request)
    while not request.lock.acquire(False):
        time.sleep(0.1)

    # TODO: do some error correcting stuff
    if isinstance(request.data, SSLHandshakeError):
        if SSL_CERT_VERIFY_FAILED_MSG in str(request.data):
            raise FatalServerError(str(request.data))

    # if all else fails
    if isinstance(request.data, Exception):
        raise request.data

    if request.data[0].status == 504:
        raise Server504Error("Server %s timed out" % host)

    if request.data[0].status == 414:
        raise Server414Error('Too long GET request')

    # HTTP status 207 is also a success status for Webdav FINDPROP,
    # used by the version module.
    if request.data[0].status not in (200, 207):
        pywikibot.warning(u"Http response status %(status)s"
                          % {'status': request.data[0].status})

    pos = request.data[0]['content-type'].find('charset=')
    if pos >= 0:
        pos += len('charset=')
        encoding = request.data[0]['content-type'][pos:]
    else:
        encoding = 'ascii'
        # Don't warn, many pages don't contain one
        pywikibot.log(u"Http response doesn't contain a charset.")

    return request.data[1].decode(encoding)
Ejemplo n.º 10
0
def fetch(uri,
          method='GET',
          headers=None,
          default_error_handling: bool = True,
          use_fake_user_agent: Union[bool, str] = False,
          **kwargs):
    """
    HTTP request.

    See L{requests.Session.request} for parameters.

    @param default_error_handling: Use default error handling
    @param use_fake_user_agent: Set to True to use fake UA, False to use
        pywikibot's UA, str to specify own UA. This behaviour might be
        overridden by domain in config.

    @kwarg charset: Either a valid charset (usable for str.decode()) or None
        to automatically chose the charset from the returned header (defaults
        to latin-1)
    @type charset: CodecInfo, str, None
    @kwarg verify: verify the SSL certificate (default is True)
    @type verify: bool or path to certificates
    @kwarg callbacks: Methods to call once data is fetched
    @type callbacks: list of callable
    @rtype: L{threadedhttp.HttpRequest}
    """
    # Change user agent depending on fake UA settings.
    # Set header to new UA if needed.
    headers = headers or {}
    headers.update(config.extra_headers.copy() or {})

    def assign_fake_user_agent(use_fake_user_agent, uri):
        uri_domain = urlparse(uri).netloc
        use_fake_user_agent = config.fake_user_agent_exceptions.get(
            uri_domain, use_fake_user_agent)

        if use_fake_user_agent is False:
            return user_agent()
        if use_fake_user_agent is True:
            return fake_user_agent()
        if use_fake_user_agent and isinstance(use_fake_user_agent, str):
            return use_fake_user_agent  # Custom UA.
        raise ValueError('Invalid parameter: '
                         'use_fake_user_agent={}'.format(use_fake_user_agent))

    def assign_user_agent(user_agent_format_string):
        if not user_agent_format_string or '{' in user_agent_format_string:
            return user_agent(None, user_agent_format_string)
        else:
            # do nothing, it is already a UA
            return user_agent_format_string

    # If not already specified.
    if 'user-agent' not in headers:
        # Get fake UA exceptions from `fake_user_agent_exceptions` config.
        headers['user-agent'] = assign_fake_user_agent(use_fake_user_agent,
                                                       uri)
    # Already specified.
    else:
        headers['user-agent'] = assign_user_agent(headers.get('user-agent'))

    callbacks = kwargs.pop('callbacks', [])
    if default_error_handling:
        callbacks.append(error_handling_callback)

    charset = kwargs.pop('charset', None)
    request = threadedhttp.HttpRequest(charset=charset)

    auth = get_authentication(uri)
    if auth is not None and len(auth) == 4:
        if isinstance(requests_oauthlib, ImportError):
            warn('%s' % requests_oauthlib, ImportWarning)
            error('OAuth authentication not supported: %s' % requests_oauthlib)
            auth = None
        else:
            auth = requests_oauthlib.OAuth1(*auth)

    timeout = config.socket_timeout
    old_validation = kwargs.pop('disable_ssl_certificate_validation', None)
    if old_validation is not None:
        issue_deprecation_warning('disable_ssl_certificate_validation',
                                  instead='verify',
                                  warning_class=FutureWarning,
                                  since='20201220')
        kwargs.update(verify=not old_validation)

    try:
        # Note that the connections are pooled which mean that a future
        # HTTPS request can succeed even if the certificate is invalid and
        # verify=True, when a request with verify=False happened before
        response = session.request(method,
                                   uri,
                                   headers=headers,
                                   auth=auth,
                                   timeout=timeout,
                                   **kwargs)
    except Exception as e:
        request.data = e
        response = e
    else:
        request.data = response

    for callback in callbacks:
        callback(response)

    # if there's no data in the answer we're in trouble
    try:
        request.data
    except AssertionError as e:
        raise e

    return request