Exemple #1
0
def error_handling_callback(response):
    """
    Raise exceptions and log alerts.

    :param response: Response returned by Session.request().
    :type response: :py:obj:`requests.Response`
    """
    # TODO: do some error correcting stuff
    if isinstance(response, requests.exceptions.SSLError):
        if SSL_CERT_VERIFY_FAILED_MSG in str(response):
            raise FatalServerError(str(response))

    if isinstance(response, Exception):
        with suppress(Exception):
            # request exception may contain response and request attribute
            error('An error occurred for uri ' + response.request.url)
        raise response from None

    if response.status_code == HTTPStatus.GATEWAY_TIMEOUT:
        raise Server504Error('Server {} timed out'.format(
            urlparse(response.url).netloc))

    if response.status_code == HTTPStatus.REQUEST_URI_TOO_LONG:
        raise Server414Error('Too long GET request')

    # TODO: shall it raise? this might break some code, TBC
    # response.raise_for_status()

    # HTTP status 207 is also a success status for Webdav FINDPROP,
    # used by the version module.
    if response.status_code not in (HTTPStatus.OK, HTTPStatus.MULTI_STATUS):
        warning('Http response status {}'.format(response.status_code))
Exemple #2
0
def error_handling_callback(request):
    """
    Raise exceptions and log alerts.

    @param request: Request that has completed
    @type request: L{threadedhttp.HttpRequest}
    """
    # TODO: do some error correcting stuff
    if isinstance(request.data, requests.exceptions.SSLError):
        if SSL_CERT_VERIFY_FAILED_MSG in str(request.data):
            raise FatalServerError(str(request.data))

    # if all else fails
    if isinstance(request.data, Exception):
        raise request.data

    if request.status == 504:
        raise Server504Error("Server %s timed out" % request.hostname)

    if request.status == 414:
        raise Server414Error('Too long GET request')

    # HTTP status 207 is also a success status for Webdav FINDPROP,
    # used by the version module.
    if request.status not in (200, 207):
        warning('Http response status {0}'.format(request.data.status_code))
Exemple #3
0
def error_handling_callback(request):
    """
    Raise exceptions and log alerts.

    @param request: Request that has completed
    @type request: L{threadedhttp.HttpRequest}
    """
    # TODO: do some error correcting stuff
    if isinstance(request.data, requests.exceptions.SSLError):
        if SSL_CERT_VERIFY_FAILED_MSG in str(request.data):
            raise FatalServerError(str(request.data))

    if request.status_code == 504:
        raise Server504Error('Server {} timed out'.format(
            urlparse(request.url).netloc))

    if request.status_code == 414:
        raise Server414Error('Too long GET request')

    if isinstance(request.data, Exception):
        error('An error occurred for uri ' + request.url)
        raise request.data from None

    # HTTP status 207 is also a success status for Webdav FINDPROP,
    # used by the version module.
    if request.status_code not in (200, 207):
        warning('Http response status {}'.format(request.status_code))
Exemple #4
0
def request(site, uri, ssl=False, *args, **kwargs):
    """Queue a request to be submitted to Site.

    All parameters not listed below are the same as
    L{httplib2.Http.request}, but the uri is relative

    If the site argument is None the uri has to be absolute and is
    taken. In this case SSL is ignored. Used for requests to non wiki
    pages.

    @param site: The Site to connect to
    @param uri: the URI to retrieve (relative to the site's scriptpath)
    @param ssl: Use HTTPS connection
    @return: The received data (a unicode string).

    """
    if site:
        if ssl:
            proto = "https"
            host = site.ssl_hostname()
            uri = site.ssl_pathprefix() + uri
        else:
            proto = site.protocol()
            host = site.hostname()
        baseuri = urlparse.urljoin("%s://%s" % (proto, host), uri)
    else:
        baseuri = uri

    # set default user-agent string
    kwargs.setdefault("headers", {})
    kwargs["headers"].setdefault("user-agent", useragent)
    request = threadedhttp.HttpRequest(baseuri, *args, **kwargs)
    http_queue.put(request)
    while not request.lock.acquire(False):
        time.sleep(0.1)

    # TODO: do some error correcting stuff
    if isinstance(request.data, SSLHandshakeError):
        if SSL_CERT_VERIFY_FAILED in str(request.data):
            raise FatalServerError(str(request.data))

    # if all else fails
    if isinstance(request.data, Exception):
        raise request.data

    if request.data[0].status == 504:
        raise Server504Error("Server %s timed out" % site.hostname())

    if request.data[0].status != 200:
        pywikibot.warning(u"Http response status %(status)s"
                          % {'status': request.data[0].status})

    return request.data[1]
Exemple #5
0
def error_handling_callback(response):
    """
    Raise exceptions and log alerts.

    @param response: Response returned by Session.request().
    @type response: L{requests.Response}
    """
    # TODO: do some error correcting stuff
    if isinstance(response, requests.exceptions.SSLError):
        if SSL_CERT_VERIFY_FAILED_MSG in str(response):
            raise FatalServerError(str(response))

    if isinstance(response, Exception):
        with suppress(Exception):
            # request.data exception may contain response and request attribute
            error('An error occurred for uri ' + response.request.url)
        raise response from None

    if response.status_code == 504:
        raise Server504Error('Server {} timed out'.format(
            urlparse(response.url).netloc))

    if response.status_code == 414:
        raise Server414Error('Too long GET request')

    # TODO: shall it raise? this might break some code, TBC
    # response.raise_for_status()

    # HTTP status 207 is also a success status for Webdav FINDPROP,
    # used by the version module.
    if response.status_code not in (200, 207):
        warning('Http response status {}'.format(response.status_code))

    if isinstance(response.encoding, UnicodeDecodeError):
        error('An error occurred for uri {}: '
              'no encoding detected!'.format(response.request.url))
        raise response.encoding from None
Exemple #6
0
    def read_file_content(self, file_url: str):
        """Return name of temp file in which remote file is saved."""
        pywikibot.output('Reading file ' + file_url)

        handle, tempname = tempfile.mkstemp()
        path = Path(tempname)
        size = 0

        dt_gen = (el for el in (15, 30, 45, 60, 120, 180, 240, 300))
        while True:
            file_len = path.stat().st_size
            if file_len:
                pywikibot.output('Download resumed.')
                headers = {'Range': 'bytes={}-'.format(file_len)}
            else:
                headers = {}

            with open(str(path), 'ab') as fd:  # T272345: Python 3.5 needs str
                os.lseek(handle, file_len, 0)
                try:
                    response = http.fetch(file_url,
                                          stream=True,
                                          headers=headers)
                    response.raise_for_status()

                    # get download info, if available
                    # Note: this is not enough to exclude pages
                    #       e.g. 'application/json' is also not a media
                    if 'text/' in response.headers['Content-Type']:
                        raise FatalServerError('The requested URL was not '
                                               'found on server.')
                    size = max(size,
                               int(response.headers.get('Content-Length', 0)))

                    # stream content to temp file (in chunks of 1Mb)
                    for chunk in response.iter_content(chunk_size=1024 * 1024):
                        fd.write(chunk)

                # raised from connection lost during response.iter_content()
                except requests.ConnectionError:
                    fd.flush()
                    pywikibot.output('Connection closed at byte {}'.format(
                        path.stat().st_size))
                # raised from response.raise_for_status()
                except requests.HTTPError as e:
                    # exit criteria if size is not available
                    # error on last iteration is OK, we're requesting
                    #    {'Range': 'bytes=file_len-'}
                    err = HTTPStatus.REQUESTED_RANGE_NOT_SATISFIABLE
                    if response.status_code == err and path.stat().st_size:
                        break
                    raise FatalServerError(str(e)) from e

            if size and size == path.stat().st_size:
                break
            try:
                dt = next(dt_gen)
                pywikibot.output('Sleeping for {} seconds ...'.format(dt))
                pywikibot.sleep(dt)
            except StopIteration:
                raise FatalServerError('Download failed, too many retries!')

        pywikibot.output('Downloaded {} bytes'.format(path.stat().st_size))
        return tempname
Exemple #7
0
def request(site=None, uri=None, *args, **kwargs):
    """Queue a request to be submitted to Site.

    All parameters not listed below are the same as
    L{httplib2.Http.request}.

    If the site argument is provided, the uri is relative to the site's
    scriptpath.

    If the site argument is None, the uri must be absolute, and is
    used for requests to non wiki pages.

    @param site: The Site to connect to
    @type site: L{pywikibot.site.BaseSite}
    @param uri: the URI to retrieve
    @type uri: str
    @return: The received data (a unicode string).

    """
    assert(site or uri)
    if site:
        proto = site.protocol()
        if proto == 'https':
            host = site.ssl_hostname()
            uri = site.ssl_pathprefix() + uri
        else:
            host = site.hostname()
        baseuri = urlparse.urljoin("%s://%s" % (proto, host), uri)

        kwargs.setdefault("disable_ssl_certificate_validation",
                          site.ignore_certificate_error())
    else:
        baseuri = uri
        host = urlparse.urlparse(uri).netloc

    format_string = kwargs.setdefault("headers", {}).get("user-agent")
    kwargs["headers"]["user-agent"] = user_agent(site, format_string)

    request = threadedhttp.HttpRequest(baseuri, *args, **kwargs)
    http_queue.put(request)
    while not request.lock.acquire(False):
        time.sleep(0.1)

    # TODO: do some error correcting stuff
    if isinstance(request.data, SSLHandshakeError):
        if SSL_CERT_VERIFY_FAILED_MSG in str(request.data):
            raise FatalServerError(str(request.data))

    # if all else fails
    if isinstance(request.data, Exception):
        raise request.data

    if request.data[0].status == 504:
        raise Server504Error("Server %s timed out" % host)

    if request.data[0].status == 414:
        raise Server414Error('Too long GET request')

    # HTTP status 207 is also a success status for Webdav FINDPROP,
    # used by the version module.
    if request.data[0].status not in (200, 207):
        pywikibot.warning(u"Http response status %(status)s"
                          % {'status': request.data[0].status})

    pos = request.data[0]['content-type'].find('charset=')
    if pos >= 0:
        pos += len('charset=')
        encoding = request.data[0]['content-type'][pos:]
    else:
        encoding = 'ascii'
        # Don't warn, many pages don't contain one
        pywikibot.log(u"Http response doesn't contain a charset.")

    return request.data[1].decode(encoding)