def error_handling_callback(response): """ Raise exceptions and log alerts. :param response: Response returned by Session.request(). :type response: :py:obj:`requests.Response` """ # TODO: do some error correcting stuff if isinstance(response, requests.exceptions.SSLError): if SSL_CERT_VERIFY_FAILED_MSG in str(response): raise FatalServerError(str(response)) if isinstance(response, Exception): with suppress(Exception): # request exception may contain response and request attribute error('An error occurred for uri ' + response.request.url) raise response from None if response.status_code == HTTPStatus.GATEWAY_TIMEOUT: raise Server504Error('Server {} timed out'.format( urlparse(response.url).netloc)) if response.status_code == HTTPStatus.REQUEST_URI_TOO_LONG: raise Server414Error('Too long GET request') # TODO: shall it raise? this might break some code, TBC # response.raise_for_status() # HTTP status 207 is also a success status for Webdav FINDPROP, # used by the version module. if response.status_code not in (HTTPStatus.OK, HTTPStatus.MULTI_STATUS): warning('Http response status {}'.format(response.status_code))
def error_handling_callback(request): """ Raise exceptions and log alerts. @param request: Request that has completed @type request: L{threadedhttp.HttpRequest} """ # TODO: do some error correcting stuff if isinstance(request.data, requests.exceptions.SSLError): if SSL_CERT_VERIFY_FAILED_MSG in str(request.data): raise FatalServerError(str(request.data)) # if all else fails if isinstance(request.data, Exception): raise request.data if request.status == 504: raise Server504Error("Server %s timed out" % request.hostname) if request.status == 414: raise Server414Error('Too long GET request') # HTTP status 207 is also a success status for Webdav FINDPROP, # used by the version module. if request.status not in (200, 207): warning('Http response status {0}'.format(request.data.status_code))
def error_handling_callback(request): """ Raise exceptions and log alerts. @param request: Request that has completed @type request: L{threadedhttp.HttpRequest} """ # TODO: do some error correcting stuff if isinstance(request.data, requests.exceptions.SSLError): if SSL_CERT_VERIFY_FAILED_MSG in str(request.data): raise FatalServerError(str(request.data)) if request.status_code == 504: raise Server504Error('Server {} timed out'.format( urlparse(request.url).netloc)) if request.status_code == 414: raise Server414Error('Too long GET request') if isinstance(request.data, Exception): error('An error occurred for uri ' + request.url) raise request.data from None # HTTP status 207 is also a success status for Webdav FINDPROP, # used by the version module. if request.status_code not in (200, 207): warning('Http response status {}'.format(request.status_code))
def request(site, uri, ssl=False, *args, **kwargs): """Queue a request to be submitted to Site. All parameters not listed below are the same as L{httplib2.Http.request}, but the uri is relative If the site argument is None the uri has to be absolute and is taken. In this case SSL is ignored. Used for requests to non wiki pages. @param site: The Site to connect to @param uri: the URI to retrieve (relative to the site's scriptpath) @param ssl: Use HTTPS connection @return: The received data (a unicode string). """ if site: if ssl: proto = "https" host = site.ssl_hostname() uri = site.ssl_pathprefix() + uri else: proto = site.protocol() host = site.hostname() baseuri = urlparse.urljoin("%s://%s" % (proto, host), uri) else: baseuri = uri # set default user-agent string kwargs.setdefault("headers", {}) kwargs["headers"].setdefault("user-agent", useragent) request = threadedhttp.HttpRequest(baseuri, *args, **kwargs) http_queue.put(request) while not request.lock.acquire(False): time.sleep(0.1) # TODO: do some error correcting stuff if isinstance(request.data, SSLHandshakeError): if SSL_CERT_VERIFY_FAILED in str(request.data): raise FatalServerError(str(request.data)) # if all else fails if isinstance(request.data, Exception): raise request.data if request.data[0].status == 504: raise Server504Error("Server %s timed out" % site.hostname()) if request.data[0].status != 200: pywikibot.warning(u"Http response status %(status)s" % {'status': request.data[0].status}) return request.data[1]
def error_handling_callback(response): """ Raise exceptions and log alerts. @param response: Response returned by Session.request(). @type response: L{requests.Response} """ # TODO: do some error correcting stuff if isinstance(response, requests.exceptions.SSLError): if SSL_CERT_VERIFY_FAILED_MSG in str(response): raise FatalServerError(str(response)) if isinstance(response, Exception): with suppress(Exception): # request.data exception may contain response and request attribute error('An error occurred for uri ' + response.request.url) raise response from None if response.status_code == 504: raise Server504Error('Server {} timed out'.format( urlparse(response.url).netloc)) if response.status_code == 414: raise Server414Error('Too long GET request') # TODO: shall it raise? this might break some code, TBC # response.raise_for_status() # HTTP status 207 is also a success status for Webdav FINDPROP, # used by the version module. if response.status_code not in (200, 207): warning('Http response status {}'.format(response.status_code)) if isinstance(response.encoding, UnicodeDecodeError): error('An error occurred for uri {}: ' 'no encoding detected!'.format(response.request.url)) raise response.encoding from None
def read_file_content(self, file_url: str): """Return name of temp file in which remote file is saved.""" pywikibot.output('Reading file ' + file_url) handle, tempname = tempfile.mkstemp() path = Path(tempname) size = 0 dt_gen = (el for el in (15, 30, 45, 60, 120, 180, 240, 300)) while True: file_len = path.stat().st_size if file_len: pywikibot.output('Download resumed.') headers = {'Range': 'bytes={}-'.format(file_len)} else: headers = {} with open(str(path), 'ab') as fd: # T272345: Python 3.5 needs str os.lseek(handle, file_len, 0) try: response = http.fetch(file_url, stream=True, headers=headers) response.raise_for_status() # get download info, if available # Note: this is not enough to exclude pages # e.g. 'application/json' is also not a media if 'text/' in response.headers['Content-Type']: raise FatalServerError('The requested URL was not ' 'found on server.') size = max(size, int(response.headers.get('Content-Length', 0))) # stream content to temp file (in chunks of 1Mb) for chunk in response.iter_content(chunk_size=1024 * 1024): fd.write(chunk) # raised from connection lost during response.iter_content() except requests.ConnectionError: fd.flush() pywikibot.output('Connection closed at byte {}'.format( path.stat().st_size)) # raised from response.raise_for_status() except requests.HTTPError as e: # exit criteria if size is not available # error on last iteration is OK, we're requesting # {'Range': 'bytes=file_len-'} err = HTTPStatus.REQUESTED_RANGE_NOT_SATISFIABLE if response.status_code == err and path.stat().st_size: break raise FatalServerError(str(e)) from e if size and size == path.stat().st_size: break try: dt = next(dt_gen) pywikibot.output('Sleeping for {} seconds ...'.format(dt)) pywikibot.sleep(dt) except StopIteration: raise FatalServerError('Download failed, too many retries!') pywikibot.output('Downloaded {} bytes'.format(path.stat().st_size)) return tempname
def request(site=None, uri=None, *args, **kwargs): """Queue a request to be submitted to Site. All parameters not listed below are the same as L{httplib2.Http.request}. If the site argument is provided, the uri is relative to the site's scriptpath. If the site argument is None, the uri must be absolute, and is used for requests to non wiki pages. @param site: The Site to connect to @type site: L{pywikibot.site.BaseSite} @param uri: the URI to retrieve @type uri: str @return: The received data (a unicode string). """ assert(site or uri) if site: proto = site.protocol() if proto == 'https': host = site.ssl_hostname() uri = site.ssl_pathprefix() + uri else: host = site.hostname() baseuri = urlparse.urljoin("%s://%s" % (proto, host), uri) kwargs.setdefault("disable_ssl_certificate_validation", site.ignore_certificate_error()) else: baseuri = uri host = urlparse.urlparse(uri).netloc format_string = kwargs.setdefault("headers", {}).get("user-agent") kwargs["headers"]["user-agent"] = user_agent(site, format_string) request = threadedhttp.HttpRequest(baseuri, *args, **kwargs) http_queue.put(request) while not request.lock.acquire(False): time.sleep(0.1) # TODO: do some error correcting stuff if isinstance(request.data, SSLHandshakeError): if SSL_CERT_VERIFY_FAILED_MSG in str(request.data): raise FatalServerError(str(request.data)) # if all else fails if isinstance(request.data, Exception): raise request.data if request.data[0].status == 504: raise Server504Error("Server %s timed out" % host) if request.data[0].status == 414: raise Server414Error('Too long GET request') # HTTP status 207 is also a success status for Webdav FINDPROP, # used by the version module. if request.data[0].status not in (200, 207): pywikibot.warning(u"Http response status %(status)s" % {'status': request.data[0].status}) pos = request.data[0]['content-type'].find('charset=') if pos >= 0: pos += len('charset=') encoding = request.data[0]['content-type'][pos:] else: encoding = 'ascii' # Don't warn, many pages don't contain one pywikibot.log(u"Http response doesn't contain a charset.") return request.data[1].decode(encoding)