Beispiel #1
0
def error_handling_callback(response):
    """
    Raise exceptions and log alerts.

    :param response: Response returned by Session.request().
    :type response: :py:obj:`requests.Response`
    """
    # TODO: do some error correcting stuff
    if isinstance(response, requests.exceptions.SSLError):
        if SSL_CERT_VERIFY_FAILED_MSG in str(response):
            raise FatalServerError(str(response))

    if isinstance(response, Exception):
        with suppress(Exception):
            # request exception may contain response and request attribute
            error('An error occurred for uri ' + response.request.url)
        raise response from None

    if response.status_code == HTTPStatus.GATEWAY_TIMEOUT:
        raise Server504Error('Server {} timed out'.format(
            urlparse(response.url).netloc))

    if response.status_code == HTTPStatus.REQUEST_URI_TOO_LONG:
        raise Server414Error('Too long GET request')

    # TODO: shall it raise? this might break some code, TBC
    # response.raise_for_status()

    # HTTP status 207 is also a success status for Webdav FINDPROP,
    # used by the version module.
    if response.status_code not in (HTTPStatus.OK, HTTPStatus.MULTI_STATUS):
        warning('Http response status {}'.format(response.status_code))
Beispiel #2
0
def error_handling_callback(request):
    """
    Raise exceptions and log alerts.

    @param request: Request that has completed
    @type request: L{threadedhttp.HttpRequest}
    """
    # TODO: do some error correcting stuff
    if isinstance(request.data, requests.exceptions.SSLError):
        if SSL_CERT_VERIFY_FAILED_MSG in str(request.data):
            raise FatalServerError(str(request.data))

    # if all else fails
    if isinstance(request.data, Exception):
        error('An error occurred for uri ' + request.uri)
        raise request.data

    if request.status == 504:
        raise Server504Error('Server %s timed out' % request.hostname)

    if request.status == 414:
        raise Server414Error('Too long GET request')

    # HTTP status 207 is also a success status for Webdav FINDPROP,
    # used by the version module.
    if request.status not in (200, 207):
        warning('Http response status {0}'.format(request.data.status_code))
Beispiel #3
0
def _http_process(session, http_request):
    method = http_request.method
    uri = http_request.uri
    body = http_request.body
    headers = http_request.headers
    if PY2 and headers:
        headers = dict((key, str(value)) for key, value in headers.items())
    auth = get_authentication(uri)
    if auth is not None and len(auth) == 4:
        if isinstance(requests_oauthlib, ImportError):
            warn('%s' % requests_oauthlib, ImportWarning)
            error('OAuth authentication not supported: %s' % requests_oauthlib)
            auth = None
        else:
            auth = requests_oauthlib.OAuth1(*auth)
    timeout = config.socket_timeout
    try:
        ignore_validation = http_request.kwargs.pop(
            'disable_ssl_certificate_validation', False)
        # Note that the connections are pooled which mean that a future
        # HTTPS request can succeed even if the certificate is invalid and
        # verify=True, when a request with verify=False happened before
        response = session.request(method,
                                   uri,
                                   data=body,
                                   headers=headers,
                                   auth=auth,
                                   timeout=timeout,
                                   verify=not ignore_validation)
    except Exception as e:
        http_request.data = e
    else:
        http_request.data = response
Beispiel #4
0
def _http_process(session, http_request):
    method = http_request.method
    uri = http_request.uri
    params = http_request.params
    body = http_request.body
    headers = http_request.headers
    if PY2 and headers:
        headers = dict((key, str(value)) for key, value in headers.items())
    auth = get_authentication(uri)
    if auth is not None and len(auth) == 4:
        if isinstance(requests_oauthlib, ImportError):
            warn('%s' % requests_oauthlib, ImportWarning)
            error('OAuth authentication not supported: %s'
                  % requests_oauthlib)
            auth = None
        else:
            auth = requests_oauthlib.OAuth1(*auth)
    timeout = config.socket_timeout
    try:
        ignore_validation = http_request.kwargs.pop(
            'disable_ssl_certificate_validation', False)
        # Note that the connections are pooled which mean that a future
        # HTTPS request can succeed even if the certificate is invalid and
        # verify=True, when a request with verify=False happened before
        response = session.request(method, uri, params=params, data=body,
                                   headers=headers, auth=auth, timeout=timeout,
                                   verify=not ignore_validation)
    except Exception as e:
        http_request.data = e
    else:
        http_request.data = response
Beispiel #5
0
def error_handling_callback(request):
    """
    Raise exceptions and log alerts.

    @param request: Request that has completed
    @type request: L{threadedhttp.HttpRequest}
    """
    # TODO: do some error correcting stuff
    if isinstance(request.data, requests.exceptions.SSLError):
        if SSL_CERT_VERIFY_FAILED_MSG in str(request.data):
            raise FatalServerError(str(request.data))

    # if all else fails
    if isinstance(request.data, Exception):
        error('An error occurred for uri ' + request.uri)
        raise request.data

    if request.status == 504:
        raise Server504Error('Server %s timed out' % request.hostname)

    if request.status == 414:
        raise Server414Error('Too long GET request')

    # HTTP status 207 is also a success status for Webdav FINDPROP,
    # used by the version module.
    if request.status not in (200, 207):
        warning('Http response status {0}'.format(request.data.status_code))
Beispiel #6
0
 def number_of_images(self):
     """Return the (cached) number of images in the djvu file."""
     if not hasattr(self, '_image_count'):
         dp = subprocess.Popen(['djvused', '-e', 'n', self.file_djvu],
                               stdout=subprocess.PIPE, stderr=subprocess.PIPE)
         (stdoutdata, stderrdata) = dp.communicate()
         if dp.returncode != 0:
             error('djvulibre library error!\n{0!s}'.format(stderrdata))
         self._image_count = int(stdoutdata)
     return self._image_count
Beispiel #7
0
 def has_text(self):
     """Test if the djvu file has a text-layer."""
     if not hasattr(self, '_has_text'):
         dp = subprocess.Popen(['djvudump', self.file_djvu],
                               stdout=subprocess.PIPE, stderr=subprocess.PIPE)
         (stdoutdata, stderrdata) = dp.communicate()
         if dp.returncode != 0:
             error('djvulibre library error!\n{0!s}'.format(stderrdata))
         txt = stdoutdata.decode('utf-8')
         self._has_text = 'TXTz' in txt
     return self._has_text
 def number_of_images(self):
     """Return the (cached) number of images in the djvu file."""
     if not hasattr(self, '_image_count'):
         dp = subprocess.Popen(['djvused', '-e', 'n', self.file_djvu],
                               stdout=subprocess.PIPE,
                               stderr=subprocess.PIPE)
         (stdoutdata, stderrdata) = dp.communicate()
         if dp.returncode != 0:
             error('djvulibre library error!\n%s' % stderrdata)
         self._image_count = int(stdoutdata)
     return self._image_count
 def has_text(self):
     """Test if the djvu file has a text-layer."""
     if not hasattr(self, '_has_text'):
         dp = subprocess.Popen(['djvudump', self.file_djvu],
                               stdout=subprocess.PIPE,
                               stderr=subprocess.PIPE)
         (stdoutdata, stderrdata) = dp.communicate()
         if dp.returncode != 0:
             error('djvulibre library error!\n%s' % stderrdata)
         txt = stdoutdata.decode('utf-8')
         self._has_text = 'TXTz' in txt
     return self._has_text
Beispiel #10
0
 def get_page(self, n):
     """Get page n for djvu file."""
     if not self.has_text():
         raise ValueError('Djvu file {0!s} has no text layer.'.format(self.file_djvu))
     if not (1 <= n <= self.number_of_images()):
         raise ValueError('Requested page number %d is not in file %s'
                          ' page range [%d-%d]'
                          % (n, self.file_djvu, 1, self.number_of_images()))
     dp = subprocess.Popen(['djvutxt', '--page={0:d}'.format(n), self.file_djvu],
                           stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     (stdoutdata, stderrdata) = dp.communicate()
     if dp.returncode != 0:
         error('djvulibre library error!\n{0!s}'.format(stderrdata))
     return self._remove_control_chars(stdoutdata)
 def get_page(self, n):
     """Get page n for djvu file."""
     if not self.has_text():
         raise ValueError('Djvu file %s has no text layer.' %
                          self.file_djvu)
     if not (1 <= n <= self.number_of_images()):
         raise ValueError('Requested page number %d is not in file %s'
                          ' page range [%d-%d]' %
                          (n, self.file_djvu, 1, self.number_of_images()))
     dp = subprocess.Popen(
         ['djvutxt', '--page=%d' % n, self.file_djvu],
         stdout=subprocess.PIPE,
         stderr=subprocess.PIPE)
     (stdoutdata, stderrdata) = dp.communicate()
     if dp.returncode != 0:
         error('djvulibre library error!\n%s' % stderrdata)
     return self._remove_control_chars(stdoutdata)
Beispiel #12
0
def _http_process(session, http_request):
    """
    Process an `threadedhttp.HttpRequest` instance.

    @param session: Session that will be used to process the `http_request`.
    @type session: L{requests.Session}
    @param http_request: Request that will be processed.
    @type http_request: L{threadedhttp.HttpRequest}
    @return: None
    @rtype: None
    """
    method = http_request.method
    uri = http_request.uri
    params = http_request.params
    body = http_request.body
    headers = http_request.headers
    if PY2 and headers:
        headers = dict((key, str(value)) for key, value in headers.items())
    auth = get_authentication(uri)
    if auth is not None and len(auth) == 4:
        if isinstance(requests_oauthlib, ImportError):
            warn('%s' % requests_oauthlib, ImportWarning)
            error('OAuth authentication not supported: %s' % requests_oauthlib)
            auth = None
        else:
            auth = requests_oauthlib.OAuth1(*auth)
    timeout = config.socket_timeout
    try:
        ignore_validation = http_request.kwargs.pop(
            'disable_ssl_certificate_validation', False)
        # Note that the connections are pooled which mean that a future
        # HTTPS request can succeed even if the certificate is invalid and
        # verify=True, when a request with verify=False happened before
        response = session.request(method,
                                   uri,
                                   params=params,
                                   data=body,
                                   headers=headers,
                                   auth=auth,
                                   timeout=timeout,
                                   verify=not ignore_validation,
                                   **http_request.kwargs)
    except Exception as e:
        http_request.data = e
    else:
        http_request.data = response
Beispiel #13
0
def _http_process(session, http_request):
    """
    Process an `threadedhttp.HttpRequest` instance.

    @param session: Session that will be used to process the `http_request`.
    @type session: L{requests.Session}
    @param http_request: Request that will be processed.
    @type http_request: L{threadedhttp.HttpRequest}
    @return: None
    @rtype: None
    """
    method = http_request.method
    uri = http_request.uri
    params = http_request.params
    body = http_request.body
    headers = http_request.headers
    if PY2 and headers:
        headers = {key: str(value) for key, value in headers.items()}
    auth = get_authentication(uri)
    if auth is not None and len(auth) == 4:
        if isinstance(requests_oauthlib, ImportError):
            warn('%s' % requests_oauthlib, ImportWarning)
            error('OAuth authentication not supported: %s'
                  % requests_oauthlib)
            auth = None
        else:
            auth = requests_oauthlib.OAuth1(*auth)
    timeout = config.socket_timeout
    try:
        ignore_validation = http_request.kwargs.pop(
            'disable_ssl_certificate_validation', False)
        # Note that the connections are pooled which mean that a future
        # HTTPS request can succeed even if the certificate is invalid and
        # verify=True, when a request with verify=False happened before
        response = session.request(method, uri, params=params, data=body,
                                   headers=headers, auth=auth, timeout=timeout,
                                   verify=not ignore_validation,
                                   **http_request.kwargs)
    except Exception as e:
        http_request.data = e
    else:
        http_request.data = response
Beispiel #14
0
def error_handling_callback(response):
    """
    Raise exceptions and log alerts.

    @param response: Response returned by Session.request().
    @type response: L{requests.Response}
    """
    # TODO: do some error correcting stuff
    if isinstance(response, requests.exceptions.SSLError):
        if SSL_CERT_VERIFY_FAILED_MSG in str(response):
            raise FatalServerError(str(response))

    if isinstance(response, Exception):
        with suppress(Exception):
            # request.data exception may contain response and request attribute
            error('An error occurred for uri ' + response.request.url)
        raise response from None

    if response.status_code == 504:
        raise Server504Error('Server {} timed out'.format(
            urlparse(response.url).netloc))

    if response.status_code == 414:
        raise Server414Error('Too long GET request')

    # TODO: shall it raise? this might break some code, TBC
    # response.raise_for_status()

    # HTTP status 207 is also a success status for Webdav FINDPROP,
    # used by the version module.
    if response.status_code not in (200, 207):
        warning('Http response status {}'.format(response.status_code))

    if isinstance(response.encoding, UnicodeDecodeError):
        error('An error occurred for uri {}: '
              'no encoding detected!'.format(response.request.url))
        raise response.encoding from None
Beispiel #15
0
 def print_failures(self, typed_item: BaseType, failed_constraints):
     """Print failed constraints"""
     for constraint in failed_constraints:
         botlogging.error(f"{constraint} failed for {typed_item}")
Beispiel #16
0
    def read_user_config():
        with open(_filename, 'rb') as f:
            exec(compile(f.read(), _filename, 'exec'), _exec_globals)

    if os.path.exists(_filename):
        _filestatus = os.stat(_filename)
        _filemode = _filestatus[0]
        _fileuid = _filestatus[4]
        if OSWIN32 or _fileuid in [os.getuid(), 0]:
            if OSWIN32 or _filemode & 0o02 == 0:
                read_user_config()
            else:
                try:
                    read_user_config()
                except Exception as e:
                    error(f"{e}" + "Skipped '%(fn)s': writeable by others." %
                          {'fn': _filename})
        else:
            try:
                read_user_config()
            except Exception as e:
                error(f"{e}" + "Skipped '%(fn)s': owned by someone else." %
                      {'fn': _filename})


class _DifferentTypeError(UserWarning, TypeError):
    """An error when the required type doesn't match the actual type."""
    def __init__(self, name, actual_type, allowed_types):
        super(_DifferentTypeError, self).__init__(
            'Configuration variable "{0}" is defined as "{1.__name__}" in '
            'your user-config.py but expected "{2}".'.format(
                name, actual_type,
Beispiel #17
0
    else:
        userinterface_lang = userinterface_lang.split('_')[0]

# Fix up default site
if family == 'wikipedia' and mylang == 'language':
    if __no_user_config != '2':
        warning('family and mylang are not set.\n'
                "Defaulting to family='wikipedia' and mylang='test'.")
    mylang = 'test'

# SECURITY WARNINGS
if (not ignore_file_security_warnings
        and private_files_permission & (stat.S_IRWXG | stat.S_IRWXO) != 0):
    error("CRITICAL SECURITY WARNING: 'private_files_permission' is set"
          ' to allow access from the group/others which'
          ' could give them access to the sensitive files.'
          ' To avoid giving others access to sensitive files, pywikibot'
          " won't run with this setting. Choose a more restrictive"
          " permission or set 'ignore_file_security_warnings' to true.")
    sys.exit(1)

#
# When called as main program, list all configuration variables
#
if __name__ == '__main__':
    _all = True
    for _arg in sys.argv[1:]:
        if _arg == 'modified':
            _all = False
        else:
            warning('Unknown arg {0} ignored'.format(_arg))
    for _name in sorted(globals().keys()):
Beispiel #18
0
def fetch(uri: str,
          method: str = 'GET',
          headers: Optional[dict] = None,
          default_error_handling: bool = True,
          use_fake_user_agent: Union[bool, str] = False,
          **kwargs):
    """
    HTTP request.

    See :py:obj:`requests.Session.request` for parameters.

    :param uri: URL to send
    :param method: HTTP method of the request (default: GET)
    :param headers: dictionary of headers of the request
    :param default_error_handling: Use default error handling
    :param use_fake_user_agent: Set to True to use fake UA, False to use
        pywikibot's UA, str to specify own UA. This behaviour might be
        overridden by domain in config.

    :keyword charset: Either a valid charset (usable for str.decode()) or None
        to automatically chose the charset from the returned header (defaults
        to latin-1)
    :type charset: CodecInfo, str, None
    :keyword verify: verify the SSL certificate (default is True)
    :type verify: bool or path to certificates
    :keyword callbacks: Methods to call once data is fetched
    :type callbacks: list of callable
    :rtype: :py:obj:`requests.Response`
    """
    # Change user agent depending on fake UA settings.
    # Set header to new UA if needed.
    headers = headers or {}
    headers.update(config.extra_headers.copy() or {})

    def assign_fake_user_agent(use_fake_user_agent, uri):
        uri_domain = urlparse(uri).netloc
        use_fake_user_agent = config.fake_user_agent_exceptions.get(
            uri_domain, use_fake_user_agent)

        if use_fake_user_agent is False:
            return user_agent()
        if use_fake_user_agent is True:
            return fake_user_agent()
        if use_fake_user_agent and isinstance(use_fake_user_agent, str):
            return use_fake_user_agent  # Custom UA.
        raise ValueError('Invalid parameter: '
                         'use_fake_user_agent={}'.format(use_fake_user_agent))

    def assign_user_agent(user_agent_format_string):
        if not user_agent_format_string or '{' in user_agent_format_string:
            return user_agent(None, user_agent_format_string)
        # do nothing, it is already a UA
        return user_agent_format_string

    # If not already specified.
    if 'user-agent' not in headers:
        # Get fake UA exceptions from `fake_user_agent_exceptions` config.
        headers['user-agent'] = assign_fake_user_agent(use_fake_user_agent,
                                                       uri)
    # Already specified.
    else:
        headers['user-agent'] = assign_user_agent(headers.get('user-agent'))

    callbacks = kwargs.pop('callbacks', [])
    # error_handling_callback will be executed first.
    if default_error_handling:
        callbacks.insert(0, error_handling_callback)

    charset = kwargs.pop('charset', None)

    auth = get_authentication(uri)
    if auth is not None and len(auth) == 4:
        if isinstance(requests_oauthlib, ImportError):
            warn(str(requests_oauthlib), ImportWarning)
            error('OAuth authentication not supported: {}'.format(
                requests_oauthlib))
            auth = None
        else:
            auth = requests_oauthlib.OAuth1(*auth)

    timeout = config.socket_timeout
    old_validation = kwargs.pop('disable_ssl_certificate_validation', None)
    if old_validation is not None:
        issue_deprecation_warning('disable_ssl_certificate_validation',
                                  instead='verify',
                                  since='20201220')
        kwargs.update(verify=not old_validation)

    try:
        # Note that the connections are pooled which mean that a future
        # HTTPS request can succeed even if the certificate is invalid and
        # verify=True, when a request with verify=False happened before
        response = session.request(method,
                                   uri,
                                   headers=headers,
                                   auth=auth,
                                   timeout=timeout,
                                   **kwargs)
    except Exception as e:
        response = e
    else:
        response.encoding = _decide_encoding(response, charset)

    for callback in callbacks:
        callback(response)

    return response
Beispiel #19
0
def fetch(uri,
          method='GET',
          headers=None,
          default_error_handling: bool = True,
          use_fake_user_agent: Union[bool, str] = False,
          **kwargs):
    """
    HTTP request.

    See L{requests.Session.request} for parameters.

    @param default_error_handling: Use default error handling
    @param use_fake_user_agent: Set to True to use fake UA, False to use
        pywikibot's UA, str to specify own UA. This behaviour might be
        overridden by domain in config.

    @kwarg charset: Either a valid charset (usable for str.decode()) or None
        to automatically chose the charset from the returned header (defaults
        to latin-1)
    @type charset: CodecInfo, str, None
    @kwarg verify: verify the SSL certificate (default is True)
    @type verify: bool or path to certificates
    @kwarg callbacks: Methods to call once data is fetched
    @type callbacks: list of callable
    @rtype: L{threadedhttp.HttpRequest}
    """
    # Change user agent depending on fake UA settings.
    # Set header to new UA if needed.
    headers = headers or {}
    headers.update(config.extra_headers.copy() or {})

    def assign_fake_user_agent(use_fake_user_agent, uri):
        uri_domain = urlparse(uri).netloc
        use_fake_user_agent = config.fake_user_agent_exceptions.get(
            uri_domain, use_fake_user_agent)

        if use_fake_user_agent is False:
            return user_agent()
        if use_fake_user_agent is True:
            return fake_user_agent()
        if use_fake_user_agent and isinstance(use_fake_user_agent, str):
            return use_fake_user_agent  # Custom UA.
        raise ValueError('Invalid parameter: '
                         'use_fake_user_agent={}'.format(use_fake_user_agent))

    def assign_user_agent(user_agent_format_string):
        if not user_agent_format_string or '{' in user_agent_format_string:
            return user_agent(None, user_agent_format_string)
        else:
            # do nothing, it is already a UA
            return user_agent_format_string

    # If not already specified.
    if 'user-agent' not in headers:
        # Get fake UA exceptions from `fake_user_agent_exceptions` config.
        headers['user-agent'] = assign_fake_user_agent(use_fake_user_agent,
                                                       uri)
    # Already specified.
    else:
        headers['user-agent'] = assign_user_agent(headers.get('user-agent'))

    callbacks = kwargs.pop('callbacks', [])
    if default_error_handling:
        callbacks.append(error_handling_callback)

    charset = kwargs.pop('charset', None)
    request = threadedhttp.HttpRequest(charset=charset)

    auth = get_authentication(uri)
    if auth is not None and len(auth) == 4:
        if isinstance(requests_oauthlib, ImportError):
            warn('%s' % requests_oauthlib, ImportWarning)
            error('OAuth authentication not supported: %s' % requests_oauthlib)
            auth = None
        else:
            auth = requests_oauthlib.OAuth1(*auth)

    timeout = config.socket_timeout
    old_validation = kwargs.pop('disable_ssl_certificate_validation', None)
    if old_validation is not None:
        issue_deprecation_warning('disable_ssl_certificate_validation',
                                  instead='verify',
                                  warning_class=FutureWarning,
                                  since='20201220')
        kwargs.update(verify=not old_validation)

    try:
        # Note that the connections are pooled which mean that a future
        # HTTPS request can succeed even if the certificate is invalid and
        # verify=True, when a request with verify=False happened before
        response = session.request(method,
                                   uri,
                                   headers=headers,
                                   auth=auth,
                                   timeout=timeout,
                                   **kwargs)
    except Exception as e:
        request.data = e
        response = e
    else:
        request.data = response

    for callback in callbacks:
        callback(response)

    # if there's no data in the answer we're in trouble
    try:
        request.data
    except AssertionError as e:
        raise e

    return request
Beispiel #20
0
    else:
        userinterface_lang = userinterface_lang.split('_')[0]

# Fix up default site
if family == 'wikipedia' and mylang == 'language':
    if __no_user_config != '2':
        warning('family and mylang are not set.\n'
                "Defaulting to family='test' and mylang='test'.")
    family = mylang = 'test'

# SECURITY WARNINGS
if (not ignore_file_security_warnings and
        private_files_permission & (stat.S_IRWXG | stat.S_IRWXO) != 0):
    error("CRITICAL SECURITY WARNING: 'private_files_permission' is set"
          " to allow access from the group/others which"
          " could give them access to the sensitive files."
          " To avoid giving others access to sensitive files, pywikibot"
          " won't run with this setting. Choose a more restrictive"
          " permission or set 'ignore_file_security_warnings' to true.")
    sys.exit(1)

#
# When called as main program, list all configuration variables
#
if __name__ == "__main__":
    _all = 1
    for _arg in sys.argv[1:]:
        if _arg == "modified":
            _all = 0
        else:
            warning('Unknown arg {0} ignored'.format(_arg))
    _k = list(globals().keys())