def request(self): try: self.curl.perform() except pycurl.error as ex: # CURLE_WRITE_ERROR (23) # An error occurred when writing received data to a local file, or # an error was returned to libcurl from a write callback. # This exception should be ignored if _callback_interrupted flag # is enabled (this happens when nohead or nobody options enabled) # # Also this error is raised when curl receives KeyboardInterrupt # while it is processing some callback function # (WRITEFUNCTION, HEADERFUNCTIO, etc) if 23 == ex.args[0]: if getattr(self.curl, '_callback_interrupted', None) is True: self.curl._callback_interrupted = False else: raise error.GrabNetworkError(ex.args[0], ex.args[1]) else: if ex.args[0] == 28: raise error.GrabTimeoutError(ex.args[0], ex.args[1]) elif ex.args[0] == 7: raise error.GrabConnectionError(ex.args[0], ex.args[1]) elif ex.args[0] == 67: raise error.GrabAuthError(ex.args[0], ex.args[1]) elif ex.args[0] == 47: raise error.GrabTooManyRedirectsError( ex.args[0], ex.args[1]) else: raise error.GrabNetworkError(ex.args[0], ex.args[1])
def request(self): req = self._request if req.proxy: if req.proxy_userpwd: headers = make_headers(proxy_basic_auth=req.proxy_userpwd) else: headers = None proxy_url = '%s://%s' % (req.proxy_type, req.proxy) if req.proxy_type == 'socks5': pool = SOCKSProxyManager(proxy_url) # , proxy_headers=headers) else: pool = ProxyManager(proxy_url, proxy_headers=headers) else: pool = self.pool try: retry = Retry(redirect=False, connect=False, read=False) # The read timeout is not total response time timeout # It is the timeout on read of next data chunk from the server # Total response timeout is handled by Grab timeout = Timeout(connect=req.connect_timeout, read=req.timeout) #req_headers = dict((make_unicode(x), make_unicode(y)) # for (x, y) in req.headers.items()) if six.PY3: req_url = make_unicode(req.url) req_method = make_unicode(req.method) else: req_url = make_str(req.url) req_method = req.method req.op_started = time.time() res = pool.urlopen(req_method, req_url, body=req.data, timeout=timeout, retries=retry, headers=req.headers, preload_content=False) except exceptions.ReadTimeoutError as ex: raise error.GrabTimeoutError('ReadTimeoutError', ex) except exceptions.ConnectTimeoutError as ex: raise error.GrabConnectionError('ConnectTimeoutError', ex) except exceptions.ProtocolError as ex: # TODO: # the code # raise error.GrabConnectionError(ex.args[1][0], ex.args[1][1]) # fails # with error TypeError: 'OSError' object is not subscriptable raise error.GrabConnectionError('ProtocolError', ex) # WTF? self.request_head = b'' self.request_body = b'' self.request_log = b'' self._response = res
def request(self): req = self._request if req.proxy: if req.proxy_userpwd: headers = make_headers(proxy_basic_auth=req.proxy_userpwd) else: headers = None proxy_url = '%s://%s' % (req.proxy_type, req.proxy) try: pool = ProxyManager(proxy_url, proxy_headers=headers) except ProxySchemeUnknown: raise GrabMisuseError('Urllib3 transport does ' 'not support %s proxies' % req.proxy_type) else: pool = self.pool try: retry = Retry(redirect=False, connect=False, read=False) # The read timeout is not total response time timeout # It is the timeout on read of next data chunk from the server # Total response timeout is handled by Grab timeout = Timeout(connect=req.connect_timeout, read=req.timeout) #req_headers = dict((make_unicode(x), make_unicode(y)) # for (x, y) in req.headers.items()) if six.PY3: req_url = make_unicode(req.url) req_method = make_unicode(req.method) else: req_url = make_str(req.url) req_method = req.method req.op_started = time.time() res = pool.urlopen(req_method, req_url, body=req.data, timeout=timeout, retries=retry, headers=req.headers, preload_content=False) except exceptions.ReadTimeoutError as ex: raise error.GrabTimeoutError('Read timeout') except exceptions.ConnectTimeoutError as ex: raise error.GrabConnectionError('Could not create connection') except exceptions.ProtocolError as ex: raise error.GrabConnectionError(ex.args[1][0], ex.args[1][1]) # WTF? self.request_head = b'' self.request_body = b'' self.request_log = b'' self._response = res
def request(self): stderr_proxy = StderrProxy() try: with stderr_proxy.record(): self.curl.perform() except pycurl.error as ex: # CURLE_WRITE_ERROR (23) # An error occurred when writing received data to a local file, or # an error was returned to libcurl from a write callback. # This exception should be ignored if grab_callback_interrupted flag # is enabled (this happens when nohead or nobody options enabled) # # Also this error is raised when curl receives KeyboardInterrupt # while it is processing some callback function # (WRITEFUNCTION, HEADERFUNCTIO, etc) # If you think WTF then see details here: # https://github.com/pycurl/pycurl/issues/413 if self.has_pycurl_hidden_sigint(stderr_proxy.get_output()): raise KeyboardInterrupt if 23 == ex.args[0]: if getattr(self.curl, 'grab_callback_interrupted', None) is True: # This is expected error caused by # interruptted execution of body_processor callback # FIXME: is it set automatically? self.curl.grab_callback_interrupted = False else: raise error.GrabNetworkError(ex.args[0], ex.args[1]) else: if ex.args[0] == 28: raise error.GrabTimeoutError(ex.args[0], ex.args[1]) elif ex.args[0] == 7: raise error.GrabConnectionError(ex.args[0], ex.args[1]) elif ex.args[0] == 67: raise error.GrabAuthError(ex.args[0], ex.args[1]) elif ex.args[0] == 47: raise error.GrabTooManyRedirectsError(ex.args[0], ex.args[1]) elif ex.args[0] == 6: raise error.GrabCouldNotResolveHostError(ex.args[0], ex.args[1]) else: raise error.GrabNetworkError(ex.args[0], ex.args[1]) except Exception as ex: # pylint: disable=broad-except if self.has_pycurl_hidden_sigint(stderr_proxy.get_output()): raise KeyboardInterrupt six.reraise(error.GrabInternalError, error.GrabInternalError(ex), sys.exc_info()[2]) else: if self.has_pycurl_hidden_sigint(stderr_proxy.get_output()): raise KeyboardInterrupt
def wrap_transport_error(self): try: yield except exceptions.ReadTimeoutError as ex: raise error.GrabTimeoutError('ReadTimeoutError', ex) except exceptions.ConnectTimeoutError as ex: raise error.GrabConnectionError('ConnectTimeoutError', ex) except exceptions.ProtocolError as ex: # TODO: # the code # raise error.GrabConnectionError(ex.args[1][0], ex.args[1][1]) # fails # with error TypeError: 'OSError' object is not subscriptable raise error.GrabConnectionError('ProtocolError', ex) except exceptions.SSLError as ex: raise error.GrabConnectionError('SSLError', ex) except ssl.SSLError as ex: raise error.GrabConnectionError('SSLError', ex)
def build_grab_exception(ex, curl): """ Build Grab exception from the pycurl exception Args: ex - the original pycurl exception curl - the Curl instance raised the exception """ # CURLE_WRITE_ERROR (23) # An error occurred when writing received data to a local file, or # an error was returned to libcurl from a write callback. # This exception should be ignored if grab_callback_interrupted # flag # is enabled (this happens when nohead or nobody options # enabled) # # Also this error is raised when curl receives KeyboardInterrupt # while it is processing some callback function # (WRITEFUNCTION, HEADERFUNCTIO, etc) # If you think WTF then see details here: # https://github.com/pycurl/pycurl/issues/413 if ex.args[0] == 23: if getattr(curl, 'grab_callback_interrupted', None) is True: # If the execution of body_process callback is # interrupted (body_maxsize, nobody and other options) # then the pycurl raised exception with code 23 # We should ignore it return None else: return error.GrabNetworkError(ex.args[1], ex) else: if ex.args[0] == 28: return error.GrabTimeoutError(ex.args[1], ex) elif ex.args[0] == 7: return error.GrabConnectionError(ex.args[1], ex) elif ex.args[0] == 67: return error.GrabAuthError(ex.args[1], ex) elif ex.args[0] == 47: return error.GrabTooManyRedirectsError(ex.args[1], ex) elif ex.args[0] == 6: return error.GrabCouldNotResolveHostError(ex.args[1], ex) elif ex.args[0] == 3: return error.GrabInvalidUrl(ex.args[1], ex) else: return error.GrabNetworkError(ex.args[1], ex)
def request(self): req = self._request if req.proxy: if req.proxy_userpwd: headers = make_headers(proxy_basic_auth=req.proxy_userpwd) else: headers = None proxy_url = '%s://%s' % (req.proxy_type, req.proxy) if req.proxy_type == 'socks5': pool = SOCKSProxyManager( proxy_url, cert_reqs='CERT_REQUIRED', ca_certs=certifi.where()) # , proxy_headers=headers) else: pool = ProxyManager(proxy_url, proxy_headers=headers, cert_reqs='CERT_REQUIRED', ca_certs=certifi.where()) else: pool = self.pool try: # Retries can be disabled by passing False: # http://urllib3.readthedocs.io/en/latest/reference/urllib3.util.html#module-urllib3.util.retry # Do not use False because of warning: # Converted retries value: False -> Retry(total=False, # connect=None, read=None, redirect=0, status=None) retry = Retry( total=False, connect=False, read=False, redirect=0, status=None, ) # The read timeout is not total response time timeout # It is the timeout on read of next data chunk from the server # Total response timeout is handled by Grab timeout = Timeout(connect=req.connect_timeout, read=req.timeout) #req_headers = dict((make_unicode(x), make_unicode(y)) # for (x, y) in req.headers.items()) if six.PY3: req_url = make_unicode(req.url) req_method = make_unicode(req.method) else: req_url = make_str(req.url) req_method = req.method req.op_started = time.time() try: res = pool.urlopen(req_method, req_url, body=req.data, timeout=timeout, retries=retry, headers=req.headers, preload_content=False) except UnicodeError as ex: raise error.GrabConnectionError('GrabInvalidUrl', ex) except exceptions.ReadTimeoutError as ex: raise error.GrabTimeoutError('ReadTimeoutError', ex) except exceptions.ConnectTimeoutError as ex: raise error.GrabConnectionError('ConnectTimeoutError', ex) except exceptions.ProtocolError as ex: # TODO: # the code # raise error.GrabConnectionError(ex.args[1][0], ex.args[1][1]) # fails # with error TypeError: 'OSError' object is not subscriptable raise error.GrabConnectionError('ProtocolError', ex) except exceptions.SSLError as ex: raise error.GrabConnectionError('SSLError', ex) # WTF? self.request_head = b'' self.request_body = b'' self.request_log = b'' self._response = res