def urlopener(url_or_request, log, **kwargs): """ Utility function for pulling back a url, with a retry of 3 times, increasing the timeout, etc. Re-raises any errors as URLError. .. warning:: This is being replaced by requests library. flexget.utils.requests should be used going forward. :param str url_or_request: URL or Request object to get. :param log: Logger to log debug info and errors to :param kwargs: Keyword arguments to be passed to urlopen :return: The file-like object returned by urlopen """ from flexget.utils.requests import is_unresponsive, set_unresponsive if isinstance(url_or_request, urllib2.Request): url = url_or_request.get_host() else: url = url_or_request if is_unresponsive(url): msg = '%s is known to be unresponsive, not trying again.' % urlparse(url).hostname log.warning(msg) raise urllib2.URLError(msg) retries = kwargs.get('retries', 3) timeout = kwargs.get('timeout', 15.0) # get the old timeout for sockets, so we can set it back to that when done. This is NOT threadsafe by the way. # In order to avoid requiring python 2.6, we're not using the urlopen timeout parameter. That really should be used # after checking for python 2.6. oldtimeout = socket.getdefaulttimeout() try: socket.setdefaulttimeout(timeout) handlers = [SmartRedirectHandler()] if urllib2._opener: handlers.extend(urllib2._opener.handlers) if kwargs.get('handlers'): handlers.extend(kwargs['handlers']) if len(handlers) > 1: handler_names = [h.__class__.__name__ for h in handlers] log.debug('Additional handlers have been specified for this urlopen: %s' % ', '.join(handler_names)) opener = urllib2.build_opener(*handlers).open for i in range(retries): # retry getting the url up to 3 times. if i > 0: time.sleep(3) try: retrieved = opener(url_or_request, kwargs.get('data')) except urllib2.HTTPError as e: if e.code < 500: # If it was not a server error, don't keep retrying. log.warning('Could not retrieve url (HTTP %s error): %s' % (e.code, e.url)) raise log.debug('HTTP error (try %i/%i): %s' % (i + 1, retries, e.code)) except (urllib2.URLError, socket.timeout) as e: if hasattr(e, 'reason'): reason = str(e.reason) else: reason = 'N/A' if reason == 'timed out': set_unresponsive(url) log.debug('Failed to retrieve url (try %i/%i): %s' % (i + 1, retries, reason)) except httplib.IncompleteRead as e: log.critical('Incomplete read - see python bug 6312') break else: # make the returned instance usable in a with statement by adding __enter__ and __exit__ methods def enter(self): return self def exit(self, exc_type, exc_val, exc_tb): self.close() retrieved.__class__.__enter__ = enter retrieved.__class__.__exit__ = exit return retrieved log.warning('Could not retrieve url: %s' % url_or_request) raise urllib2.URLError('Could not retrieve url after %s tries.' % retries) finally: socket.setdefaulttimeout(oldtimeout)
def urlopener(url_or_request, log, **kwargs): """ Utility function for pulling back a url, with a retry of 3 times, increasing the timeout, etc. Re-raises any errors as URLError. .. warning:: This is being replaced by requests library. flexget.utils.requests should be used going forward. :param str url_or_request: URL or Request object to get. :param log: Logger to log debug info and errors to :param kwargs: Keyword arguments to be passed to urlopen :return: The file-like object returned by urlopen """ from flexget.utils.requests import is_unresponsive, set_unresponsive if isinstance(url_or_request, urllib2.Request): url = url_or_request.get_host() else: url = url_or_request if is_unresponsive(url): msg = "%s is known to be unresponsive, not trying again." % urlparse(url).hostname log.warning(msg) raise urllib2.URLError(msg) retries = kwargs.get("retries", 3) timeout = kwargs.get("timeout", 15.0) # get the old timeout for sockets, so we can set it back to that when done. This is NOT threadsafe by the way. # In order to avoid requiring python 2.6, we're not using the urlopen timeout parameter. That really should be used # after checking for python 2.6. oldtimeout = socket.getdefaulttimeout() try: socket.setdefaulttimeout(timeout) handlers = [SmartRedirectHandler()] if urllib2._opener: handlers.extend(urllib2._opener.handlers) if kwargs.get("handlers"): handlers.extend(kwargs["handlers"]) if len(handlers) > 1: handler_names = [h.__class__.__name__ for h in handlers] log.debug("Additional handlers have been specified for this urlopen: %s" % ", ".join(handler_names)) opener = urllib2.build_opener(*handlers).open for i in range(retries): # retry getting the url up to 3 times. if i > 0: time.sleep(3) try: retrieved = opener(url_or_request, kwargs.get("data")) except urllib2.HTTPError, e: if e.code < 500: # If it was not a server error, don't keep retrying. log.warning("Could not retrieve url (HTTP %s error): %s" % (e.code, e.url)) raise log.debug("HTTP error (try %i/%i): %s" % (i + 1, retries, e.code)) except (urllib2.URLError, socket.timeout), e: if hasattr(e, "reason"): reason = str(e.reason) else: reason = "N/A" if reason == "timed out": set_unresponsive(url) log.debug("Failed to retrieve url (try %i/%i): %s" % (i + 1, retries, reason))