예제 #1
0
파일: tools.py 프로젝트: Grejeru/Flexget
def urlopener(url_or_request, log, **kwargs):
    """
    Utility function for pulling back a url, with a retry of 3 times, increasing the timeout, etc.
    Re-raises any errors as URLError.

    .. warning:: This is being replaced by requests library.
                 flexget.utils.requests should be used going forward.

    :param str url_or_request: URL or Request object to get.
    :param log: Logger to log debug info and errors to
    :param kwargs: Keyword arguments to be passed to urlopen
    :return: The file-like object returned by urlopen
    """
    from flexget.utils.requests import is_unresponsive, set_unresponsive

    if isinstance(url_or_request, urllib2.Request):
        url = url_or_request.get_host()
    else:
        url = url_or_request
    if is_unresponsive(url):
        msg = '%s is known to be unresponsive, not trying again.' % urlparse(url).hostname
        log.warning(msg)
        raise urllib2.URLError(msg)

    retries = kwargs.get('retries', 3)
    timeout = kwargs.get('timeout', 15.0)

    # get the old timeout for sockets, so we can set it back to that when done. This is NOT threadsafe by the way.
    # In order to avoid requiring python 2.6, we're not using the urlopen timeout parameter. That really should be used
    # after checking for python 2.6.
    oldtimeout = socket.getdefaulttimeout()
    try:
        socket.setdefaulttimeout(timeout)

        handlers = [SmartRedirectHandler()]
        if urllib2._opener:
            handlers.extend(urllib2._opener.handlers)
        if kwargs.get('handlers'):
            handlers.extend(kwargs['handlers'])
        if len(handlers) > 1:
            handler_names = [h.__class__.__name__ for h in handlers]
            log.debug('Additional handlers have been specified for this urlopen: %s' % ', '.join(handler_names))
        opener = urllib2.build_opener(*handlers).open
        for i in range(retries):  # retry getting the url up to 3 times.
            if i > 0:
                time.sleep(3)
            try:
                retrieved = opener(url_or_request, kwargs.get('data'))
            except urllib2.HTTPError as e:
                if e.code < 500:
                    # If it was not a server error, don't keep retrying.
                    log.warning('Could not retrieve url (HTTP %s error): %s' % (e.code, e.url))
                    raise
                log.debug('HTTP error (try %i/%i): %s' % (i + 1, retries, e.code))
            except (urllib2.URLError, socket.timeout) as e:
                if hasattr(e, 'reason'):
                    reason = str(e.reason)
                else:
                    reason = 'N/A'
                if reason == 'timed out':
                    set_unresponsive(url)
                log.debug('Failed to retrieve url (try %i/%i): %s' % (i + 1, retries, reason))
            except httplib.IncompleteRead as e:
                log.critical('Incomplete read - see python bug 6312')
                break
            else:
                # make the returned instance usable in a with statement by adding __enter__ and __exit__ methods

                def enter(self):
                    return self

                def exit(self, exc_type, exc_val, exc_tb):
                    self.close()

                retrieved.__class__.__enter__ = enter
                retrieved.__class__.__exit__ = exit
                return retrieved

        log.warning('Could not retrieve url: %s' % url_or_request)
        raise urllib2.URLError('Could not retrieve url after %s tries.' % retries)
    finally:
        socket.setdefaulttimeout(oldtimeout)
예제 #2
0
def urlopener(url_or_request, log, **kwargs):
    """
    Utility function for pulling back a url, with a retry of 3 times, increasing the timeout, etc.
    Re-raises any errors as URLError.

    .. warning:: This is being replaced by requests library.
                 flexget.utils.requests should be used going forward.

    :param str url_or_request: URL or Request object to get.
    :param log: Logger to log debug info and errors to
    :param kwargs: Keyword arguments to be passed to urlopen
    :return: The file-like object returned by urlopen
    """
    from flexget.utils.requests import is_unresponsive, set_unresponsive

    if isinstance(url_or_request, urllib2.Request):
        url = url_or_request.get_host()
    else:
        url = url_or_request
    if is_unresponsive(url):
        msg = "%s is known to be unresponsive, not trying again." % urlparse(url).hostname
        log.warning(msg)
        raise urllib2.URLError(msg)

    retries = kwargs.get("retries", 3)
    timeout = kwargs.get("timeout", 15.0)

    # get the old timeout for sockets, so we can set it back to that when done. This is NOT threadsafe by the way.
    # In order to avoid requiring python 2.6, we're not using the urlopen timeout parameter. That really should be used
    # after checking for python 2.6.
    oldtimeout = socket.getdefaulttimeout()
    try:
        socket.setdefaulttimeout(timeout)

        handlers = [SmartRedirectHandler()]
        if urllib2._opener:
            handlers.extend(urllib2._opener.handlers)
        if kwargs.get("handlers"):
            handlers.extend(kwargs["handlers"])
        if len(handlers) > 1:
            handler_names = [h.__class__.__name__ for h in handlers]
            log.debug("Additional handlers have been specified for this urlopen: %s" % ", ".join(handler_names))
        opener = urllib2.build_opener(*handlers).open
        for i in range(retries):  # retry getting the url up to 3 times.
            if i > 0:
                time.sleep(3)
            try:
                retrieved = opener(url_or_request, kwargs.get("data"))
            except urllib2.HTTPError, e:
                if e.code < 500:
                    # If it was not a server error, don't keep retrying.
                    log.warning("Could not retrieve url (HTTP %s error): %s" % (e.code, e.url))
                    raise
                log.debug("HTTP error (try %i/%i): %s" % (i + 1, retries, e.code))
            except (urllib2.URLError, socket.timeout), e:
                if hasattr(e, "reason"):
                    reason = str(e.reason)
                else:
                    reason = "N/A"
                if reason == "timed out":
                    set_unresponsive(url)
                log.debug("Failed to retrieve url (try %i/%i): %s" % (i + 1, retries, reason))